summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3254
1 files changed, 374 insertions, 2880 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 6f99ed03d88e..fbc45a5e76c5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/debugfs.h>
@@ -59,10 +32,13 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
+#include "devx.h"
+#include "fs.h"
#include "srq.h"
#include "qp.h"
#include "wr.h"
-#include <linux/mlx5/fs_helpers.h>
+#include "restrack.h"
+#include "counters.h"
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
@@ -311,9 +287,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
*native_port_num = 1;
port = &ibdev->port[ib_port_num - 1];
- if (!port)
- return NULL;
-
spin_lock(&port->mp.mpi_lock);
mpi = ibdev->port[ib_port_num - 1].mp.mpi;
if (mpi && !mpi->unaffiliate) {
@@ -1765,6 +1738,92 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
mlx5_ib_disable_lb(dev, true, false);
}
+static int set_ucontext_resp(struct ib_ucontext *uctx,
+ struct mlx5_ib_alloc_ucontext_resp *resp)
+{
+ struct ib_device *ibdev = uctx->device;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
+ int err;
+
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+ err = mlx5_cmd_dump_fill_mkey(dev->mdev,
+ &resp->dump_fill_mkey);
+ if (err)
+ return err;
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+ }
+
+ resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
+ if (dev->wc_support)
+ resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_bf_reg_size);
+ resp->cache_line_size = cache_line_size();
+ resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
+ resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
+ resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp->cqe_version = context->cqe_version;
+ resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev,
+ num_of_uars_per_page) : 1;
+
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+ if (mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS))
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
+ if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
+ /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
+ }
+
+ resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
+ bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
+ resp->num_ports = dev->num_ports;
+ resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
+
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline);
+ resp->eth_min_inline++;
+ }
+
+ if (dev->mdev->clock_info)
+ resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
+
+ /*
+ * We don't want to expose information from the PCI bar that is located
+ * after 4096 bytes, so if the arch only supports larger pages, let's
+ * pretend we don't support reading the HCA's core clock. This is also
+ * forced by mmap function.
+ */
+ if (PAGE_SIZE <= 4096) {
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp->hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg,
+ internal_timer_h) % PAGE_SIZE;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+
+ resp->num_dyn_bfregs = bfregi->num_dyn_bfregs;
+ return 0;
+}
+
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -1772,14 +1831,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_ucontext *context = to_mucontext(uctx);
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
- u32 dump_fill_mkey;
bool lib_uar_4k;
bool lib_uar_dyn;
@@ -1808,37 +1865,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return -EINVAL;
- resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (dev->wc_support)
- resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
- resp.cache_line_size = cache_line_size();
- resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
- resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
- resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
- resp.cqe_version = min_t(__u8,
- (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
- req.max_cqe_version);
- resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
- resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
- if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
- if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
- /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
- }
-
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
bfregi = &context->bfregi;
@@ -1887,87 +1913,24 @@ uar_done:
if (err)
goto out_devx;
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
- if (err)
- goto out_mdev;
- }
-
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
- resp.num_ports = dev->num_ports;
-
- if (offsetofend(typeof(resp), cqe_version) <= udata->outlen)
- resp.response_length += sizeof(resp.cqe_version);
-
- if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) {
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
- MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
- resp.response_length += sizeof(resp.cmds_supp_uhw);
- }
-
- if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) {
- if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
- mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
- resp.eth_min_inline++;
- }
- resp.response_length += sizeof(resp.eth_min_inline);
- }
-
- if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) {
- if (mdev->clock_info)
- resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
- resp.response_length += sizeof(resp.clock_info_versions);
- }
-
- /*
- * We don't want to expose information from the PCI bar that is located
- * after 4096 bytes, so if the arch only supports larger pages, let's
- * pretend we don't support reading the HCA's core clock. This is also
- * forced by mmap function.
- */
- if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) {
- if (PAGE_SIZE <= 4096) {
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
- }
- resp.response_length += sizeof(resp.hca_core_clock_offset);
- }
-
- if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen)
- resp.response_length += sizeof(resp.log_uar_size);
-
- if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen)
- resp.response_length += sizeof(resp.num_uars_per_page);
-
- if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) {
- resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
- resp.response_length += sizeof(resp.num_dyn_bfregs);
- }
-
- if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) {
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- resp.dump_fill_mkey = dump_fill_mkey;
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
- }
- resp.response_length += sizeof(resp.dump_fill_mkey);
- }
+ context->cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
- if (MLX5_CAP_GEN(dev->mdev, ece_support))
- resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+ err = set_ucontext_resp(uctx, &resp);
+ if (err)
+ goto out_mdev;
+ resp.response_length = min(udata->outlen, sizeof(resp));
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_mdev;
bfregi->ver = ver;
bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
- context->cqe_version = resp.cqe_version;
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
@@ -2000,6 +1963,29 @@ out_ctx:
return err;
}
+static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_alloc_ucontext_resp uctx_resp = {};
+ int ret;
+
+ ret = set_ucontext_resp(ibcontext, &uctx_resp);
+ if (ret)
+ return ret;
+
+ uctx_resp.response_length =
+ min_t(size_t,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX),
+ sizeof(uctx_resp));
+
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ &uctx_resp,
+ sizeof(uctx_resp));
+ return ret;
+}
+
static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -2591,1820 +2577,6 @@ static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
-enum {
- MATCH_CRITERIA_ENABLE_OUTER_BIT,
- MATCH_CRITERIA_ENABLE_MISC_BIT,
- MATCH_CRITERIA_ENABLE_INNER_BIT,
- MATCH_CRITERIA_ENABLE_MISC2_BIT
-};
-
-#define HEADER_IS_ZERO(match_criteria, headers) \
- !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
- 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
-
-static u8 get_match_criteria_enable(u32 *match_criteria)
-{
- u8 match_criteria_enable;
-
- match_criteria_enable =
- (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
- MATCH_CRITERIA_ENABLE_OUTER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
- MATCH_CRITERIA_ENABLE_MISC_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
- MATCH_CRITERIA_ENABLE_INNER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
- MATCH_CRITERIA_ENABLE_MISC2_BIT;
-
- return match_criteria_enable;
-}
-
-static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- u8 entry_mask;
- u8 entry_val;
- int err = 0;
-
- if (!mask)
- goto out;
-
- entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
- ip_protocol);
- entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
- ip_protocol);
- if (!entry_mask) {
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
- goto out;
- }
- /* Don't override existing ip protocol */
- if (mask != entry_mask || val != entry_val)
- err = -EINVAL;
-out:
- return err;
-}
-
-static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
- bool inner)
-{
- if (inner) {
- MLX5_SET(fte_match_set_misc,
- misc_c, inner_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, inner_ipv6_flow_label, val);
- } else {
- MLX5_SET(fte_match_set_misc,
- misc_c, outer_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, outer_ipv6_flow_label, val);
- }
-}
-
-static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
-}
-
-static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
-{
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
-#define LAST_ETH_FIELD vlan_tag
-#define LAST_IB_FIELD sl
-#define LAST_IPV4_FIELD tos
-#define LAST_IPV6_FIELD traffic_class
-#define LAST_TCP_UDP_FIELD src_port
-#define LAST_TUNNEL_FIELD tunnel_id
-#define LAST_FLOW_TAG_FIELD tag_id
-#define LAST_DROP_FIELD size
-#define LAST_COUNTERS_FIELD counters
-
-/* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
- memchr_inv((void *)&filter.field +\
- sizeof(filter.field), 0,\
- sizeof(filter) -\
- offsetof(typeof(filter), field) -\
- sizeof(filter.field))
-
-int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
- bool is_egress,
- struct mlx5_flow_act *action)
-{
-
- switch (maction->ib_action.type) {
- case IB_FLOW_ACTION_ESP:
- if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
- return -EINVAL;
- /* Currently only AES_GCM keymat is supported by the driver */
- action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= is_egress ?
- MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
- return 0;
- case IB_FLOW_ACTION_UNSPECIFIED:
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- action->modify_hdr =
- maction->flow_action_raw.modify_hdr;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_DECAP) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
- if (action->action &
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- return -EINVAL;
- action->action |=
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- action->pkt_reformat =
- maction->flow_action_raw.pkt_reformat;
- return 0;
- }
- /* fall through */
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int parse_flow_attr(struct mlx5_core_dev *mdev,
- struct mlx5_flow_spec *spec,
- const union ib_flow_spec *ib_spec,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action, u32 prev_type)
-{
- struct mlx5_flow_context *flow_context = &spec->flow_context;
- u32 *match_c = spec->match_criteria;
- u32 *match_v = spec->match_value;
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters);
- void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters_2);
- void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters_2);
- void *headers_c;
- void *headers_v;
- int match_ipv;
- int ret;
-
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- inner_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- inner_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version);
- } else {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- }
-
- switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
- case IB_FLOW_SPEC_ETH:
- if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -EOPNOTSUPP;
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dmac_47_16),
- ib_spec->eth.mask.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16),
- ib_spec->eth.val.dst_mac);
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- smac_47_16),
- ib_spec->eth.mask.src_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- smac_47_16),
- ib_spec->eth.val.src_mac);
-
- if (ib_spec->eth.mask.vlan_tag) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- cvlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- cvlan_tag, 1);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_vid, ntohs(ib_spec->eth.val.vlan_tag));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_cfi,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_cfi,
- ntohs(ib_spec->eth.val.vlan_tag) >> 12);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_prio,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_prio,
- ntohs(ib_spec->eth.val.vlan_tag) >> 13);
- }
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, ntohs(ib_spec->eth.mask.ether_type));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ntohs(ib_spec->eth.val.ether_type));
- break;
- case IB_FLOW_SPEC_IPV4:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV4_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.src_ip,
- sizeof(ib_spec->ipv4.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.src_ip,
- sizeof(ib_spec->ipv4.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.dst_ip,
- sizeof(ib_spec->ipv4.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.dst_ip,
- sizeof(ib_spec->ipv4.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv4.mask.proto,
- ib_spec->ipv4.val.proto))
- return -EINVAL;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV6_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.src_ip,
- sizeof(ib_spec->ipv6.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.src_ip,
- sizeof(ib_spec->ipv6.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.dst_ip,
- sizeof(ib_spec->ipv6.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.dst_ip,
- sizeof(ib_spec->ipv6.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv6.mask.traffic_class,
- ib_spec->ipv6.val.traffic_class);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv6.mask.next_hdr,
- ib_spec->ipv6.val.next_hdr))
- return -EINVAL;
-
- set_flow_label(misc_params_c, misc_params_v,
- ntohl(ib_spec->ipv6.mask.flow_label),
- ntohl(ib_spec->ipv6.val.flow_label),
- ib_spec->type & IB_FLOW_SPEC_INNER);
- break;
- case IB_FLOW_SPEC_ESP:
- if (ib_spec->esp.mask.seq)
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
- ntohl(ib_spec->esp.mask.spi));
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- ntohl(ib_spec->esp.val.spi));
- break;
- case IB_FLOW_SPEC_TCP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_UDP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_GRE:
- if (ib_spec->gre.mask.c_ks_res0_ver)
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_GRE);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
- ntohs(ib_spec->gre.mask.protocol));
- MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
- ntohs(ib_spec->gre.val.protocol));
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- gre_key.nvgre.hi),
- &ib_spec->gre.mask.key,
- sizeof(ib_spec->gre.mask.key));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
- gre_key.nvgre.hi),
- &ib_spec->gre.val.key,
- sizeof(ib_spec->gre.val.key));
- break;
- case IB_FLOW_SPEC_MPLS:
- switch (prev_type) {
- case IB_FLOW_SPEC_UDP:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- case IB_FLOW_SPEC_GRE:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- default:
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- inner_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- inner_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- } else {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- }
- }
- break;
- case IB_FLOW_SPEC_VXLAN_TUNNEL:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
- LAST_TUNNEL_FIELD))
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
- ntohl(ib_spec->tunnel.mask.tunnel_id));
- MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
- ntohl(ib_spec->tunnel.val.tunnel_id));
- break;
- case IB_FLOW_SPEC_ACTION_TAG:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
- LAST_FLOW_TAG_FIELD))
- return -EOPNOTSUPP;
- if (ib_spec->flow_tag.tag_id >= BIT(24))
- return -EINVAL;
-
- flow_context->flow_tag = ib_spec->flow_tag.tag_id;
- flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
- break;
- case IB_FLOW_SPEC_ACTION_DROP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
- LAST_DROP_FIELD))
- return -EOPNOTSUPP;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
- break;
- case IB_FLOW_SPEC_ACTION_HANDLE:
- ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
- flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
- if (ret)
- return ret;
- break;
- case IB_FLOW_SPEC_ACTION_COUNT:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
- LAST_COUNTERS_FIELD))
- return -EOPNOTSUPP;
-
- /* for now support only one counters spec per flow */
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- return -EINVAL;
-
- action->counters = ib_spec->flow_count.counters;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* If a flow could catch both multicast and unicast packets,
- * it won't fall into the multicast flow steering table and this rule
- * could steal other multicast packets.
- */
-static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
-{
- union ib_flow_spec *flow_spec;
-
- if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->num_of_specs < 1)
- return false;
-
- flow_spec = (union ib_flow_spec *)(ib_attr + 1);
- if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
- struct ib_flow_spec_ipv4 *ipv4_spec;
-
- ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
- if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
- return true;
-
- return false;
- }
-
- if (flow_spec->type == IB_FLOW_SPEC_ETH) {
- struct ib_flow_spec_eth *eth_spec;
-
- eth_spec = (struct ib_flow_spec_eth *)flow_spec;
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
- }
-
- return false;
-}
-
-enum valid_spec {
- VALID_SPEC_INVALID,
- VALID_SPEC_VALID,
- VALID_SPEC_NA,
-};
-
-static enum valid_spec
-is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- const u32 *match_c = spec->match_criteria;
- bool is_crypto =
- (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
- bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
- bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /*
- * Currently only crypto is supported in egress, when regular egress
- * rules would be supported, always return VALID_SPEC_NA.
- */
- if (!is_crypto)
- return VALID_SPEC_NA;
-
- return is_crypto && is_ipsec &&
- (!egress || (!is_drop &&
- !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
- VALID_SPEC_VALID : VALID_SPEC_INVALID;
-}
-
-static bool is_valid_spec(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- /* We curretly only support ipsec egress flow */
- return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
-}
-
-static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr,
- bool check_inner)
-{
- union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- int match_ipv = check_inner ?
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version) :
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
- bool ipv4_spec_valid, ipv6_spec_valid;
- unsigned int ip_spec_type = 0;
- bool has_ethertype = false;
- unsigned int spec_index;
- bool mask_valid = true;
- u16 eth_type = 0;
- bool type_valid;
-
- /* Validate that ethertype is correct */
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
- ib_spec->eth.mask.ether_type) {
- mask_valid = (ib_spec->eth.mask.ether_type ==
- htons(0xffff));
- has_ethertype = true;
- eth_type = ntohs(ib_spec->eth.val.ether_type);
- } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
- (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
- ip_spec_type = ib_spec->type;
- }
- ib_spec = (void *)ib_spec + ib_spec->size;
- }
-
- type_valid = (!has_ethertype) || (!ip_spec_type);
- if (!type_valid && mask_valid) {
- ipv4_spec_valid = (eth_type == ETH_P_IP) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
- ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
-
- type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
- (((eth_type == ETH_P_MPLS_UC) ||
- (eth_type == ETH_P_MPLS_MC)) && match_ipv);
- }
-
- return type_valid;
-}
-
-static bool is_valid_attr(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr)
-{
- return is_valid_ethertype(mdev, flow_attr, false) &&
- is_valid_ethertype(mdev, flow_attr, true);
-}
-
-static void put_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *prio, bool ft_added)
-{
- prio->refcount -= !!ft_added;
- if (!prio->refcount) {
- mlx5_destroy_flow_table(prio->flow_table);
- prio->flow_table = NULL;
- }
-}
-
-static void counters_clear_description(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- mutex_lock(&mcounters->mcntrs_mutex);
- kfree(mcounters->counters_data);
- mcounters->counters_data = NULL;
- mcounters->cntrs_max_index = 0;
- mutex_unlock(&mcounters->mcntrs_mutex);
-}
-
-static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
-{
- struct mlx5_ib_flow_handler *handler = container_of(flow_id,
- struct mlx5_ib_flow_handler,
- ibflow);
- struct mlx5_ib_flow_handler *iter, *tmp;
- struct mlx5_ib_dev *dev = handler->dev;
-
- mutex_lock(&dev->flow_db->lock);
-
- list_for_each_entry_safe(iter, tmp, &handler->list, list) {
- mlx5_del_flow_rules(iter->rule);
- put_flow_table(dev, iter->prio, true);
- list_del(&iter->list);
- kfree(iter);
- }
-
- mlx5_del_flow_rules(handler->rule);
- put_flow_table(dev, handler->prio, true);
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
-
- mutex_unlock(&dev->flow_db->lock);
- if (handler->flow_matcher)
- atomic_dec(&handler->flow_matcher->usecnt);
- kfree(handler);
-
- return 0;
-}
-
-static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
-{
- priority *= 2;
- if (!dont_trap)
- priority++;
- return priority;
-}
-
-enum flow_table_type {
- MLX5_IB_FT_RX,
- MLX5_IB_FT_TX
-};
-
-#define MLX5_FS_MAX_TYPES 6
-#define MLX5_FS_MAX_ENTRIES BIT(16)
-
-static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
- struct mlx5_ib_flow_prio *prio,
- int priority,
- int num_entries, int num_groups,
- u32 flags)
-{
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_flow_table *ft;
-
- ft_attr.prio = priority;
- ft_attr.max_fte = num_entries;
- ft_attr.flags = flags;
- ft_attr.autogroup.max_num_groups = num_groups;
- ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
- if (IS_ERR(ft))
- return ERR_CAST(ft);
-
- prio->flow_table = ft;
- prio->refcount = 0;
- return prio;
-}
-
-static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
- struct ib_flow_attr *flow_attr,
- enum flow_table_type ft_type)
-{
- bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio;
- struct mlx5_flow_table *ft;
- int max_table_size;
- int num_entries;
- int num_groups;
- bool esw_encap;
- u32 flags = 0;
- int priority;
-
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
- DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- enum mlx5_flow_namespace_type fn_type;
-
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(flow_attr->priority,
- dont_trap);
- if (ft_type == MLX5_IB_FT_RX) {
- fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
- prio = &dev->flow_db->prios[priority];
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- log_max_ft_size));
- fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
- prio = &dev->flow_db->egress_prios[priority];
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- }
- ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
- num_entries = MLX5_FS_MAX_ENTRIES;
- num_groups = MLX5_FS_MAX_TYPES;
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- ns = mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_LEFTOVERS);
- build_leftovers_ft_param(&priority,
- &num_entries,
- &num_groups);
- prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- if (!MLX5_CAP_FLOWTABLE(dev->mdev,
- allow_sniffer_and_nic_rx_shared_tir))
- return ERR_PTR(-ENOTSUPP);
-
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
-
- prio = &dev->flow_db->sniffer[ft_type];
- priority = 0;
- num_entries = 1;
- num_groups = 1;
- }
-
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- max_table_size = min_t(int, num_entries, max_table_size);
-
- ft = prio->flow_table;
- if (!ft)
- return _get_prio(ns, prio, priority, max_table_size, num_groups,
- flags);
-
- return prio;
-}
-
-static void set_underlay_qp(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- u32 underlay_qpn)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
- spec->match_criteria,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- if (underlay_qpn &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- ft_field_support.bth_dst_qp)) {
- MLX5_SET(fte_match_set_misc,
- misc_params_v, bth_dst_qp, underlay_qpn);
- MLX5_SET(fte_match_set_misc,
- misc_params_c, bth_dst_qp, 0xffffff);
- }
-}
-
-static int read_flow_counters(struct ib_device *ibdev,
- struct mlx5_read_counters_attr *read_attr)
-{
- struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_fc_query(dev->mdev, fc,
- &read_attr->out[IB_COUNTER_PACKETS],
- &read_attr->out[IB_COUNTER_BYTES]);
-}
-
-/* flow counters currently expose two counters packets and bytes */
-#define FLOW_COUNTERS_NUM 2
-static int counters_set_description(struct ib_counters *counters,
- enum mlx5_ib_counters_type counters_type,
- struct mlx5_ib_flow_counters_desc *desc_data,
- u32 ncounters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- u32 cntrs_max_index = 0;
- int i;
-
- if (counters_type != MLX5_IB_COUNTERS_FLOW)
- return -EINVAL;
-
- /* init the fields for the object */
- mcounters->type = counters_type;
- mcounters->read_counters = read_flow_counters;
- mcounters->counters_num = FLOW_COUNTERS_NUM;
- mcounters->ncounters = ncounters;
- /* each counter entry have both description and index pair */
- for (i = 0; i < ncounters; i++) {
- if (desc_data[i].description > IB_COUNTER_BYTES)
- return -EINVAL;
-
- if (cntrs_max_index <= desc_data[i].index)
- cntrs_max_index = desc_data[i].index + 1;
- }
-
- mutex_lock(&mcounters->mcntrs_mutex);
- mcounters->counters_data = desc_data;
- mcounters->cntrs_max_index = cntrs_max_index;
- mutex_unlock(&mcounters->mcntrs_mutex);
-
- return 0;
-}
-
-#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
-static int flow_counters_set_data(struct ib_counters *ibcounters,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
- struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
- struct mlx5_ib_flow_counters_desc *desc_data = NULL;
- bool hw_hndl = false;
- int ret = 0;
-
- if (ucmd && ucmd->ncounters_data != 0) {
- cntrs_data = ucmd->data;
- if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
- return -EINVAL;
-
- desc_data = kcalloc(cntrs_data->ncounters,
- sizeof(*desc_data),
- GFP_KERNEL);
- if (!desc_data)
- return -ENOMEM;
-
- if (copy_from_user(desc_data,
- u64_to_user_ptr(cntrs_data->counters_data),
- sizeof(*desc_data) * cntrs_data->ncounters)) {
- ret = -EFAULT;
- goto free;
- }
- }
-
- if (!mcounters->hw_cntrs_hndl) {
- mcounters->hw_cntrs_hndl = mlx5_fc_create(
- to_mdev(ibcounters->device)->mdev, false);
- if (IS_ERR(mcounters->hw_cntrs_hndl)) {
- ret = PTR_ERR(mcounters->hw_cntrs_hndl);
- goto free;
- }
- hw_hndl = true;
- }
-
- if (desc_data) {
- /* counters already bound to at least one flow */
- if (mcounters->cntrs_max_index) {
- ret = -EINVAL;
- goto free_hndl;
- }
-
- ret = counters_set_description(ibcounters,
- MLX5_IB_COUNTERS_FLOW,
- desc_data,
- cntrs_data->ncounters);
- if (ret)
- goto free_hndl;
-
- } else if (!mcounters->cntrs_max_index) {
- /* counters not bound yet, must have udata passed */
- ret = -EINVAL;
- goto free_hndl;
- }
-
- return 0;
-
-free_hndl:
- if (hw_hndl) {
- mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
- mcounters->hw_cntrs_hndl);
- mcounters->hw_cntrs_hndl = NULL;
- }
-free:
- kfree(desc_data);
- return ret;
-}
-
-static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- struct mlx5_eswitch_rep *rep)
-{
- struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
- void *misc;
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters_2);
-
- MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(esw,
- rep->vport));
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters_2);
-
- MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_mask());
- } else {
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
-
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- }
-}
-
-static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst,
- u32 underlay_qpn,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_destination dest_arr[2] = {};
- struct mlx5_flow_destination *rule_dst = dest_arr;
- const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
- unsigned int spec_index;
- u32 prev_type = 0;
- int err = 0;
- int dest_num = 0;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
-
- if (!is_valid_attr(dev->mdev, flow_attr))
- return ERR_PTR(-EINVAL);
-
- if (dev->is_rep && is_egress)
- return ERR_PTR(-EINVAL);
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec,
- ib_flow, flow_attr, &flow_act,
- prev_type);
- if (err < 0)
- goto free;
-
- prev_type = ((union ib_flow_spec *)ib_flow)->type;
- ib_flow += ((union ib_flow_spec *)ib_flow)->size;
- }
-
- if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
- memcpy(&dest_arr[0], dst, sizeof(*dst));
- dest_num++;
- }
-
- if (!flow_is_multicast_only(flow_attr))
- set_underlay_qp(dev, spec, underlay_qpn);
-
- if (dev->is_rep) {
- struct mlx5_eswitch_rep *rep;
-
- rep = dev->port[flow_attr->port - 1].rep;
- if (!rep) {
- err = -EINVAL;
- goto free;
- }
-
- mlx5_ib_set_rule_source_port(dev, spec, rep);
- }
-
- spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-
- if (is_egress &&
- !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
- err = -EINVAL;
- goto free;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- struct mlx5_ib_mcounters *mcounters;
-
- err = flow_counters_set_data(flow_act.counters, ucmd);
- if (err)
- goto free;
-
- mcounters = to_mcounters(flow_act.counters);
- handler->ibcounters = flow_act.counters;
- dest_arr[dest_num].type =
- MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter_id =
- mlx5_fc_id(mcounters->hw_cntrs_hndl);
- dest_num++;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- if (!dest_num)
- rule_dst = NULL;
- } else {
- if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
- flow_act.action |=
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- if (is_egress)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- else if (dest_num)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- }
-
- if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- spec->flow_context.flow_tag, flow_attr->type);
- err = -EINVAL;
- goto free;
- }
- handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act,
- rule_dst, dest_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
-
- ft_prio->flow_table = ft;
-free:
- if (err && handler) {
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
- kfree(handler);
- }
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
-}
-
-enum {
- LEFTOVERS_MC,
- LEFTOVERS_UC,
-};
-
-static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_ucast = NULL;
- struct mlx5_ib_flow_handler *handler = NULL;
-
- static struct {
- struct ib_flow_attr flow_attr;
- struct ib_flow_spec_eth eth_flow;
- } leftovers_specs[] = {
- [LEFTOVERS_MC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {0x1} }
- }
- },
- [LEFTOVERS_UC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {} }
- }
- }
- };
-
- handler = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
- dst);
- if (!IS_ERR(handler) &&
- flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
- handler_ucast = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
- dst);
- if (IS_ERR(handler_ucast)) {
- mlx5_del_flow_rules(handler->rule);
- ft_prio->refcount--;
- kfree(handler);
- handler = handler_ucast;
- } else {
- list_add(&handler_ucast->list, &handler->list);
- }
- }
-
- return handler;
-}
-
-static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_rx,
- struct mlx5_ib_flow_prio *ft_tx,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_rx;
- struct mlx5_ib_flow_handler *handler_tx;
- int err;
- static const struct ib_flow_attr flow_attr = {
- .num_of_specs = 0,
- .size = sizeof(flow_attr)
- };
-
- handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
- if (IS_ERR(handler_rx)) {
- err = PTR_ERR(handler_rx);
- goto err;
- }
-
- handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
- if (IS_ERR(handler_tx)) {
- err = PTR_ERR(handler_tx);
- goto err_tx;
- }
-
- list_add(&handler_tx->list, &handler_rx->list);
-
- return handler_rx;
-
-err_tx:
- mlx5_del_flow_rules(handler_rx->rule);
- ft_rx->refcount--;
- kfree(handler_rx);
-err:
- return ERR_PTR(err);
-}
-
-static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain,
- struct ib_udata *udata)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_qp *mqp = to_mqp(qp);
- struct mlx5_ib_flow_handler *handler = NULL;
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
- struct mlx5_ib_flow_prio *ft_prio;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
- struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
- size_t min_ucmd_sz, required_ucmd_sz;
- int err;
- int underlay_qpn;
-
- if (udata && udata->inlen) {
- min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
- sizeof(ucmd_hdr.reserved);
- if (udata->inlen < min_ucmd_sz)
- return ERR_PTR(-EOPNOTSUPP);
-
- err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
- if (err)
- return ERR_PTR(err);
-
- /* currently supports only one counters data */
- if (ucmd_hdr.ncounters_data > 1)
- return ERR_PTR(-EINVAL);
-
- required_ucmd_sz = min_ucmd_sz +
- sizeof(struct mlx5_ib_flow_counters_data) *
- ucmd_hdr.ncounters_data;
- if (udata->inlen > required_ucmd_sz &&
- !ib_is_udata_cleared(udata, required_ucmd_sz,
- udata->inlen - required_ucmd_sz))
- return ERR_PTR(-EOPNOTSUPP);
-
- ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
- if (!ucmd)
- return ERR_PTR(-ENOMEM);
-
- err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
- if (err)
- goto free_ucmd;
- }
-
- if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
- IB_FLOW_ATTR_FLAGS_EGRESS))) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- if (is_egress &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
- if (!dst) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = get_flow_table(dev, flow_attr,
- is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
- if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
- if (IS_ERR(ft_prio_tx)) {
- err = PTR_ERR(ft_prio_tx);
- ft_prio_tx = NULL;
- goto destroy_ft;
- }
- }
-
- if (is_egress) {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- } else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->is_rss)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
- }
-
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
- mqp->underlay_qpn :
- 0;
- handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
- underlay_qpn, ucmd);
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
- dst);
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
- } else {
- err = -EINVAL;
- goto destroy_ft;
- }
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- handler = NULL;
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
- kfree(ucmd);
-
- return &handler->ibflow;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
- if (ft_prio_tx)
- put_flow_table(dev, ft_prio_tx, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-free_ucmd:
- kfree(ucmd);
- return ERR_PTR(err);
-}
-
-static struct mlx5_ib_flow_prio *
-_get_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- bool mcast)
-{
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio = NULL;
- int max_table_size = 0;
- bool esw_encap;
- u32 flags = 0;
- int priority;
-
- if (mcast)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(fs_matcher->priority, false);
-
- esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
- DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2) &&
- !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
- max_table_size = BIT(
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
- max_table_size = BIT(
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
- esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- priority = FDB_BYPASS_PATH;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
- log_max_ft_size));
- priority = fs_matcher->priority;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- log_max_ft_size));
- priority = fs_matcher->priority;
- }
-
- max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
-
- ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
- prio = &dev->flow_db->prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- prio = &dev->flow_db->egress_prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
- prio = &dev->flow_db->fdb;
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
- prio = &dev->flow_db->rdma_rx[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
- prio = &dev->flow_db->rdma_tx[priority];
-
- if (!prio)
- return ERR_PTR(-EINVAL);
-
- if (prio->flow_table)
- return prio;
-
- return _get_prio(ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
-}
-
-static struct mlx5_ib_flow_handler *
-_create_raw_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct mlx5_flow_destination *dst,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act,
- void *cmd_in, int inlen,
- int dst_num)
-{
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- int err = 0;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- memcpy(spec->match_value, cmd_in, inlen);
- memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
- fs_matcher->mask_len);
- spec->match_criteria_enable = fs_matcher->match_criteria_enable;
- spec->flow_context = *flow_context;
-
- handler->rule = mlx5_add_flow_rules(ft, spec,
- flow_act, dst, dst_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
- ft_prio->flow_table = ft;
-
-free:
- if (err)
- kfree(handler);
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
- void *match_v)
-{
- void *match_c;
- void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
- void *dmac, *dmac_mask;
- void *ipv4, *ipv4_mask;
-
- if (!(fs_matcher->match_criteria_enable &
- (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
- return false;
-
- match_c = fs_matcher->matcher_mask.match_params;
- match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
-
- dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dmac_47_16);
- dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dmac_47_16);
-
- if (is_multicast_ether_addr(dmac) &&
- is_multicast_ether_addr(dmac_mask))
- return true;
-
- ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
- ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
- return true;
-
- return false;
-}
-
-struct mlx5_ib_flow_handler *
-mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act,
- u32 counter_id,
- void *cmd_in, int inlen, int dest_id,
- int dest_type)
-{
- struct mlx5_flow_destination *dst;
- struct mlx5_ib_flow_prio *ft_prio;
- struct mlx5_ib_flow_handler *handler;
- int dst_num = 0;
- bool mcast;
- int err;
-
- if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOMEM);
-
- dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
- if (!dst)
- return ERR_PTR(-ENOMEM);
-
- mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = _get_flow_table(dev, fs_matcher, mcast);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
-
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst[dst_num].type = dest_type;
- dst[dst_num++].tir_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst[dst_num++].ft_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
- dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- }
-
-
- if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst[dst_num].counter_id = counter_id;
- dst_num++;
- }
-
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
- flow_context, flow_act,
- cmd_in, inlen, dst_num);
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- atomic_inc(&fs_matcher->usecnt);
- handler->flow_matcher = fs_matcher;
-
- kfree(dst);
-
- return handler;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-
- return ERR_PTR(err);
-}
-
-static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
-{
- u32 flags = 0;
-
- if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
- flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
-
- return flags;
-}
-
-#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
-static struct ib_flow_action *
-mlx5_ib_create_flow_action_esp(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dev *mdev = to_mdev(device);
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
- struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
- struct mlx5_ib_flow_action *action;
- u64 action_flags;
- u64 flags;
- int err = 0;
-
- err = uverbs_get_flags64(
- &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
- if (err)
- return ERR_PTR(err);
-
- flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
-
- /* We current only support a subset of the standard features. Only a
- * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
- * (with overlap). Full offload mode isn't supported.
- */
- if (!attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
- return ERR_PTR(-EOPNOTSUPP);
-
- if (attr->keymat->protocol !=
- IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
- return ERR_PTR(-EOPNOTSUPP);
-
- aes_gcm = &attr->keymat->keymat.aes_gcm;
-
- if (aes_gcm->icv_len != 16 ||
- aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return ERR_PTR(-EOPNOTSUPP);
-
- action = kmalloc(sizeof(*action), GFP_KERNEL);
- if (!action)
- return ERR_PTR(-ENOMEM);
-
- action->esp_aes_gcm.ib_flags = attr->flags;
- memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
- sizeof(accel_attrs.keymat.aes_gcm.aes_key));
- accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
- memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
- sizeof(accel_attrs.keymat.aes_gcm.salt));
- memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
- sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
- accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
- accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
- accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
- accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
-
- action->esp_aes_gcm.ctx =
- mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
- if (IS_ERR(action->esp_aes_gcm.ctx)) {
- err = PTR_ERR(action->esp_aes_gcm.ctx);
- goto err_parse;
- }
-
- action->esp_aes_gcm.ib_flags = attr->flags;
-
- return &action->ib_action;
-
-err_parse:
- kfree(action);
- return ERR_PTR(err);
-}
-
-static int
-mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
- struct mlx5_accel_esp_xfrm_attrs accel_attrs;
- int err = 0;
-
- if (attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
- return -EOPNOTSUPP;
-
- /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
- * be modified.
- */
- if (!(maction->esp_aes_gcm.ib_flags &
- IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
- attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
- return -EINVAL;
-
- memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
- sizeof(accel_attrs));
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
- else
- accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
- &accel_attrs);
- if (err)
- return err;
-
- maction->esp_aes_gcm.ib_flags &=
- ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
- maction->esp_aes_gcm.ib_flags |=
- attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
-
- return 0;
-}
-
-static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
-
- switch (action->type) {
- case IB_FLOW_ACTION_ESP:
- /*
- * We only support aes_gcm by now, so we implicitly know this is
- * the underline crypto.
- */
- mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
- break;
- case IB_FLOW_ACTION_UNSPECIFIED:
- mlx5_ib_destroy_flow_action_raw(maction);
- break;
- default:
- WARN_ON(true);
- break;
- }
-
- kfree(maction);
- return 0;
-}
-
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4848,137 +3020,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
return __get_port_caps(dev, port);
}
-static void destroy_umrc_res(struct mlx5_ib_dev *dev)
-{
- int err;
-
- err = mlx5_mr_cache_cleanup(dev);
- if (err)
- mlx5_ib_warn(dev, "mr cache cleanup failed\n");
-
- if (dev->umrc.qp)
- mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
- if (dev->umrc.cq)
- ib_free_cq(dev->umrc.cq);
- if (dev->umrc.pd)
- ib_dealloc_pd(dev->umrc.pd);
-}
-
-enum {
- MAX_UMR_WR = 128,
-};
-
-static int create_umr_res(struct mlx5_ib_dev *dev)
-{
- struct ib_qp_init_attr *init_attr = NULL;
- struct ib_qp_attr *attr = NULL;
- struct ib_pd *pd;
- struct ib_cq *cq;
- struct ib_qp *qp;
- int ret;
-
- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
- init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
- if (!attr || !init_attr) {
- ret = -ENOMEM;
- goto error_0;
- }
-
- pd = ib_alloc_pd(&dev->ib_dev, 0);
- if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
- ret = PTR_ERR(pd);
- goto error_0;
- }
-
- cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
- if (IS_ERR(cq)) {
- mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
- ret = PTR_ERR(cq);
- goto error_2;
- }
-
- init_attr->send_cq = cq;
- init_attr->recv_cq = cq;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->cap.max_send_wr = MAX_UMR_WR;
- init_attr->cap.max_send_sge = 1;
- init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
- init_attr->port_num = 1;
- qp = mlx5_ib_create_qp(pd, init_attr, NULL);
- if (IS_ERR(qp)) {
- mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
- ret = PTR_ERR(qp);
- goto error_3;
- }
- qp->device = &dev->ib_dev;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = MLX5_IB_QPT_REG_UMR;
- qp->send_cq = init_attr->send_cq;
- qp->recv_cq = init_attr->recv_cq;
-
- attr->qp_state = IB_QPS_INIT;
- attr->port_num = 1;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
- IB_QP_PORT, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTR;
- attr->path_mtu = IB_MTU_256;
-
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTS;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
- goto error_4;
- }
-
- dev->umrc.qp = qp;
- dev->umrc.cq = cq;
- dev->umrc.pd = pd;
-
- sema_init(&dev->umrc.sem, MAX_UMR_WR);
- ret = mlx5_mr_cache_init(dev);
- if (ret) {
- mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
- goto error_4;
- }
-
- kfree(attr);
- kfree(init_attr);
-
- return 0;
-
-error_4:
- mlx5_ib_destroy_qp(qp, NULL);
- dev->umrc.qp = NULL;
-
-error_3:
- ib_free_cq(cq);
- dev->umrc.cq = NULL;
-
-error_2:
- ib_dealloc_pd(pd);
- dev->umrc.pd = NULL;
-
-error_0:
- kfree(attr);
- kfree(init_attr);
- return ret;
-}
-
static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
{
switch (umr_fence_cap) {
@@ -4991,18 +3032,20 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
}
}
-static int create_dev_resources(struct mlx5_ib_resources *devr)
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
struct ib_srq_init_attr attr;
- struct mlx5_ib_dev *dev;
struct ib_device *ibdev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
int port;
int ret = 0;
- dev = container_of(devr, struct mlx5_ib_dev, devr);
ibdev = &dev->ib_dev;
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ return -EOPNOTSUPP;
+
mutex_init(&devr->mutex);
devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
@@ -5030,34 +3073,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
if (ret)
goto err_create_cq;
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x0)) {
- ret = PTR_ERR(devr->x0);
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
+ if (ret)
goto error2;
- }
- devr->x0->device = &dev->ib_dev;
- devr->x0->inode = NULL;
- atomic_set(&devr->x0->usecnt, 0);
- mutex_init(&devr->x0->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
-
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x1)) {
- ret = PTR_ERR(devr->x1);
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ if (ret)
goto error3;
- }
- devr->x1->device = &dev->ib_dev;
- devr->x1->inode = NULL;
- atomic_set(&devr->x1->usecnt, 0);
- mutex_init(&devr->x1->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
attr.ext.cq = devr->c0;
- attr.ext.xrc.xrcd = devr->x0;
devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
if (!devr->s0) {
@@ -5068,13 +3096,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
devr->s0->srq_type = IB_SRQT_XRC;
- devr->s0->ext.xrc.xrcd = devr->x0;
devr->s0->ext.cq = devr->c0;
ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
if (ret)
goto err_create;
- atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
atomic_inc(&devr->s0->ext.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
@@ -5116,9 +3142,9 @@ error5:
err_create:
kfree(devr->s0);
error4:
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
error3:
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
error2:
mlx5_ib_destroy_cq(devr->c0, NULL);
err_create_cq:
@@ -5130,16 +3156,17 @@ error0:
return ret;
}
-static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
int port;
mlx5_ib_destroy_srq(devr->s1, NULL);
kfree(devr->s1);
mlx5_ib_destroy_srq(devr->s0, NULL);
kfree(devr->s0);
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
mlx5_ib_destroy_cq(devr->c0, NULL);
kfree(devr->c0);
mlx5_ib_dealloc_pd(devr->p0, NULL);
@@ -5332,466 +3359,6 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-struct mlx5_ib_counter {
- const char *name;
- size_t offset;
-};
-
-#define INIT_Q_COUNTER(_name) \
- { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
-
-static const struct mlx5_ib_counter basic_q_cnts[] = {
- INIT_Q_COUNTER(rx_write_requests),
- INIT_Q_COUNTER(rx_read_requests),
- INIT_Q_COUNTER(rx_atomic_requests),
- INIT_Q_COUNTER(out_of_buffer),
-};
-
-static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
- INIT_Q_COUNTER(out_of_sequence),
-};
-
-static const struct mlx5_ib_counter retrans_q_cnts[] = {
- INIT_Q_COUNTER(duplicate_request),
- INIT_Q_COUNTER(rnr_nak_retry_err),
- INIT_Q_COUNTER(packet_seq_err),
- INIT_Q_COUNTER(implied_nak_seq_err),
- INIT_Q_COUNTER(local_ack_timeout_err),
-};
-
-#define INIT_CONG_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
-
-static const struct mlx5_ib_counter cong_cnts[] = {
- INIT_CONG_COUNTER(rp_cnp_ignored),
- INIT_CONG_COUNTER(rp_cnp_handled),
- INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
- INIT_CONG_COUNTER(np_cnp_sent),
-};
-
-static const struct mlx5_ib_counter extended_err_cnts[] = {
- INIT_Q_COUNTER(resp_local_length_error),
- INIT_Q_COUNTER(resp_cqe_error),
- INIT_Q_COUNTER(req_cqe_error),
- INIT_Q_COUNTER(req_remote_invalid_request),
- INIT_Q_COUNTER(req_remote_access_errors),
- INIT_Q_COUNTER(resp_remote_access_errors),
- INIT_Q_COUNTER(resp_cqe_flush_error),
- INIT_Q_COUNTER(req_cqe_flush_error),
-};
-
-static const struct mlx5_ib_counter roce_accl_cnts[] = {
- INIT_Q_COUNTER(roce_adp_retrans),
- INIT_Q_COUNTER(roce_adp_retrans_to),
- INIT_Q_COUNTER(roce_slow_restart),
- INIT_Q_COUNTER(roce_slow_restart_cnps),
- INIT_Q_COUNTER(roce_slow_restart_trans),
-};
-
-#define INIT_EXT_PPCNT_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(ppcnt_reg, \
- counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
-
-static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
- INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
-};
-
-static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
-{
- return MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
- MLX5_ESWITCH_OFFLOADS;
-}
-
-static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
-{
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
- int num_cnt_ports;
- int i;
-
- num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
-
- for (i = 0; i < num_cnt_ports; i++) {
- if (dev->port[i].cnts.set_id) {
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
- dev->port[i].cnts.set_id);
- mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
- }
- kfree(dev->port[i].cnts.names);
- kfree(dev->port[i].cnts.offsets);
- }
-}
-
-static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_counters *cnts)
-{
- u32 num_counters;
-
- num_counters = ARRAY_SIZE(basic_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
- num_counters += ARRAY_SIZE(retrans_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
- num_counters += ARRAY_SIZE(extended_err_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, roce_accl))
- num_counters += ARRAY_SIZE(roce_accl_cnts);
-
- cnts->num_q_counters = num_counters;
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
- num_counters += ARRAY_SIZE(cong_cnts);
- }
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
- num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
- }
- cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
- if (!cnts->names)
- return -ENOMEM;
-
- cnts->offsets = kcalloc(num_counters,
- sizeof(cnts->offsets), GFP_KERNEL);
- if (!cnts->offsets)
- goto err_names;
-
- return 0;
-
-err_names:
- kfree(cnts->names);
- cnts->names = NULL;
- return -ENOMEM;
-}
-
-static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
- const char **names,
- size_t *offsets)
-{
- int i;
- int j = 0;
-
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
- names[j] = basic_q_cnts[i].name;
- offsets[j] = basic_q_cnts[i].offset;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
- names[j] = out_of_seq_q_cnts[i].name;
- offsets[j] = out_of_seq_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
- names[j] = retrans_q_cnts[i].name;
- offsets[j] = retrans_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
- names[j] = extended_err_cnts[i].name;
- offsets[j] = extended_err_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
- for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
- names[j] = roce_accl_cnts[i].name;
- offsets[j] = roce_accl_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
- names[j] = cong_cnts[i].name;
- offsets[j] = cong_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
- names[j] = ext_ppcnt_cnts[i].name;
- offsets[j] = ext_ppcnt_cnts[i].offset;
- }
- }
-}
-
-static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
-{
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
- int num_cnt_ports;
- int err = 0;
- int i;
- bool is_shared;
-
- MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
- is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
- num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
-
- for (i = 0; i < num_cnt_ports; i++) {
- err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
- if (err)
- goto err_alloc;
-
- mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
- dev->port[i].cnts.offsets);
-
- MLX5_SET(alloc_q_counter_in, in, uid,
- is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
-
- err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
- if (err) {
- mlx5_ib_warn(dev,
- "couldn't allocate queue counter for port %d, err %d\n",
- i + 1, err);
- goto err_alloc;
- }
-
- dev->port[i].cnts.set_id =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- }
- return 0;
-
-err_alloc:
- mlx5_ib_dealloc_counters(dev);
- return err;
-}
-
-static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
- u8 port_num)
-{
- return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
- &dev->port[port_num].cnts;
-}
-
-/**
- * mlx5_ib_get_counters_id - Returns counters id to use for device+port
- * @dev: Pointer to mlx5 IB device
- * @port_num: Zero based port number
- *
- * mlx5_ib_get_counters_id() Returns counters set id to use for given
- * device port combination in switchdev and non switchdev mode of the
- * parent device.
- */
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
-{
- const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
-
- return cnts->set_id;
-}
-
-static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts;
- bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
-
- if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
- return NULL;
-
- cnts = get_counters(dev, port_num - 1);
-
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
- const struct mlx5_ib_counters *cnts,
- struct rdma_hw_stats *stats,
- u16 set_id)
-{
- u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
- __be32 val;
- int ret, i;
-
- MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
- MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
- ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
- if (ret)
- return ret;
-
- for (i = 0; i < cnts->num_q_counters; i++) {
- val = *(__be32 *)((void *)out + cnts->offsets[i]);
- stats->value[i] = (u64)be32_to_cpu(val);
- }
-
- return 0;
-}
-
-static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_counters *cnts,
- struct rdma_hw_stats *stats)
-{
- int offset = cnts->num_q_counters + cnts->num_cong_counters;
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
- int ret, i;
- void *out;
-
- out = kvzalloc(sz, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
- if (ret)
- goto free;
-
- for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
- stats->value[i + offset] =
- be64_to_cpup((__be64 *)(out +
- cnts->offsets[i + offset]));
-free:
- kvfree(out);
- return ret;
-}
-
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
- struct mlx5_core_dev *mdev;
- int ret, num_counters;
- u8 mdev_port_num;
-
- if (!stats)
- return -EINVAL;
-
- num_counters = cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters;
-
- /* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
- if (ret)
- return ret;
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
- if (ret)
- return ret;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
- &mdev_port_num);
- if (!mdev) {
- /* If port is not affiliated yet, its in down state
- * which doesn't have any counters yet, so it would be
- * zero. So no need to read from the HCA.
- */
- goto done;
- }
- ret = mlx5_lag_query_cong_counters(dev->mdev,
- stats->value +
- cnts->num_q_counters,
- cnts->num_cong_counters,
- cnts->offsets +
- cnts->num_q_counters);
-
- mlx5_ib_put_native_port_mdev(dev, port_num);
- if (ret)
- return ret;
- }
-
-done:
- return num_counters;
-}
-
-static struct rdma_hw_stats *
-mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- const struct mlx5_ib_counters *cnts =
- get_counters(dev, counter->port - 1);
-
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- const struct mlx5_ib_counters *cnts =
- get_counters(dev, counter->port - 1);
-
- return mlx5_ib_query_q_counters(dev->mdev, cnts,
- counter->stats, counter->id);
-}
-
-static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
-
- if (!counter->id)
- return 0;
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
- return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
-}
-
-static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- int err;
-
- if (!counter->id) {
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
-
- MLX5_SET(alloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_ALLOC_Q_COUNTER);
- MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
- err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
- if (err)
- return err;
- counter->id =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- }
-
- err = mlx5_ib_qp_set_counter(qp, counter);
- if (err)
- goto fail_set_counter;
-
- return 0;
-
-fail_set_counter:
- mlx5_ib_counter_dealloc(counter);
- counter->id = 0;
-
- return err;
-}
-
-static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
-{
- return mlx5_ib_qp_set_counter(qp, NULL);
-}
-
static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params)
@@ -5802,23 +3369,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
}
-static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
-{
- if (!dev->delay_drop.dir_debugfs)
- return;
- debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
- dev->delay_drop.dir_debugfs = NULL;
-}
-
-static void cancel_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- cancel_work_sync(&dev->delay_drop.delay_drop_work);
- delay_drop_debugfs_cleanup(dev);
-}
-
static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -5858,40 +3408,6 @@ static const struct file_operations fops_delay_drop_timeout = {
.read = delay_drop_timeout_read,
};
-static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
-{
- struct dentry *root;
-
- if (!mlx5_debugfs_root)
- return;
-
- root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
- dev->delay_drop.dir_debugfs = root;
-
- debugfs_create_atomic_t("num_timeout_events", 0400, root,
- &dev->delay_drop.events_cnt);
- debugfs_create_atomic_t("num_rqs", 0400, root,
- &dev->delay_drop.rqs_cnt);
- debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
- &fops_delay_drop_timeout);
-}
-
-static void init_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- mutex_init(&dev->delay_drop.lock);
- dev->delay_drop.dev = dev;
- dev->delay_drop.activate = false;
- dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
- INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
- atomic_set(&dev->delay_drop.rqs_cnt, 0);
- atomic_set(&dev->delay_drop.events_cnt, 0);
-
- delay_drop_debugfs_init(dev);
-}
-
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
@@ -6385,90 +3901,32 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
enum mlx5_ib_uapi_flow_action_flags));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_query_context,
+ UVERBS_OBJECT_DEVICE,
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp,
+ dump_fill_mkey),
+ UA_MANDATORY));
+
static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_action),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
{}
};
-static int mlx5_ib_read_counters(struct ib_counters *counters,
- struct ib_counters_read_attr *read_attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- struct mlx5_read_counters_attr mread_attr = {};
- struct mlx5_ib_flow_counters_desc *desc;
- int ret, i;
-
- mutex_lock(&mcounters->mcntrs_mutex);
- if (mcounters->cntrs_max_index > read_attr->ncounters) {
- ret = -EINVAL;
- goto err_bound;
- }
-
- mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
- GFP_KERNEL);
- if (!mread_attr.out) {
- ret = -ENOMEM;
- goto err_bound;
- }
-
- mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
- mread_attr.flags = read_attr->flags;
- ret = mcounters->read_counters(counters->device, &mread_attr);
- if (ret)
- goto err_read;
-
- /* do the pass over the counters data array to assign according to the
- * descriptions and indexing pairs
- */
- desc = mcounters->counters_data;
- for (i = 0; i < mcounters->ncounters; i++)
- read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
-
-err_read:
- kfree(mread_attr.out);
-err_bound:
- mutex_unlock(&mcounters->mcntrs_mutex);
- return ret;
-}
-
-static int mlx5_ib_destroy_counters(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- counters_clear_description(counters);
- if (mcounters->hw_cntrs_hndl)
- mlx5_fc_destroy(to_mdev(counters->device)->mdev,
- mcounters->hw_cntrs_hndl);
-
- kfree(mcounters);
-
- return 0;
-}
-
-static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_mcounters *mcounters;
-
- mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
- if (!mcounters)
- return ERR_PTR(-ENOMEM);
-
- mutex_init(&mcounters->mcntrs_mutex);
-
- return &mcounters->ibcntrs;
-}
-
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -6547,21 +4005,16 @@ err_mp:
return -ENOMEM;
}
-static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_enable_driver(struct ib_device *dev)
{
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
-
- if (!dev->flow_db)
- return -ENOMEM;
-
- mutex_init(&dev->flow_db->lock);
+ struct mlx5_ib_dev *mdev = to_mdev(dev);
+ int ret;
- return 0;
-}
+ ret = mlx5_ib_test_wc(mdev);
+ mlx5_ib_dbg(mdev, "Write-Combining %s",
+ mdev->wc_support ? "supported" : "not supported");
-static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
-{
- kfree(dev->flow_db);
+ return ret;
}
static const struct ib_device_ops mlx5_ib_dev_ops = {
@@ -6577,9 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.attach_mcast = mlx5_ib_mcg_attach,
.check_mr_status = mlx5_ib_check_mr_status,
.create_ah = mlx5_ib_create_ah,
- .create_counters = mlx5_ib_create_counters,
.create_cq = mlx5_ib_create_cq,
- .create_flow = mlx5_ib_create_flow,
.create_qp = mlx5_ib_create_qp,
.create_srq = mlx5_ib_create_srq,
.dealloc_pd = mlx5_ib_dealloc_pd,
@@ -6587,10 +4038,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.del_gid = mlx5_ib_del_gid,
.dereg_mr = mlx5_ib_dereg_mr,
.destroy_ah = mlx5_ib_destroy_ah,
- .destroy_counters = mlx5_ib_destroy_counters,
.destroy_cq = mlx5_ib_destroy_cq,
- .destroy_flow = mlx5_ib_destroy_flow,
- .destroy_flow_action = mlx5_ib_destroy_flow_action,
.destroy_qp = mlx5_ib_destroy_qp,
.destroy_srq = mlx5_ib_destroy_srq,
.detach_mcast = mlx5_ib_mcg_detach,
@@ -6598,8 +4046,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
.enable_driver = mlx5_ib_enable_driver,
- .fill_res_entry = mlx5_ib_fill_res_entry,
- .fill_stat_entry = mlx5_ib_fill_stat_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
@@ -6623,24 +4069,20 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.query_pkey = mlx5_ib_query_pkey,
.query_qp = mlx5_ib_query_qp,
.query_srq = mlx5_ib_query_srq,
- .read_counters = mlx5_ib_read_counters,
+ .query_ucontext = mlx5_ib_query_ucontext,
.reg_user_mr = mlx5_ib_reg_user_mr,
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
};
-static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
- .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
- .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
-};
-
static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
.rdma_netdev_get_params = mlx5_ib_rn_get_params,
};
@@ -6661,6 +4103,8 @@ static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
.alloc_xrcd = mlx5_ib_alloc_xrcd,
.dealloc_xrcd = mlx5_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd),
};
static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
@@ -6769,9 +4213,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
@@ -6829,65 +4270,36 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
.modify_wq = mlx5_ib_modify_wq,
};
-static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
-{
- u8 port_num;
-
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
-
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- /* Register only for native ports */
- return mlx5_add_netdev_notifier(dev, port_num);
-}
-
-static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- mlx5_remove_netdev_notifier(dev, port_num);
-}
-
-static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev)
-{
- struct mlx5_core_dev *mdev = dev->mdev;
- enum rdma_link_layer ll;
- int port_type_cap;
- int err = 0;
-
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
-
- if (ll == IB_LINK_LAYER_ETHERNET)
- err = mlx5_ib_stage_common_roce_init(dev);
-
- return err;
-}
-
-static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_stage_common_roce_cleanup(dev);
-}
-
-static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u8 port_num = 0;
int err;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_ib_stage_common_roce_init(dev);
- if (err)
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ /* Register only for native ports */
+ err = mlx5_add_netdev_notifier(dev, port_num);
+ if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev))
+ /*
+ * We don't enable ETH interface for
+ * 1. IB representors
+ * 2. User disabled ROCE through devlink interface
+ */
return err;
err = mlx5_enable_eth(dev);
@@ -6897,71 +4309,27 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
return 0;
cleanup:
- mlx5_ib_stage_common_roce_cleanup(dev);
-
+ mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
-static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u8 port_num;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- mlx5_disable_eth(dev);
- mlx5_ib_stage_common_roce_cleanup(dev);
- }
-}
-
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
-{
- return create_dev_resources(&dev->devr);
-}
+ if (!dev->is_rep)
+ mlx5_disable_eth(dev);
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
-{
- destroy_dev_resources(&dev->devr);
-}
-
-static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
-{
- return mlx5_ib_odp_init_one(dev);
-}
-
-static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_odp_cleanup_one(dev);
-}
-
-static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
- .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
- .get_hw_stats = mlx5_ib_get_hw_stats,
- .counter_bind_qp = mlx5_ib_counter_bind_qp,
- .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
- .counter_dealloc = mlx5_ib_counter_dealloc,
- .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
- .counter_update_stats = mlx5_ib_counter_update_stats,
-};
-
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops);
-
- return mlx5_ib_alloc_counters(dev);
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ mlx5_remove_netdev_notifier(dev, port_num);
}
-
- return 0;
-}
-
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_counters(dev);
}
static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
@@ -7023,7 +4391,18 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
- destroy_umrc_res(dev);
+ int err;
+
+ err = mlx5_mr_cache_cleanup(dev);
+ if (err)
+ mlx5_ib_warn(dev, "mr cache cleanup failed\n");
+
+ if (dev->umrc.qp)
+ mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
+ if (dev->umrc.cq)
+ ib_free_cq(dev->umrc.cq);
+ if (dev->umrc.pd)
+ ib_dealloc_pd(dev->umrc.pd);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -7031,21 +4410,162 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
ib_unregister_device(&dev->ib_dev);
}
+enum {
+ MAX_UMR_WR = 128,
+};
+
static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
- return create_umr_res(dev);
+ struct ib_qp_init_attr *init_attr = NULL;
+ struct ib_qp_attr *attr = NULL;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ int ret;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto error_0;
+ }
+
+ pd = ib_alloc_pd(&dev->ib_dev, 0);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ ret = PTR_ERR(pd);
+ goto error_0;
+ }
+
+ cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
+ if (IS_ERR(cq)) {
+ mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+ ret = PTR_ERR(cq);
+ goto error_2;
+ }
+
+ init_attr->send_cq = cq;
+ init_attr->recv_cq = cq;
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->cap.max_send_wr = MAX_UMR_WR;
+ init_attr->cap.max_send_sge = 1;
+ init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
+ init_attr->port_num = 1;
+ qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+ if (IS_ERR(qp)) {
+ mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+ ret = PTR_ERR(qp);
+ goto error_3;
+ }
+ qp->device = &dev->ib_dev;
+ qp->real_qp = qp;
+ qp->uobject = NULL;
+ qp->qp_type = MLX5_IB_QPT_REG_UMR;
+ qp->send_cq = init_attr->send_cq;
+ qp->recv_cq = init_attr->recv_cq;
+
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
+ IB_QP_PORT, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTR;
+ attr->path_mtu = IB_MTU_256;
+
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTS;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+ goto error_4;
+ }
+
+ dev->umrc.qp = qp;
+ dev->umrc.cq = cq;
+ dev->umrc.pd = pd;
+
+ sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ ret = mlx5_mr_cache_init(dev);
+ if (ret) {
+ mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+ goto error_4;
+ }
+
+ kfree(attr);
+ kfree(init_attr);
+
+ return 0;
+
+error_4:
+ mlx5_ib_destroy_qp(qp, NULL);
+ dev->umrc.qp = NULL;
+
+error_3:
+ ib_free_cq(cq);
+ dev->umrc.cq = NULL;
+
+error_2:
+ ib_dealloc_pd(pd);
+ dev->umrc.pd = NULL;
+
+error_0:
+ kfree(attr);
+ kfree(init_attr);
+ return ret;
}
static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
{
- init_delay_drop(dev);
+ struct dentry *root;
+
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return 0;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
+ dev->delay_drop.dir_debugfs = root;
+ debugfs_create_atomic_t("num_timeout_events", 0400, root,
+ &dev->delay_drop.events_cnt);
+ debugfs_create_atomic_t("num_rqs", 0400, root,
+ &dev->delay_drop.rqs_cnt);
+ debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
+ &fops_delay_drop_timeout);
return 0;
}
static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
{
- cancel_delay_drop(dev);
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ if (!dev->delay_drop.dir_debugfs)
+ return;
+
+ debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
+ dev->delay_drop.dir_debugfs = NULL;
}
static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
@@ -7060,38 +4580,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
}
-static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
-{
- int uid;
-
- uid = mlx5_ib_devx_create(dev, false);
- if (uid > 0) {
- dev->devx_whitelist_uid = uid;
- mlx5_ib_devx_init_event_table(dev);
- }
-
- return 0;
-}
-static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
-{
- if (dev->devx_whitelist_uid) {
- mlx5_ib_devx_cleanup_event_table(dev);
- mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
- }
-}
-
-int mlx5_ib_enable_driver(struct ib_device *dev)
-{
- struct mlx5_ib_dev *mdev = to_mdev(dev);
- int ret;
-
- ret = mlx5_ib_test_wc(mdev);
- mlx5_ib_dbg(mdev, "Write-Combining %s",
- mdev->wc_support ? "supported" : "not supported");
-
- return ret;
-}
-
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
@@ -7139,9 +4627,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
@@ -7149,8 +4637,8 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_ib_stage_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_roce_init,
- mlx5_ib_stage_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
@@ -7158,17 +4646,17 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
- mlx5_ib_stage_odp_init,
- mlx5_ib_stage_odp_cleanup),
+ mlx5_ib_odp_init_one,
+ mlx5_ib_odp_cleanup_one),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
@@ -7182,8 +4670,8 @@ static const struct mlx5_ib_profile pf_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -7193,15 +4681,18 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
@@ -7209,8 +4700,8 @@ const struct mlx5_ib_profile raw_eth_profile = {
mlx5_ib_stage_raw_eth_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_raw_eth_roce_init,
- mlx5_ib_stage_raw_eth_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
@@ -7218,14 +4709,14 @@ const struct mlx5_ib_profile raw_eth_profile = {
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
@@ -7239,14 +4730,17 @@ const struct mlx5_ib_profile raw_eth_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)