summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-12-07 20:09:21 -0800
committerJakub Kicinski <kuba@kernel.org>2022-12-07 20:09:21 -0800
commite1228581b38bd698d3502354c0807863c8b518f7 (patch)
tree32af511225fd8fd00af5e913af48edd2ba66ca16 /drivers
parent5955a948ac3d4e78857726d5e8d4bffb3fc71d4f (diff)
parente5b9642a33be6f6a9ec2f035299f4a5c0980d7c0 (diff)
downloadlinux-e1228581b38bd698d3502354c0807863c8b518f7.tar.bz2
Merge branch 'devlink-add-port-function-attribute-to-enable-disable-roce-and-migratable'
Shay Drory says: ==================== devlink: Add port function attribute to enable/disable Roce and migratable This series is a complete rewrite of the series "devlink: Add port function attribute to enable/disable roce" link: https://lore.kernel.org/netdev/20221102163954.279266-1-danielj@nvidia.com/ Currently mlx5 PCI VF and SF are enabled by default for RoCE functionality. And mlx5 PCI VF is disable by dafault for migratable functionality. Currently a user does not have the ability to disable RoCE for a PCI VF/SF device before such device is enumerated by the driver. User is also incapable to do such setting from smartnic scenario for a VF from the smartnic. Current 'enable_roce' device knob is limited to do setting only at driverinit time. By this time device is already created and firmware has already allocated necessary system memory for supporting RoCE. Also, Currently a user does not have the ability to enable migratable for a PCI VF. The above are a hyper visor level control, to set the functionality of devices passed through to guests. This is achieved by extending existing 'port function' object to control capabilities of a function. This enables users to control capability of the device before enumeration. Examples when user prefers to disable RoCE for a VF when using switchdev mode: $ devlink port show pci/0000:06:00.0/1 pci/0000:06:00.0/1: type eth netdev pf0vf0 flavour pcivf controller 0 pfnum 0 vfnum 0 external false splittable false function: hw_addr 00:00:00:00:00:00 roce enable $ devlink port function set pci/0000:06:00.0/1 roce disable $ devlink port show pci/0000:06:00.0/1 pci/0000:06:00.0/1: type eth netdev pf0vf0 flavour pcivf controller 0 pfnum 0 vfnum 0 external false splittable false function: hw_addr 00:00:00:00:00:00 roce disable FAQs: ----- 1. What does roce enable/disable do? Ans: It disables RoCE capability of the function before its enumerated, so when driver reads the capability from the device firmware, it is disabled. At this point RDMA stack will not be able to create UD, QP1, RC, XRC type of QPs. When RoCE is disabled, the GID table of all ports of the device is disabled in the device and software stack. 2. How is the roce 'port function' option different from existing devlink param? Ans: RoCE attribute at the port function level disables the RoCE capability at the specific function level; while enable_roce only does at the software level. 3. Why is this option for disabling only RoCE and not the whole RDMA device? Ans: Because user still wants to use the RDMA device for non RoCE commands in more memory efficient way. ==================== Link: https://lore.kernel.org/r/20221206185119.380138-1-shayd@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c211
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c30
7 files changed, 301 insertions, 6 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 751bc4a9edcf..ddb197970c22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -314,6 +314,10 @@ static const struct devlink_ops mlx5_devlink_ops = {
.rate_node_new = mlx5_esw_devlink_rate_node_new,
.rate_node_del = mlx5_esw_devlink_rate_node_del,
.rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
+ .port_fn_roce_get = mlx5_devlink_port_fn_roce_get,
+ .port_fn_roce_set = mlx5_devlink_port_fn_roce_set,
+ .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get,
+ .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set,
#endif
#ifdef CONFIG_MLX5_SF_MANAGER
.port_new = mlx5_devlink_sf_port_new,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 374e3fbdc2cf..527e4bffda8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -772,6 +772,41 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
esw_vport_destroy_offloads_acl_tables(esw, vport);
}
+static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return 0;
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce);
+
+ memset(query_ctx, 0, query_out_sz);
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL_2);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable);
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
{
u16 vport_num = vport->vport;
@@ -785,6 +820,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
if (mlx5_esw_is_manager_vport(esw, vport_num))
return 0;
+ err = mlx5_esw_vport_caps_get(esw, vport);
+ if (err)
+ goto err_caps;
+
mlx5_modify_vport_admin_state(esw->dev,
MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
vport_num, 1,
@@ -804,6 +843,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
vport->info.qos, flags);
return 0;
+
+err_caps:
+ esw_vport_cleanup_acl(esw, vport);
+ return err;
}
/* Don't cleanup vport->info, it's needed to restore vport configuration */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 42d9df417e20..5a85a5d32be7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -153,6 +153,8 @@ struct mlx5_vport_info {
u8 qos;
u8 spoofchk: 1;
u8 trusted: 1;
+ u8 roce_enabled: 1;
+ u8 mig_enabled: 1;
};
/* Vport context events */
@@ -508,7 +510,14 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
-
+int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9b6fbb19c22a..c6a14202c62c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3889,7 +3889,7 @@ static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num,
if (!query_ctx)
return -ENOMEM;
- err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx);
+ err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx);
if (err)
goto out_free;
@@ -4022,3 +4022,212 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr);
}
+
+static struct mlx5_vport *
+mlx5_devlink_port_fn_get_vport(struct devlink_port *port, struct mlx5_eswitch *esw)
+{
+ u16 vport_num;
+
+ if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+ if (!is_port_function_supported(esw, vport_num))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return mlx5_eswitch_get_vport(esw, vport_num);
+}
+
+int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ if (!MLX5_CAP_GEN(esw->dev, migration)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration");
+ return err;
+ }
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (vport->enabled) {
+ *is_enabled = vport->info.mig_enabled;
+ err = 0;
+ }
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ void *query_ctx;
+ void *hca_caps;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ if (!MLX5_CAP_GEN(esw->dev, migration)) {
+ NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration");
+ return err;
+ }
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
+ goto out;
+ }
+
+ if (vport->info.mig_enabled == enable) {
+ err = 0;
+ goto out;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
+ MLX5_CAP_GENERAL_2);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
+ goto out_free;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1);
+
+ err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA migratable cap");
+ goto out_free;
+ }
+
+ vport->info.mig_enabled = enable;
+
+out_free:
+ kfree(query_ctx);
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+
+ mutex_lock(&esw->state_lock);
+ if (vport->enabled) {
+ *is_enabled = vport->info.roce_enabled;
+ err = 0;
+ }
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ struct mlx5_eswitch *esw;
+ struct mlx5_vport *vport;
+ int err = -EOPNOTSUPP;
+ void *query_ctx;
+ void *hca_caps;
+ u16 vport_num;
+
+ esw = mlx5_devlink_eswitch_get(port->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ vport = mlx5_devlink_port_fn_get_vport(port, esw);
+ if (IS_ERR(vport)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid port");
+ return PTR_ERR(vport);
+ }
+ vport_num = vport->vport;
+
+ mutex_lock(&esw->state_lock);
+ if (!vport->enabled) {
+ NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
+ goto out;
+ }
+
+ if (vport->info.roce_enabled == enable) {
+ err = 0;
+ goto out;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
+ MLX5_CAP_GENERAL);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
+ goto out_free;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability),
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ MLX5_SET(cmd_hca_cap, hca_caps, roce, enable);
+
+ err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA roce cap");
+ goto out_free;
+ }
+
+ vport->info.roce_enabled = enable;
+
+out_free:
+ kfree(query_ctx);
+out:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index a806e3de7b7c..029305a8b80a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -324,7 +324,10 @@ void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev);
int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
-int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
+int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id,
+ u16 opmod);
+#define mlx5_vport_get_other_func_general_cap(dev, fid, out) \
+ mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL)
void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 662f1d55e30e..6bde18bcd42f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -4,6 +4,7 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
#include "mlx5_irq.h"
#include "pci_irq.h"
@@ -101,7 +102,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
goto out;
}
- ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
+ ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap);
if (ret)
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index d5c317325030..ba7e3df22413 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1160,14 +1160,40 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
-int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+ u16 opmod)
{
- u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+ opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
MLX5_SET(query_hca_cap_in, in, function_id, function_id);
MLX5_SET(query_hca_cap_in, in, other_function, true);
return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
}
+EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap);
+
+int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap,
+ u16 function_id, u16 opmod)
+{
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_hca_cap;
+ void *set_ctx;
+ int ret;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ MLX5_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1);
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap));
+ MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id);
+ MLX5_SET(set_hca_cap_in, set_ctx, other_function, true);
+ ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx);
+
+ kfree(set_ctx);
+ return ret;
+}