From 25a0ad85156a7b697d4340560fff0d25a3b19243 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 3 May 2018 08:40:49 -0700 Subject: RDMA/nldev: Add explicit pad attribute Add a specific RDMA_NLDEV_ATTR_PAD attribute to be used for 64b attribute padding. To preserve the ABI, make this attribute equal to RDMA_NLDEV_ATTR_UNSPEC, which has a value of 0, because that has been used up until now as the pad attribute. Change all the previous use of 0 as the pad with this new enum. Signed-off-by: Steve Wise Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/rdma_netlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 0ce0943fc808..fbf99aa8a03c 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -253,6 +253,9 @@ enum rdma_nldev_attr { /* don't change the order or add anything between, this is ABI! */ RDMA_NLDEV_ATTR_UNSPEC, + /* Pad attribute for 64b alignment */ + RDMA_NLDEV_ATTR_PAD = RDMA_NLDEV_ATTR_UNSPEC, + /* Identifier for ib_device */ RDMA_NLDEV_ATTR_DEV_INDEX, /* u32 */ -- cgit v1.2.3 From da5c8507821573b8ed6e3f47e009f273493ffaf7 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 3 May 2018 08:41:30 -0700 Subject: RDMA/nldev: add driver-specific resource tracking Each driver can register a "fill entry" function with the restrack core. This function will be called when filling out a resource, allowing the driver to add driver-specific details. The details consist of a nltable of nested attributes, that are in the form of tuples. Both key and value attributes are mandatory. The key nlattr must be a string, and the value nlattr can be one of the driver attributes that are generic, but typed, allowing the attributes to be validated. Currently the driver nlattr types include string, s32, u32, s64, and u64. The print-type nlattr allows a driver to specify an alternative display format for user tools displaying the attribute. For example, a u32 attribute will default to "%u", but a print-type attribute can be included for it to be displayed in hex. This allows the user tool to print the number in the format desired by the driver driver. More attrs can be defined as they become needed by drivers. Signed-off-by: Steve Wise Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/nldev.c | 29 +++++++++++++++++++++++++++++ drivers/infiniband/core/restrack.c | 7 +++++++ include/rdma/restrack.h | 9 +++++++++ include/uapi/rdma/rdma_netlink.h | 23 +++++++++++++++++++++++ 4 files changed, 68 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 6b0c1eb71ea0..50efca482a6c 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -98,6 +98,15 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ }, + [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, + [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, + [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, }; static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) @@ -285,6 +294,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, struct rdma_restrack_entry *res, uint32_t port) { struct ib_qp *qp = container_of(res, struct ib_qp, res); + struct rdma_restrack_root *resroot = &qp->device->res; struct ib_qp_init_attr qp_init_attr; struct nlattr *entry_attr; struct ib_qp_attr qp_attr; @@ -334,6 +344,9 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -349,6 +362,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, { struct rdma_id_private *id_priv = container_of(res, struct rdma_id_private, res); + struct rdma_restrack_root *resroot = &id_priv->id.device->res; struct rdma_cm_id *cm_id = &id_priv->id; struct nlattr *entry_attr; @@ -390,6 +404,9 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -403,6 +420,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, struct rdma_restrack_entry *res, uint32_t port) { struct ib_cq *cq = container_of(res, struct ib_cq, res); + struct rdma_restrack_root *resroot = &cq->device->res; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY); @@ -423,6 +441,9 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -436,6 +457,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, struct rdma_restrack_entry *res, uint32_t port) { struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct rdma_restrack_root *resroot = &mr->pd->device->res; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY); @@ -459,6 +481,9 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -472,6 +497,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); + struct rdma_restrack_root *resroot = &pd->device->res; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY); @@ -498,6 +524,9 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index efddd13e3edb..172b517dc7b9 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -12,9 +12,16 @@ #include "cma_priv.h" +static int fill_res_noop(struct sk_buff *msg, + struct rdma_restrack_entry *entry) +{ + return 0; +} + void rdma_restrack_init(struct rdma_restrack_root *res) { init_rwsem(&res->rwsem); + res->fill_res_entry = fill_res_noop; } static const char *type2str(enum rdma_restrack_type type) diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index f3b3e3576f6a..14304e249685 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -44,6 +44,8 @@ enum rdma_restrack_type { }; #define RDMA_RESTRACK_HASH_BITS 8 +struct rdma_restrack_entry; + /** * struct rdma_restrack_root - main resource tracking management * entity, per-device @@ -57,6 +59,13 @@ struct rdma_restrack_root { * @hash: global database for all resources per-device */ DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS); + /** + * @fill_res_entry: driver-specific fill function + * + * Allows rdma drivers to add their own restrack attributes. + */ + int (*fill_res_entry)(struct sk_buff *msg, + struct rdma_restrack_entry *entry); }; /** diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index fbf99aa8a03c..07ff6c72fc50 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -249,6 +249,15 @@ enum rdma_nldev_command { RDMA_NLDEV_NUM_OPS }; +enum { + RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, +}; + +enum rdma_nldev_print_type { + RDMA_NLDEV_PRINT_TYPE_UNSPEC, + RDMA_NLDEV_PRINT_TYPE_HEX, +}; + enum rdma_nldev_attr { /* don't change the order or add anything between, this is ABI! */ RDMA_NLDEV_ATTR_UNSPEC, @@ -390,6 +399,20 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_RES_PD_ENTRY, /* nested table */ RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, /* u32 */ RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, /* u32 */ + /* + * driver-specific attributes. + */ + RDMA_NLDEV_ATTR_DRIVER, /* nested table */ + RDMA_NLDEV_ATTR_DRIVER_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_DRIVER_STRING, /* string */ + /* + * u8 values from enum rdma_nldev_print_type + */ + RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, /* u8 */ + RDMA_NLDEV_ATTR_DRIVER_S32, /* s32 */ + RDMA_NLDEV_ATTR_DRIVER_U32, /* u32 */ + RDMA_NLDEV_ATTR_DRIVER_S64, /* s64 */ + RDMA_NLDEV_ATTR_DRIVER_U64, /* u64 */ /* * Provides logical name and index of netdevice which is -- cgit v1.2.3 From 0d52d803767e9dfab91348fbc8de1dfd388565fd Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Tue, 15 May 2018 15:54:46 -0400 Subject: RDMA/uapi: Fix uapi breakage During this merge window, we added support for addition RDMA netlink operations. Unfortunately, we added the items in the middle of our uapi enum. Fix that before final release. Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking") Signed-off-by: Doug Ledford --- include/uapi/rdma/rdma_netlink.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 07ff6c72fc50..edba6351ac13 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -399,6 +399,18 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_RES_PD_ENTRY, /* nested table */ RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, /* u32 */ RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, /* u32 */ + /* + * Provides logical name and index of netdevice which is + * connected to physical port. This information is relevant + * for RoCE and iWARP. + * + * The netdevices which are associated with containers are + * supposed to be exported together with GID table once it + * will be exposed through the netlink. Because the + * associated netdevices are properties of GIDs. + */ + RDMA_NLDEV_ATTR_NDEV_INDEX, /* u32 */ + RDMA_NLDEV_ATTR_NDEV_NAME, /* string */ /* * driver-specific attributes. */ @@ -415,18 +427,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_DRIVER_U64, /* u64 */ /* - * Provides logical name and index of netdevice which is - * connected to physical port. This information is relevant - * for RoCE and iWARP. - * - * The netdevices which are associated with containers are - * supposed to be exported together with GID table once it - * will be exposed through the netlink. Because the - * associated netdevices are properties of GIDs. + * Always the end */ - RDMA_NLDEV_ATTR_NDEV_INDEX, /* u32 */ - RDMA_NLDEV_ATTR_NDEV_NAME, /* string */ - RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 20b6563ba164efc1ae24f637cb269bb3a9eb5467 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 13 May 2018 14:33:29 +0300 Subject: IB/uverbs: Expose GRE flow spec to the user-kernel ABI header Add ib_uverbs_flow_spec_gre to define a rule to match the GRE encapsulation protocol. The spec includes the generic specs header, type, size and reserved fields while the filter itself is defined as ib_uverbs_flow_gre_filter and includes: 1. Checksum present bit, key present bit and version bits in a single 16bit field. 2. Protocol type field - Indicates the ether protocol type of the encapsulated payload. 3. Key field - present if key bit is set and contains an application specific key value. Reviewed-by: Mark Bloch Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/ib_user_verbs.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 6aeb03315b0b..77c1a9a76e71 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -1033,6 +1033,33 @@ struct ib_uverbs_flow_spec_esp { struct ib_uverbs_flow_spec_esp_filter mask; }; +struct ib_uverbs_flow_gre_filter { + /* c_ks_res0_ver field is bits 0-15 in offset 0 of a standard GRE header: + * bit 0 - C - checksum bit. + * bit 1 - reserved. set to 0. + * bit 2 - key bit. + * bit 3 - sequence number bit. + * bits 4:12 - reserved. set to 0. + * bits 13:15 - GRE version. + */ + __be16 c_ks_res0_ver; + __be16 protocol; + __be32 key; +}; + +struct ib_uverbs_flow_spec_gre { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_gre_filter val; + struct ib_uverbs_flow_gre_filter mask; +}; + struct ib_uverbs_flow_attr { __u32 type; __u16 size; -- cgit v1.2.3 From 0d86bbec71b283803f844b35d50dba977be2db4a Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 13 May 2018 14:33:31 +0300 Subject: IB/uverbs: Expose MPLS flow spec to the user-kernel ABI header Add ib_uverbs_flow_spec_mpls to define a rule to match the MPLS protocol. The spec includes the generic specs header, type, size and reserved fields while the filter itself is defined as ib_uverbs_flow_mpls_filter and includes a single 32bit field named 'label' which consists of: Bits 0:19 - The MPLS label. Bits 20:22 - Traffic class field. Bit 23 - Bottom of stack bit. Bits 24:31 - Time to live (TTL) field. Reviewed-by: Mark Bloch Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/ib_user_verbs.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 77c1a9a76e71..409507f83b91 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -1060,6 +1060,29 @@ struct ib_uverbs_flow_spec_gre { struct ib_uverbs_flow_gre_filter mask; }; +struct ib_uverbs_flow_mpls_filter { + /* The field includes the entire MPLS label: + * bits 0:19 - label field. + * bits 20:22 - traffic class field. + * bits 23 - bottom of stack bit. + * bits 24:31 - ttl field. + */ + __be32 label; +}; + +struct ib_uverbs_flow_spec_mpls { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_mpls_filter val; + struct ib_uverbs_flow_mpls_filter mask; +}; + struct ib_uverbs_flow_attr { __u32 type; __u16 size; -- cgit v1.2.3 From e818e255a58d64e86c8c93e3aa52498b1a3d1760 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 13 May 2018 14:33:35 +0300 Subject: IB/mlx5: Expose MPLS related tunneling offloads This patch reports the device's capbilities to offload encapsulated MPLS tunnel protocols to user-space: - Capability to offload MPLS over GRE. - Capability to offload MPLS over UDP. Reviewed-by: Mark Bloch Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 8 ++++++++ include/linux/mlx5/device.h | 5 +++++ include/linux/mlx5/mlx5_ifc.h | 4 +++- include/uapi/rdma/mlx5-abi.h | 4 +++- 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8792248034cb..ab8cd5c034a2 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1084,6 +1084,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_GRE) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_UDP) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } if (uhw->outlen) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index fd1a9341edfa..5004ddc702e3 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1001,6 +1001,11 @@ enum mlx5_mpls_supported_fields { MLX5_FIELD_SUPPORT_MPLS_TTL = 1 << 3 }; +enum mlx5_flex_parser_protos { + MLX5_FLEX_PROTO_CW_MPLS_GRE = 1 << 4, + MLX5_FLEX_PROTO_CW_MPLS_UDP = 1 << 5, +}; + /* MLX5 DEV CAPs */ /* TODO: EAT.ME */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3fee2f74d09d..68f756ea550d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1138,7 +1138,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_500[0x20]; u8 num_of_uars_per_page[0x20]; - u8 reserved_at_540[0x40]; + + u8 flex_parser_protocols[0x20]; + u8 reserved_at_560[0x20]; u8 reserved_at_580[0x3d]; u8 cqe_128_always[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index fdaf00e20649..508ea8c82da7 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -233,7 +233,9 @@ enum mlx5_ib_query_dev_resp_flags { enum mlx5_ib_tunnel_offloads { MLX5_IB_TUNNELED_OFFLOADS_VXLAN = 1 << 0, MLX5_IB_TUNNELED_OFFLOADS_GRE = 1 << 1, - MLX5_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2 + MLX5_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2, + MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE = 1 << 3, + MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP = 1 << 4, }; struct mlx5_ib_query_device_resp { -- cgit v1.2.3 From 6f1006a43869ff82745eea3b88204d0a3bcc0158 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Sun, 27 May 2018 13:42:34 +0300 Subject: IB/mlx5: Introduce a new mini-CQE format The new mini-CQE format includes the stride index, byte count and packet checksum. Stride index is needed for striding WQ feature. This patch exposes this capability and enables its setting via mlx5 UHW data as part of query device and cq creation. Reviewed-by: Yishai Hadas Reviewed-by: Guy Levi Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 42 +++++++++++++++++++++++++++++---------- drivers/infiniband/hw/mlx5/main.c | 4 ++++ include/uapi/rdma/mlx5-abi.h | 2 +- 3 files changed, 37 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 6d52ea03574e..68775e12d721 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -742,6 +742,28 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, return 0; } +enum { + MLX5_CQE_RES_FORMAT_HASH = 0, + MLX5_CQE_RES_FORMAT_CSUM = 1, + MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3, +}; + +static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) +{ + switch (format) { + case MLX5_IB_CQE_RES_FORMAT_HASH: + return MLX5_CQE_RES_FORMAT_HASH; + case MLX5_IB_CQE_RES_FORMAT_CSUM: + return MLX5_CQE_RES_FORMAT_CSUM; + case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX: + if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) + return MLX5_CQE_RES_FORMAT_CSUM_STRIDX; + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct ib_ucontext *context, struct mlx5_ib_cq *cq, int entries, u32 **cqb, @@ -807,6 +829,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { + int mini_cqe_format; + if (!((*cqe_size == 128 && MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) || (*cqe_size == 64 && @@ -817,20 +841,18 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, goto err_cqb; } - if (unlikely(!ucmd.cqe_comp_res_format || - !(ucmd.cqe_comp_res_format < - MLX5_IB_CQE_RES_RESERVED) || - (ucmd.cqe_comp_res_format & - (ucmd.cqe_comp_res_format - 1)))) { - err = -EOPNOTSUPP; - mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n", - ucmd.cqe_comp_res_format); + mini_cqe_format = + mini_cqe_res_format_to_hw(dev, + ucmd.cqe_comp_res_format); + if (mini_cqe_format < 0) { + err = mini_cqe_format; + mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n", + ucmd.cqe_comp_res_format, err); goto err_cqb; } MLX5_SET(cqc, cqc, cqe_comp_en, 1); - MLX5_SET(cqc, cqc, mini_cqe_res_format, - ilog2(ucmd.cqe_comp_res_format)); + MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format); } if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 029c310a0dd1..e0894b203d59 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -993,6 +993,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.cqe_comp_caps.supported_format = MLX5_IB_CQE_RES_FORMAT_HASH | MLX5_IB_CQE_RES_FORMAT_CSUM; + + if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) + resp.cqe_comp_caps.supported_format |= + MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX; } } diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index cb4a02c4a1ce..9783648d5511 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -163,7 +163,7 @@ struct mlx5_ib_rss_caps { enum mlx5_ib_cqe_comp_res_format { MLX5_IB_CQE_RES_FORMAT_HASH = 1 << 0, MLX5_IB_CQE_RES_FORMAT_CSUM = 1 << 1, - MLX5_IB_CQE_RES_RESERVED = 1 << 2, + MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2, }; struct mlx5_ib_cqe_comp_caps { -- cgit v1.2.3 From d9a5a6441e9dde080e9d69e736c623f7369472ed Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 31 May 2018 16:43:32 +0300 Subject: IB/uverbs: Add create/destroy counters support User space application which uses counters functionality, is expected to allocate/release the counters resources by calling create/destroy verbs and in turn get a unique handle that can be used to attach the counters to its counted type. Reviewed-by: Yishai Hadas Signed-off-by: Raed Salem Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_std_types.c | 3 +- .../infiniband/core/uverbs_std_types_counters.c | 100 +++++++++++++++++++++ include/uapi/rdma/ib_user_ioctl_cmds.h | 14 +++ 5 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_std_types_counters.c (limited to 'include/uapi') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index dda9e856e3fa..589d3b744b1e 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -36,4 +36,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ uverbs_ioctl_merge.o uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ - uverbs_std_types_mr.o + uverbs_std_types_mr.o uverbs_std_types_counters.o diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cfb51618ab7a..5b2461fa634d 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -287,6 +287,7 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 569f48bd821e..b570acbd94af 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -302,7 +302,8 @@ static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), - &UVERBS_OBJECT(UVERBS_OBJECT_DM)); + &UVERBS_OBJECT(UVERBS_OBJECT_DM), + &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c new file mode 100644 index 000000000000..4b6a985aad97 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "uverbs.h" +#include + +static int uverbs_free_counters(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_counters *counters = uobject->object; + + if (why == RDMA_REMOVE_DESTROY && + atomic_read(&counters->usecnt)) + return -EBUSY; + + return counters->device->destroy_counters(counters); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_counters *counters; + struct ib_uobject *uobj; + int ret; + + /* + * This check should be removed once the infrastructure + * have the ability to remove methods from parse tree once + * such condition is met. + */ + if (!ib_dev->create_counters) + return -EOPNOTSUPP; + + uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); + counters = ib_dev->create_counters(ib_dev, attrs); + if (IS_ERR(counters)) { + ret = PTR_ERR(counters); + goto err_create_counters; + } + + counters->device = ib_dev; + counters->uobject = uobj; + uobj->object = counters; + atomic_set(&counters->usecnt, 0); + + return 0; + +err_create_counters: + return ret; +} + +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, + &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY, + uverbs_destroy_def_handler, + &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY)); + diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 83e3890eef20..c28ce62d2e40 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -55,6 +55,7 @@ enum uverbs_default_objects { UVERBS_OBJECT_WQ, UVERBS_OBJECT_FLOW_ACTION, UVERBS_OBJECT_DM, + UVERBS_OBJECT_COUNTERS, }; enum { @@ -131,4 +132,17 @@ enum uverbs_methods_mr { UVERBS_METHOD_DM_MR_REG, }; +enum uverbs_attrs_create_counters_cmd_attr_ids { + UVERBS_ATTR_CREATE_COUNTERS_HANDLE, +}; + +enum uverbs_attrs_destroy_counters_cmd_attr_ids { + UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, +}; + +enum uverbs_methods_actions_counters_ops { + UVERBS_METHOD_COUNTERS_CREATE, + UVERBS_METHOD_COUNTERS_DESTROY, +}; + #endif -- cgit v1.2.3 From ebb6796bd397f3fb9b2b46398b2fbb585e1b8bb6 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 31 May 2018 16:43:34 +0300 Subject: IB/uverbs: Add read counters support This patch exposes the read counters verb to user space applications. By that verb the user can read the hardware counters which are associated with the counters object. The application needs to provide a sufficient memory to hold the statistics. Reviewed-by: Yishai Hadas Signed-off-by: Raed Salem Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- .../infiniband/core/uverbs_std_types_counters.c | 59 +++++++++++++++++++++- include/uapi/rdma/ib_user_ioctl_cmds.h | 7 +++ 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 4b6a985aad97..03b182a684a6 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -80,6 +80,49 @@ err_create_counters: return ret; } +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_counters_read_attr read_attr = {}; + const struct uverbs_attr *uattr; + struct ib_counters *counters = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); + int ret; + + if (!ib_dev->read_counters) + return -EOPNOTSUPP; + + if (!atomic_read(&counters->usecnt)) + return -EINVAL; + + ret = uverbs_copy_from(&read_attr.flags, attrs, + UVERBS_ATTR_READ_COUNTERS_FLAGS); + if (ret) + return ret; + + uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); + read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); + read_attr.counters_buff = kcalloc(read_attr.ncounters, + sizeof(u64), GFP_KERNEL); + if (!read_attr.counters_buff) + return -ENOMEM; + + ret = ib_dev->read_counters(counters, + &read_attr, + attrs); + if (ret) + goto err_read; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, + read_attr.counters_buff, + read_attr.ncounters * sizeof(u64)); + +err_read: + kfree(read_attr.counters_buff); + return ret; +} + static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, UVERBS_OBJECT_COUNTERS, @@ -93,8 +136,22 @@ static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY, UVERBS_ACCESS_DESTROY, UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, + &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, + UVERBS_ATTR_TYPE(__u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters), &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY)); + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index c28ce62d2e40..888ac5975a6c 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -140,9 +140,16 @@ enum uverbs_attrs_destroy_counters_cmd_attr_ids { UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, }; +enum uverbs_attrs_read_counters_cmd_attr_ids { + UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_READ_COUNTERS_FLAGS, +}; + enum uverbs_methods_actions_counters_ops { UVERBS_METHOD_COUNTERS_CREATE, UVERBS_METHOD_COUNTERS_DESTROY, + UVERBS_METHOD_COUNTERS_READ, }; #endif -- cgit v1.2.3 From b6ba4a9aa59fe99c6e9ca6ec941cd5f9823b0cae Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 31 May 2018 16:43:37 +0300 Subject: IB/uverbs: Add support for flow counters The struct ib_uverbs_flow_spec_action_count associates a counters object with the flow. Post this association the flow counters can be read via the counters object. Reviewed-by: Yishai Hadas Signed-off-by: Raed Salem Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 81 +++++++++++++++++++++++++++++++----- include/uapi/rdma/ib_user_verbs.h | 13 ++++++ 3 files changed, 84 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 5b2461fa634d..c0d40fc3a53a 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -263,6 +263,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_action_tag flow_tag; struct ib_uverbs_flow_spec_action_drop drop; struct ib_uverbs_flow_spec_action_handle action; + struct ib_uverbs_flow_spec_action_count flow_count; }; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index c735c13e53b0..8fbeece7f831 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2742,43 +2742,82 @@ out_put: struct ib_uflow_resources { size_t max; size_t num; - struct ib_flow_action *collection[0]; + size_t collection_num; + size_t counters_num; + struct ib_counters **counters; + struct ib_flow_action **collection; }; static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) { struct ib_uflow_resources *resources; - resources = - kmalloc(sizeof(*resources) + - num_specs * sizeof(*resources->collection), GFP_KERNEL); + resources = kzalloc(sizeof(*resources), GFP_KERNEL); if (!resources) - return NULL; + goto err_res; + + resources->counters = + kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); + + if (!resources->counters) + goto err_cnt; + + resources->collection = + kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL); + + if (!resources->collection) + goto err_collection; - resources->num = 0; resources->max = num_specs; return resources; + +err_collection: + kfree(resources->counters); +err_cnt: + kfree(resources); +err_res: + return NULL; } void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) { unsigned int i; - for (i = 0; i < uflow_res->num; i++) + for (i = 0; i < uflow_res->collection_num; i++) atomic_dec(&uflow_res->collection[i]->usecnt); + for (i = 0; i < uflow_res->counters_num; i++) + atomic_dec(&uflow_res->counters[i]->usecnt); + + kfree(uflow_res->collection); + kfree(uflow_res->counters); kfree(uflow_res); } static void flow_resources_add(struct ib_uflow_resources *uflow_res, - struct ib_flow_action *action) + enum ib_flow_spec_type type, + void *ibobj) { WARN_ON(uflow_res->num >= uflow_res->max); - atomic_inc(&action->usecnt); - uflow_res->collection[uflow_res->num++] = action; + switch (type) { + case IB_FLOW_SPEC_ACTION_HANDLE: + atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt); + uflow_res->collection[uflow_res->collection_num++] = + (struct ib_flow_action *)ibobj; + break; + case IB_FLOW_SPEC_ACTION_COUNT: + atomic_inc(&((struct ib_counters *)ibobj)->usecnt); + uflow_res->counters[uflow_res->counters_num++] = + (struct ib_counters *)ibobj; + break; + default: + WARN_ON(1); + } + + uflow_res->num++; } static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, @@ -2815,9 +2854,29 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, return -EINVAL; ib_spec->action.size = sizeof(struct ib_flow_spec_action_handle); - flow_resources_add(uflow_res, ib_spec->action.act); + flow_resources_add(uflow_res, + IB_FLOW_SPEC_ACTION_HANDLE, + ib_spec->action.act); uobj_put_obj_read(ib_spec->action.act); break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (kern_spec->flow_count.size != + sizeof(struct ib_uverbs_flow_spec_action_count)) + return -EINVAL; + ib_spec->flow_count.counters = + uobj_get_obj_read(counters, + UVERBS_OBJECT_COUNTERS, + kern_spec->flow_count.handle, + ucontext); + if (!ib_spec->flow_count.counters) + return -EINVAL; + ib_spec->flow_count.size = + sizeof(struct ib_flow_spec_action_count); + flow_resources_add(uflow_res, + IB_FLOW_SPEC_ACTION_COUNT, + ib_spec->flow_count.counters); + uobj_put_obj_read(ib_spec->flow_count.counters); + break; default: return -EINVAL; } diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 9be07394fdbe..f83e8ef6f056 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -998,6 +998,19 @@ struct ib_uverbs_flow_spec_action_handle { __u32 reserved1; }; +struct ib_uverbs_flow_spec_action_count { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + __u32 handle; + __u32 reserved1; +}; + struct ib_uverbs_flow_tunnel_filter { __be32 tunnel_id; }; -- cgit v1.2.3 From 3b3233fbf02ee4c5de4d635ca6c4f2566d9716df Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 31 May 2018 16:43:39 +0300 Subject: IB/mlx5: Add flow counters binding support Associates a counters with a flow when IB_FLOW_SPEC_ACTION_COUNT is part of the flow specifications. The counters user space placements of location and description (index, description) pairs are passed as private data of the counters flow specification. Reviewed-by: Yishai Hadas Signed-off-by: Raed Salem Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 222 +++++++++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 15 +++ include/linux/mlx5/fs.h | 1 + include/uapi/rdma/mlx5-abi.h | 24 ++++ 4 files changed, 249 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 81471013b776..c52841bad4e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2449,6 +2449,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_TUNNEL_FIELD tunnel_id #define LAST_FLOW_TAG_FIELD tag_id #define LAST_DROP_FIELD size +#define LAST_COUNTERS_FIELD counters /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -2721,6 +2722,18 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ret) return ret; break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, + LAST_COUNTERS_FIELD)) + return -EOPNOTSUPP; + + /* for now support only one counters spec per flow */ + if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + return -EINVAL; + + action->counters = ib_spec->flow_count.counters; + action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; default: return -EINVAL; } @@ -2868,6 +2881,17 @@ static void put_flow_table(struct mlx5_ib_dev *dev, } } +static void counters_clear_description(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mutex_lock(&mcounters->mcntrs_mutex); + kfree(mcounters->counters_data); + mcounters->counters_data = NULL; + mcounters->cntrs_max_index = 0; + mutex_unlock(&mcounters->mcntrs_mutex); +} + static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); @@ -2887,8 +2911,11 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) mlx5_del_flow_rules(handler->rule); put_flow_table(dev, handler->prio, true); - mutex_unlock(&dev->flow_db->lock); + if (handler->ibcounters && + atomic_read(&handler->ibcounters->usecnt) == 1) + counters_clear_description(handler->ibcounters); + mutex_unlock(&dev->flow_db->lock); kfree(handler); return 0; @@ -3008,21 +3035,127 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev, } } +static int counters_set_description(struct ib_counters *counters, + enum mlx5_ib_counters_type counters_type, + struct mlx5_ib_flow_counters_desc *desc_data, + u32 ncounters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + u32 cntrs_max_index = 0; + int i; + + if (counters_type != MLX5_IB_COUNTERS_FLOW) + return -EINVAL; + + /* init the fields for the object */ + mcounters->type = counters_type; + mcounters->ncounters = ncounters; + /* each counter entry have both description and index pair */ + for (i = 0; i < ncounters; i++) { + if (desc_data[i].description > IB_COUNTER_BYTES) + return -EINVAL; + + if (cntrs_max_index <= desc_data[i].index) + cntrs_max_index = desc_data[i].index + 1; + } + + mutex_lock(&mcounters->mcntrs_mutex); + mcounters->counters_data = desc_data; + mcounters->cntrs_max_index = cntrs_max_index; + mutex_unlock(&mcounters->mcntrs_mutex); + + return 0; +} + +#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) +static int flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); + struct mlx5_ib_flow_counters_data *cntrs_data = NULL; + struct mlx5_ib_flow_counters_desc *desc_data = NULL; + bool hw_hndl = false; + int ret = 0; + + if (ucmd && ucmd->ncounters_data != 0) { + cntrs_data = ucmd->data; + if (cntrs_data->ncounters > MAX_COUNTERS_NUM) + return -EINVAL; + + desc_data = kcalloc(cntrs_data->ncounters, + sizeof(*desc_data), + GFP_KERNEL); + if (!desc_data) + return -ENOMEM; + + if (copy_from_user(desc_data, + u64_to_user_ptr(cntrs_data->counters_data), + sizeof(*desc_data) * cntrs_data->ncounters)) { + ret = -EFAULT; + goto free; + } + } + + if (!mcounters->hw_cntrs_hndl) { + mcounters->hw_cntrs_hndl = mlx5_fc_create( + to_mdev(ibcounters->device)->mdev, false); + if (!mcounters->hw_cntrs_hndl) { + ret = -ENOMEM; + goto free; + } + hw_hndl = true; + } + + if (desc_data) { + /* counters already bound to at least one flow */ + if (mcounters->cntrs_max_index) { + ret = -EINVAL; + goto free_hndl; + } + + ret = counters_set_description(ibcounters, + MLX5_IB_COUNTERS_FLOW, + desc_data, + cntrs_data->ncounters); + if (ret) + goto free_hndl; + + } else if (!mcounters->cntrs_max_index) { + /* counters not bound yet, must have udata passed */ + ret = -EINVAL; + goto free_hndl; + } + + return 0; + +free_hndl: + if (hw_hndl) { + mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, + mcounters->hw_cntrs_hndl); + mcounters->hw_cntrs_hndl = NULL; + } +free: + kfree(desc_data); + return ret; +} + static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst, - u32 underlay_qpn) + u32 underlay_qpn, + struct mlx5_ib_create_flow *ucmd) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_flow_spec *spec; - struct mlx5_flow_destination *rule_dst = dst; + struct mlx5_flow_destination dest_arr[2] = {}; + struct mlx5_flow_destination *rule_dst = dest_arr; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; int err = 0; - int dest_num = 1; + int dest_num = 0; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; if (!is_valid_attr(dev->mdev, flow_attr)) @@ -3036,6 +3169,10 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } INIT_LIST_HEAD(&handler->list); + if (dst) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec->match_criteria, @@ -3070,15 +3207,30 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, goto free; } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = flow_counters_set_data(flow_act.counters, ucmd); + if (err) + goto free; + + handler->ibcounters = flow_act.counters; + dest_arr[dest_num].type = + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest_arr[dest_num].counter = + to_mcounters(flow_act.counters)->hw_cntrs_hndl; + dest_num++; + } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - rule_dst = NULL; - dest_num = 0; + if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) { + rule_dst = NULL; + dest_num = 0; + } } else { if (is_egress) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; else flow_act.action |= - dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } @@ -3104,8 +3256,12 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ft_prio->flow_table = ft; free: - if (err) + if (err && handler) { + if (handler->ibcounters && + atomic_read(&handler->ibcounters->usecnt) == 1) + counters_clear_description(handler->ibcounters); kfree(handler); + } kvfree(spec); return err ? ERR_PTR(err) : handler; } @@ -3115,7 +3271,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); } static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, @@ -3255,12 +3411,43 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; + size_t min_ucmd_sz, required_ucmd_sz; int err; int underlay_qpn; - if (udata && - udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) - return ERR_PTR(-EOPNOTSUPP); + if (udata && udata->inlen) { + min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + + sizeof(ucmd_hdr.reserved); + if (udata->inlen < min_ucmd_sz) + return ERR_PTR(-EOPNOTSUPP); + + err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); + if (err) + return ERR_PTR(err); + + /* currently supports only one counters data */ + if (ucmd_hdr.ncounters_data > 1) + return ERR_PTR(-EINVAL); + + required_ucmd_sz = min_ucmd_sz + + sizeof(struct mlx5_ib_flow_counters_data) * + ucmd_hdr.ncounters_data; + if (udata->inlen > required_ucmd_sz && + !ib_is_udata_cleared(udata, required_ucmd_sz, + udata->inlen - required_ucmd_sz)) + return ERR_PTR(-EOPNOTSUPP); + + ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); + if (!ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); + if (err) { + kfree(ucmd); + return ERR_PTR(err); + } + } if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); @@ -3315,7 +3502,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? mqp->underlay_qpn : 0; handler = _create_flow_rule(dev, ft_prio, flow_attr, - dst, underlay_qpn); + dst, underlay_qpn, ucmd); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { @@ -3336,6 +3523,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, mutex_unlock(&dev->flow_db->lock); kfree(dst); + kfree(ucmd); return &handler->ibflow; @@ -3346,6 +3534,7 @@ destroy_ft: unlock: mutex_unlock(&dev->flow_db->lock); kfree(dst); + kfree(ucmd); kfree(handler); return ERR_PTR(err); } @@ -5010,6 +5199,11 @@ static int mlx5_ib_destroy_counters(struct ib_counters *counters) { struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + counters_clear_description(counters); + if (mcounters->hw_cntrs_hndl) + mlx5_fc_destroy(to_mdev(counters->device)->mdev, + mcounters->hw_cntrs_hndl); + kfree(mcounters); return 0; @@ -5024,6 +5218,8 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, if (!mcounters) return ERR_PTR(-ENOMEM); + mutex_init(&mcounters->mcntrs_mutex); + return &mcounters->ibcntrs; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index fd27ec1aed08..155bca627222 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -175,6 +175,7 @@ struct mlx5_ib_flow_handler { struct ib_flow ibflow; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_handle *rule; + struct ib_counters *ibcounters; }; struct mlx5_ib_flow_db { @@ -813,8 +814,22 @@ struct mlx5_memic { DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); }; +enum mlx5_ib_counters_type { + MLX5_IB_COUNTERS_FLOW, +}; + struct mlx5_ib_mcounters { struct ib_counters ibcntrs; + enum mlx5_ib_counters_type type; + void *hw_cntrs_hndl; + /* max index set as part of create_flow */ + u32 cntrs_max_index; + /* number of counters data entries ( pair) */ + u32 ncounters; + /* counters data array for descriptions and indexes */ + struct mlx5_ib_flow_counters_desc *counters_data; + /* protects access to mcounters internal data */ + struct mutex mcntrs_mutex; }; static inline struct mlx5_ib_mcounters * diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 3b4c3298061c..757b4a30281e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -160,6 +160,7 @@ struct mlx5_flow_act { u32 modify_id; uintptr_t esp_id; struct mlx5_fs_vlan vlan; + struct ib_counters *counters; }; #define MLX5_DECLARE_FLOW_ACT(name) \ diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index cb4a02c4a1ce..ab71e939eb78 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -36,6 +36,7 @@ #include #include /* For ETH_ALEN. */ +#include enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, @@ -441,4 +442,27 @@ enum { enum { MLX5_IB_CLOCK_INFO_V1 = 0, }; + +struct mlx5_ib_flow_counters_desc { + __u32 description; + __u32 index; +}; + +struct mlx5_ib_flow_counters_data { + RDMA_UAPI_PTR(struct mlx5_ib_flow_counters_desc *, counters_data); + __u32 ncounters; + __u32 reserved; +}; + +struct mlx5_ib_create_flow { + __u32 ncounters_data; + __u32 reserved; + /* + * Following are counters data based on ncounters_data, each + * entry in the data[] should match a corresponding counter object + * that was pointed by a counters spec upon the flow creation + */ + struct mlx5_ib_flow_counters_data data[]; +}; + #endif /* MLX5_ABI_USER_H */ -- cgit v1.2.3