From 68602120e496a31d8e3b36d0bfc7d9d2456fb05c Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 14 Jun 2012 20:31:39 +0000 Subject: RDMA/cma: Allow user to restrict listens to bound address family Provide an option for the user to specify that listens should only accept connections where the incoming address family matches that of the locally bound address. This is used to support the equivalent of IPV6_V6ONLY socket option, which allows an app to only accept connection requests directed to IPv6 addresses. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/rdma_cm.h | 10 ++++++++++ include/rdma/rdma_user_cm.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 51988f808181..ad3a3142383a 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -357,4 +357,14 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos); */ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); +/** + * rdma_set_afonly - Specify that listens are restricted to the + * bound address family only. + * @id: Communication identifer to configure. + * @afonly: Value indicating if listens are restricted. + * + * Must be set before identifier is in the listening state. + */ +int rdma_set_afonly(struct rdma_cm_id *id, int afonly); + #endif /* RDMA_CM_H */ diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index 5348a000c8f3..1ee9239ff8c2 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -224,6 +224,7 @@ enum { enum { RDMA_OPTION_ID_TOS = 0, RDMA_OPTION_ID_REUSEADDR = 1, + RDMA_OPTION_ID_AFONLY = 2, RDMA_OPTION_IB_PATH = 1 }; -- cgit v1.2.3 From 752a50cab600c6d46c5a1921c6a6d2fb116c8a4b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:33 +0300 Subject: mlx4_core: Pass an invalid PCI id number to VFs Currently, VFs have 0 in their dev->caps.function field. This is a valid pci id (usually of the PF). Instead, pass an invalid PCI id to the VF via QUERY_FW, so that if the value gets accessed in the VF driver, we'll catch the problem. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 10 +++++++--- include/linux/mlx4/device.h | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9c83bb8151ea..4281ce09add8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -881,11 +881,12 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev) ((fw_ver & 0xffff0000ull) >> 16) | ((fw_ver & 0x0000ffffull) << 16); + MLX4_GET(lg, outbox, QUERY_FW_PPF_ID); + dev->caps.function = lg; + if (mlx4_is_slave(dev)) goto out; - MLX4_GET(lg, outbox, QUERY_FW_PPF_ID); - dev->caps.function = lg; MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET); if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV || @@ -966,9 +967,12 @@ int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; - /* for slaves, zero out everything except FW version */ + /* for slaves, set pci PPF ID to invalid and zero out everything + * else except FW version */ outbuf[0] = outbuf[1] = 0; memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8); + outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID; + return 0; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6a8f002b8ed3..8eadf0f14cc5 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -534,6 +534,8 @@ struct mlx4_init_port_param { if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) +#define MLX4_INVALID_SLAVE_ID 0xFF + static inline int mlx4_is_master(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_MASTER; -- cgit v1.2.3 From aeab97ed1503bedbe14d1e1c5ab7b90253a67664 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Tue, 19 Jun 2012 11:21:38 +0300 Subject: IB/sa: Add GuidInfoRecord query support This query is needed for SRIOV alias GUID support. The query is implemented per the IB Spec definition in section 15.2.5.18 (GuidInfoRecord). Signed-off-by: Erez Shitrit Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 133 +++++++++++++++++++++++++++++++++++++ include/rdma/ib_sa.h | 33 +++++++++ 2 files changed, 166 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index fbbfa24cf572..a8905abc56e4 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -94,6 +94,12 @@ struct ib_sa_path_query { struct ib_sa_query sa_query; }; +struct ib_sa_guidinfo_query { + void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; @@ -347,6 +353,34 @@ static const struct ib_field service_rec_table[] = { .size_bits = 2*64 }, }; +#define GUIDINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ + .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ + .field_name = "sa_guidinfo_rec:" #field + +static const struct ib_field guidinfo_rec_table[] = { + { GUIDINFO_REC_FIELD(lid), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { GUIDINFO_REC_FIELD(block_num), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res1), + .offset_words = 0, + .offset_bits = 24, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res2), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { GUIDINFO_REC_FIELD(guid_info_list), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 512 }, +}; + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -945,6 +979,105 @@ err1: return ret; } +/* Support GuidInfoRecord */ +static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_guidinfo_query *query = + container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); + + if (mad) { + struct ib_sa_guidinfo_rec rec; + + ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); +} + +int ib_sa_guid_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_guidinfo_rec *rec, + ib_sa_comp_mask comp_mask, u8 method, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_guidinfo_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_guidinfo_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) { + return -EINVAL; + } + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; + query->sa_query.release = ib_sa_guidinfo_rec_release; + + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, + mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_guid_info_rec_query); + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index d44a56388a3e..8275e539bace 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -251,6 +251,28 @@ struct ib_sa_service_rec { u64 data64[2]; }; +#define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0) +#define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1) +#define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2) +#define IB_SA_GUIDINFO_REC_RES2 IB_SA_COMP_MASK(3) +#define IB_SA_GUIDINFO_REC_GID0 IB_SA_COMP_MASK(4) +#define IB_SA_GUIDINFO_REC_GID1 IB_SA_COMP_MASK(5) +#define IB_SA_GUIDINFO_REC_GID2 IB_SA_COMP_MASK(6) +#define IB_SA_GUIDINFO_REC_GID3 IB_SA_COMP_MASK(7) +#define IB_SA_GUIDINFO_REC_GID4 IB_SA_COMP_MASK(8) +#define IB_SA_GUIDINFO_REC_GID5 IB_SA_COMP_MASK(9) +#define IB_SA_GUIDINFO_REC_GID6 IB_SA_COMP_MASK(10) +#define IB_SA_GUIDINFO_REC_GID7 IB_SA_COMP_MASK(11) + +struct ib_sa_guidinfo_rec { + __be16 lid; + u8 block_num; + /* reserved */ + u8 res1; + __be32 res2; + u8 guid_info_list[64]; +}; + struct ib_sa_client { atomic_t users; struct completion comp; @@ -385,4 +407,15 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, */ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec); +/* Support GuidInfoRecord */ +int ib_sa_guid_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_guidinfo_rec *rec, + ib_sa_comp_mask comp_mask, u8 method, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_guidinfo_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query); #endif /* IB_SA_H */ -- cgit v1.2.3 From 3045f0920367e625bbec7d66fadb444e673515af Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:39 +0300 Subject: IB/core: Move CM_xxx_ATTR_ID macros from cm_msgs.h to ib_cm.h These macros will be reused by the mlx4 SRIOV-IB CM paravirtualization code, and there is no reason to have them declared both in the IB core in the mlx4 IB driver. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm_msgs.h | 12 ------------ include/rdma/ib_cm.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 7da9b2102341..be068f47e47e 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,18 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -#define CM_REQ_ATTR_ID cpu_to_be16(0x0010) -#define CM_MRA_ATTR_ID cpu_to_be16(0x0011) -#define CM_REJ_ATTR_ID cpu_to_be16(0x0012) -#define CM_REP_ATTR_ID cpu_to_be16(0x0013) -#define CM_RTU_ATTR_ID cpu_to_be16(0x0014) -#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) -#define CM_DREP_ATTR_ID cpu_to_be16(0x0016) -#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) -#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) -#define CM_LAP_ATTR_ID cpu_to_be16(0x0019) -#define CM_APR_ATTR_ID cpu_to_be16(0x001A) - enum cm_msg_sequence { CM_MSG_SEQUENCE_REQ, CM_MSG_SEQUENCE_LAP, diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 83f77ac33957..0e3ff30647d5 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -262,6 +262,18 @@ struct ib_cm_event { void *private_data; }; +#define CM_REQ_ATTR_ID cpu_to_be16(0x0010) +#define CM_MRA_ATTR_ID cpu_to_be16(0x0011) +#define CM_REJ_ATTR_ID cpu_to_be16(0x0012) +#define CM_REP_ATTR_ID cpu_to_be16(0x0013) +#define CM_RTU_ATTR_ID cpu_to_be16(0x0014) +#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) +#define CM_DREP_ATTR_ID cpu_to_be16(0x0016) +#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) +#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) +#define CM_LAP_ATTR_ID cpu_to_be16(0x0019) +#define CM_APR_ATTR_ID cpu_to_be16(0x001A) + /** * ib_cm_handler - User-defined callback to process communication events. * @cm_id: Communication identifier associated with the reported event. -- cgit v1.2.3 From 00f5ce99dc6ee46c3113393cc8fa12173f9bbcd7 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:40 +0300 Subject: mlx4: Use port management change event instead of smp_snoop The port management change event can replace smp_snoop. If the capability bit for this event is set in dev-caps, the event is used (by the driver setting the PORT_MNG_CHG_EVENT bit in the async event mask in the MAP_EQ fw command). In this case, when the driver passes incoming SMP PORT_INFO SET mads to the FW, the FW generates port management change events to signal any changes to the driver. If the FW generates these events, smp_snoop shouldn't be invoked in ib_process_mad(), or duplicate events will occur (once from the FW-generated event, and once from smp_snoop). In the case where the FW does not generate port management change events smp_snoop needs to be invoked to create these events. The flow in smp_snoop has been modified to make use of the same procedures as in the fw-generated-event event case to generate the port management events (LID change, Client-rereg, Pkey change, and/or GID change). Port management change event handling required changing the mlx4_ib_event and mlx4_dispatch_event prototypes; the "param" argument (last argument) had to be changed to unsigned long in order to accomodate passing the EQE pointer. We also needed to move the definition of struct mlx4_eqe from net/mlx4.h to file device.h -- to make it available to the IB driver, to handle port management change events. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 118 ++++++++++++++++++++------- drivers/infiniband/hw/mlx4/main.c | 29 +++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 9 ++ drivers/net/ethernet/mellanox/mlx4/en_main.c | 5 +- drivers/net/ethernet/mellanox/mlx4/eq.c | 22 ++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 1 + drivers/net/ethernet/mellanox/mlx4/intf.c | 5 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 63 +------------- include/linux/mlx4/device.h | 99 +++++++++++++++++++++- include/linux/mlx4/driver.h | 3 +- 10 files changed, 249 insertions(+), 105 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 84786a9fb64f..58c45fb5bd31 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -147,47 +147,49 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) } /* - * Snoop SM MADs for port info and P_Key table sets, so we can - * synthesize LID change and P_Key change events. + * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can + * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, - u16 prev_lid) + u16 prev_lid) { - struct ib_event event; + struct ib_port_info *pinfo; + u16 lid; + struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - mad->mad_hdr.method == IB_MGMT_METHOD_SET) { - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { - struct ib_port_info *pinfo = - (struct ib_port_info *) ((struct ib_smp *) mad)->data; - u16 lid = be16_to_cpu(pinfo->lid); + mad->mad_hdr.method == IB_MGMT_METHOD_SET) + switch (mad->mad_hdr.attr_id) { + case IB_SMP_ATTR_PORT_INFO: + pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + lid = be16_to_cpu(pinfo->lid); - update_sm_ah(to_mdev(ibdev), port_num, + update_sm_ah(dev, port_num, be16_to_cpu(pinfo->sm_lid), pinfo->neighbormtu_mastersmsl & 0xf); - event.device = ibdev; - event.element.port_num = port_num; + if (pinfo->clientrereg_resv_subnetto & 0x80) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_CLIENT_REREGISTER); - if (pinfo->clientrereg_resv_subnetto & 0x80) { - event.event = IB_EVENT_CLIENT_REREGISTER; - ib_dispatch_event(&event); - } + if (prev_lid != lid) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_LID_CHANGE); + break; - if (prev_lid != lid) { - event.event = IB_EVENT_LID_CHANGE; - ib_dispatch_event(&event); - } - } + case IB_SMP_ATTR_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { - event.device = ibdev; - event.event = IB_EVENT_PKEY_CHANGE; - event.element.port_num = port_num; - ib_dispatch_event(&event); + case IB_SMP_ATTR_GUID_INFO: + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_GID_CHANGE); + break; + default: + break; } - } } static void node_desc_override(struct ib_device *dev, @@ -305,7 +307,8 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) + smp_snoop(ibdev, port_num, in_mad, prev_lid); node_desc_override(ibdev, out_mad); } @@ -446,3 +449,62 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) ib_destroy_ah(dev->sm_ah[p]); } } + +void handle_port_mgmt_change_event(struct work_struct *work) +{ + struct ib_event_work *ew = container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *dev = ew->ib_dev; + struct mlx4_eqe *eqe = &(ew->ib_eqe); + u8 port = eqe->event.port_mgmt_change.port; + u32 changed_attr; + + switch (eqe->subtype) { + case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: + changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr); + + /* Update the SM ah - This should be done before handling + the other changed attributes so that MADs can be sent to the SM */ + if (changed_attr & MSTR_SM_CHANGE_MASK) { + u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid); + u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf; + update_sm_ah(dev, port, lid, sl); + } + + /* Check if it is a lid change event */ + if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE); + + /* Generate GUID changed event */ + if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + + if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) + mlx4_ib_dispatch_event(dev, port, + IB_EVENT_CLIENT_REREGISTER); + break; + + case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); + break; + case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + break; + default: + pr_warn("Unsupported subtype 0x%x for " + "Port Management Change event\n", eqe->subtype); + } + + kfree(ew); +} + +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type) +{ + struct ib_event event; + + event.device = &dev->ib_dev; + event.element.port_num = port_num; + event.event = type; + + ib_dispatch_event(&event); +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 5266b49c46ee..4f230c26622d 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -898,7 +898,6 @@ static void update_gids_task(struct work_struct *work) union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; - struct ib_event event; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { @@ -916,10 +915,7 @@ static void update_gids_task(struct work_struct *work) pr_warn("set port command failed\n"); else { memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); - event.device = &gw->dev->ib_dev; - event.element.port_num = gw->port; - event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); } mlx4_free_cmd_mailbox(dev, mailbox); @@ -1383,10 +1379,18 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, - enum mlx4_dev_event event, int port) + enum mlx4_dev_event event, unsigned long param) { struct ib_event ibev; struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); + struct mlx4_eqe *eqe = NULL; + struct ib_event_work *ew; + int port = 0; + + if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) + eqe = (struct mlx4_eqe *)param; + else + port = (u8)param; if (port > ibdev->num_ports) return; @@ -1405,6 +1409,19 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, ibev.event = IB_EVENT_DEVICE_FATAL; break; + case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: + ew = kmalloc(sizeof *ew, GFP_ATOMIC); + if (!ew) { + pr_err("failed to allocate memory for events work\n"); + break; + } + + INIT_WORK(&ew->work, handle_port_mgmt_change_event); + memcpy(&ew->ib_eqe, eqe, sizeof *eqe); + ew->ib_dev = ibdev; + handle_port_mgmt_change_event(&ew->work); + return; + default: return; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 5f298afaa81f..23bfbf9ee0e0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -224,6 +224,12 @@ struct mlx4_ib_dev { int eq_added; }; +struct ib_event_work { + struct work_struct work; + struct mlx4_ib_dev *ib_dev; + struct mlx4_eqe ib_eqe; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -381,4 +387,7 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid); +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type); + #endif /* MLX4_IB_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 69ba57270481..a52922ed85c1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -131,7 +131,7 @@ static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) } static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, - enum mlx4_dev_event event, int port) + enum mlx4_dev_event event, unsigned long port) { struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr; struct mlx4_en_priv *priv; @@ -156,7 +156,8 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, if (port < 1 || port > dev->caps.num_ports || !mdev->pndev[port]) return; - mlx4_warn(mdev, "Unhandled event %d for port %d\n", event, port); + mlx4_warn(mdev, "Unhandled event %d for port %d\n", event, + (int) port); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index bce98d9c0039..9b15d0219950 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -82,6 +82,15 @@ enum { (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \ (1ull << MLX4_EVENT_TYPE_FATAL_WARNING)) +static u64 get_async_ev_mask(struct mlx4_dev *dev) +{ + u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT); + + return async_ev_mask; +} + static void eq_set_ci(struct mlx4_eq *eq, int req_not) { __raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) | @@ -473,6 +482,11 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; + case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT: + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, + (unsigned long) eqe); + break; + case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: case MLX4_EVENT_TYPE_ECC_DETECT: default: @@ -956,7 +970,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) priv->eq_table.have_irq = 1; } - err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); if (err) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", @@ -996,7 +1010,7 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; - mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1, + mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); mlx4_free_irqs(dev); @@ -1040,7 +1054,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) mlx4_cmd_use_polling(dev); /* Map the new eq to handle all asyncronous events */ - err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[i].eqn); if (err) { mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); @@ -1054,7 +1068,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) } /* Return to default */ - mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 4281ce09add8..ee9d6b0b4d20 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -109,6 +109,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", [48] = "Counters support", + [59] = "Port management change event support", }; int i; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index b4e9f6f5cc04..116895ac8b35 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -115,7 +115,8 @@ void mlx4_unregister_interface(struct mlx4_interface *intf) } EXPORT_SYMBOL_GPL(mlx4_unregister_interface); -void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port) +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, + unsigned long param) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_device_context *dev_ctx; @@ -125,7 +126,7 @@ void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int por list_for_each_entry(dev_ctx, &priv->ctx_list, list) if (dev_ctx->intf->event) - dev_ctx->intf->event(dev, dev_ctx->context, type, port); + dev_ctx->intf->event(dev, dev_ctx->context, type, param); spin_unlock_irqrestore(&priv->ctx_lock, flags); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e5d20220762c..4d11d12b9db4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -338,66 +338,6 @@ struct mlx4_srq_context { __be64 db_rec_addr; }; -struct mlx4_eqe { - u8 reserved1; - u8 type; - u8 reserved2; - u8 subtype; - union { - u32 raw[6]; - struct { - __be32 cqn; - } __packed comp; - struct { - u16 reserved1; - __be16 token; - u32 reserved2; - u8 reserved3[3]; - u8 status; - __be64 out_param; - } __packed cmd; - struct { - __be32 qpn; - } __packed qp; - struct { - __be32 srqn; - } __packed srq; - struct { - __be32 cqn; - u32 reserved1; - u8 reserved2[3]; - u8 syndrome; - } __packed cq_err; - struct { - u32 reserved1[2]; - __be32 port; - } __packed port_change; - struct { - #define COMM_CHANNEL_BIT_ARRAY_SIZE 4 - u32 reserved; - u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE]; - } __packed comm_channel_arm; - struct { - u8 port; - u8 reserved[3]; - __be64 mac; - } __packed mac_update; - struct { - u8 port; - } __packed sw_event; - struct { - __be32 slave_id; - } __packed flr_event; - struct { - __be16 current_temperature; - __be16 warning_threshold; - } __packed warming; - } event; - u8 slave_id; - u8 reserved3[2]; - u8 owner; -} __packed; - struct mlx4_eq { struct mlx4_dev *dev; void __iomem *doorbell; @@ -887,7 +827,8 @@ void mlx4_catas_init(void); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); -void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port); +void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, + unsigned long param); struct mlx4_dev_cap; struct mlx4_init_hca_param; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8eadf0f14cc5..560b2201519f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -96,7 +96,8 @@ enum { MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, - MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55 + MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, + MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, }; enum { @@ -138,6 +139,7 @@ enum mlx4_event { MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18, MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b, MLX4_EVENT_TYPE_FLR_EVENT = 0x1c, + MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d, MLX4_EVENT_TYPE_NONE = 0xff, }; @@ -235,6 +237,24 @@ enum { MLX4_MAX_FAST_REG_PAGES = 511, }; +enum { + MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, + MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, + MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16, +}; + +/* Port mgmt change event handling */ +enum { + MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK = 1 << 0, + MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK = 1 << 1, + MLX4_EQ_PORT_INFO_LID_CHANGE_MASK = 1 << 2, + MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK = 1 << 3, + MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, +}; + +#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ + MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -511,6 +531,81 @@ struct mlx4_dev { int num_vfs; }; +struct mlx4_eqe { + u8 reserved1; + u8 type; + u8 reserved2; + u8 subtype; + union { + u32 raw[6]; + struct { + __be32 cqn; + } __packed comp; + struct { + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; + } __packed cmd; + struct { + __be32 qpn; + } __packed qp; + struct { + __be32 srqn; + } __packed srq; + struct { + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; + } __packed cq_err; + struct { + u32 reserved1[2]; + __be32 port; + } __packed port_change; + struct { + #define COMM_CHANNEL_BIT_ARRAY_SIZE 4 + u32 reserved; + u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE]; + } __packed comm_channel_arm; + struct { + u8 port; + u8 reserved[3]; + __be64 mac; + } __packed mac_update; + struct { + __be32 slave_id; + } __packed flr_event; + struct { + __be16 current_temperature; + __be16 warning_threshold; + } __packed warming; + struct { + u8 reserved[3]; + u8 port; + union { + struct { + __be16 mstr_sm_lid; + __be16 port_lid; + __be32 changed_attr; + u8 reserved[3]; + u8 mstr_sm_sl; + __be64 gid_prefix; + } __packed port_info; + struct { + __be32 block_ptr; + __be32 tbl_entries_mask; + } __packed tbl_change_info; + } params; + } __packed port_mgmt_change; + } event; + u8 slave_id; + u8 reserved3[2]; + u8 owner; +} __packed; + struct mlx4_init_port_param { int set_guid0; int set_node_guid; @@ -536,6 +631,8 @@ struct mlx4_init_port_param { #define MLX4_INVALID_SLAVE_ID 0xFF +void handle_port_mgmt_change_event(struct work_struct *work); + static inline int mlx4_is_master(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_MASTER; diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index 5f1298b1b5ef..0f509229fb3d 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -42,13 +42,14 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_UP, MLX4_DEV_EVENT_PORT_DOWN, MLX4_DEV_EVENT_PORT_REINIT, + MLX4_DEV_EVENT_PORT_MGMT_CHANGE, }; struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); void (*event) (struct mlx4_dev *dev, void *context, - enum mlx4_dev_event event, int port); + enum mlx4_dev_event event, unsigned long param); void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port); struct list_head list; enum mlx4_protocol protocol; -- cgit v1.2.3 From 2aca1172c2f5b27fbc37297574f716c1c15f4153 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:41 +0300 Subject: net/mlx4_core: Initialize IB port capabilities for all slaves With IB SR-IOV, each slave has its own separate copy of the port capabilities flags. For example, the master can run a subnet manager (which causes the IsSM bit to be set in the master's port capabilities) without affecting the port capabilities seen by the slaves (the IsSM bit will be seen as cleared in the slaves). Also add a static inline mlx4_master_func_num() to enhance readability of the code. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ include/linux/mlx4/device.h | 5 +++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a0313de122de..83afb1541a74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1477,6 +1477,17 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) "with caps = 0\n", port, err); dev->caps.ib_port_def_cap[port] = ib_port_default_caps; + /* initialize per-slave default ib port capabilities */ + if (mlx4_is_master(dev)) { + int i; + for (i = 0; i < dev->num_slaves; i++) { + if (i == mlx4_master_func_num(dev)) + continue; + priv->mfunc.master.slave_state[i].ib_cap_mask[port] = + ib_port_default_caps; + } + } + if (mlx4_is_mfunc(dev)) dev->caps.port_ib_mtu[port] = IB_MTU_2048; else diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 560b2201519f..7fbdc89de495 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -633,6 +633,11 @@ struct mlx4_init_port_param { void handle_port_mgmt_change_event(struct work_struct *work); +static inline int mlx4_master_func_num(struct mlx4_dev *dev) +{ + return dev->caps.function; +} + static inline int mlx4_is_master(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_MASTER; -- cgit v1.2.3 From 396f2feb05d7cc5549c611c05abfb4108cd1c6d6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:42 +0300 Subject: mlx4_core: Implement mechanism for reserved Q_Keys The SR-IOV special QP tunneling mechanism uses proxy special QPs (instead of the real special QPs) for MADs on guests. These proxy QPs send their packets to a "tunnel" QP owned by the master. The master then forwards the MAD (after any required paravirtualization) to the real special QP, which sends out the MAD. For security reasons (i.e., to prevent guests from sending MADs to tunnel QPs belonging to other guests), each proxy-tunnel QP pair is assigned a unique, reserved, Q_Key. These Q_Keys are available only for proxy and tunnel QPs -- if the guest tries to use these Q_Keys with other QPs, it will fail. This patch introduces a mechanism for reserving a block of 64K Q_Keys for proxy/tunneling use. The patch introduces also two new fields into mlx4_dev: base_sqpn and base_tunnel_sqpn. In SR-IOV mode, the QP numbers for the "real," proxy, and tunnel sqps are added to the reserved QPN area (so that they will not change). There are 8 special QPs per port in the HCA, and each of them is assigned both a proxy and a tunnel QP, for each VF and for the PF as well in SR-IOV mode. The QPNs for these QPs are arranged as follows: 1. The real SQP numbers (8) 2. The proxy SQPs (8 * (max number of VFs + max number of PFs) 3. The tunnel SQPs (8 * (max number of VFs + max number of PFs) To support these QPs, two new fields are added to struct mlx4_dev: base_sqp: this is the QP number of the first of the real SQPs base_tunnel_sqp: this is the qp number of the first qp in the tunnel sqp region. (On guests, this is the first tunnel sqp of the 8 which are assigned to that guest). In addition, in SR-IOV mode, sqp_start is the number of the first proxy SQP in the proxy SQP region. (In guests, this is the first proxy SQP of the 8 which are assigned to that guest) Note that in non-SR-IOV mode, there are no proxies and no tunnels. In this case, sqp_start is set to sqp_base -- which minimizes code changes. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/main.c | 17 +++++++++++++++++ include/linux/mlx4/device.h | 11 +++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 83afb1541a74..81154a16d6b8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -391,6 +391,23 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) return ret; } +int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) +{ + u32 qk = MLX4_RESERVED_QKEY_BASE; + if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || + qpn < dev->caps.sqp_start) + return -EINVAL; + + if (qpn >= dev->caps.base_tunnel_sqpn) + /* tunnel qp */ + qk += qpn - dev->caps.base_tunnel_sqpn; + else + qk += qpn - dev->caps.sqp_start; + *qkey = qk; + return 0; +} +EXPORT_SYMBOL(mlx4_get_parav_qkey); + int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7fbdc89de495..c30a314e095c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -56,6 +56,13 @@ enum { MLX4_MAX_PORTS = 2 }; +/* base qkey for use in sriov tunnel-qp/proxy-qp communication. + * These qkeys must not be allowed for general use. This is a 64k range, + * and to test for violation, we use the mask (protect against future chg). + */ +#define MLX4_RESERVED_QKEY_BASE (0xFFFF0000) +#define MLX4_RESERVED_QKEY_MASK (0xFFFF0000) + enum { MLX4_BOARD_ID_LEN = 64 }; @@ -293,6 +300,8 @@ struct mlx4_caps { int max_qp_init_rdma; int max_qp_dest_rdma; int sqp_start; + u32 base_sqpn; + u32 base_tunnel_sqpn; int num_srqs; int max_srq_wqes; int max_srq_sge; @@ -772,4 +781,6 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 6634961c14d38ef64ec284c07aecb03d3dd03b4a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Jun 2012 11:21:44 +0300 Subject: mlx4: Put physical GID and P_Key table sizes in mlx4_phys_caps struct and paravirtualize them To allow easy paravirtualization of P_Key and GID table sizes, keep paravirtualized sizes in mlx4_dev->caps, but save the actual physical sizes from FW in struct: mlx4_dev->phys_cap. In addition, in SR-IOV mode, do the following: 1. Reduce reported P_Key table size by 1. This is done to reserve the highest P_Key index for internal use, for declaring an invalid P_Key in P_Key paravirtualization. We require a P_Key index which always contain an invalid P_Key value for this purpose (i.e., one which cannot be modified by the subnet manager). The way to do this is to reduce the P_Key table size reported to the subnet manager by 1, so that it will not attempt to access the P_Key at index #127. 2. Paravirtualize the GID table size to 1. Thus, each guest sees only a single GID (at its paravirtualized index 0). In addition, since we are paravirtualizing the GID table size to 1, we add paravirtualization of the master GID event here (i.e., we do not do ib_dispatch_event() for the GUID change event on the master, since its (only) GUID never changes). Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 10 ++++--- drivers/net/ethernet/mellanox/mlx4/fw.c | 43 +++++++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 32 ++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 ++- drivers/net/ethernet/mellanox/mlx4/port.c | 11 ++++++-- include/linux/mlx4/device.h | 2 ++ 6 files changed, 92 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 58c45fb5bd31..c27141fef1ab 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -184,8 +184,10 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, break; case IB_SMP_ATTR_GUID_INFO: - mlx4_ib_dispatch_event(dev, port_num, - IB_EVENT_GID_CHANGE); + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_GID_CHANGE); break; default: break; @@ -487,7 +489,9 @@ void handle_port_mgmt_change_event(struct work_struct *work) mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); break; case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: - mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); break; default: pr_warn("Unsupported subtype 0x%x for " diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 5549f6b3bb67..473d63b63b4e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -730,9 +730,12 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, { u64 def_mac; u8 port_type; + u16 short_field; int err; #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 +#define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c +#define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, @@ -755,11 +758,51 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); + + short_field = 1; /* slave max gids */ + MLX4_PUT(outbox->buf, short_field, + QUERY_PORT_CUR_MAX_GID_OFFSET); + + short_field = dev->caps.pkey_table_len[vhcr->in_modifier]; + MLX4_PUT(outbox->buf, short_field, + QUERY_PORT_CUR_MAX_PKEY_OFFSET); } return err; } +int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, + int *gid_tbl_len, int *pkey_tbl_len) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u16 field; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto out; + + outbox = mailbox->buf; + + MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET); + *gid_tbl_len = field; + + MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET); + *pkey_tbl_len = field; + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len); + int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 58544b72bacb..5df3ac40a490 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -215,6 +215,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; + dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; + dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; + /* set gid and pkey table operating lengths by default + * to non-sriov values */ dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; @@ -498,8 +502,13 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } - for (i = 1; i <= dev->caps.num_ports; ++i) + for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.port_mask[i] = dev->caps.port_type[i]; + if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, + &dev->caps.gid_table_len[i], + &dev->caps.pkey_table_len[i])) + return -ENODEV; + } if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > @@ -536,7 +545,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev, for (port = 1; port <= dev->caps.num_ports; port++) { mlx4_CLOSE_PORT(dev, port); dev->caps.port_type[port] = port_types[port - 1]; - err = mlx4_SET_PORT(dev, port); + err = mlx4_SET_PORT(dev, port, -1); if (err) { mlx4_err(dev, "Failed to set port %d, " "aborting\n", port); @@ -722,7 +731,7 @@ static ssize_t set_port_ib_mtu(struct device *dev, mlx4_unregister_device(mdev); for (port = 1; port <= mdev->caps.num_ports; port++) { mlx4_CLOSE_PORT(mdev, port); - err = mlx4_SET_PORT(mdev, port); + err = mlx4_SET_PORT(mdev, port, -1); if (err) { mlx4_err(mdev, "Failed to set port %d, " "aborting\n", port); @@ -1173,6 +1182,17 @@ err: return -EIO; } +static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) +{ + int i; + + for (i = 1; i <= dev->caps.num_ports; i++) { + dev->caps.gid_table_len[i] = 1; + dev->caps.pkey_table_len[i] = + dev->phys_caps.pkey_phys_table_len[i] - 1; + } +} + static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1212,6 +1232,9 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + if (mlx4_is_master(dev)) + mlx4_parav_master_pf_caps(dev); + profile = default_profile; icm_size = mlx4_make_profile(dev, &profile, &dev_cap, @@ -1500,7 +1523,8 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) else dev->caps.port_ib_mtu[port] = IB_MTU_4096; - err = mlx4_SET_PORT(dev, port); + err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? + dev->caps.pkey_table_len[port] : -1); if (err) { mlx4_err(dev, "Failed to set port %d, aborting\n", port); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 4d11d12b9db4..cde6e511899f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -969,7 +969,7 @@ int mlx4_change_port_types(struct mlx4_dev *dev, void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); -int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); /* resource tracker functions*/ int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource resource_type, @@ -1012,6 +1012,8 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd); int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps); +int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, + int *gid_tbl_len, int *pkey_tbl_len); int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index a8fb52992c64..90dc47542b8b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -726,14 +726,15 @@ int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave, enum { MLX4_SET_PORT_VL_CAP = 4, /* bits 7:4 */ MLX4_SET_PORT_MTU_CAP = 12, /* bits 15:12 */ + MLX4_CHANGE_PORT_PKEY_TBL_SZ = 20, MLX4_CHANGE_PORT_VL_CAP = 21, MLX4_CHANGE_PORT_MTU_CAP = 22, }; -int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) +int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz) { struct mlx4_cmd_mailbox *mailbox; - int err, vl_cap; + int err, vl_cap, pkey_tbl_flag = 0; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) return 0; @@ -746,11 +747,17 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; + if (pkey_tbl_sz >= 0 && mlx4_is_master(dev)) { + pkey_tbl_flag = 1; + ((__be16 *) mailbox->buf)[20] = cpu_to_be16(pkey_tbl_sz); + } + /* IB VL CAP enum isn't used by the firmware, just numerical values */ for (vl_cap = 8; vl_cap >= 1; vl_cap >>= 1) { ((__be32 *) mailbox->buf)[0] = cpu_to_be32( (1 << MLX4_CHANGE_PORT_MTU_CAP) | (1 << MLX4_CHANGE_PORT_VL_CAP) | + (pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) | (dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) | (vl_cap << MLX4_SET_PORT_VL_CAP)); err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c30a314e095c..441caf1a497d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -268,6 +268,8 @@ static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) } struct mlx4_phys_caps { + u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; + u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; u32 num_phys_eqs; }; -- cgit v1.2.3