From a917374e8a20cdfbaaaaaa2b01331dc063d38848 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 2 Oct 2017 11:04:33 -0700 Subject: IB/core: Use __be32 for LIDs in opa_is_extended_lid The LIDs passed to opa_extended_lid are in __be32 format, change function signature accordingly. This fixes the following sparse warnings: drivers/infiniband/core/cm.c:1181:60: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:1182:60: warning: incorrect type in argument 2 (different ba drivers/infiniband/core/cm.c:1242:68: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:1243:68: warning: incorrect type in argument 2 (different ba drivers/infiniband/core/cm.c:2922:66: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:2923:66: warning: incorrect type in argument 2 (different ba include/rdma/opa_addr.h:102:14: warning: cast to restricted __be32 Fixes: e92aa00a5189 ("IB/CM: Add OPA Path record support to CM") Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/opa_addr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index e6e90f18e6d5..f68fca296631 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -97,15 +97,15 @@ static inline u32 opa_get_lid_from_gid(const union ib_gid *gid) * @dlid: The DLID * @slid: The SLID */ -static inline bool opa_is_extended_lid(u32 dlid, u32 slid) +static inline bool opa_is_extended_lid(__be32 dlid, __be32 slid) { if ((be32_to_cpu(dlid) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) || (be32_to_cpu(slid) >= be16_to_cpu(IB_MULTICAST_LID_BASE))) return true; - else - return false; + + return false; } /* Get multicast lid base */ -- cgit v1.2.3 From 7ebfc93edcefa0c4c09f194ebdace9ddae15efa6 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 2 Oct 2017 11:04:41 -0700 Subject: IB/rdmavt: Correct issues with read-mostly and send size cache lines The s_ahgpsn was incorrectly placed in the read-mostly section of the QP and the s_curr_size and s_hdrwords are oversized. The misplaced s_ahgpsn will cause the read-mostly cachelines to thrash. Place s_ahgpsn in the send side cache lines and correctly size and s_hdrwords and s_cur_size to keep the send side cachelines at the same size. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 ++-- include/rdma/rdmavt_qp.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 98bcd06c82d7..7befba8fceb2 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8739,8 +8739,8 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state) return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); } -int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, - u8 lane_id, u32 config_data) +static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, + u8 lane_id, u32 config_data) { u64 data; int ret; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0eed3d8752fa..89ab88c342b6 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -282,7 +282,6 @@ struct rvt_qp { u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ - u32 s_ahgpsn; /* set to the psn in the copy of the header */ u16 pmtu; /* decoded from path_mtu */ u8 log_pmtu; /* shift for pmtu */ @@ -344,7 +343,6 @@ struct rvt_qp { struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; - u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_last_psn; /* last response PSN processed */ @@ -358,8 +356,10 @@ struct rvt_qp { u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + u16 s_cur_size; /* size of send packet in bytes */ u16 s_rdma_ack_cnt; + u8 s_hdrwords; /* size of s_hdr in 32 bit words */ s8 s_ahgidx; u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ -- cgit v1.2.3 From 9d18717790c43c904fabe9da7a4c6b2ebed2c4d8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 6 Oct 2017 13:06:17 -0700 Subject: IB/core: Simplify sa_path_set_[sd]lid() calls Instead of making every caller convert the second argument of sa_path_set_slid() and sa_path_set_dlid() to big endian format, make these two functions accept LIDs in CPU endian format. This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Sean Hefty Cc: Dasaratharaman Chandramouli Cc: Don Hiatt Cc: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 26 ++++++++++++-------------- drivers/infiniband/core/uverbs_marshall.c | 10 +++++----- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_sa.h | 12 ++++++------ 4 files changed, 24 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4c4b46586af2..d80911d4abb7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1472,31 +1472,29 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { sa_path_set_dlid(primary_path, - htonl(ntohs(req_msg->primary_local_lid))); + ntohs(req_msg->primary_local_lid)); sa_path_set_slid(primary_path, - htonl(ntohs(req_msg->primary_remote_lid))); + ntohs(req_msg->primary_remote_lid)); } else { lid = opa_get_lid_from_gid(&req_msg->primary_local_gid); - sa_path_set_dlid(primary_path, cpu_to_be32(lid)); + sa_path_set_dlid(primary_path, lid); lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid); - sa_path_set_slid(primary_path, cpu_to_be32(lid)); + sa_path_set_slid(primary_path, lid); } if (!cm_req_has_alt_path(req_msg)) return; if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(alt_path, - htonl(ntohs(req_msg->alt_local_lid))); - sa_path_set_slid(alt_path, - htonl(ntohs(req_msg->alt_remote_lid))); + sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid)); + sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid)); } else { lid = opa_get_lid_from_gid(&req_msg->alt_local_gid); - sa_path_set_dlid(alt_path, cpu_to_be32(lid)); + sa_path_set_dlid(alt_path, lid); lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid); - sa_path_set_slid(alt_path, cpu_to_be32(lid)); + sa_path_set_slid(alt_path, lid); } } @@ -3037,14 +3035,14 @@ static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg, u32 lid; if (path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); - sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid))); + sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid)); + sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid)); } else { lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid); - sa_path_set_dlid(path, cpu_to_be32(lid)); + sa_path_set_dlid(path, lid); lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid); - sa_path_set_slid(path, cpu_to_be32(lid)); + sa_path_set_slid(path, lid); } } diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index bd0acf376af0..aeb2824efbc7 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -176,18 +176,18 @@ EXPORT_SYMBOL(ib_copy_path_rec_to_user); void ib_copy_path_rec_from_user(struct sa_path_rec *dst, struct ib_user_path_rec *src) { - __be32 slid, dlid; + u32 slid, dlid; memset(dst, 0, sizeof(*dst)); if ((ib_is_opa_gid((union ib_gid *)src->sgid)) || (ib_is_opa_gid((union ib_gid *)src->dgid))) { dst->rec_type = SA_PATH_REC_TYPE_OPA; - slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid)); - dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid)); + slid = opa_get_lid_from_gid((union ib_gid *)src->sgid); + dlid = opa_get_lid_from_gid((union ib_gid *)src->dgid); } else { dst->rec_type = SA_PATH_REC_TYPE_IB; - slid = htonl(ntohs(src->slid)); - dlid = htonl(ntohs(src->dlid)); + slid = ntohs(src->slid); + dlid = ntohs(src->dlid); } memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index fa5ccdb3bb2a..778a08250f16 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2403,7 +2403,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, switch (event->param.rej_rcvd.reason) { case IB_CM_REJ_PORT_CM_REDIRECT: cpi = event->param.rej_rcvd.ari; - sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid))); + sa_path_set_dlid(&ch->path, ntohs(cpi->redirect_lid)); ch->path.pkey = cpi->redirect_pkey; cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 355b81f4242d..1f7f604db5aa 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -590,20 +590,20 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec) (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2)); } -static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid) +static inline void sa_path_set_slid(struct sa_path_rec *rec, u32 slid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) - rec->ib.slid = htons(ntohl(slid)); + rec->ib.slid = cpu_to_be16(slid); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - rec->opa.slid = slid; + rec->opa.slid = cpu_to_be32(slid); } -static inline void sa_path_set_dlid(struct sa_path_rec *rec, __be32 dlid) +static inline void sa_path_set_dlid(struct sa_path_rec *rec, u32 dlid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) - rec->ib.dlid = htons(ntohl(dlid)); + rec->ib.dlid = cpu_to_be16(dlid); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - rec->opa.dlid = dlid; + rec->opa.dlid = cpu_to_be32(dlid); } static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec, -- cgit v1.2.3 From 1c3aea2bc8f0b2e5b57375ead40457ff75a3a2ec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:43 -0700 Subject: IB/core: Fix endianness annotation in rdma_is_multicast_addr() Since ipv4_addr is a big endian 32-bit number, annotate it as such. Fixes: commit be1d325a3358 ("IB/core: Set RoCEv2 MGID according to spec") Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ec5008cf5d51..cfa82d16573d 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -305,12 +305,12 @@ static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) static inline int rdma_is_multicast_addr(struct in6_addr *addr) { - u32 ipv4_addr; + __be32 ipv4_addr; if (addr->s6_addr[0] == 0xff) return 1; - memcpy(&ipv4_addr, addr->s6_addr + 12, 4); + ipv4_addr = addr->s6_addr32[3]; return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr)); } -- cgit v1.2.3 From 5cda6587feec790a089703dde2e6e1f82de50bbd Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:12 +0300 Subject: IB/core: Introduce and use rdma_create_user_ah Introduce rdma_create_user_ah API which allows passing udata to provider driver and additionally which resolves DMAC for RoCE. ib_resolve_eth_dmac() resolves destination mac address for unicast, multicast, link local ipv4 mapped ipv6 and ipv6 destination gid entry. This allows all RoCE provider drivers to avoid duplicating such code. Such change brings consistency where IB core always resolves dmac and pass it to RoCE provider drivers for user and kernel consumers, with this ah_attr->roce.dmac is always an input field for provider drivers. This uniformity avoids exporting ib_resolve_eth_dmac symbol to providers or other modules. Therefore its removed as exported symbol at later in the patch series. Now uverbs and umad both makes use of rdma_create_user_ah API which fixes the issue where umad has invalid DMAC for address. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 10 +-------- drivers/infiniband/core/verbs.c | 40 ++++++++++++++++++++++++++++++++++-- include/rdma/ib_verbs.h | 15 ++++++++++++++ 4 files changed, 55 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index c1696e6084b2..38809962c105 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -506,7 +506,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid); } - ah = rdma_create_ah(agent->qp->pd, &ah_attr); + ah = rdma_create_user_ah(agent->qp->pd, &ah_attr, NULL); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_up; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 52a2cf2d83aa..0cef1863e2dc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2537,7 +2537,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct rdma_ah_attr attr; int ret; struct ib_udata udata; - u8 *dmac; if (out_len < sizeof resp) return -ENOSPC; @@ -2580,20 +2579,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, } else { rdma_ah_set_ah_flags(&attr, 0); } - dmac = rdma_ah_retrieve_dmac(&attr); - if (dmac) - memset(dmac, 0, ETH_ALEN); - - ah = pd->device->create_ah(pd, &attr, &udata); + ah = rdma_create_user_ah(pd, &attr, &udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } - ah->device = pd->device; - ah->pd = pd; - atomic_inc(&pd->usecnt); ah->uobject = uobj; uobj->user_handle = cmd.user_handle; uobj->object = ah; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index de57d6c11a25..4dcfe47c479d 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -302,11 +302,13 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ -struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) +static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata) { struct ib_ah *ah; - ah = pd->device->create_ah(pd, ah_attr, NULL); + ah = pd->device->create_ah(pd, ah_attr, udata); if (!IS_ERR(ah)) { ah->device = pd->device; @@ -318,8 +320,42 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) return ah; } + +struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) +{ + return _rdma_create_ah(pd, ah_attr, NULL); +} EXPORT_SYMBOL(rdma_create_ah); +/** + * rdma_create_user_ah - Creates an address handle for the + * given address vector. + * It resolves destination mac address for ah attribute of RoCE type. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @udata: pointer to user's input output buffer information need by + * provider driver. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata) +{ + int err; + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + err = ib_resolve_eth_dmac(pd->device, ah_attr); + if (err) + return ERR_PTR(err); + } + + return _rdma_create_ah(pd, ah_attr, udata); +} +EXPORT_SYMBOL(rdma_create_user_ah); + int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) { const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e8608b2dc844..09c4a695155e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2857,6 +2857,21 @@ void ib_dealloc_pd(struct ib_pd *pd); */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr); +/** + * rdma_create_user_ah - Creates an address handle for the given address vector. + * It resolves destination mac address for ah attribute of RoCE type. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @udata: pointer to user's input output buffer information need by + * provider driver. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata); /** * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header * work completion. -- cgit v1.2.3 From c0348eb069687a2f27c0cd23dafb35918edf9e75 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:13 +0300 Subject: IB: Let ib_core resolve destination mac address Since IB/core resolves the destination mac address for user and kernel consumers, avoid resolving in multiple provider drivers. Only ib_core resolves DMAC now, therefore resolve_eth_dmac is removed as exported symbol. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 8 +++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 -------- drivers/infiniband/hw/hns/hns_roce_ah.c | 14 +------------- drivers/infiniband/hw/mlx4/ah.c | 8 +++----- drivers/infiniband/hw/mlx5/ah.c | 4 ---- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 15 --------------- include/rdma/ib_verbs.h | 2 -- 7 files changed, 9 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4dcfe47c479d..d8f1a5d34f4f 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -53,6 +53,9 @@ #include "core_priv.h" +static int ib_resolve_eth_dmac(struct ib_device *device, + struct rdma_ah_attr *ah_attr); + static const char * const ib_events[] = { [IB_EVENT_CQ_ERR] = "CQ error", [IB_EVENT_QP_FATAL] = "QP fatal error", @@ -1257,8 +1260,8 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_dmac(struct ib_device *device, - struct rdma_ah_attr *ah_attr) +static int ib_resolve_eth_dmac(struct ib_device *device, + struct rdma_ah_attr *ah_attr) { int ret = 0; struct ib_global_route *grh; @@ -1317,7 +1320,6 @@ int ib_resolve_eth_dmac(struct ib_device *device, out: return ret; } -EXPORT_SYMBOL(ib_resolve_eth_dmac); /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0d89621d9fe8..20e5eb9290ad 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -729,14 +729,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1; break; } - rc = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, - ah_attr->roce.dmac, &vlan_tag, - &sgid_attr.ndev->ifindex, - NULL); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to get dmac\n"); - goto fail; - } } memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d545302b8ef8..05bab4a39d91 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -48,7 +48,6 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_gid_attr gid_attr; struct hns_roce_ah *ah; u16 vlan_tag = 0xffff; - struct in6_addr in6; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); union ib_gid sgid; int ret; @@ -58,18 +57,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); /* Get mac address */ - memcpy(&in6, grh->dgid.raw, sizeof(grh->dgid.raw)); - if (rdma_is_multicast_addr(&in6)) { - rdma_get_mcast_mac(&in6, ah->av.mac); - } else { - u8 *dmac = rdma_ah_retrieve_dmac(ah_attr); - - if (!dmac) { - kfree(ah); - return ERR_PTR(-EINVAL); - } - memcpy(ah->av.mac, dmac, ETH_ALEN); - } + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); /* Get source gid */ ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr), diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 538c46a73248..6dee4fdc5d67 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -92,12 +92,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, int ret; memcpy(&in6, grh->dgid.raw, sizeof(in6)); - if (rdma_is_multicast_addr(&in6)) { + if (rdma_is_multicast_addr(&in6)) is_mcast = 1; - rdma_get_mcast_mac(&in6, ah->av.eth.mac); - } else { - memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); - } + + memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr), grh->sgid_index, &sgid, &gid_attr); if (ret) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 3363e29157f6..fe269f680103 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -89,10 +89,6 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, resp.response_length = min_resp_len; - err = ib_resolve_eth_dmac(pd->device, ah_attr); - if (err) - return ERR_PTR(err); - memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index d0249e463338..dec650930ca6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -201,21 +201,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, /* Get network header type for this GID */ ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); - if ((pd->uctx) && - (!rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) && - (!rdma_link_local_addr((struct in6_addr *)grh->dgid.raw))) { - status = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, - attr->roce.dmac, - &vlan_tag, - &sgid_attr.ndev->ifindex, - NULL); - if (status) { - pr_err("%s(): Failed to resolve dmac from gid." - "status = %d\n", __func__, status); - goto av_conf_err; - } - } - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); if (status) goto av_conf_err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 09c4a695155e..9810e4568635 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3622,8 +3622,6 @@ void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); -int ib_resolve_eth_dmac(struct ib_device *device, - struct rdma_ah_attr *ah_attr); int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) -- cgit v1.2.3 From 99260132fde7bddc6e0132ce53da94d1c9ccabcb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:16 +0300 Subject: IB/core: Fix calculation of maximum RoCE MTU The original code only took into consideration the largest header possible after the IB_BTH_BYTES. This was incorrect, as the largest possible header size is the largest possible combination of headers we might run into. The new code accounts for all possible headers in the largest possible combination and subtracts that from the MTU to make sure that all packets will fit on the wire. Link: https://www.spinics.net/lists/linux-rdma/msg54558.html Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reported-by: Roland Dreier Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 7 ++++--- include/rdma/ib_pack.h | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ec5008cf5d51..8815989301ab 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -245,10 +245,11 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g static inline enum ib_mtu iboe_get_mtu(int mtu) { /* - * reduce IB headers from effective IBoE MTU. 28 stands for - * atomic header which is the biggest possible header after BTH + * Reduce IB headers from effective IBoE MTU. */ - mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; + mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES + + IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES + + IB_ICRC_BYTES); if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 36655899ee02..7ea1382ad0e5 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -37,14 +37,17 @@ #include enum { - IB_LRH_BYTES = 8, - IB_ETH_BYTES = 14, - IB_VLAN_BYTES = 4, - IB_GRH_BYTES = 40, - IB_IP4_BYTES = 20, - IB_UDP_BYTES = 8, - IB_BTH_BYTES = 12, - IB_DETH_BYTES = 8 + IB_LRH_BYTES = 8, + IB_ETH_BYTES = 14, + IB_VLAN_BYTES = 4, + IB_GRH_BYTES = 40, + IB_IP4_BYTES = 20, + IB_UDP_BYTES = 8, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8, + IB_EXT_ATOMICETH_BYTES = 28, + IB_EXT_XRC_BYTES = 4, + IB_ICRC_BYTES = 4 }; struct ib_field { -- cgit v1.2.3 From b4f34597a5ce148b88a47da621037537c384d565 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 17 Oct 2017 18:01:12 +0300 Subject: IB/mlx5: Expose multi-packet RQ capabilities This patch reports the device's striding RQ capabilities to the user-space: - min/max_single_stride_log_num_of_bytes: Log of min/max number of bytes in a single stride. - min/max_single_wqe_log_num_of_strides: Log of min/max number of strides in a single WQE. - supported_qpts: A bit mask to know which QP types support multi- packet RQ, for now only Raw Packet QPs. Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 16 ++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++ include/uapi/rdma/mlx5-abi.h | 14 ++++++++++++++ 3 files changed, 35 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 552f7bd4ecc3..02da3f58f296 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -848,6 +848,22 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } + if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen)) { + resp.response_length += sizeof(resp.striding_rq_caps); + if (MLX5_CAP_GEN(mdev, striding_rq)) { + resp.striding_rq_caps.min_single_stride_log_num_of_bytes = + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; + resp.striding_rq_caps.max_single_stride_log_num_of_bytes = + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; + resp.striding_rq_caps.min_single_wqe_log_num_of_strides = + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + resp.striding_rq_caps.max_single_wqe_log_num_of_strides = + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; + resp.striding_rq_caps.supported_qpts = + BIT(IB_QPT_RAW_PACKET); + } + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 189e80cd6b2f..8de40852818b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -256,6 +256,11 @@ enum mlx5_ib_wq_flags { MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, }; +#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 +#define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16 +#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 +#define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 + struct mlx5_ib_rwq { struct ib_wq ibwq; struct mlx5_core_qp core_qp; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 1791bf123ba9..0832d9502200 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -190,6 +190,19 @@ struct mlx5_ib_sw_parsing_caps { __u32 supported_qpts; }; +struct mlx5_ib_striding_rq_caps { + __u32 min_single_stride_log_num_of_bytes; + __u32 max_single_stride_log_num_of_bytes; + __u32 min_single_wqe_log_num_of_strides; + __u32 max_single_wqe_log_num_of_strides; + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -200,6 +213,7 @@ struct mlx5_ib_query_device_resp { __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; struct mlx5_ib_sw_parsing_caps sw_parsing_caps; + struct mlx5_ib_striding_rq_caps striding_rq_caps; }; struct mlx5_ib_create_cq { -- cgit v1.2.3 From ccc8708790273811db24676223b040710793cba7 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 17 Oct 2017 18:01:13 +0300 Subject: IB/mlx5: Allow creation of a multi-packet RQ Allow creation of a multi-packet receive queue. In order to create a multi-packet RQ, the following fields in the mlx5_ib_rwq should be set: - log_num_strides: Log of number of strides per WQE - single_stride_log_num_of_bytes: Log of a single stride size - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeros before writing the message to memory (e.g. for the IP alignment). Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 +++ drivers/infiniband/hw/mlx5/qp.c | 52 ++++++++++++++++++++++++++++++------ include/linux/mlx5/mlx5_ifc.h | 1 + include/uapi/rdma/mlx5-abi.h | 8 +++++- 4 files changed, 56 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8de40852818b..e7deaa08535b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -254,6 +254,7 @@ struct mlx5_ib_wq { enum mlx5_ib_wq_flags { MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, + MLX5_IB_WQ_FLAGS_STRIDING_RQ = 0x2, }; #define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 @@ -269,6 +270,9 @@ struct mlx5_ib_rwq { u32 log_rq_size; u32 rq_page_offset; u32 log_page_size; + u32 log_num_strides; + u32 two_byte_shift_en; + u32 single_stride_log_num_of_bytes; struct ib_umem *umem; size_t buf_size; unsigned int page_shift; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 37a0976240fd..d209c684d729 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4706,9 +4706,19 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, wq_type, + rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ ? + MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); + if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) { + MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en); + MLX5_SET(wq, wq, log_wqe_stride_size, + rwq->single_stride_log_num_of_bytes - + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES); + MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides - + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES); + } MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset); @@ -4790,7 +4800,8 @@ static int prepare_user_rq(struct ib_pd *pd, int err; size_t required_cmd_sz; - required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes) + + sizeof(ucmd.single_stride_log_num_of_bytes); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -4808,14 +4819,39 @@ static int prepare_user_rq(struct ib_pd *pd, return -EFAULT; } - if (ucmd.comp_mask) { + if (ucmd.comp_mask & (~MLX5_IB_CREATE_WQ_STRIDING_RQ)) { mlx5_ib_dbg(dev, "invalid comp mask\n"); return -EOPNOTSUPP; - } - - if (ucmd.reserved) { - mlx5_ib_dbg(dev, "invalid reserved\n"); - return -EOPNOTSUPP; + } else if (ucmd.comp_mask & MLX5_IB_CREATE_WQ_STRIDING_RQ) { + if (!MLX5_CAP_GEN(dev->mdev, striding_rq)) { + mlx5_ib_dbg(dev, "Striding RQ is not supported\n"); + return -EOPNOTSUPP; + } + if ((ucmd.single_stride_log_num_of_bytes < + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES) || + (ucmd.single_stride_log_num_of_bytes > + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES)) { + mlx5_ib_dbg(dev, "Invalid log stride size (%u. Range is %u - %u)\n", + ucmd.single_stride_log_num_of_bytes, + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES, + MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES); + return -EINVAL; + } + if ((ucmd.single_wqe_log_num_of_strides > + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || + (ucmd.single_wqe_log_num_of_strides < + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) { + mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n", + ucmd.single_wqe_log_num_of_strides, + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); + return -EINVAL; + } + rwq->single_stride_log_num_of_bytes = + ucmd.single_stride_log_num_of_bytes; + rwq->log_num_strides = ucmd.single_wqe_log_num_of_strides; + rwq->two_byte_shift_en = !!ucmd.two_byte_shift_en; + rwq->create_flags |= MLX5_IB_WQ_FLAGS_STRIDING_RQ; } err = set_user_rq_size(dev, init_attr, &ucmd, rwq); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 69772347f866..db655db45b77 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -744,6 +744,7 @@ enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ = 0x2, + MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ = 0x3, }; enum { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 0832d9502200..b1d5b87ba3fd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -308,6 +308,10 @@ struct mlx5_ib_alloc_mw { __u16 reserved2; }; +enum mlx5_ib_create_wq_mask { + MLX5_IB_CREATE_WQ_STRIDING_RQ = (1 << 0), +}; + struct mlx5_ib_create_wq { __u64 buf_addr; __u64 db_addr; @@ -316,7 +320,9 @@ struct mlx5_ib_create_wq { __u32 user_index; __u32 flags; __u32 comp_mask; - __u32 reserved; + __u32 single_stride_log_num_of_bytes; + __u32 single_wqe_log_num_of_strides; + __u32 two_byte_shift_en; }; struct mlx5_ib_create_ah_resp { -- cgit v1.2.3 From 0ff8e79ca7c81fafa3f5c91a1b58efc85cbc2302 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:51 +0300 Subject: IB/mlx5: Add 128B CQE compression and padding HW bits Adding new bits in mlx5_ifc_cmd_hca_cap to get the hardware capabilities for: - compression_128: Support 128B CQE compression - cqe_128_always: Support 128B CQE padding Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index db655db45b77..d49928c314ed 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1047,7 +1047,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 num_of_uars_per_page[0x20]; u8 reserved_at_540[0x40]; - u8 reserved_at_580[0x3f]; + u8 reserved_at_580[0x3d]; + u8 cqe_128_always[0x1]; + u8 cqe_compression_128[0x1]; u8 cqe_compression[0x1]; u8 cqe_compression_timeout[0x10]; -- cgit v1.2.3 From de57f2ad06d5bf01015b955600cbfc77059b2b6e Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:52 +0300 Subject: IB/mlx5: Support 128B CQE compression feature In commit 1cbe6fc86ccf ("IB/mlx5: Add support for CQE compressing") the concept of CQE compression was introduced and added a support for 64B CQE size. This change update the code to support 128B CQE size as well. Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 6 ++++-- drivers/infiniband/hw/mlx5/main.c | 8 ++++++-- include/uapi/rdma/mlx5-abi.h | 7 ++++++- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b8a116d0e063..51871f049c57 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -804,8 +804,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { - if (unlikely((*cqe_size != 64) || - !MLX5_CAP_GEN(dev->mdev, cqe_compression))) { + if (!((*cqe_size == 128 && + MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) || + (*cqe_size == 64 && + MLX5_CAP_GEN(dev->mdev, cqe_compression)))) { err = -EOPNOTSUPP; mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", *cqe_size); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 02da3f58f296..b9337562aa90 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -824,8 +824,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), reserved, uhw->outlen)) - resp.response_length += sizeof(resp.reserved); + if (field_avail(typeof(resp), flags, uhw->outlen)) { + resp.response_length += sizeof(resp.flags); + if (MLX5_CAP_GEN(mdev, cqe_compression_128)) + resp.flags |= + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP; + } if (field_avail(typeof(resp), sw_parsing_caps, uhw->outlen)) { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index b1d5b87ba3fd..a8fc1f0956d0 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -203,6 +203,11 @@ struct mlx5_ib_striding_rq_caps { __u32 supported_qpts; }; +enum mlx5_ib_query_dev_resp_flags { + /* Support 128B CQE compression */ + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -211,7 +216,7 @@ struct mlx5_ib_query_device_resp { struct mlx5_ib_cqe_comp_caps cqe_comp_caps; struct mlx5_packet_pacing_caps packet_pacing_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; - __u32 reserved; + __u32 flags; /* Use enum mlx5_ib_query_dev_resp_flags */ struct mlx5_ib_sw_parsing_caps sw_parsing_caps; struct mlx5_ib_striding_rq_caps striding_rq_caps; }; -- cgit v1.2.3 From 7a0c8f4244e9ec7a630563d294b211342b46223d Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:53 +0300 Subject: IB/mlx5: Support padded 128B CQE feature In some benchmarks and some CPU architectures, writing the CQE on a full cache line size improves performance by saving memory access operations (read-modify-write) relative to partial cache line change. This patch lets the user to configure the device to pad the CQE up to 128B in case its content is less than 128B. Currently the driver supports only padding for a CQE size of 128B. Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 27 +++++++++++++++++++++++---- drivers/infiniband/hw/mlx5/main.c | 4 ++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++ include/linux/mlx5/cq.h | 6 ++++-- include/uapi/rdma/mlx5-abi.h | 7 ++++++- 5 files changed, 42 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 51871f049c57..01b218a3c277 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -754,13 +754,13 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int err; ucmdlen = udata->inlen < sizeof(ucmd) ? - (sizeof(ucmd) - sizeof(ucmd.reserved)) : sizeof(ucmd); + (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; if (ucmdlen == sizeof(ucmd) && - ucmd.reserved != 0) + (ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD))) return -EINVAL; if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) @@ -830,6 +830,19 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, ilog2(ucmd.cqe_comp_res_format)); } + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) { + if (*cqe_size != 128 || + !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, + "CQE padding is not supported for CQE size of %dB!\n", + *cqe_size); + goto err_cqb; + } + + cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; + } + return 0; err_cqb: @@ -989,7 +1002,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->cqe_size = cqe_size; cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); - MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(cqc, cqc, uar_page, index); MLX5_SET(cqc, cqc, c_eqn, eqn); @@ -1339,7 +1355,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, cqe_sz, + cqe_sz_to_mlx_sz(cqe_size, + cq->private_flags & + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b9337562aa90..1edd41e3be1b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -826,9 +826,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), flags, uhw->outlen)) { resp.response_length += sizeof(resp.flags); + if (MLX5_CAP_GEN(mdev, cqe_compression_128)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP; + + if (MLX5_CAP_GEN(mdev, cqe_128_always)) + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; } if (field_avail(typeof(resp), sw_parsing_caps, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e7deaa08535b..137f2116911f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -444,6 +444,10 @@ struct mlx5_shared_mr_info { struct ib_umem *umem; }; +enum mlx5_ib_cq_pr_flags { + MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0, +}; + struct mlx5_ib_cq { struct ib_cq ibcq; struct mlx5_core_cq mcq; @@ -466,6 +470,7 @@ struct mlx5_ib_cq { struct list_head wc_list; enum ib_cq_notify_flags notify_flags; struct work_struct notify_work; + u16 private_flags; /* Use mlx5_ib_cq_pr_flags */ }; struct mlx5_ib_wc { diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 95898847c7d4..cc718e245b1e 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -125,11 +125,13 @@ struct mlx5_cq_modify_params { enum { CQE_SIZE_64 = 0, CQE_SIZE_128 = 1, + CQE_SIZE_128_PAD = 2, }; -static inline int cqe_sz_to_mlx_sz(u8 size) +static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en) { - return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; + return padding_128_en ? CQE_SIZE_128_PAD : + size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; } static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a8fc1f0956d0..201a60f032dd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -206,6 +206,7 @@ struct mlx5_ib_striding_rq_caps { enum mlx5_ib_query_dev_resp_flags { /* Support 128B CQE compression */ MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, + MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, }; struct mlx5_ib_query_device_resp { @@ -221,13 +222,17 @@ struct mlx5_ib_query_device_resp { struct mlx5_ib_striding_rq_caps striding_rq_caps; }; +enum mlx5_ib_create_cq_flags { + MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD = 1 << 0, +}; + struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; __u8 cqe_comp_en; __u8 cqe_comp_res_format; - __u16 reserved; /* explicit padding (optional on i386) */ + __u16 flags; }; struct mlx5_ib_create_cq_resp { -- cgit v1.2.3 From 4d350f1f89ee8ea84be1ef09717bf392fa5a3b45 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:54 +0300 Subject: IB/mlx5: Update tunnel offloads bits This patch updates the mlx5_ifc with the following: - Fix tunnel_stateless_gre typo. - max_geneve_opt_len - Maximum geneve options length. - tunnel_stateless_geneve_rx - If set, receive Stateless Offloads for Geneve tunneled (inner) packets are supported. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d49928c314ed..d4c29c183d28 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -614,7 +614,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 swp[0x1]; u8 swp_csum[0x1]; u8 swp_lso[0x1]; - u8 reserved_at_23[0x1d]; + u8 reserved_at_23[0x1b]; + u8 max_geneve_opt_len[0x1]; + u8 tunnel_stateless_geneve_rx[0x1]; u8 reserved_at_40[0x10]; u8 lro_min_mss_size[0x10]; -- cgit v1.2.3 From f95ef6cbae61fa1dd563f5c0f6a0e5b512fda5ba Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:55 +0300 Subject: IB/mlx5: Add tunneling offloads support The device can support receive Stateless Offloads for the inner packet's fields only when the packet is processed by TIR which is enabled to support tunneling. Otherwise, the device treats the packet as an ordinary non-tunneling packet and receive offloads can be done only for the outer packet's field. In order to enable receive Stateless Offloading support for incoming tunneling traffic the TIR should be created with tunneled_offload_en. Tunneling offloads is supported only be raw ethernet QP. This patch includes: * New QP creation flag for tunneling offloads. * Reports device capabilities. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 14 +++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 39 +++++++++++++++++++++++++++++++----- include/uapi/rdma/mlx5-abi.h | 11 +++++++++- 4 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1edd41e3be1b..260f8be1d0ed 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -872,6 +872,20 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } + if (field_avail(typeof(resp), tunnel_offloads_caps, + uhw->outlen)) { + resp.response_length += sizeof(resp.tunnel_offloads_caps); + if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_VXLAN; + if (MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_GENEVE; + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_GRE; + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 137f2116911f..0a328d6c6494 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -398,6 +398,7 @@ struct mlx5_ib_qp { struct list_head cq_send_list; u32 rate_limit; u32 underlay_qpn; + bool tunnel_offload_en; }; struct mlx5_ib_cq_buf { @@ -420,6 +421,8 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_RSS = 1 << 8, MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, MLX5_IB_QP_UNDERLAY = 1 << 10, + /* Reserved for PCI_WRITE_PAD = 1 << 11, */ + MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d209c684d729..53bb0d5cad3d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1204,8 +1204,16 @@ static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev, mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp); } +static bool tunnel_offload_supported(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_ETH(dev, tunnel_stateless_vxlan) || + MLX5_CAP_ETH(dev, tunnel_stateless_gre) || + MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx)); +} + static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, - struct mlx5_ib_rq *rq, u32 tdn) + struct mlx5_ib_rq *rq, u32 tdn, + bool tunnel_offload_en) { u32 *in; void *tirc; @@ -1221,6 +1229,8 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); MLX5_SET(tirc, tirc, transport_domain, tdn); + if (tunnel_offload_en) + MLX5_SET(tirc, tirc, tunneled_offload_en, 1); err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); @@ -1271,7 +1281,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, goto err_destroy_sq; - err = create_raw_packet_qp_tir(dev, rq, tdn); + err = create_raw_packet_qp_tir(dev, rq, tdn, + qp->tunnel_offload_en); if (err) goto err_destroy_rq; } @@ -1358,7 +1369,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (udata->outlen < min_resp_len) return -EINVAL; - required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); + required_cmd_sz = offsetof(typeof(ucmd), flags) + sizeof(ucmd.flags); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -1381,8 +1392,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) { - mlx5_ib_dbg(dev, "invalid reserved\n"); + if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) { + mlx5_ib_dbg(dev, "invalid flags\n"); + return -EOPNOTSUPP; + } + + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS && + !tunnel_offload_supported(dev->mdev)) { + mlx5_ib_dbg(dev, "tunnel offloads isn't supported\n"); return -EOPNOTSUPP; } @@ -1405,6 +1422,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(tirc, tirc, transport_domain, tdn); hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) + MLX5_SET(tirc, tirc, tunneled_offload_en, 1); + switch (ucmd.rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { @@ -1604,6 +1625,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { + if (init_attr->qp_type != IB_QPT_RAW_PACKET || + !tunnel_offload_supported(mdev)) { + mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); + return -EOPNOTSUPP; + } + qp->tunnel_offload_en = true; + } if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { if (init_attr->qp_type != IB_QPT_UD || diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 201a60f032dd..791655ec4aff 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -39,6 +39,7 @@ enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, + MLX5_QP_FLAG_TUNNEL_OFFLOADS = 1 << 2, }; enum { @@ -209,6 +210,12 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, }; +enum mlx5_ib_tunnel_offloads { + MLX5_IB_TUNNELED_OFFLOADS_VXLAN = 1 << 0, + MLX5_IB_TUNNELED_OFFLOADS_GRE = 1 << 1, + MLX5_IB_TUNNELED_OFFLOADS_GENEVE = 1 << 2 +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -220,6 +227,8 @@ struct mlx5_ib_query_device_resp { __u32 flags; /* Use enum mlx5_ib_query_dev_resp_flags */ struct mlx5_ib_sw_parsing_caps sw_parsing_caps; struct mlx5_ib_striding_rq_caps striding_rq_caps; + __u32 tunnel_offloads_caps; /* enum mlx5_ib_tunnel_offloads */ + __u32 reserved; }; enum mlx5_ib_create_cq_flags { @@ -304,7 +313,7 @@ struct mlx5_ib_create_qp_rss { __u8 reserved[6]; __u8 rx_hash_key[128]; /* valid only for Toeplitz */ __u32 comp_mask; - __u32 reserved1; + __u32 flags; }; struct mlx5_ib_create_qp_resp { -- cgit v1.2.3 From 309fa3470fcaf96b295d2106ab17c00dbf7f3920 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:56 +0300 Subject: IB/mlx5: Add support for RSS on the inner packet Some user space application would like to do RSS on the inner packet fields instead on the outer. When MLX5_RX_HASH_INNER is set with one or more of the other hash fields, then the RSS will be done using the inner packet. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 11 +++++++++++ include/uapi/rdma/mlx5-abi.h | 4 +++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 53bb0d5cad3d..9e22dead259a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1403,6 +1403,12 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER && + !(ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { + mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n"); + return -EOPNOTSUPP; + } + err = ib_copy_to_udata(udata, &resp, min_resp_len); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); @@ -1426,6 +1432,11 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER) + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); + else + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + switch (ucmd.rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 791655ec4aff..442b46b74a3a 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -303,7 +303,9 @@ enum mlx5_rx_hash_fields { MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, - MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 + MLX5_RX_HASH_DST_PORT_UDP = 1 << 7, + /* Save bits for future fields */ + MLX5_RX_HASH_INNER = 1 << 31 }; struct mlx5_ib_create_qp_rss { -- cgit v1.2.3 From fec99ededf6be46178d7f571b34dae80fc05f090 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Oct 2017 18:56:49 +0300 Subject: RDMA/umem: Avoid partial declaration of non-static function The RDMA/umem uses generic RB-trees macros to generate various ib_umem access functions. The generation is performed with INTERVAL_TREE_DEFINE macro, which allows one of two modes: declare all functions as static or declare none of the function to be static. The second mode of operation produces the following sparse errors: drivers/infiniband/core/umem_rbtree.c:69:1: warning: symbol 'rbt_ib_umem_iter_first' was not declared. Should it be static? drivers/infiniband/core/umem_rbtree.c:69:1: warning: symbol 'rbt_ib_umem_iter_next' was not declared. Should it be static? Code relocation together with declaration of such functions to be "static" solves the issue. Because there is no need to have separate file for two functions, let's consolidate umem_rtree.c and umem_odp.c into one file. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/umem_odp.c | 72 ++++++++++++++++++++++ drivers/infiniband/core/umem_rbtree.c | 109 ---------------------------------- include/rdma/ib_umem_odp.h | 4 -- 4 files changed, 73 insertions(+), 114 deletions(-) delete mode 100644 drivers/infiniband/core/umem_rbtree.c (limited to 'include') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index b4df164f71a6..336ace50b0ab 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -14,7 +14,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ security.o nldev.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o -ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o +ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o ib_cm-y := cm.o diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 55e8f5ed8b3c..2aadf5813a40 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -39,11 +39,44 @@ #include #include #include +#include #include #include #include +/* + * The ib_umem list keeps track of memory regions for which the HW + * device request to receive notification when the related memory + * mapping is changed. + * + * ib_umem_lock protects the list. + */ + +static u64 node_start(struct umem_odp_node *n) +{ + struct ib_umem_odp *umem_odp = + container_of(n, struct ib_umem_odp, interval_tree); + + return ib_umem_start(umem_odp->umem); +} + +/* Note that the representation of the intervals in the interval tree + * considers the ending point as contained in the interval, while the + * function ib_umem_end returns the first address which is not contained + * in the umem. + */ +static u64 node_last(struct umem_odp_node *n) +{ + struct ib_umem_odp *umem_odp = + container_of(n, struct ib_umem_odp, interval_tree); + + return ib_umem_end(umem_odp->umem) - 1; +} + +INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, + node_start, node_last, static, rbt_ib_umem) + static void ib_umem_notifier_start_account(struct ib_umem *item) { mutex_lock(&item->odp_data->umem_mutex); @@ -754,3 +787,42 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, mutex_unlock(&umem->odp_data->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); + +/* @last is not a part of the interval. See comment for function + * node_last. + */ +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, + u64 start, u64 last, + umem_call_back cb, + void *cookie) +{ + int ret_val = 0; + struct umem_odp_node *node, *next; + struct ib_umem_odp *umem; + + if (unlikely(start == last)) + return ret_val; + + for (node = rbt_ib_umem_iter_first(root, start, last - 1); + node; node = next) { + next = rbt_ib_umem_iter_next(node, start, last - 1); + umem = container_of(node, struct ib_umem_odp, interval_tree); + ret_val = cb(umem->umem, start, last, cookie) || ret_val; + } + + return ret_val; +} +EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); + +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, + u64 addr, u64 length) +{ + struct umem_odp_node *node; + + node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); + if (node) + return container_of(node, struct ib_umem_odp, interval_tree); + return NULL; + +} +EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c deleted file mode 100644 index fc801920e341..000000000000 --- a/drivers/infiniband/core/umem_rbtree.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2014 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -/* - * The ib_umem list keeps track of memory regions for which the HW - * device request to receive notification when the related memory - * mapping is changed. - * - * ib_umem_lock protects the list. - */ - -static inline u64 node_start(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_start(umem_odp->umem); -} - -/* Note that the representation of the intervals in the interval tree - * considers the ending point as contained in the interval, while the - * function ib_umem_end returns the first address which is not contained - * in the umem. - */ -static inline u64 node_last(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_end(umem_odp->umem) - 1; -} - -INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, - node_start, node_last, , rbt_ib_umem) - -/* @last is not a part of the interval. See comment for function - * node_last. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 last, - umem_call_back cb, - void *cookie) -{ - int ret_val = 0; - struct umem_odp_node *node, *next; - struct ib_umem_odp *umem; - - if (unlikely(start == last)) - return ret_val; - - for (node = rbt_ib_umem_iter_first(root, start, last - 1); - node; node = next) { - next = rbt_ib_umem_iter_next(node, start, last - 1); - umem = container_of(node, struct ib_umem_odp, interval_tree); - ret_val = cb(umem->umem, start, last, cookie) || ret_val; - } - - return ret_val; -} -EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); - -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, - u64 addr, u64 length) -{ - struct umem_odp_node *node; - - node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); - if (node) - return container_of(node, struct ib_umem_odp, interval_tree); - return NULL; - -} -EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 5eb7f5bc8248..6a17f856f841 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -111,10 +111,6 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bound); -void rbt_ib_umem_insert(struct umem_odp_node *node, - struct rb_root_cached *root); -void rbt_ib_umem_remove(struct umem_odp_node *node, - struct rb_root_cached *root); typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, void *cookie); /* -- cgit v1.2.3 From e1d2e88733695038754d3303b180f8005a02b6f1 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 29 Oct 2017 13:59:44 +0200 Subject: IB/core: Add PCI write end padding flags for WQ and QP There are root complexes that are able to optimize their performance when incoming data is multiple full cache lines. PCI write end padding is the device's ability to pad the ending of incoming packets (scatter) to full cache line such that the last upstream write generated by an incoming packet will be a full cache line. Add a relevant entry to ib_device_cap_flags to report such capability of an RDMA device. Add the QP and WQ create flags: * A QP/WQ created with a scatter end padding flag will cause HW to pad the last upstream write generated by a packet to cache line. User should consider several factors before activating this feature: - In case of high CPU memory load (which may cause PCI back pressure in turn), if a large percent of the writes are partial cache line, this feature should be checked as an optional solution. - This feature might reduce performance if most packets are between one and two cache lines and PCIe throughput has reached its maximum capacity. E.g. 65B packet from the network port will lead to 128B write on PCIe, which may cause traffic on PCIe to reach high throughput. Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- include/rdma/ib_verbs.h | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d31e4bc58e9a..8ca36843ef38 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1491,7 +1491,8 @@ static int create_qp(struct ib_uverbs_file *file, IB_QP_CREATE_MANAGED_RECV | IB_QP_CREATE_SCATTER_FCS | IB_QP_CREATE_CVLAN_STRIPPING | - IB_QP_CREATE_SOURCE_QPN)) { + IB_QP_CREATE_SOURCE_QPN | + IB_QP_CREATE_PCI_WRITE_END_PADDING)) { ret = -EINVAL; goto err_put; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9810e4568635..0b671982bbb3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -229,6 +229,8 @@ enum ib_device_cap_flags { /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35), + /* The device supports padding incoming writes to cacheline. */ + IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), }; enum ib_signature_prot_cap { @@ -1098,6 +1100,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, IB_QP_CREATE_SOURCE_QPN = 1 << 10, + IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -1621,6 +1624,7 @@ enum ib_wq_flags { IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, IB_WQ_FLAGS_SCATTER_FCS = 1 << 1, IB_WQ_FLAGS_DELAY_DROP = 1 << 2, + IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3, }; struct ib_wq_init_attr { -- cgit v1.2.3 From f17966f19575eac9d5dea68b08f6292dd3d4d3db Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 2 Nov 2017 15:22:28 +0200 Subject: IB/mlx5: Fix ABI alignment to 64 bit Struct mlx5_ib_striding_rq_caps was not aligned to 64 bit as it should have been. Add a 32 bit reserved field. Fixes: b4f34597a5ce ('IB/mlx5: Expose multi-packet RQ capabilities') Signed-off-by: Noa Osherovich Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 442b46b74a3a..21722e3c2c70 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -202,6 +202,7 @@ struct mlx5_ib_striding_rq_caps { * supported_qpts |= 1 << IB_QPT_RAW_PACKET */ __u32 supported_qpts; + __u32 reserved; }; enum mlx5_ib_query_dev_resp_flags { -- cgit v1.2.3 From 8b10ba783c9d0c69d53e7d78ff7f2cd921f80729 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Mon, 6 Nov 2017 11:48:53 -0800 Subject: RDMA/vmw_pvrdma: Add shared receive queue support Add the required functions needed to support SRQs. Currently, kernel clients are not supported. SRQs will only be available in userspace. Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Reviewed-by: Nitish Bhat Signed-off-by: Bryan Tan Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/Makefile | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 25 ++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 54 ++++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 59 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 55 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 319 ++++++++++++++++++++++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 3 + drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 18 ++ include/uapi/rdma/vmw_pvrdma-abi.h | 2 + 9 files changed, 523 insertions(+), 14 deletions(-) create mode 100644 drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c (limited to 'include') diff --git a/drivers/infiniband/hw/vmw_pvrdma/Makefile b/drivers/infiniband/hw/vmw_pvrdma/Makefile index 0194ed19f542..2f52e0a044a0 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/Makefile +++ b/drivers/infiniband/hw/vmw_pvrdma/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma.o -vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_verbs.o +vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_srq.o pvrdma_verbs.o diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 984aa3484928..63bc2efc34eb 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -162,6 +162,22 @@ struct pvrdma_ah { struct pvrdma_av av; }; +struct pvrdma_srq { + struct ib_srq ibsrq; + int offset; + spinlock_t lock; /* SRQ lock. */ + int wqe_cnt; + int wqe_size; + int max_gs; + struct ib_umem *umem; + struct pvrdma_ring_state *ring; + struct pvrdma_page_dir pdir; + u32 srq_handle; + int npages; + refcount_t refcnt; + wait_queue_head_t wait; +}; + struct pvrdma_qp { struct ib_qp ibqp; u32 qp_handle; @@ -171,6 +187,7 @@ struct pvrdma_qp { struct ib_umem *rumem; struct ib_umem *sumem; struct pvrdma_page_dir pdir; + struct pvrdma_srq *srq; int npages; int npages_send; int npages_recv; @@ -210,6 +227,8 @@ struct pvrdma_dev { struct pvrdma_page_dir cq_pdir; struct pvrdma_cq **cq_tbl; spinlock_t cq_tbl_lock; + struct pvrdma_srq **srq_tbl; + spinlock_t srq_tbl_lock; struct pvrdma_qp **qp_tbl; spinlock_t qp_tbl_lock; struct pvrdma_uar_table uar_table; @@ -221,6 +240,7 @@ struct pvrdma_dev { bool ib_active; atomic_t num_qps; atomic_t num_cqs; + atomic_t num_srqs; atomic_t num_pds; atomic_t num_ahs; @@ -256,6 +276,11 @@ static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq) return container_of(ibcq, struct pvrdma_cq, ibcq); } +static inline struct pvrdma_srq *to_vsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct pvrdma_srq, ibsrq); +} + static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr) { return container_of(ibmr, struct pvrdma_user_mr, ibmr); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index df0a6b525021..6fd5a8f4e2f6 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -339,6 +339,10 @@ enum { PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_DESTROY_BIND, + PVRDMA_CMD_CREATE_SRQ, + PVRDMA_CMD_MODIFY_SRQ, + PVRDMA_CMD_QUERY_SRQ, + PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_MAX, }; @@ -361,6 +365,10 @@ enum { PVRDMA_CMD_DESTROY_UC_RESP_NOOP, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, + PVRDMA_CMD_CREATE_SRQ_RESP, + PVRDMA_CMD_MODIFY_SRQ_RESP, + PVRDMA_CMD_QUERY_SRQ_RESP, + PVRDMA_CMD_DESTROY_SRQ_RESP, PVRDMA_CMD_MAX_RESP, }; @@ -495,6 +503,46 @@ struct pvrdma_cmd_destroy_cq { u8 reserved[4]; }; +struct pvrdma_cmd_create_srq { + struct pvrdma_cmd_hdr hdr; + u64 pdir_dma; + u32 pd_handle; + u32 nchunks; + struct pvrdma_srq_attr attrs; + u8 srq_type; + u8 reserved[7]; +}; + +struct pvrdma_cmd_create_srq_resp { + struct pvrdma_cmd_resp_hdr hdr; + u32 srqn; + u8 reserved[4]; +}; + +struct pvrdma_cmd_modify_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u32 attr_mask; + struct pvrdma_srq_attr attrs; +}; + +struct pvrdma_cmd_query_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u8 reserved[4]; +}; + +struct pvrdma_cmd_query_srq_resp { + struct pvrdma_cmd_resp_hdr hdr; + struct pvrdma_srq_attr attrs; +}; + +struct pvrdma_cmd_destroy_srq { + struct pvrdma_cmd_hdr hdr; + u32 srq_handle; + u8 reserved[4]; +}; + struct pvrdma_cmd_create_qp { struct pvrdma_cmd_hdr hdr; u64 pdir_dma; @@ -594,6 +642,10 @@ union pvrdma_cmd_req { struct pvrdma_cmd_destroy_qp destroy_qp; struct pvrdma_cmd_create_bind create_bind; struct pvrdma_cmd_destroy_bind destroy_bind; + struct pvrdma_cmd_create_srq create_srq; + struct pvrdma_cmd_modify_srq modify_srq; + struct pvrdma_cmd_query_srq query_srq; + struct pvrdma_cmd_destroy_srq destroy_srq; }; union pvrdma_cmd_resp { @@ -608,6 +660,8 @@ union pvrdma_cmd_resp { struct pvrdma_cmd_create_qp_resp create_qp_resp; struct pvrdma_cmd_query_qp_resp query_qp_resp; struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; + struct pvrdma_cmd_create_srq_resp create_srq_resp; + struct pvrdma_cmd_query_srq_resp query_srq_resp; }; #endif /* __PVRDMA_DEV_API_H__ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6ce709a67959..1f4e18717a00 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -118,6 +118,7 @@ static int pvrdma_init_device(struct pvrdma_dev *dev) spin_lock_init(&dev->cmd_lock); sema_init(&dev->cmd_sema, 1); atomic_set(&dev->num_qps, 0); + atomic_set(&dev->num_srqs, 0); atomic_set(&dev->num_cqs, 0); atomic_set(&dev->num_pds, 0); atomic_set(&dev->num_ahs, 0); @@ -254,9 +255,32 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) goto err_cq_free; spin_lock_init(&dev->qp_tbl_lock); + /* Check if SRQ is supported by backend */ + if (dev->dsr->caps.max_srq) { + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + + dev->ib_dev.create_srq = pvrdma_create_srq; + dev->ib_dev.modify_srq = pvrdma_modify_srq; + dev->ib_dev.query_srq = pvrdma_query_srq; + dev->ib_dev.destroy_srq = pvrdma_destroy_srq; + dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv; + + dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, + sizeof(struct pvrdma_srq *), + GFP_KERNEL); + if (!dev->srq_tbl) + goto err_qp_free; + } + spin_lock_init(&dev->srq_tbl_lock); + ret = ib_register_device(&dev->ib_dev, NULL); if (ret) - goto err_qp_free; + goto err_srq_free; for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) { ret = device_create_file(&dev->ib_dev.dev, @@ -271,6 +295,8 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) err_class: ib_unregister_device(&dev->ib_dev); +err_srq_free: + kfree(dev->srq_tbl); err_qp_free: kfree(dev->qp_tbl); err_cq_free: @@ -353,6 +379,35 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) } } +static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) +{ + struct pvrdma_srq *srq; + unsigned long flags; + + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + if (dev->srq_tbl) + srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq]; + else + srq = NULL; + if (srq) + refcount_inc(&srq->refcnt); + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + if (srq && srq->ibsrq.event_handler) { + struct ib_srq *ibsrq = &srq->ibsrq; + struct ib_event e; + + e.device = ibsrq->device; + e.element.srq = ibsrq; + e.event = type; /* 1:1 mapping for now. */ + ibsrq->event_handler(&e, ibsrq->srq_context); + } + if (srq) { + if (refcount_dec_and_test(&srq->refcnt)) + wake_up(&srq->wait); + } +} + static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, enum ib_event_type event) { @@ -423,6 +478,7 @@ static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) case PVRDMA_EVENT_SRQ_ERR: case PVRDMA_EVENT_SRQ_LIMIT_REACHED: + pvrdma_srq_event(dev, eqe->info, eqe->type); break; case PVRDMA_EVENT_PORT_ACTIVE: @@ -1059,6 +1115,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) iounmap(dev->regs); kfree(dev->sgid_tbl); kfree(dev->cq_tbl); + kfree(dev->srq_tbl); kfree(dev->qp_tbl); pvrdma_uar_table_cleanup(dev); iounmap(dev->driver_uar.map); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index ed34d5a581fa..10420a18d02f 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -198,6 +198,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct pvrdma_create_qp ucmd; unsigned long flags; int ret; + bool is_srq = !!init_attr->srq; if (init_attr->create_flags) { dev_warn(&dev->pdev->dev, @@ -214,6 +215,12 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } + if (is_srq && !dev->dsr->caps.max_srq) { + dev_warn(&dev->pdev->dev, + "SRQs not supported by device\n"); + return ERR_PTR(-EINVAL); + } + if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp)) return ERR_PTR(-ENOMEM); @@ -252,26 +259,36 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } - /* set qp->sq.wqe_cnt, shift, buf_size.. */ - qp->rumem = ib_umem_get(pd->uobject->context, - ucmd.rbuf_addr, - ucmd.rbuf_size, 0, 0); - if (IS_ERR(qp->rumem)) { - ret = PTR_ERR(qp->rumem); - goto err_qp; + if (!is_srq) { + /* set qp->sq.wqe_cnt, shift, buf_size.. */ + qp->rumem = ib_umem_get(pd->uobject->context, + ucmd.rbuf_addr, + ucmd.rbuf_size, 0, 0); + if (IS_ERR(qp->rumem)) { + ret = PTR_ERR(qp->rumem); + goto err_qp; + } + qp->srq = NULL; + } else { + qp->rumem = NULL; + qp->srq = to_vsrq(init_attr->srq); } qp->sumem = ib_umem_get(pd->uobject->context, ucmd.sbuf_addr, ucmd.sbuf_size, 0, 0); if (IS_ERR(qp->sumem)) { - ib_umem_release(qp->rumem); + if (!is_srq) + ib_umem_release(qp->rumem); ret = PTR_ERR(qp->sumem); goto err_qp; } qp->npages_send = ib_umem_page_count(qp->sumem); - qp->npages_recv = ib_umem_page_count(qp->rumem); + if (!is_srq) + qp->npages_recv = ib_umem_page_count(qp->rumem); + else + qp->npages_recv = 0; qp->npages = qp->npages_send + qp->npages_recv; } else { qp->is_kernel = true; @@ -312,12 +329,14 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, if (!qp->is_kernel) { pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0); - pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem, - qp->npages_send); + if (!is_srq) + pvrdma_page_dir_insert_umem(&qp->pdir, + qp->rumem, + qp->npages_send); } else { /* Ring state is always the first page. */ qp->sq.ring = qp->pdir.pages[0]; - qp->rq.ring = &qp->sq.ring[1]; + qp->rq.ring = is_srq ? NULL : &qp->sq.ring[1]; } break; default: @@ -333,6 +352,10 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, cmd->pd_handle = to_vpd(pd)->pd_handle; cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle; cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle; + if (is_srq) + cmd->srq_handle = to_vsrq(init_attr->srq)->srq_handle; + else + cmd->srq_handle = 0; cmd->max_send_wr = init_attr->cap.max_send_wr; cmd->max_recv_wr = init_attr->cap.max_recv_wr; cmd->max_send_sge = init_attr->cap.max_send_sge; @@ -340,6 +363,8 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, cmd->max_inline_data = init_attr->cap.max_inline_data; cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type); + cmd->is_srq = is_srq; + cmd->lkey = 0; cmd->access_flags = IB_ACCESS_LOCAL_WRITE; cmd->total_chunks = qp->npages; cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES; @@ -815,6 +840,12 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EINVAL; } + if (qp->srq) { + dev_warn(&dev->pdev->dev, "QP associated with SRQ\n"); + *bad_wr = wr; + return -EINVAL; + } + spin_lock_irqsave(&qp->rq.lock, flags); while (wr) { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c new file mode 100644 index 000000000000..826ccb864596 --- /dev/null +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2016-2017 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of EITHER the GNU General Public License + * version 2 as published by the Free Software Foundation or the BSD + * 2-Clause License. This program is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License version 2 for more details at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. + * + * You should have received a copy of the GNU General Public License + * along with this program available in the file COPYING in the main + * directory of this source tree. + * + * The BSD 2-Clause License + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "pvrdma.h" + +int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + /* No support for kernel clients. */ + return -EOPNOTSUPP; +} + +/** + * pvrdma_query_srq - query shared receive queue + * @ibsrq: the shared receive queue to query + * @srq_attr: attributes to query and return to client + * + * @return: 0 for success, otherwise returns an errno. + */ +int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct pvrdma_dev *dev = to_vdev(ibsrq->device); + struct pvrdma_srq *srq = to_vsrq(ibsrq); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_query_srq *cmd = &req.query_srq; + struct pvrdma_cmd_query_srq_resp *resp = &rsp.query_srq_resp; + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_QUERY_SRQ; + cmd->srq_handle = srq->srq_handle; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_SRQ_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not query shared receive queue, error: %d\n", + ret); + return -EINVAL; + } + + srq_attr->srq_limit = resp->attrs.srq_limit; + srq_attr->max_wr = resp->attrs.max_wr; + srq_attr->max_sge = resp->attrs.max_sge; + + return 0; +} + +/** + * pvrdma_create_srq - create shared receive queue + * @pd: protection domain + * @init_attr: shared receive queue attributes + * @udata: user data + * + * @return: the ib_srq pointer on success, otherwise returns an errno. + */ +struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct pvrdma_srq *srq = NULL; + struct pvrdma_dev *dev = to_vdev(pd->device); + union pvrdma_cmd_req req; + union pvrdma_cmd_resp rsp; + struct pvrdma_cmd_create_srq *cmd = &req.create_srq; + struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp; + struct pvrdma_create_srq ucmd; + unsigned long flags; + int ret; + + if (!(pd->uobject && udata)) { + /* No support for kernel clients. */ + dev_warn(&dev->pdev->dev, + "no shared receive queue support for kernel client\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->srq_type != IB_SRQT_BASIC) { + dev_warn(&dev->pdev->dev, + "shared receive queue type %d not supported\n", + init_attr->srq_type); + return ERR_PTR(-EINVAL); + } + + if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || + init_attr->attr.max_sge > dev->dsr->caps.max_srq_sge) { + dev_warn(&dev->pdev->dev, + "shared receive queue size invalid\n"); + return ERR_PTR(-EINVAL); + } + + if (!atomic_add_unless(&dev->num_srqs, 1, dev->dsr->caps.max_srq)) + return ERR_PTR(-ENOMEM); + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) { + ret = -ENOMEM; + goto err_srq; + } + + spin_lock_init(&srq->lock); + refcount_set(&srq->refcnt, 1); + init_waitqueue_head(&srq->wait); + + dev_dbg(&dev->pdev->dev, + "create shared receive queue from user space\n"); + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->uobject->context, + ucmd.buf_addr, + ucmd.buf_size, 0, 0); + if (IS_ERR(srq->umem)) { + ret = PTR_ERR(srq->umem); + goto err_srq; + } + + srq->npages = ib_umem_page_count(srq->umem); + + if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + dev_warn(&dev->pdev->dev, + "overflow pages in shared receive queue\n"); + ret = -EINVAL; + goto err_umem; + } + + ret = pvrdma_page_dir_init(dev, &srq->pdir, srq->npages, false); + if (ret) { + dev_warn(&dev->pdev->dev, + "could not allocate page directory\n"); + goto err_umem; + } + + pvrdma_page_dir_insert_umem(&srq->pdir, srq->umem, 0); + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_CREATE_SRQ; + cmd->srq_type = init_attr->srq_type; + cmd->nchunks = srq->npages; + cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->attrs.max_wr = init_attr->attr.max_wr; + cmd->attrs.max_sge = init_attr->attr.max_sge; + cmd->attrs.srq_limit = init_attr->attr.srq_limit; + cmd->pdir_dma = srq->pdir.dir_dma; + + ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_SRQ_RESP); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not create shared receive queue, error: %d\n", + ret); + goto err_page_dir; + } + + srq->srq_handle = resp->srqn; + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + dev->srq_tbl[srq->srq_handle % dev->dsr->caps.max_srq] = srq; + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + /* Copy udata back. */ + if (ib_copy_to_udata(udata, &srq->srq_handle, sizeof(__u32))) { + dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); + pvrdma_destroy_srq(&srq->ibsrq); + return ERR_PTR(-EINVAL); + } + + return &srq->ibsrq; + +err_page_dir: + pvrdma_page_dir_cleanup(dev, &srq->pdir); +err_umem: + ib_umem_release(srq->umem); +err_srq: + kfree(srq); + atomic_dec(&dev->num_srqs); + + return ERR_PTR(ret); +} + +static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->srq_tbl_lock, flags); + dev->srq_tbl[srq->srq_handle] = NULL; + spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); + + refcount_dec(&srq->refcnt); + wait_event(srq->wait, !refcount_read(&srq->refcnt)); + + /* There is no support for kernel clients, so this is safe. */ + ib_umem_release(srq->umem); + + pvrdma_page_dir_cleanup(dev, &srq->pdir); + + kfree(srq); + + atomic_dec(&dev->num_srqs); +} + +/** + * pvrdma_destroy_srq - destroy shared receive queue + * @srq: the shared receive queue to destroy + * + * @return: 0 for success. + */ +int pvrdma_destroy_srq(struct ib_srq *srq) +{ + struct pvrdma_srq *vsrq = to_vsrq(srq); + union pvrdma_cmd_req req; + struct pvrdma_cmd_destroy_srq *cmd = &req.destroy_srq; + struct pvrdma_dev *dev = to_vdev(srq->device); + int ret; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_DESTROY_SRQ; + cmd->srq_handle = vsrq->srq_handle; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) + dev_warn(&dev->pdev->dev, + "destroy shared receive queue failed, error: %d\n", + ret); + + pvrdma_free_srq(dev, vsrq); + + return 0; +} + +/** + * pvrdma_modify_srq - modify shared receive queue attributes + * @ibsrq: the shared receive queue to modify + * @attr: the shared receive queue's new attributes + * @attr_mask: attributes mask + * @udata: user data + * + * @returns 0 on success, otherwise returns an errno. + */ +int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct pvrdma_srq *vsrq = to_vsrq(ibsrq); + union pvrdma_cmd_req req; + struct pvrdma_cmd_modify_srq *cmd = &req.modify_srq; + struct pvrdma_dev *dev = to_vdev(ibsrq->device); + int ret; + + /* Only support SRQ limit. */ + if (!(attr_mask & IB_SRQ_LIMIT)) + return -EINVAL; + + memset(cmd, 0, sizeof(*cmd)); + cmd->hdr.cmd = PVRDMA_CMD_MODIFY_SRQ; + cmd->srq_handle = vsrq->srq_handle; + cmd->attrs.srq_limit = attr->srq_limit; + cmd->attr_mask = attr_mask; + + ret = pvrdma_cmd_post(dev, &req, NULL, 0); + if (ret < 0) { + dev_warn(&dev->pdev->dev, + "could not modify shared receive queue, error: %d\n", + ret); + + return -EINVAL; + } + + return ret; +} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 48776f5ffb0e..16b96616ef7e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -85,6 +85,9 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_sge = dev->dsr->caps.max_sge; props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, dev->dsr->caps.max_sge_rd); + props->max_srq = dev->dsr->caps.max_srq; + props->max_srq_wr = dev->dsr->caps.max_srq_wr; + props->max_srq_sge = dev->dsr->caps.max_srq_sge; props->max_cq = dev->dsr->caps.max_cq; props->max_cqe = dev->dsr->caps.max_cqe; props->max_mr = dev->dsr->caps.max_mr; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 002a9b066e70..b7b25728a7e5 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -324,6 +324,13 @@ enum pvrdma_mw_type { PVRDMA_MW_TYPE_2 = 2, }; +struct pvrdma_srq_attr { + u32 max_wr; + u32 max_sge; + u32 srq_limit; + u32 reserved; +}; + struct pvrdma_qp_attr { enum pvrdma_qp_state qp_state; enum pvrdma_qp_state cur_qp_state; @@ -420,6 +427,17 @@ int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); int pvrdma_destroy_ah(struct ib_ah *ah); + +struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); +int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); +int pvrdma_destroy_srq(struct ib_srq *srq); +int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index c6569b0032ec..846c6f4859db 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -158,6 +158,8 @@ struct pvrdma_resize_cq { struct pvrdma_create_srq { __u64 buf_addr; + __u32 buf_size; + __u32 reserved; }; struct pvrdma_create_srq_resp { -- cgit v1.2.3 From e08ce2e82b2fc5cdd07de170e8b9e8327625005c Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Tue, 7 Nov 2017 10:49:09 +0200 Subject: RDMA/core: Make function rdma_copy_addr return void Function returns zero - make it void. While there make struct net_device const. Signed-off-by: Yuval Shaia Reviewed-by: Parav Pandit Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 27 ++++++++++++++------------- drivers/infiniband/core/cma.c | 8 ++------ include/rdma/ib_addr.h | 5 +++-- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d2f74721b3ba..f4e8185bccd3 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -229,8 +229,9 @@ void rdma_addr_unregister_client(struct rdma_addr_client *client) } EXPORT_SYMBOL(rdma_addr_unregister_client); -int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - const unsigned char *dst_dev_addr) +void rdma_copy_addr(struct rdma_dev_addr *dev_addr, + const struct net_device *dev, + const unsigned char *dst_dev_addr) { dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); @@ -238,7 +239,6 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); dev_addr->bound_dev_if = dev->ifindex; - return 0; } EXPORT_SYMBOL(rdma_copy_addr); @@ -247,15 +247,14 @@ int rdma_translate_ip(const struct sockaddr *addr, u16 *vlan_id) { struct net_device *dev; - int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); - return ret; + return 0; } switch (addr->sa_family) { @@ -264,9 +263,9 @@ int rdma_translate_ip(const struct sockaddr *addr, ((const struct sockaddr_in *)addr)->sin_addr.s_addr); if (!dev) - return ret; + return -EADDRNOTAVAIL; - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); @@ -279,7 +278,7 @@ int rdma_translate_ip(const struct sockaddr *addr, if (ipv6_chk_addr(dev_addr->net, &((const struct sockaddr_in6 *)addr)->sin6_addr, dev, 1)) { - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); @@ -290,7 +289,7 @@ int rdma_translate_ip(const struct sockaddr *addr, break; #endif } - return ret; + return 0; } EXPORT_SYMBOL(rdma_translate_ip); @@ -336,7 +335,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr) { struct neighbour *n; - int ret; + int ret = 0; n = dst_neigh_lookup(dst, daddr); @@ -346,7 +345,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, neigh_event_send(n, NULL); ret = -ENODATA; } else { - ret = rdma_copy_addr(dev_addr, dst->dev, n->ha); + rdma_copy_addr(dev_addr, dst->dev, n->ha); } rcu_read_unlock(); @@ -494,7 +493,9 @@ static int addr_resolve_neigh(struct dst_entry *dst, if (!(dst->dev->flags & IFF_NOARP)) return fetch_ha(dst, addr, dst_in, seq); - return rdma_copy_addr(addr, dst->dev, NULL); + rdma_copy_addr(addr, dst->dev, NULL); + + return 0; } static int addr_resolve(struct sockaddr *src_in, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fa79c7076ccd..1fdb473b5df7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1846,9 +1846,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { - ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); - if (ret) - goto err; + rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); } else { if (!cma_protocol_roce(listen_id) && cma_any_addr(cma_src_addr(id_priv))) { @@ -1894,9 +1892,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, goto err; if (net_dev) { - ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); - if (ret) - goto err; + rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); } else { if (!cma_any_addr(cma_src_addr(id_priv))) { ret = cma_translate_addr(cma_src_addr(id_priv), diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index b2a10c762304..18c564f60e93 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -125,8 +125,9 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, void rdma_addr_cancel(struct rdma_dev_addr *addr); -int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - const unsigned char *dst_dev_addr); +void rdma_copy_addr(struct rdma_dev_addr *dev_addr, + const struct net_device *dev, + const unsigned char *dst_dev_addr); int rdma_addr_size(struct sockaddr *addr); -- cgit v1.2.3 From 869ddcf8b351ace5bf8860f3cd6265dccb382426 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:13 +0200 Subject: IB/uverbs: Allow CQ moderation with modify CQ Uverbs support in modify_cq for CQ moderation only. Gives ability to change cq_max_count and cq_period. CQ moderation enhance performance by moderating the number of CQEs needed to create an event instead of application having to suffer from event per-CQE. To achieve CQ moderation the application needs to set cq_max_count and cq_period. cq_max_count - defines the number of CQEs needed to create an event. cq_period - defines the timeout (micro seconds) between last event and a new one that will occur even if cq_max_count was not satisfied Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 42 +++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 1 + include/rdma/ib_verbs.h | 4 ++++ include/uapi/rdma/ib_user_verbs.h | 15 ++++++++++++- 5 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ee2739ae4305..deccefb71a6b 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -306,5 +306,6 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(modify_qp); +IB_UVERBS_DECLARE_EX_CMD(modify_cq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8ca36843ef38..3c2673cd4090 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3856,3 +3856,45 @@ end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; } + +int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_cq cmd = {}; + struct ib_cq *cq; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), reserved) + + sizeof(cmd.reserved); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + /* sanity checks */ + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask || cmd.reserved) + return -EINVAL; + + if (cmd.attr_mask > IB_CQ_MODERATE) + return -EOPNOTSUPP; + + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + if (!cq) + return -EINVAL; + + ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period); + + uobj_put_obj_read(cq); + + return ret; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b5febfd84ee5..381fd9c096ae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -128,6 +128,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, + [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0b671982bbb3..8e0d3780ce4e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -311,6 +311,10 @@ struct ib_cq_init_attr { u32 flags; }; +enum ib_cq_attr_mask { + IB_CQ_MODERATE = 1 << 0, +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index d4e0b53bfc75..cfa09d6095d6 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -100,7 +100,8 @@ enum { IB_USER_VERBS_EX_CMD_MODIFY_WQ, IB_USER_VERBS_EX_CMD_DESTROY_WQ, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, - IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + IB_USER_VERBS_EX_CMD_MODIFY_CQ }; /* @@ -1150,6 +1151,18 @@ struct ib_uverbs_ex_destroy_rwq_ind_table { __u32 ind_tbl_handle; }; +struct ib_uverbs_cq_moderation { + __u16 cq_count; + __u16 cq_period; +}; + +struct ib_uverbs_ex_modify_cq { + __u32 cq_handle; + __u32 attr_mask; + struct ib_uverbs_cq_moderation attr; + __u32 reserved; +}; + #define IB_DEVICE_NAME_MAX 64 #endif /* IB_USER_VERBS_H */ -- cgit v1.2.3 From 34d9a270e74a412f041b528c33b75e3e6bc7a242 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:14 +0200 Subject: IB/mlx4: Exposing modify CQ callback to uverbs layer Exposed mlx4_ib_modify_cq to be called from ib device verb list. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 3 +++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + include/linux/mlx4/cq.h | 3 +++ 3 files changed, 7 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d8f0b94f1dc4..28c10ffc19e2 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2750,6 +2750,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 719dae354066..e14919c15b06 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -47,6 +47,7 @@ #include #include #include +#include #define MLX4_IB_DRV_NAME "mlx4_ib" diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 09cebe528488..508e8cc5ee86 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -136,6 +136,9 @@ enum { MLX4_CQE_BAD_FCS = 1 << 4, }; +#define MLX4_MAX_CQ_PERIOD (BIT(16) - 1) +#define MLX4_MAX_CQ_COUNT (BIT(16) - 1) + static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd, void __iomem *uar_page, spinlock_t *doorbell_lock) -- cgit v1.2.3 From b0e9df6da25890448ebd134b7f647f16bced9abc Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:15 +0200 Subject: IB/mlx5: Exposing modify CQ callback to uverbs layer Exposed mlx5_ib_modify_cq to be called from ib device verb list. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 +++ drivers/infiniband/hw/mlx5/main.c | 3 ++- include/linux/mlx5/cq.h | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 01b218a3c277..18705cbcdc8c 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1149,6 +1149,9 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; + if (cq_period > MLX5_MAX_CQ_PERIOD) + return -EINVAL; + err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq, cq_period, cq_count); if (err) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d2aadb13637b..5a0ed6a499f4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4022,7 +4022,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index cc718e245b1e..6be357b219ec 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -128,6 +128,9 @@ enum { CQE_SIZE_128_PAD = 2, }; +#define MLX5_MAX_CQ_PERIOD (BIT(__mlx5_bit_sz(cqc, cq_period)) - 1) +#define MLX5_MAX_CQ_COUNT (BIT(__mlx5_bit_sz(cqc, cq_max_count)) - 1) + static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en) { return padding_128_en ? CQE_SIZE_128_PAD : -- cgit v1.2.3 From 18bd90729237dc6ddbad01bc9618148224f03590 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:16 +0200 Subject: IB/uverbs: Add CQ moderation capability to query_device The query_device function can now obtain the maximum values for cq_max_count and cq_period, needed for CQ moderation. cq_max_count is a 16 bits number that determines the number of CQEs to accumulate before generating an event. cq_period is a 16 bits number that determines the timeout in micro seconds from the last event generated, upon which a new event will be generated even if cq_max_count was not reached. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 9 +++++++++ include/rdma/ib_verbs.h | 6 ++++++ include/uapi/rdma/ib_user_verbs.h | 7 +++++++ 3 files changed, 22 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3c2673cd4090..53143e4b1c50 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3852,6 +3852,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.tm_caps.max_sge = attr.tm_caps.max_sge; resp.tm_caps.flags = attr.tm_caps.flags; resp.response_length += sizeof(resp.tm_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps)) + goto end; + + resp.cq_moderation_caps.max_cq_moderation_count = + attr.cq_caps.max_cq_moderation_count; + resp.cq_moderation_caps.max_cq_moderation_period = + attr.cq_caps.max_cq_moderation_period; + resp.response_length += sizeof(resp.cq_moderation_caps); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8e0d3780ce4e..0b484c023fa9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -315,6 +315,11 @@ enum ib_cq_attr_mask { IB_CQ_MODERATE = 1 << 0, }; +struct ib_cq_caps { + u16 max_cq_moderation_count; + u16 max_cq_moderation_period; +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -365,6 +370,7 @@ struct ib_device_attr { u32 max_wq_type_rq; u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ struct ib_tm_caps tm_caps; + struct ib_cq_caps cq_caps; }; enum ib_mtu { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index cfa09d6095d6..5186fb12629b 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -125,6 +125,12 @@ struct ib_uverbs_comp_event_desc { __u64 cq_handle; }; +struct ib_uverbs_cq_moderation_caps { + __u16 max_cq_moderation_count; + __u16 max_cq_moderation_period; + __u32 reserved; +}; + /* * All commands from userspace should start with a __u32 command field * followed by __u16 in_words and out_words fields (which give the @@ -263,6 +269,7 @@ struct ib_uverbs_ex_query_device_resp { __u32 max_wq_type_rq; __u32 raw_packet_caps; struct ib_uverbs_tm_caps tm_caps; + struct ib_uverbs_cq_moderation_caps cq_moderation_caps; }; struct ib_uverbs_query_port { -- cgit v1.2.3 From 4190b4e96954e2c3597021d29435c3f8db8d3129 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 13 Nov 2017 10:51:19 +0200 Subject: RDMA/core: Rename kernel modify_cq to better describe its usage Current ib_modify_cq() is used to set CQ moderation parameters. This patch renames ib_modify_cq() to be rdma_set_cq_moderation(), because the kernel version of RDMA API doesn't need to follow already exposed to user's API pattern (create_XXX/modify_XXX/query_XXX/destroy_XXX) and better to have more accurate name which describes the actual usage. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/verbs.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 5 +++-- include/rdma/ib_verbs.h | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 53143e4b1c50..16d55710b116 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3901,7 +3901,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (!cq) return -EINVAL; - ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period); + ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period); uobj_put_obj_read(cq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d8f1a5d34f4f..3fb8fb6cc824 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1550,12 +1550,12 @@ struct ib_cq *ib_create_cq(struct ib_device *device, } EXPORT_SYMBOL(ib_create_cq); -int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { return cq->device->modify_cq ? cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; } -EXPORT_SYMBOL(ib_modify_cq); +EXPORT_SYMBOL(rdma_set_cq_moderation); int ib_destroy_cq(struct ib_cq *cq) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 8dc1e6225cc8..2706bf26cbac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -99,8 +99,9 @@ static int ipoib_set_coalesce(struct net_device *dev, coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; - ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, - coal->rx_coalesce_usecs); + ret = rdma_set_cq_moderation(priv->recv_cq, + coal->rx_max_coalesced_frames, + coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0b484c023fa9..fd84cda5ed7c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3169,13 +3169,13 @@ struct ib_cq *ib_create_cq(struct ib_device *device, int ib_resize_cq(struct ib_cq *cq, int cqe); /** - * ib_modify_cq - Modifies moderation params of the CQ + * rdma_set_cq_moderation - Modifies moderation params of the CQ * @cq: The CQ to modify. * @cq_count: number of CQEs that will trigger an event * @cq_period: max period of time in usec before triggering an event * */ -int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** * ib_destroy_cq - Destroys the specified CQ. -- cgit v1.2.3