From a917374e8a20cdfbaaaaaa2b01331dc063d38848 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 2 Oct 2017 11:04:33 -0700 Subject: IB/core: Use __be32 for LIDs in opa_is_extended_lid The LIDs passed to opa_extended_lid are in __be32 format, change function signature accordingly. This fixes the following sparse warnings: drivers/infiniband/core/cm.c:1181:60: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:1182:60: warning: incorrect type in argument 2 (different ba drivers/infiniband/core/cm.c:1242:68: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:1243:68: warning: incorrect type in argument 2 (different ba drivers/infiniband/core/cm.c:2922:66: warning: incorrect type in argument 1 (different ba drivers/infiniband/core/cm.c:2923:66: warning: incorrect type in argument 2 (different ba include/rdma/opa_addr.h:102:14: warning: cast to restricted __be32 Fixes: e92aa00a5189 ("IB/CM: Add OPA Path record support to CM") Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/opa_addr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index e6e90f18e6d5..f68fca296631 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -97,15 +97,15 @@ static inline u32 opa_get_lid_from_gid(const union ib_gid *gid) * @dlid: The DLID * @slid: The SLID */ -static inline bool opa_is_extended_lid(u32 dlid, u32 slid) +static inline bool opa_is_extended_lid(__be32 dlid, __be32 slid) { if ((be32_to_cpu(dlid) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) || (be32_to_cpu(slid) >= be16_to_cpu(IB_MULTICAST_LID_BASE))) return true; - else - return false; + + return false; } /* Get multicast lid base */ -- cgit v1.2.3 From 7ebfc93edcefa0c4c09f194ebdace9ddae15efa6 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 2 Oct 2017 11:04:41 -0700 Subject: IB/rdmavt: Correct issues with read-mostly and send size cache lines The s_ahgpsn was incorrectly placed in the read-mostly section of the QP and the s_curr_size and s_hdrwords are oversized. The misplaced s_ahgpsn will cause the read-mostly cachelines to thrash. Place s_ahgpsn in the send side cache lines and correctly size and s_hdrwords and s_cur_size to keep the send side cachelines at the same size. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 ++-- include/rdma/rdmavt_qp.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 98bcd06c82d7..7befba8fceb2 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8739,8 +8739,8 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state) return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); } -int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, - u8 lane_id, u32 config_data) +static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, + u8 lane_id, u32 config_data) { u64 data; int ret; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0eed3d8752fa..89ab88c342b6 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -282,7 +282,6 @@ struct rvt_qp { u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ - u32 s_ahgpsn; /* set to the psn in the copy of the header */ u16 pmtu; /* decoded from path_mtu */ u8 log_pmtu; /* shift for pmtu */ @@ -344,7 +343,6 @@ struct rvt_qp { struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; - u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_last_psn; /* last response PSN processed */ @@ -358,8 +356,10 @@ struct rvt_qp { u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ u32 s_lsn; /* limit sequence number (credit) */ - u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + u16 s_cur_size; /* size of send packet in bytes */ u16 s_rdma_ack_cnt; + u8 s_hdrwords; /* size of s_hdr in 32 bit words */ s8 s_ahgidx; u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ -- cgit v1.2.3 From 9d18717790c43c904fabe9da7a4c6b2ebed2c4d8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 6 Oct 2017 13:06:17 -0700 Subject: IB/core: Simplify sa_path_set_[sd]lid() calls Instead of making every caller convert the second argument of sa_path_set_slid() and sa_path_set_dlid() to big endian format, make these two functions accept LIDs in CPU endian format. This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Sean Hefty Cc: Dasaratharaman Chandramouli Cc: Don Hiatt Cc: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 26 ++++++++++++-------------- drivers/infiniband/core/uverbs_marshall.c | 10 +++++----- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_sa.h | 12 ++++++------ 4 files changed, 24 insertions(+), 26 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4c4b46586af2..d80911d4abb7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1472,31 +1472,29 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { sa_path_set_dlid(primary_path, - htonl(ntohs(req_msg->primary_local_lid))); + ntohs(req_msg->primary_local_lid)); sa_path_set_slid(primary_path, - htonl(ntohs(req_msg->primary_remote_lid))); + ntohs(req_msg->primary_remote_lid)); } else { lid = opa_get_lid_from_gid(&req_msg->primary_local_gid); - sa_path_set_dlid(primary_path, cpu_to_be32(lid)); + sa_path_set_dlid(primary_path, lid); lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid); - sa_path_set_slid(primary_path, cpu_to_be32(lid)); + sa_path_set_slid(primary_path, lid); } if (!cm_req_has_alt_path(req_msg)) return; if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(alt_path, - htonl(ntohs(req_msg->alt_local_lid))); - sa_path_set_slid(alt_path, - htonl(ntohs(req_msg->alt_remote_lid))); + sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid)); + sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid)); } else { lid = opa_get_lid_from_gid(&req_msg->alt_local_gid); - sa_path_set_dlid(alt_path, cpu_to_be32(lid)); + sa_path_set_dlid(alt_path, lid); lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid); - sa_path_set_slid(alt_path, cpu_to_be32(lid)); + sa_path_set_slid(alt_path, lid); } } @@ -3037,14 +3035,14 @@ static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg, u32 lid; if (path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); - sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid))); + sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid)); + sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid)); } else { lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid); - sa_path_set_dlid(path, cpu_to_be32(lid)); + sa_path_set_dlid(path, lid); lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid); - sa_path_set_slid(path, cpu_to_be32(lid)); + sa_path_set_slid(path, lid); } } diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index bd0acf376af0..aeb2824efbc7 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -176,18 +176,18 @@ EXPORT_SYMBOL(ib_copy_path_rec_to_user); void ib_copy_path_rec_from_user(struct sa_path_rec *dst, struct ib_user_path_rec *src) { - __be32 slid, dlid; + u32 slid, dlid; memset(dst, 0, sizeof(*dst)); if ((ib_is_opa_gid((union ib_gid *)src->sgid)) || (ib_is_opa_gid((union ib_gid *)src->dgid))) { dst->rec_type = SA_PATH_REC_TYPE_OPA; - slid = htonl(opa_get_lid_from_gid((union ib_gid *)src->sgid)); - dlid = htonl(opa_get_lid_from_gid((union ib_gid *)src->dgid)); + slid = opa_get_lid_from_gid((union ib_gid *)src->sgid); + dlid = opa_get_lid_from_gid((union ib_gid *)src->dgid); } else { dst->rec_type = SA_PATH_REC_TYPE_IB; - slid = htonl(ntohs(src->slid)); - dlid = htonl(ntohs(src->dlid)); + slid = ntohs(src->slid); + dlid = ntohs(src->dlid); } memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index fa5ccdb3bb2a..778a08250f16 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2403,7 +2403,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, switch (event->param.rej_rcvd.reason) { case IB_CM_REJ_PORT_CM_REDIRECT: cpi = event->param.rej_rcvd.ari; - sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid))); + sa_path_set_dlid(&ch->path, ntohs(cpi->redirect_lid)); ch->path.pkey = cpi->redirect_pkey; cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 355b81f4242d..1f7f604db5aa 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -590,20 +590,20 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec) (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2)); } -static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid) +static inline void sa_path_set_slid(struct sa_path_rec *rec, u32 slid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) - rec->ib.slid = htons(ntohl(slid)); + rec->ib.slid = cpu_to_be16(slid); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - rec->opa.slid = slid; + rec->opa.slid = cpu_to_be32(slid); } -static inline void sa_path_set_dlid(struct sa_path_rec *rec, __be32 dlid) +static inline void sa_path_set_dlid(struct sa_path_rec *rec, u32 dlid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) - rec->ib.dlid = htons(ntohl(dlid)); + rec->ib.dlid = cpu_to_be16(dlid); else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - rec->opa.dlid = dlid; + rec->opa.dlid = cpu_to_be32(dlid); } static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec, -- cgit v1.2.3 From 1c3aea2bc8f0b2e5b57375ead40457ff75a3a2ec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 11 Oct 2017 10:48:43 -0700 Subject: IB/core: Fix endianness annotation in rdma_is_multicast_addr() Since ipv4_addr is a big endian 32-bit number, annotate it as such. Fixes: commit be1d325a3358 ("IB/core: Set RoCEv2 MGID according to spec") Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ec5008cf5d51..cfa82d16573d 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -305,12 +305,12 @@ static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) static inline int rdma_is_multicast_addr(struct in6_addr *addr) { - u32 ipv4_addr; + __be32 ipv4_addr; if (addr->s6_addr[0] == 0xff) return 1; - memcpy(&ipv4_addr, addr->s6_addr + 12, 4); + ipv4_addr = addr->s6_addr32[3]; return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr)); } -- cgit v1.2.3 From 5cda6587feec790a089703dde2e6e1f82de50bbd Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:12 +0300 Subject: IB/core: Introduce and use rdma_create_user_ah Introduce rdma_create_user_ah API which allows passing udata to provider driver and additionally which resolves DMAC for RoCE. ib_resolve_eth_dmac() resolves destination mac address for unicast, multicast, link local ipv4 mapped ipv6 and ipv6 destination gid entry. This allows all RoCE provider drivers to avoid duplicating such code. Such change brings consistency where IB core always resolves dmac and pass it to RoCE provider drivers for user and kernel consumers, with this ah_attr->roce.dmac is always an input field for provider drivers. This uniformity avoids exporting ib_resolve_eth_dmac symbol to providers or other modules. Therefore its removed as exported symbol at later in the patch series. Now uverbs and umad both makes use of rdma_create_user_ah API which fixes the issue where umad has invalid DMAC for address. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 10 +-------- drivers/infiniband/core/verbs.c | 40 ++++++++++++++++++++++++++++++++++-- include/rdma/ib_verbs.h | 15 ++++++++++++++ 4 files changed, 55 insertions(+), 12 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index c1696e6084b2..38809962c105 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -506,7 +506,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid); } - ah = rdma_create_ah(agent->qp->pd, &ah_attr); + ah = rdma_create_user_ah(agent->qp->pd, &ah_attr, NULL); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_up; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 52a2cf2d83aa..0cef1863e2dc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2537,7 +2537,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct rdma_ah_attr attr; int ret; struct ib_udata udata; - u8 *dmac; if (out_len < sizeof resp) return -ENOSPC; @@ -2580,20 +2579,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, } else { rdma_ah_set_ah_flags(&attr, 0); } - dmac = rdma_ah_retrieve_dmac(&attr); - if (dmac) - memset(dmac, 0, ETH_ALEN); - - ah = pd->device->create_ah(pd, &attr, &udata); + ah = rdma_create_user_ah(pd, &attr, &udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } - ah->device = pd->device; - ah->pd = pd; - atomic_inc(&pd->usecnt); ah->uobject = uobj; uobj->user_handle = cmd.user_handle; uobj->object = ah; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index de57d6c11a25..4dcfe47c479d 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -302,11 +302,13 @@ EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ -struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) +static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata) { struct ib_ah *ah; - ah = pd->device->create_ah(pd, ah_attr, NULL); + ah = pd->device->create_ah(pd, ah_attr, udata); if (!IS_ERR(ah)) { ah->device = pd->device; @@ -318,8 +320,42 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) return ah; } + +struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) +{ + return _rdma_create_ah(pd, ah_attr, NULL); +} EXPORT_SYMBOL(rdma_create_ah); +/** + * rdma_create_user_ah - Creates an address handle for the + * given address vector. + * It resolves destination mac address for ah attribute of RoCE type. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @udata: pointer to user's input output buffer information need by + * provider driver. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata) +{ + int err; + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + err = ib_resolve_eth_dmac(pd->device, ah_attr); + if (err) + return ERR_PTR(err); + } + + return _rdma_create_ah(pd, ah_attr, udata); +} +EXPORT_SYMBOL(rdma_create_user_ah); + int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) { const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e8608b2dc844..09c4a695155e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2857,6 +2857,21 @@ void ib_dealloc_pd(struct ib_pd *pd); */ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr); +/** + * rdma_create_user_ah - Creates an address handle for the given address vector. + * It resolves destination mac address for ah attribute of RoCE type. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @udata: pointer to user's input output buffer information need by + * provider driver. + * + * It returns 0 on success and returns appropriate error code on error. + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + struct ib_udata *udata); /** * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header * work completion. -- cgit v1.2.3 From c0348eb069687a2f27c0cd23dafb35918edf9e75 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:13 +0300 Subject: IB: Let ib_core resolve destination mac address Since IB/core resolves the destination mac address for user and kernel consumers, avoid resolving in multiple provider drivers. Only ib_core resolves DMAC now, therefore resolve_eth_dmac is removed as exported symbol. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 8 +++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 -------- drivers/infiniband/hw/hns/hns_roce_ah.c | 14 +------------- drivers/infiniband/hw/mlx4/ah.c | 8 +++----- drivers/infiniband/hw/mlx5/ah.c | 4 ---- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 15 --------------- include/rdma/ib_verbs.h | 2 -- 7 files changed, 9 insertions(+), 50 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4dcfe47c479d..d8f1a5d34f4f 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -53,6 +53,9 @@ #include "core_priv.h" +static int ib_resolve_eth_dmac(struct ib_device *device, + struct rdma_ah_attr *ah_attr); + static const char * const ib_events[] = { [IB_EVENT_CQ_ERR] = "CQ error", [IB_EVENT_QP_FATAL] = "QP fatal error", @@ -1257,8 +1260,8 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_dmac(struct ib_device *device, - struct rdma_ah_attr *ah_attr) +static int ib_resolve_eth_dmac(struct ib_device *device, + struct rdma_ah_attr *ah_attr) { int ret = 0; struct ib_global_route *grh; @@ -1317,7 +1320,6 @@ int ib_resolve_eth_dmac(struct ib_device *device, out: return ret; } -EXPORT_SYMBOL(ib_resolve_eth_dmac); /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0d89621d9fe8..20e5eb9290ad 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -729,14 +729,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1; break; } - rc = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, - ah_attr->roce.dmac, &vlan_tag, - &sgid_attr.ndev->ifindex, - NULL); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to get dmac\n"); - goto fail; - } } memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d545302b8ef8..05bab4a39d91 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -48,7 +48,6 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_gid_attr gid_attr; struct hns_roce_ah *ah; u16 vlan_tag = 0xffff; - struct in6_addr in6; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); union ib_gid sgid; int ret; @@ -58,18 +57,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); /* Get mac address */ - memcpy(&in6, grh->dgid.raw, sizeof(grh->dgid.raw)); - if (rdma_is_multicast_addr(&in6)) { - rdma_get_mcast_mac(&in6, ah->av.mac); - } else { - u8 *dmac = rdma_ah_retrieve_dmac(ah_attr); - - if (!dmac) { - kfree(ah); - return ERR_PTR(-EINVAL); - } - memcpy(ah->av.mac, dmac, ETH_ALEN); - } + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); /* Get source gid */ ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr), diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 538c46a73248..6dee4fdc5d67 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -92,12 +92,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, int ret; memcpy(&in6, grh->dgid.raw, sizeof(in6)); - if (rdma_is_multicast_addr(&in6)) { + if (rdma_is_multicast_addr(&in6)) is_mcast = 1; - rdma_get_mcast_mac(&in6, ah->av.eth.mac); - } else { - memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); - } + + memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr), grh->sgid_index, &sgid, &gid_attr); if (ret) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 3363e29157f6..fe269f680103 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -89,10 +89,6 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, resp.response_length = min_resp_len; - err = ib_resolve_eth_dmac(pd->device, ah_attr); - if (err) - return ERR_PTR(err); - memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index d0249e463338..dec650930ca6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -201,21 +201,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, /* Get network header type for this GID */ ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); - if ((pd->uctx) && - (!rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) && - (!rdma_link_local_addr((struct in6_addr *)grh->dgid.raw))) { - status = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, - attr->roce.dmac, - &vlan_tag, - &sgid_attr.ndev->ifindex, - NULL); - if (status) { - pr_err("%s(): Failed to resolve dmac from gid." - "status = %d\n", __func__, status); - goto av_conf_err; - } - } - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); if (status) goto av_conf_err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 09c4a695155e..9810e4568635 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3622,8 +3622,6 @@ void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); -int ib_resolve_eth_dmac(struct ib_device *device, - struct rdma_ah_attr *ah_attr); int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) -- cgit v1.2.3 From 99260132fde7bddc6e0132ce53da94d1c9ccabcb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 16 Oct 2017 08:45:16 +0300 Subject: IB/core: Fix calculation of maximum RoCE MTU The original code only took into consideration the largest header possible after the IB_BTH_BYTES. This was incorrect, as the largest possible header size is the largest possible combination of headers we might run into. The new code accounts for all possible headers in the largest possible combination and subtracts that from the MTU to make sure that all packets will fit on the wire. Link: https://www.spinics.net/lists/linux-rdma/msg54558.html Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reported-by: Roland Dreier Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 7 ++++--- include/rdma/ib_pack.h | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ec5008cf5d51..8815989301ab 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -245,10 +245,11 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g static inline enum ib_mtu iboe_get_mtu(int mtu) { /* - * reduce IB headers from effective IBoE MTU. 28 stands for - * atomic header which is the biggest possible header after BTH + * Reduce IB headers from effective IBoE MTU. */ - mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; + mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES + + IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES + + IB_ICRC_BYTES); if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 36655899ee02..7ea1382ad0e5 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -37,14 +37,17 @@ #include enum { - IB_LRH_BYTES = 8, - IB_ETH_BYTES = 14, - IB_VLAN_BYTES = 4, - IB_GRH_BYTES = 40, - IB_IP4_BYTES = 20, - IB_UDP_BYTES = 8, - IB_BTH_BYTES = 12, - IB_DETH_BYTES = 8 + IB_LRH_BYTES = 8, + IB_ETH_BYTES = 14, + IB_VLAN_BYTES = 4, + IB_GRH_BYTES = 40, + IB_IP4_BYTES = 20, + IB_UDP_BYTES = 8, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8, + IB_EXT_ATOMICETH_BYTES = 28, + IB_EXT_XRC_BYTES = 4, + IB_ICRC_BYTES = 4 }; struct ib_field { -- cgit v1.2.3 From fec99ededf6be46178d7f571b34dae80fc05f090 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 25 Oct 2017 18:56:49 +0300 Subject: RDMA/umem: Avoid partial declaration of non-static function The RDMA/umem uses generic RB-trees macros to generate various ib_umem access functions. The generation is performed with INTERVAL_TREE_DEFINE macro, which allows one of two modes: declare all functions as static or declare none of the function to be static. The second mode of operation produces the following sparse errors: drivers/infiniband/core/umem_rbtree.c:69:1: warning: symbol 'rbt_ib_umem_iter_first' was not declared. Should it be static? drivers/infiniband/core/umem_rbtree.c:69:1: warning: symbol 'rbt_ib_umem_iter_next' was not declared. Should it be static? Code relocation together with declaration of such functions to be "static" solves the issue. Because there is no need to have separate file for two functions, let's consolidate umem_rtree.c and umem_odp.c into one file. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/umem_odp.c | 72 ++++++++++++++++++++++ drivers/infiniband/core/umem_rbtree.c | 109 ---------------------------------- include/rdma/ib_umem_odp.h | 4 -- 4 files changed, 73 insertions(+), 114 deletions(-) delete mode 100644 drivers/infiniband/core/umem_rbtree.c (limited to 'include/rdma') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index b4df164f71a6..336ace50b0ab 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -14,7 +14,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ security.o nldev.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o -ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o +ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o ib_cm-y := cm.o diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 55e8f5ed8b3c..2aadf5813a40 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -39,11 +39,44 @@ #include #include #include +#include #include #include #include +/* + * The ib_umem list keeps track of memory regions for which the HW + * device request to receive notification when the related memory + * mapping is changed. + * + * ib_umem_lock protects the list. + */ + +static u64 node_start(struct umem_odp_node *n) +{ + struct ib_umem_odp *umem_odp = + container_of(n, struct ib_umem_odp, interval_tree); + + return ib_umem_start(umem_odp->umem); +} + +/* Note that the representation of the intervals in the interval tree + * considers the ending point as contained in the interval, while the + * function ib_umem_end returns the first address which is not contained + * in the umem. + */ +static u64 node_last(struct umem_odp_node *n) +{ + struct ib_umem_odp *umem_odp = + container_of(n, struct ib_umem_odp, interval_tree); + + return ib_umem_end(umem_odp->umem) - 1; +} + +INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, + node_start, node_last, static, rbt_ib_umem) + static void ib_umem_notifier_start_account(struct ib_umem *item) { mutex_lock(&item->odp_data->umem_mutex); @@ -754,3 +787,42 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, mutex_unlock(&umem->odp_data->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); + +/* @last is not a part of the interval. See comment for function + * node_last. + */ +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, + u64 start, u64 last, + umem_call_back cb, + void *cookie) +{ + int ret_val = 0; + struct umem_odp_node *node, *next; + struct ib_umem_odp *umem; + + if (unlikely(start == last)) + return ret_val; + + for (node = rbt_ib_umem_iter_first(root, start, last - 1); + node; node = next) { + next = rbt_ib_umem_iter_next(node, start, last - 1); + umem = container_of(node, struct ib_umem_odp, interval_tree); + ret_val = cb(umem->umem, start, last, cookie) || ret_val; + } + + return ret_val; +} +EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); + +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, + u64 addr, u64 length) +{ + struct umem_odp_node *node; + + node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); + if (node) + return container_of(node, struct ib_umem_odp, interval_tree); + return NULL; + +} +EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c deleted file mode 100644 index fc801920e341..000000000000 --- a/drivers/infiniband/core/umem_rbtree.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2014 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -/* - * The ib_umem list keeps track of memory regions for which the HW - * device request to receive notification when the related memory - * mapping is changed. - * - * ib_umem_lock protects the list. - */ - -static inline u64 node_start(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_start(umem_odp->umem); -} - -/* Note that the representation of the intervals in the interval tree - * considers the ending point as contained in the interval, while the - * function ib_umem_end returns the first address which is not contained - * in the umem. - */ -static inline u64 node_last(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_end(umem_odp->umem) - 1; -} - -INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, - node_start, node_last, , rbt_ib_umem) - -/* @last is not a part of the interval. See comment for function - * node_last. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 last, - umem_call_back cb, - void *cookie) -{ - int ret_val = 0; - struct umem_odp_node *node, *next; - struct ib_umem_odp *umem; - - if (unlikely(start == last)) - return ret_val; - - for (node = rbt_ib_umem_iter_first(root, start, last - 1); - node; node = next) { - next = rbt_ib_umem_iter_next(node, start, last - 1); - umem = container_of(node, struct ib_umem_odp, interval_tree); - ret_val = cb(umem->umem, start, last, cookie) || ret_val; - } - - return ret_val; -} -EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); - -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, - u64 addr, u64 length) -{ - struct umem_odp_node *node; - - node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); - if (node) - return container_of(node, struct ib_umem_odp, interval_tree); - return NULL; - -} -EXPORT_SYMBOL(rbt_ib_umem_lookup); diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 5eb7f5bc8248..6a17f856f841 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -111,10 +111,6 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bound); -void rbt_ib_umem_insert(struct umem_odp_node *node, - struct rb_root_cached *root); -void rbt_ib_umem_remove(struct umem_odp_node *node, - struct rb_root_cached *root); typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, void *cookie); /* -- cgit v1.2.3 From e1d2e88733695038754d3303b180f8005a02b6f1 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 29 Oct 2017 13:59:44 +0200 Subject: IB/core: Add PCI write end padding flags for WQ and QP There are root complexes that are able to optimize their performance when incoming data is multiple full cache lines. PCI write end padding is the device's ability to pad the ending of incoming packets (scatter) to full cache line such that the last upstream write generated by an incoming packet will be a full cache line. Add a relevant entry to ib_device_cap_flags to report such capability of an RDMA device. Add the QP and WQ create flags: * A QP/WQ created with a scatter end padding flag will cause HW to pad the last upstream write generated by a packet to cache line. User should consider several factors before activating this feature: - In case of high CPU memory load (which may cause PCI back pressure in turn), if a large percent of the writes are partial cache line, this feature should be checked as an optional solution. - This feature might reduce performance if most packets are between one and two cache lines and PCIe throughput has reached its maximum capacity. E.g. 65B packet from the network port will lead to 128B write on PCIe, which may cause traffic on PCIe to reach high throughput. Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- include/rdma/ib_verbs.h | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d31e4bc58e9a..8ca36843ef38 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1491,7 +1491,8 @@ static int create_qp(struct ib_uverbs_file *file, IB_QP_CREATE_MANAGED_RECV | IB_QP_CREATE_SCATTER_FCS | IB_QP_CREATE_CVLAN_STRIPPING | - IB_QP_CREATE_SOURCE_QPN)) { + IB_QP_CREATE_SOURCE_QPN | + IB_QP_CREATE_PCI_WRITE_END_PADDING)) { ret = -EINVAL; goto err_put; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9810e4568635..0b671982bbb3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -229,6 +229,8 @@ enum ib_device_cap_flags { /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35), + /* The device supports padding incoming writes to cacheline. */ + IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), }; enum ib_signature_prot_cap { @@ -1098,6 +1100,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, IB_QP_CREATE_SOURCE_QPN = 1 << 10, + IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -1621,6 +1624,7 @@ enum ib_wq_flags { IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, IB_WQ_FLAGS_SCATTER_FCS = 1 << 1, IB_WQ_FLAGS_DELAY_DROP = 1 << 2, + IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3, }; struct ib_wq_init_attr { -- cgit v1.2.3 From e08ce2e82b2fc5cdd07de170e8b9e8327625005c Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Tue, 7 Nov 2017 10:49:09 +0200 Subject: RDMA/core: Make function rdma_copy_addr return void Function returns zero - make it void. While there make struct net_device const. Signed-off-by: Yuval Shaia Reviewed-by: Parav Pandit Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 27 ++++++++++++++------------- drivers/infiniband/core/cma.c | 8 ++------ include/rdma/ib_addr.h | 5 +++-- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d2f74721b3ba..f4e8185bccd3 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -229,8 +229,9 @@ void rdma_addr_unregister_client(struct rdma_addr_client *client) } EXPORT_SYMBOL(rdma_addr_unregister_client); -int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - const unsigned char *dst_dev_addr) +void rdma_copy_addr(struct rdma_dev_addr *dev_addr, + const struct net_device *dev, + const unsigned char *dst_dev_addr) { dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); @@ -238,7 +239,6 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); dev_addr->bound_dev_if = dev->ifindex; - return 0; } EXPORT_SYMBOL(rdma_copy_addr); @@ -247,15 +247,14 @@ int rdma_translate_ip(const struct sockaddr *addr, u16 *vlan_id) { struct net_device *dev; - int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); - return ret; + return 0; } switch (addr->sa_family) { @@ -264,9 +263,9 @@ int rdma_translate_ip(const struct sockaddr *addr, ((const struct sockaddr_in *)addr)->sin_addr.s_addr); if (!dev) - return ret; + return -EADDRNOTAVAIL; - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); @@ -279,7 +278,7 @@ int rdma_translate_ip(const struct sockaddr *addr, if (ipv6_chk_addr(dev_addr->net, &((const struct sockaddr_in6 *)addr)->sin6_addr, dev, 1)) { - ret = rdma_copy_addr(dev_addr, dev, NULL); + rdma_copy_addr(dev_addr, dev, NULL); dev_addr->bound_dev_if = dev->ifindex; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); @@ -290,7 +289,7 @@ int rdma_translate_ip(const struct sockaddr *addr, break; #endif } - return ret; + return 0; } EXPORT_SYMBOL(rdma_translate_ip); @@ -336,7 +335,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr) { struct neighbour *n; - int ret; + int ret = 0; n = dst_neigh_lookup(dst, daddr); @@ -346,7 +345,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, neigh_event_send(n, NULL); ret = -ENODATA; } else { - ret = rdma_copy_addr(dev_addr, dst->dev, n->ha); + rdma_copy_addr(dev_addr, dst->dev, n->ha); } rcu_read_unlock(); @@ -494,7 +493,9 @@ static int addr_resolve_neigh(struct dst_entry *dst, if (!(dst->dev->flags & IFF_NOARP)) return fetch_ha(dst, addr, dst_in, seq); - return rdma_copy_addr(addr, dst->dev, NULL); + rdma_copy_addr(addr, dst->dev, NULL); + + return 0; } static int addr_resolve(struct sockaddr *src_in, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fa79c7076ccd..1fdb473b5df7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1846,9 +1846,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { - ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); - if (ret) - goto err; + rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); } else { if (!cma_protocol_roce(listen_id) && cma_any_addr(cma_src_addr(id_priv))) { @@ -1894,9 +1892,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, goto err; if (net_dev) { - ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); - if (ret) - goto err; + rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); } else { if (!cma_any_addr(cma_src_addr(id_priv))) { ret = cma_translate_addr(cma_src_addr(id_priv), diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index b2a10c762304..18c564f60e93 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -125,8 +125,9 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, void rdma_addr_cancel(struct rdma_dev_addr *addr); -int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - const unsigned char *dst_dev_addr); +void rdma_copy_addr(struct rdma_dev_addr *dev_addr, + const struct net_device *dev, + const unsigned char *dst_dev_addr); int rdma_addr_size(struct sockaddr *addr); -- cgit v1.2.3 From 869ddcf8b351ace5bf8860f3cd6265dccb382426 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:13 +0200 Subject: IB/uverbs: Allow CQ moderation with modify CQ Uverbs support in modify_cq for CQ moderation only. Gives ability to change cq_max_count and cq_period. CQ moderation enhance performance by moderating the number of CQEs needed to create an event instead of application having to suffer from event per-CQE. To achieve CQ moderation the application needs to set cq_max_count and cq_period. cq_max_count - defines the number of CQEs needed to create an event. cq_period - defines the timeout (micro seconds) between last event and a new one that will occur even if cq_max_count was not satisfied Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 42 +++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 1 + include/rdma/ib_verbs.h | 4 ++++ include/uapi/rdma/ib_user_verbs.h | 15 ++++++++++++- 5 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ee2739ae4305..deccefb71a6b 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -306,5 +306,6 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(modify_qp); +IB_UVERBS_DECLARE_EX_CMD(modify_cq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8ca36843ef38..3c2673cd4090 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3856,3 +3856,45 @@ end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; } + +int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_cq cmd = {}; + struct ib_cq *cq; + size_t required_cmd_sz; + int ret; + + required_cmd_sz = offsetof(typeof(cmd), reserved) + + sizeof(cmd.reserved); + if (ucore->inlen < required_cmd_sz) + return -EINVAL; + + /* sanity checks */ + if (ucore->inlen > sizeof(cmd) && + !ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (!cmd.attr_mask || cmd.reserved) + return -EINVAL; + + if (cmd.attr_mask > IB_CQ_MODERATE) + return -EOPNOTSUPP; + + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); + if (!cq) + return -EINVAL; + + ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period); + + uobj_put_obj_read(cq); + + return ret; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b5febfd84ee5..381fd9c096ae 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -128,6 +128,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, + [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0b671982bbb3..8e0d3780ce4e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -311,6 +311,10 @@ struct ib_cq_init_attr { u32 flags; }; +enum ib_cq_attr_mask { + IB_CQ_MODERATE = 1 << 0, +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index d4e0b53bfc75..cfa09d6095d6 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -100,7 +100,8 @@ enum { IB_USER_VERBS_EX_CMD_MODIFY_WQ, IB_USER_VERBS_EX_CMD_DESTROY_WQ, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, - IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL + IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL, + IB_USER_VERBS_EX_CMD_MODIFY_CQ }; /* @@ -1150,6 +1151,18 @@ struct ib_uverbs_ex_destroy_rwq_ind_table { __u32 ind_tbl_handle; }; +struct ib_uverbs_cq_moderation { + __u16 cq_count; + __u16 cq_period; +}; + +struct ib_uverbs_ex_modify_cq { + __u32 cq_handle; + __u32 attr_mask; + struct ib_uverbs_cq_moderation attr; + __u32 reserved; +}; + #define IB_DEVICE_NAME_MAX 64 #endif /* IB_USER_VERBS_H */ -- cgit v1.2.3 From 18bd90729237dc6ddbad01bc9618148224f03590 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:16 +0200 Subject: IB/uverbs: Add CQ moderation capability to query_device The query_device function can now obtain the maximum values for cq_max_count and cq_period, needed for CQ moderation. cq_max_count is a 16 bits number that determines the number of CQEs to accumulate before generating an event. cq_period is a 16 bits number that determines the timeout in micro seconds from the last event generated, upon which a new event will be generated even if cq_max_count was not reached. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 9 +++++++++ include/rdma/ib_verbs.h | 6 ++++++ include/uapi/rdma/ib_user_verbs.h | 7 +++++++ 3 files changed, 22 insertions(+) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3c2673cd4090..53143e4b1c50 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3852,6 +3852,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.tm_caps.max_sge = attr.tm_caps.max_sge; resp.tm_caps.flags = attr.tm_caps.flags; resp.response_length += sizeof(resp.tm_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps)) + goto end; + + resp.cq_moderation_caps.max_cq_moderation_count = + attr.cq_caps.max_cq_moderation_count; + resp.cq_moderation_caps.max_cq_moderation_period = + attr.cq_caps.max_cq_moderation_period; + resp.response_length += sizeof(resp.cq_moderation_caps); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8e0d3780ce4e..0b484c023fa9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -315,6 +315,11 @@ enum ib_cq_attr_mask { IB_CQ_MODERATE = 1 << 0, }; +struct ib_cq_caps { + u16 max_cq_moderation_count; + u16 max_cq_moderation_period; +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -365,6 +370,7 @@ struct ib_device_attr { u32 max_wq_type_rq; u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ struct ib_tm_caps tm_caps; + struct ib_cq_caps cq_caps; }; enum ib_mtu { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index cfa09d6095d6..5186fb12629b 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -125,6 +125,12 @@ struct ib_uverbs_comp_event_desc { __u64 cq_handle; }; +struct ib_uverbs_cq_moderation_caps { + __u16 max_cq_moderation_count; + __u16 max_cq_moderation_period; + __u32 reserved; +}; + /* * All commands from userspace should start with a __u32 command field * followed by __u16 in_words and out_words fields (which give the @@ -263,6 +269,7 @@ struct ib_uverbs_ex_query_device_resp { __u32 max_wq_type_rq; __u32 raw_packet_caps; struct ib_uverbs_tm_caps tm_caps; + struct ib_uverbs_cq_moderation_caps cq_moderation_caps; }; struct ib_uverbs_query_port { -- cgit v1.2.3 From 4190b4e96954e2c3597021d29435c3f8db8d3129 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 13 Nov 2017 10:51:19 +0200 Subject: RDMA/core: Rename kernel modify_cq to better describe its usage Current ib_modify_cq() is used to set CQ moderation parameters. This patch renames ib_modify_cq() to be rdma_set_cq_moderation(), because the kernel version of RDMA API doesn't need to follow already exposed to user's API pattern (create_XXX/modify_XXX/query_XXX/destroy_XXX) and better to have more accurate name which describes the actual usage. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/verbs.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 5 +++-- include/rdma/ib_verbs.h | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 53143e4b1c50..16d55710b116 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3901,7 +3901,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, if (!cq) return -EINVAL; - ret = ib_modify_cq(cq, cmd.attr.cq_count, cmd.attr.cq_period); + ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period); uobj_put_obj_read(cq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d8f1a5d34f4f..3fb8fb6cc824 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1550,12 +1550,12 @@ struct ib_cq *ib_create_cq(struct ib_device *device, } EXPORT_SYMBOL(ib_create_cq); -int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { return cq->device->modify_cq ? cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; } -EXPORT_SYMBOL(ib_modify_cq); +EXPORT_SYMBOL(rdma_set_cq_moderation); int ib_destroy_cq(struct ib_cq *cq) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 8dc1e6225cc8..2706bf26cbac 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -99,8 +99,9 @@ static int ipoib_set_coalesce(struct net_device *dev, coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; - ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, - coal->rx_coalesce_usecs); + ret = rdma_set_cq_moderation(priv->recv_cq, + coal->rx_max_coalesced_frames, + coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0b484c023fa9..fd84cda5ed7c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3169,13 +3169,13 @@ struct ib_cq *ib_create_cq(struct ib_device *device, int ib_resize_cq(struct ib_cq *cq, int cqe); /** - * ib_modify_cq - Modifies moderation params of the CQ + * rdma_set_cq_moderation - Modifies moderation params of the CQ * @cq: The CQ to modify. * @cq_count: number of CQEs that will trigger an event * @cq_period: max period of time in usec before triggering an event * */ -int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** * ib_destroy_cq - Destroys the specified CQ. -- cgit v1.2.3