From 39289bfc221423b27570e7c9157b690828e786cb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 22 Jul 2019 17:01:30 +0000 Subject: RDMA: Make most headers compile stand alone So that rdma can work with CONFIG_KERNEL_HEADER_TEST and CONFIG_HEADERS_CHECK. Link: https://lore.kernel.org/r/20190722170126.GA16453@ziepe.ca Signed-off-by: Jason Gunthorpe --- include/rdma/ib.h | 2 ++ include/rdma/iw_portmap.h | 3 +++ include/rdma/opa_port_info.h | 2 ++ include/rdma/rdmavt_cq.h | 1 + include/rdma/signature.h | 2 ++ 5 files changed, 10 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib.h b/include/rdma/ib.h index 4f385ec54f80..fe2fc9e91588 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include struct ib_addr { union { diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h index b9fee7feeeb5..c89535047c42 100644 --- a/include/rdma/iw_portmap.h +++ b/include/rdma/iw_portmap.h @@ -33,6 +33,9 @@ #ifndef _IW_PORTMAP_H #define _IW_PORTMAP_H +#include +#include + #define IWPM_ULIBNAME_SIZE 32 #define IWPM_DEVNAME_SIZE 32 #define IWPM_IFNAME_SIZE 16 diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index 7147a9263011..bdbfe25d3854 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -33,6 +33,8 @@ #if !defined(OPA_PORT_INFO_H) #define OPA_PORT_INFO_H +#include + #define OPA_PORT_LINK_MODE_NOP 0 /* No change */ #define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */ diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h index 04c519ef6d71..574eb7278f46 100644 --- a/include/rdma/rdmavt_cq.h +++ b/include/rdma/rdmavt_cq.h @@ -53,6 +53,7 @@ #include #include +#include /* * Define an ib_cq_notify value that is not valid so we know when CQ diff --git a/include/rdma/signature.h b/include/rdma/signature.h index f24cc2a1d3c5..d16b0fcc8344 100644 --- a/include/rdma/signature.h +++ b/include/rdma/signature.h @@ -6,6 +6,8 @@ #ifndef _RDMA_SIGNATURE_H_ #define _RDMA_SIGNATURE_H_ +#include + enum ib_signature_prot_cap { IB_PROT_T10DIF_TYPE_1 = 1, IB_PROT_T10DIF_TYPE_2 = 1 << 1, -- cgit v1.2.3 From 1d2fedd8561dc469a7503855ee602f4bb3eccfa7 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 23 Jul 2019 10:02:05 +0300 Subject: RDMA/core: Support netlink commands in non init_net net namespaces Now that IB core supports RDMA device binding with specific net namespace, enable IB core to accept netlink commands in non init_net namespaces. This is done by having per net namespace netlink socket. At present only netlink device handling client RDMA_NL_NLDEV supports device handling in multiple net namespaces. Hence do not accept netlink messages for other clients in non init_net net namespaces. Link: https://lore.kernel.org/r/20190723070205.6247-1-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 2 +- drivers/infiniband/core/core_priv.h | 24 +++++++++++++- drivers/infiniband/core/device.c | 38 ++++++++-------------- drivers/infiniband/core/iwpm_msg.c | 8 ++--- drivers/infiniband/core/iwpm_util.c | 6 ++-- drivers/infiniband/core/netlink.c | 63 +++++++++++++++++++++++++------------ drivers/infiniband/core/nldev.c | 20 ++++++------ drivers/infiniband/core/sa_query.c | 2 +- include/rdma/rdma_netlink.h | 10 ++++-- 9 files changed, 105 insertions(+), 68 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 9b76a8fcdd24..1dd467bed8fc 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -183,7 +183,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL); + rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL); /* Make the request retry, so when we get the response from userspace * we will have something. diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 888d89ce81df..589ed805e0ad 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -54,8 +56,26 @@ struct pkey_index_qp_list { struct list_head qp_list; }; +/** + * struct rdma_dev_net - rdma net namespace metadata for a net + * @nl_sock: Pointer to netlink socket + * @net: Pointer to owner net namespace + * @id: xarray id to identify the net namespace. + */ +struct rdma_dev_net { + struct sock *nl_sock; + possible_net_t net; + u32 id; +}; + extern const struct attribute_group ib_dev_attr_group; extern bool ib_devices_shared_netns; +extern unsigned int rdma_dev_net_id; + +static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net) +{ + return net_generic(net, rdma_dev_net_id); +} int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); @@ -179,7 +199,6 @@ void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); -int rdma_nl_init(void); void rdma_nl_exit(void); int ib_nl_handle_resolve_resp(struct sk_buff *skb, @@ -362,4 +381,7 @@ void ib_port_unregister_module_stat(struct kobject *kobj); int ib_device_set_netns_put(struct sk_buff *skb, struct ib_device *dev, u32 ns_fd); + +int rdma_nl_net_init(struct rdma_dev_net *rnet); +void rdma_nl_net_exit(struct rdma_dev_net *rnet); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 309210d2d4a8..c3576c7d2e8f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -105,17 +104,7 @@ static DECLARE_RWSEM(clients_rwsem); */ #define CLIENT_DATA_REGISTERED XA_MARK_1 -/** - * struct rdma_dev_net - rdma net namespace metadata for a net - * @net: Pointer to owner net namespace - * @id: xarray id to identify the net namespace. - */ -struct rdma_dev_net { - possible_net_t net; - u32 id; -}; - -static unsigned int rdma_dev_net_id; +unsigned int rdma_dev_net_id; /* * A list of net namespaces is maintained in an xarray. This is necessary @@ -1050,7 +1039,7 @@ int rdma_compatdev_set(u8 enable) static void rdma_dev_exit_net(struct net *net) { - struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); struct ib_device *dev; unsigned long index; int ret; @@ -1084,25 +1073,32 @@ static void rdma_dev_exit_net(struct net *net) } up_read(&devices_rwsem); + rdma_nl_net_exit(rnet); xa_erase(&rdma_nets, rnet->id); } static __net_init int rdma_dev_init_net(struct net *net) { - struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); unsigned long index; struct ib_device *dev; int ret; + write_pnet(&rnet->net, net); + + ret = rdma_nl_net_init(rnet); + if (ret) + return ret; + /* No need to create any compat devices in default init_net. */ if (net_eq(net, &init_net)) return 0; - write_pnet(&rnet->net, net); - ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); - if (ret) + if (ret) { + rdma_nl_net_exit(rnet); return ret; + } down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { @@ -2629,12 +2625,6 @@ static int __init ib_core_init(void) goto err_comp_unbound; } - ret = rdma_nl_init(); - if (ret) { - pr_warn("Couldn't init IB netlink interface: err %d\n", ret); - goto err_sysfs; - } - ret = addr_init(); if (ret) { pr_warn("Could't init IB address resolution\n"); @@ -2680,8 +2670,6 @@ err_mad: err_addr: addr_cleanup(); err_ibnl: - rdma_nl_exit(); -err_sysfs: class_unregister(&ib_class); err_comp_unbound: destroy_workqueue(ib_comp_unbound_wq); diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 2452b0ddcf0d..f1a873d4e842 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -112,7 +112,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n", __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name); - ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL); + ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNAVAILABLE; @@ -202,7 +202,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; - ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); + ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNDEFINED; @@ -297,7 +297,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; - ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); + ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ err_str = "Unable to send a nlmsg"; @@ -364,7 +364,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) nlmsg_end(skb, nlh); - ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); + ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNDEFINED; diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 41929bb83739..c7ad3499228c 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -645,7 +645,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) nlmsg_end(skb, nlh); - ret = rdma_nl_unicast(skb, iwpm_pid); + ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) { skb = NULL; err_str = "Unable to send a nlmsg"; @@ -674,7 +674,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid) return -ENOMEM; } nlh->nlmsg_type = NLMSG_DONE; - ret = rdma_nl_unicast(skb, iwpm_pid); + ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) pr_warn("%s Unable to send a nlmsg\n", __func__); return ret; @@ -824,7 +824,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version) goto hello_num_error; nlmsg_end(skb, nlh); - ret = rdma_nl_unicast(skb, iwpm_pid); + ret = rdma_nl_unicast(&init_net, skb, iwpm_pid); if (ret) { skb = NULL; err_str = "Unable to send a nlmsg"; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index eecfc0b377c9..67a76aca2dd6 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -36,20 +36,22 @@ #include #include #include +#include #include #include #include #include "core_priv.h" static DEFINE_MUTEX(rdma_nl_mutex); -static struct sock *nls; static struct { const struct rdma_nl_cbs *cb_table; } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; bool rdma_nl_chk_listeners(unsigned int group) { - return netlink_has_listeners(nls, group); + struct rdma_dev_net *rnet = rdma_net_to_dev_net(&init_net); + + return netlink_has_listeners(rnet->nl_sock, group); } EXPORT_SYMBOL(rdma_nl_chk_listeners); @@ -73,13 +75,21 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) return (op < max_num_ops[type]) ? true : false; } -static bool is_nl_valid(unsigned int type, unsigned int op) +static bool +is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op) { const struct rdma_nl_cbs *cb_table; if (!is_nl_msg_valid(type, op)) return false; + /* + * Currently only NLDEV client is supporting netlink commands in + * non init_net net namespace. + */ + if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV) + return false; + if (!rdma_nl_types[type].cb_table) { mutex_unlock(&rdma_nl_mutex); request_module("rdma-netlink-subsys-%d", type); @@ -161,7 +171,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int op = RDMA_NL_GET_OP(type); const struct rdma_nl_cbs *cb_table; - if (!is_nl_valid(index, op)) + if (!is_nl_valid(skb, index, op)) return -EINVAL; cb_table = rdma_nl_types[index].cb_table; @@ -185,7 +195,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, .dump = cb_table[op].dump, }; if (c.dump) - return netlink_dump_start(nls, skb, nlh, &c); + return netlink_dump_start(skb->sk, skb, nlh, &c); return -EINVAL; } @@ -258,52 +268,65 @@ static void rdma_nl_rcv(struct sk_buff *skb) mutex_unlock(&rdma_nl_mutex); } -int rdma_nl_unicast(struct sk_buff *skb, u32 pid) +int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid) { + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); int err; - err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT); + err = netlink_unicast(rnet->nl_sock, skb, pid, MSG_DONTWAIT); return (err < 0) ? err : 0; } EXPORT_SYMBOL(rdma_nl_unicast); -int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid) +int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid) { + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); int err; - err = netlink_unicast(nls, skb, pid, 0); + err = netlink_unicast(rnet->nl_sock, skb, pid, 0); return (err < 0) ? err : 0; } EXPORT_SYMBOL(rdma_nl_unicast_wait); -int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags) +int rdma_nl_multicast(struct net *net, struct sk_buff *skb, + unsigned int group, gfp_t flags) { - return nlmsg_multicast(nls, skb, 0, group, flags); + struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); + + return nlmsg_multicast(rnet->nl_sock, skb, 0, group, flags); } EXPORT_SYMBOL(rdma_nl_multicast); -int __init rdma_nl_init(void) +void rdma_nl_exit(void) +{ + int idx; + + for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) + WARN(rdma_nl_types[idx].cb_table, + "Nelink client %d wasn't released prior to unloading %s\n", + idx, KBUILD_MODNAME); +} + +int rdma_nl_net_init(struct rdma_dev_net *rnet) { + struct net *net = read_pnet(&rnet->net); struct netlink_kernel_cfg cfg = { .input = rdma_nl_rcv, }; + struct sock *nls; - nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg); + nls = netlink_kernel_create(net, NETLINK_RDMA, &cfg); if (!nls) return -ENOMEM; nls->sk_sndtimeo = 10 * HZ; + rnet->nl_sock = nls; return 0; } -void rdma_nl_exit(void) +void rdma_nl_net_exit(struct rdma_dev_net *rnet) { - int idx; - - for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) - rdma_nl_unregister(idx); - - netlink_kernel_release(nls); + netlink_kernel_release(rnet->nl_sock); } MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 783e465e7c41..e287b71a1cfd 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -832,7 +832,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); @@ -972,7 +972,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); @@ -1074,7 +1074,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); @@ -1251,7 +1251,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); @@ -1596,7 +1596,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, put_device(data.cdev); if (ibdev) ib_device_put(ibdev); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); out_data: put_device(data.cdev); @@ -1636,7 +1636,7 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } nlmsg_end(msg, nlh); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); } static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1734,7 +1734,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: rdma_counter_unbind_qpn(device, port, qpn, cntn); @@ -1802,7 +1802,7 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: rdma_counter_bind_qpn(device, port, qpn, cntn); @@ -1893,7 +1893,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, mutex_unlock(&stats->lock); nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_table: nla_nest_cancel(msg, table_attr); @@ -1961,7 +1961,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, nlmsg_end(msg, nlh); ib_device_put(device); - return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_msg: nlmsg_free(msg); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7d8071c7e564..17fc2936c077 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -860,7 +860,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask); + return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask); } static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 6631624e4d7c..ab22759de7ea 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -76,28 +76,32 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, /** * Send the supplied skb to a specific userspace PID. + * @net: Net namespace in which to send the skb * @skb: The netlink skb * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ -int rdma_nl_unicast(struct sk_buff *skb, u32 pid); +int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid); /** * Send, with wait/1 retry, the supplied skb to a specific userspace PID. + * @net: Net namespace in which to send the skb * @skb: The netlink skb * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ -int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid); +int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid); /** * Send the supplied skb to a netlink group. + * @net: Net namespace in which to send the skb * @skb: The netlink skb * @group: Netlink group ID * @flags: allocation flags * Returns 0 on success or a negative error code. */ -int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags); +int rdma_nl_multicast(struct net *net, struct sk_buff *skb, + unsigned int group, gfp_t flags); /** * Check if there are any listeners to the netlink group -- cgit v1.2.3 From 20cf4e026730104892fa1268de0371a631cee294 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 29 Jul 2019 13:22:09 -0400 Subject: rdma: Enable ib_alloc_cq to spread work over a device's comp_vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Send and Receive completion is handled on a single CPU selected at the time each Completion Queue is allocated. Typically this is when an initiator instantiates an RDMA transport, or when a target accepts an RDMA connection. Some ULPs cannot open a connection per CPU to spread completion workload across available CPUs and MSI vectors. For such ULPs, provide an API that allows the RDMA core to select a completion vector based on the device's complement of available comp_vecs. ULPs that invoke ib_alloc_cq() with only comp_vector 0 are converted to use the new API so that their completion workloads interfere less with each other. Suggested-by: HÃ¥kon Bugge Signed-off-by: Chuck Lever Reviewed-by: Leon Romanovsky Cc: Cc: Link: https://lore.kernel.org/r/20190729171923.13428.52555.stgit@manet.1015granger.net Signed-off-by: Doug Ledford --- drivers/infiniband/core/cq.c | 28 ++++++++++++++++++++++++++++ drivers/infiniband/ulp/srpt/ib_srpt.c | 4 ++-- fs/cifs/smbdirect.c | 10 ++++++---- include/rdma/ib_verbs.h | 19 +++++++++++++++++++ net/9p/trans_rdma.c | 6 +++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 8 ++++---- net/sunrpc/xprtrdma/verbs.c | 13 ++++++------- 7 files changed, 68 insertions(+), 20 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 7c599878ccf7..bbfded6d5d3d 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -252,6 +252,34 @@ out_free_cq: } EXPORT_SYMBOL(__ib_alloc_cq_user); +/** + * __ib_alloc_cq_any - allocate a completion queue + * @dev: device to allocate the CQ for + * @private: driver private data, accessible from cq->cq_context + * @nr_cqe: number of CQEs to allocate + * @poll_ctx: context to poll the CQ from + * @caller: module owner name + * + * Attempt to spread ULP Completion Queues over each device's interrupt + * vectors. A simple best-effort mechanism is used. + */ +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller) +{ + static atomic_t counter; + int comp_vector = 0; + + if (dev->num_comp_vectors > 1) + comp_vector = + atomic_inc_return(&counter) % + min_t(int, dev->num_comp_vectors, num_online_cpus()); + + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + caller, NULL); +} +EXPORT_SYMBOL(__ib_alloc_cq_any); + /** * ib_free_cq_user - free a completion queue * @cq: completion queue to free. diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 1a039f16d315..e25c70a56be6 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1767,8 +1767,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: - ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size, - 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE); + ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size, + IB_POLL_WORKQUEUE); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index cd07e5301d42..3c91fa97c9a8 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1654,15 +1654,17 @@ static struct smbd_connection *_smbd_get_connection( info->send_cq = NULL; info->recv_cq = NULL; - info->send_cq = ib_alloc_cq(info->id->device, info, - info->send_credit_target, 0, IB_POLL_SOFTIRQ); + info->send_cq = + ib_alloc_cq_any(info->id->device, info, + info->send_credit_target, IB_POLL_SOFTIRQ); if (IS_ERR(info->send_cq)) { info->send_cq = NULL; goto alloc_cq_failed; } - info->recv_cq = ib_alloc_cq(info->id->device, info, - info->receive_credit_max, 0, IB_POLL_SOFTIRQ); + info->recv_cq = + ib_alloc_cq_any(info->id->device, info, + info->receive_credit_max, IB_POLL_SOFTIRQ); if (IS_ERR(info->recv_cq)) { info->recv_cq = NULL; goto alloc_cq_failed; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c5f8a9f17063..2a1523ccd7ab 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3711,6 +3711,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, NULL); } +struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, + int nr_cqe, enum ib_poll_context poll_ctx, + const char *caller); + +/** + * ib_alloc_cq_any: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @poll_ctx: Context used for polling the CQ + */ +static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, + void *private, int nr_cqe, + enum ib_poll_context poll_ctx) +{ + return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx, + KBUILD_MODNAME); +} + /** * ib_free_cq_user - Free kernel/user CQ * @cq: The CQ to free diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index bac8dad5dd69..b21c3c209815 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -685,9 +685,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) goto error; /* Create the Completion Queue */ - rdma->cq = ib_alloc_cq(rdma->cm_id->device, client, - opts.sq_depth + opts.rq_depth + 1, - 0, IB_POLL_SOFTIRQ); + rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client, + opts.sq_depth + opts.rq_depth + 1, + IB_POLL_SOFTIRQ); if (IS_ERR(rdma->cq)) goto error; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 3fe665152d95..4d3db6ee7f09 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -454,14 +454,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk("svcrdma: error creating PD for connect request\n"); goto errout; } - newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth, - 0, IB_POLL_WORKQUEUE); + newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, + IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_sq_cq)) { dprintk("svcrdma: error creating SQ CQ for connect request\n"); goto errout; } - newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth, - 0, IB_POLL_WORKQUEUE); + newxprt->sc_rq_cq = + ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); if (IS_ERR(newxprt->sc_rq_cq)) { dprintk("svcrdma: error creating RQ CQ for connect request\n"); goto errout; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 805b1f35e1ca..b10aa16557f0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -521,18 +521,17 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) init_waitqueue_head(&ep->rep_connect_wait); ep->rep_receive_count = 0; - sendcq = ib_alloc_cq(ia->ri_id->device, NULL, - ep->rep_attr.cap.max_send_wr + 1, - ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0, - IB_POLL_WORKQUEUE); + sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL, + ep->rep_attr.cap.max_send_wr + 1, + IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); goto out1; } - recvcq = ib_alloc_cq(ia->ri_id->device, NULL, - ep->rep_attr.cap.max_recv_wr + 1, - 0, IB_POLL_WORKQUEUE); + recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL, + ep->rep_attr.cap.max_recv_wr + 1, + IB_POLL_WORKQUEUE); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); goto out2; -- cgit v1.2.3 From 05bb411ada9508b48044ef5d84dd8bc46cece607 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 1 Aug 2019 20:14:46 +0300 Subject: RDMA/core: Introduce ratelimited ibdev printk functions Add ratelimited helpers to the ibdev_* printk functions. Implementation inspired by counterpart dev_*_ratelimited functions. Signed-off-by: Gal Pressman Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190801171447.54440-2-galpress@amazon.com Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2a1523ccd7ab..0ecda7d15df2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -107,6 +107,48 @@ static inline void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {} #endif +#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + ibdev_level(ibdev, fmt, ##__VA_ARGS__); \ +} while (0) + +#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_alert_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_crit_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_err_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_warn_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_notice_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__) +#define ibdev_info_ratelimited(ibdev, fmt, ...) \ + ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__) + +#if defined(CONFIG_DYNAMIC_DEBUG) +/* descriptor check is first to prevent flooding with "callbacks suppressed" */ +#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \ + __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \ + ##__VA_ARGS__); \ +} while (0) +#else +__printf(2, 3) __cold +static inline +void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {} +#endif + union ib_gid { u8 raw[16]; struct { -- cgit v1.2.3 From 72a7720fca37fec0daf295923f17ac5d88a613e1 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 7 Aug 2019 13:31:35 +0300 Subject: RDMA: Introduce ib_port_phys_state enum In order to improve readability, add ib_port_phys_state enum to replace the use of magic numbers. Signed-off-by: Kamal Heib Reviewed-by: Andrew Boyer Acked-by: Michal Kalderon Acked-by: Bernard Metzler Link: https://lore.kernel.org/r/20190807103138.17219-2-kamalheib1@gmail.com Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 30 ++++++++++++++++++---------- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++-- drivers/infiniband/hw/efa/efa_verbs.c | 2 +- drivers/infiniband/hw/hns/hns_roce_device.h | 10 ---------- drivers/infiniband/hw/hns/hns_roce_main.c | 3 ++- drivers/infiniband/hw/mlx4/main.c | 3 ++- drivers/infiniband/hw/mlx5/main.c | 4 ++-- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- drivers/infiniband/hw/qedr/verbs.c | 4 ++-- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 7 ++++--- drivers/infiniband/sw/rxe/rxe.h | 4 ---- drivers/infiniband/sw/rxe/rxe_param.h | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +++--- drivers/infiniband/sw/siw/siw_verbs.c | 3 ++- include/rdma/ib_verbs.h | 10 ++++++++++ 15 files changed, 53 insertions(+), 43 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index b477295a96c2..7a50cedcef1f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -289,6 +289,24 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, ib_width_enum_to_int(attr.active_width), speed); } +static const char *phys_state_to_str(enum ib_port_phys_state phys_state) +{ + static const char * phys_state_str[] = { + "", + "Sleep", + "Polling", + "Disabled", + "PortConfigurationTraining", + "LinkUp", + "LinkErrorRecovery", + "Phy Test", + }; + + if (phys_state < ARRAY_SIZE(phys_state_str)) + return phys_state_str[phys_state]; + return ""; +} + static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, char *buf) { @@ -300,16 +318,8 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - switch (attr.phys_state) { - case 1: return sprintf(buf, "1: Sleep\n"); - case 2: return sprintf(buf, "2: Polling\n"); - case 3: return sprintf(buf, "3: Disabled\n"); - case 4: return sprintf(buf, "4: PortConfigurationTraining\n"); - case 5: return sprintf(buf, "5: LinkUp\n"); - case 6: return sprintf(buf, "6: LinkErrorRecovery\n"); - case 7: return sprintf(buf, "7: Phy Test\n"); - default: return sprintf(buf, "%d: \n", attr.phys_state); - } + return sprintf(buf, "%d: %s\n", attr.phys_state, + phys_state_to_str(attr.phys_state)); } static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 098ab883733e..f9e97d0cc459 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -220,10 +220,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) { port_attr->state = IB_PORT_ACTIVE; - port_attr->phys_state = 5; + port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { port_attr->state = IB_PORT_DOWN; - port_attr->phys_state = 3; + port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } port_attr->max_mtu = IB_MTU_4096; port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu); diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 32d3b3deabce..70851bd7f801 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -333,7 +333,7 @@ int efa_query_port(struct ib_device *ibdev, u8 port, props->lmc = 1; props->state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; props->active_speed = IB_SPEED_EDR; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index b39497a13b61..12a2b8565771 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -989,16 +989,6 @@ struct hns_roce_hw { const struct ib_device_ops *hns_roce_dev_srq_ops; }; -enum hns_phy_state { - HNS_ROCE_PHY_SLEEP = 1, - HNS_ROCE_PHY_POLLING = 2, - HNS_ROCE_PHY_DISABLED = 3, - HNS_ROCE_PHY_TRAINING = 4, - HNS_ROCE_PHY_LINKUP = 5, - HNS_ROCE_PHY_LINKERR = 6, - HNS_ROCE_PHY_TEST = 7 -}; - struct hns_roce_dev { struct ib_device ib_dev; struct platform_device *pdev; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1e4ba48f5613..1b757cc924c3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -262,7 +262,8 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = (props->state == IB_PORT_ACTIVE) ? - HNS_ROCE_PHY_LINKUP : HNS_ROCE_PHY_DISABLED; + IB_PORT_PHYS_STATE_LINK_UP : + IB_PORT_PHYS_STATE_DISABLED; spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8790101facb7..8d2f1e38b891 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -734,7 +734,8 @@ out: static u8 state_to_phys_state(enum ib_port_state state) { - return state == IB_PORT_ACTIVE ? 5 : 3; + return state == IB_PORT_ACTIVE ? + IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; } static int eth_link_query_port(struct ib_device *ibdev, u8 port, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4a3d700cd783..af1986b4aa7d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -535,7 +535,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; - props->phys_state = 3; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); props->qkey_viol_cntr = qkey_viol_cntr; @@ -561,7 +561,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (netif_running(ndev) && netif_carrier_ok(ndev)) { props->state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } ndev_ib_mtu = iboe_get_mtu(ndev->mtu); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index bccc11378109..e8267e590772 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -163,10 +163,10 @@ int ocrdma_query_port(struct ib_device *ibdev, netdev = dev->nic_info.netdev; if (netif_running(netdev) && netif_oper_up(netdev)) { port_state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { port_state = IB_PORT_DOWN; - props->phys_state = 3; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } props->max_mtu = IB_MTU_4096; props->active_mtu = iboe_get_mtu(netdev->mtu); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 0c6a4bc848f5..6f3ce86019b7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -221,10 +221,10 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) /* *attr being zeroed by the caller, avoid zeroing it here */ if (rdma_port->port_state == QED_RDMA_PORT_UP) { attr->state = IB_PORT_ACTIVE; - attr->phys_state = 5; + attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { attr->state = IB_PORT_DOWN; - attr->phys_state = 3; + attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } attr->max_mtu = IB_MTU_4096; attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a354c7c86547..556b8e44a51c 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -356,13 +356,14 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, if (!us_ibdev->ufdev->link_up) { props->state = IB_PORT_DOWN; - props->phys_state = 3; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } else if (!us_ibdev->ufdev->inaddr) { props->state = IB_PORT_INIT; - props->phys_state = 4; + props->phys_state = + IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; } else { props->state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } props->port_cap_flags = 0; diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index ecf6e659c0da..fb07eed9e402 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -65,10 +65,6 @@ */ #define RXE_UVERBS_ABI_VERSION 2 -#define RDMA_LINK_PHYS_STATE_LINK_UP (5) -#define RDMA_LINK_PHYS_STATE_DISABLED (3) -#define RDMA_LINK_PHYS_STATE_POLLING (2) - #define RXE_ROCE_V2_SPORT (0xc000) static inline u32 rxe_crc32(struct rxe_dev *rxe, diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 1abed47ca221..fe5207386700 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -154,7 +154,7 @@ enum rxe_port_param { RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, RXE_PORT_ACTIVE_SPEED = 1, RXE_PORT_PKEY_TBL_LEN = 64, - RXE_PORT_PHYS_STATE = 2, + RXE_PORT_PHYS_STATE = IB_PORT_PHYS_STATE_POLLING, RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, }; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4ebdfcf4d33e..623129f27f5a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -69,11 +69,11 @@ static int rxe_query_port(struct ib_device *dev, &attr->active_width); if (attr->state == IB_PORT_ACTIVE) - attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP; + attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; else if (dev_get_flags(rxe->ndev) & IFF_UP) - attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING; + attr->phys_state = IB_PORT_PHYS_STATE_POLLING; else - attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED; + attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; mutex_unlock(&rxe->usdev_lock); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 32dc79d0e898..404e7ca4b30c 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -206,7 +206,8 @@ int siw_query_port(struct ib_device *base_dev, u8 port, attr->gid_tbl_len = 1; attr->max_msg_sz = -1; attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); - attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 5 : 3; + attr->phys_state = sdev->state == IB_PORT_ACTIVE ? + IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; attr->pkey_tbl_len = 1; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->state = sdev->state; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0ecda7d15df2..391499008a22 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -493,6 +493,16 @@ enum ib_port_state { IB_PORT_ACTIVE_DEFER = 5 }; +enum ib_port_phys_state { + IB_PORT_PHYS_STATE_SLEEP = 1, + IB_PORT_PHYS_STATE_POLLING = 2, + IB_PORT_PHYS_STATE_DISABLED = 3, + IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4, + IB_PORT_PHYS_STATE_LINK_UP = 5, + IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6, + IB_PORT_PHYS_STATE_PHY_TEST = 7, +}; + enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_2X = 16, -- cgit v1.2.3 From b2299e83815c59ab59c4ee4fb4842b3b28e5072f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 19 Aug 2019 14:45:47 +0300 Subject: RDMA: Delete DEBUG code There is no need to keep DEBUG defines for out-of-the tree testing. Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190819114547.20704-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/fmr_pool.c | 13 ------------- include/rdma/ib_verbs.h | 3 --- 2 files changed, 16 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 7d841b689a1e..e08aec427027 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -148,13 +148,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) hlist_del_init(&fmr->cache_node); fmr->remap_count = 0; list_add_tail(&fmr->fmr->list, &fmr_list); - -#ifdef DEBUG - if (fmr->ref_count !=0) { - pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n", - fmr, fmr->ref_count); - } -#endif } list_splice_init(&pool->dirty_list, &unmap_list); @@ -496,12 +489,6 @@ void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) } } -#ifdef DEBUG - if (fmr->ref_count < 0) - pr_warn(PFX "FMR %p has ref count %d < 0\n", - fmr, fmr->ref_count); -#endif - spin_unlock_irqrestore(&pool->pool_lock, flags); } EXPORT_SYMBOL(ib_fmr_pool_unmap); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 391499008a22..08e966c8081a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -98,9 +98,6 @@ void ibdev_info(const struct ib_device *ibdev, const char *format, ...); #if defined(CONFIG_DYNAMIC_DEBUG) #define ibdev_dbg(__dev, format, args...) \ dynamic_ibdev_dbg(__dev, format, ##args) -#elif defined(DEBUG) -#define ibdev_dbg(__dev, format, args...) \ - ibdev_printk(KERN_DEBUG, __dev, format, ##args) #else __printf(2, 3) __cold static inline -- cgit v1.2.3 From 7199435414868bd656284349edc1a1f528fe3662 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Wed, 11 Sep 2019 07:30:47 -0400 Subject: IB/{rdmavt, hfi1, qib}: Add a counter for credit waits This patch adds a counter for credit waits to assist field debugging. Link: https://lore.kernel.org/r/20190911113047.126040.10857.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 2 ++ drivers/infiniband/hw/hfi1/chip.h | 1 + drivers/infiniband/hw/hfi1/rc.c | 10 ++-------- drivers/infiniband/hw/qib/qib_rc.c | 10 ++-------- drivers/infiniband/hw/qib/qib_sysfs.c | 2 ++ include/rdma/rdma_vt.h | 1 + include/rdma/rdmavt_qp.h | 35 +++++++++++++++++++++++++++++++++++ 7 files changed, 45 insertions(+), 16 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 67052dc3100c..9b1fb84a3d45 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -4101,6 +4101,7 @@ def_access_ibp_counter(rc_dupreq); def_access_ibp_counter(rdma_seq); def_access_ibp_counter(unaligned); def_access_ibp_counter(seq_naks); +def_access_ibp_counter(rc_crwaits); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), @@ -5119,6 +5120,7 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq), [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned), [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks), +[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits), [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL, access_sw_cpu_rc_acks), [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b76cf81f927f..4ca5ac8d7e9e 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1245,6 +1245,7 @@ enum { C_SW_IBP_RDMA_SEQ, C_SW_IBP_UNALIGNED, C_SW_IBP_SEQ_NAK, + C_SW_IBP_RC_CRWAITS, C_SW_CPU_RC_ACKS, C_SW_CPU_RC_QACKS, C_SW_CPU_RC_DELAYED_COMP, diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index eeca08d3b470..513a8aac9ccd 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -595,11 +595,8 @@ check_s_state: case IB_WR_SEND_WITH_IMM: case IB_WR_SEND_WITH_INV: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } if (len > pmtu) { qp->s_state = OP(SEND_FIRST); len = pmtu; @@ -632,11 +629,8 @@ check_s_state: goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } no_flow_control: put_ib_reth_vaddr( wqe->rdma_wr.remote_addr, diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 1d5e2d4ee257..aaf7438258fa 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -313,11 +313,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } if (len > pmtu) { qp->s_state = OP(SEND_FIRST); len = pmtu; @@ -344,11 +341,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } no_flow_control: ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 905206a0c2d5..3926be78036e 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -436,6 +436,7 @@ QIB_DIAGC_ATTR(dmawait); QIB_DIAGC_ATTR(unaligned); QIB_DIAGC_ATTR(rc_dupreq); QIB_DIAGC_ATTR(rc_seqnak); +QIB_DIAGC_ATTR(rc_crwaits); static struct attribute *diagc_default_attributes[] = { &qib_diagc_attr_rc_resends.attr, @@ -453,6 +454,7 @@ static struct attribute *diagc_default_attributes[] = { &qib_diagc_attr_unaligned.attr, &qib_diagc_attr_rc_dupreq.attr, &qib_diagc_attr_rc_seqnak.attr, + &qib_diagc_attr_rc_crwaits.attr, NULL }; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 525848e227dc..ac5a9430abb6 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -116,6 +116,7 @@ struct rvt_ibport { u64 n_unaligned; u64 n_rc_dupreq; u64 n_rc_seqnak; + u64 n_rc_crwaits; u16 pkey_violations; u16 qkey_violations; u16 mkey_violations; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e06c77d76463..b550ae89bf85 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -973,6 +973,41 @@ static inline void rvt_free_rq(struct rvt_rq *rq) rq->wq = NULL; } +/** + * rvt_to_iport - Get the ibport pointer + * @qp: the qp pointer + * + * This function returns the ibport pointer from the qp pointer. + */ +static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp) +{ + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + return rdi->ports[qp->port_num - 1]; +} + +/** + * rvt_rc_credit_avail - Check if there are enough RC credits for the request + * @qp: the qp + * @wqe: the request + * + * This function returns false when there are not enough credits for the given + * request and true otherwise. + */ +static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe) +{ + lockdep_assert_held(&qp->s_lock); + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && + rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { + struct rvt_ibport *rvp = rvt_to_iport(qp); + + qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + rvp->n_rc_crwaits++; + return false; + } + return true; +} + struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi, u64 v, void (*cb)(struct rvt_qp *qp, u64 v)); -- cgit v1.2.3