diff options
Diffstat (limited to 'drivers')
143 files changed, 7626 insertions, 4107 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 6425c0e5d18a..2137adfbd8c3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -85,4 +85,6 @@ source "drivers/infiniband/ulp/isert/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +source "drivers/infiniband/hw/hfi1/Kconfig" + endif # INFINIBAND diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 26987d9d7e1c..edaae9f9853c 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,8 +1,7 @@ infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o -obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o iw_cm.o ib_addr.o \ +obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \ $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ @@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ - roce_gid_mgmt.o mr_pool.o + roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ + multicast.o mad.o smi.o agent.o mad_rmpp.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o -ib_mad-y := mad.o smi.o agent.o mad_rmpp.o - -ib_sa-y := sa_query.o multicast.o - ib_cm-y := cm.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o @@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o rdma_ucm-y := ucma.o -ib_addr-y := addr.o - ib_umad-y := user_mad.o ib_ucm-y := ucm.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 337353d86cfa..1374541a4528 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -46,10 +46,10 @@ #include <net/ip6_route.h> #include <rdma/ib_addr.h> #include <rdma/ib.h> +#include <rdma/rdma_netlink.h> +#include <net/netlink.h> -MODULE_AUTHOR("Sean Hefty"); -MODULE_DESCRIPTION("IB Address Translation"); -MODULE_LICENSE("Dual BSD/GPL"); +#include "core_priv.h" struct addr_req { struct list_head list; @@ -62,8 +62,11 @@ struct addr_req { struct rdma_dev_addr *addr, void *context); unsigned long timeout; int status; + u32 seq; }; +static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); + static void process_req(struct work_struct *work); static DEFINE_MUTEX(lock); @@ -71,6 +74,126 @@ static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; +static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { + [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, + .len = sizeof(struct rdma_nla_ls_gid)}, +}; + +static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) +{ + struct nlattr *tb[LS_NLA_TYPE_MAX] = {}; + int ret; + + if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) + return false; + + ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), + nlmsg_len(nlh), ib_nl_addr_policy); + if (ret) + return false; + + return true; +} + +static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) +{ + const struct nlattr *head, *curr; + union ib_gid gid; + struct addr_req *req; + int len, rem; + int found = 0; + + head = (const struct nlattr *)nlmsg_data(nlh); + len = nlmsg_len(nlh); + + nla_for_each_attr(curr, head, len, rem) { + if (curr->nla_type == LS_NLA_TYPE_DGID) + memcpy(&gid, nla_data(curr), nla_len(curr)); + } + + mutex_lock(&lock); + list_for_each_entry(req, &req_list, list) { + if (nlh->nlmsg_seq != req->seq) + continue; + /* We set the DGID part, the rest was set earlier */ + rdma_addr_set_dgid(req->addr, &gid); + req->status = 0; + found = 1; + break; + } + mutex_unlock(&lock); + + if (!found) + pr_info("Couldn't find request waiting for DGID: %pI6\n", + &gid); +} + +int ib_nl_handle_ip_res_resp(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; + + if ((nlh->nlmsg_flags & NLM_F_REQUEST) || + !(NETLINK_CB(skb).sk) || + !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + + if (ib_nl_is_good_ip_resp(nlh)) + ib_nl_process_good_ip_rsep(nlh); + + return skb->len; +} + +static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, + const void *daddr, + u32 seq, u16 family) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + struct rdma_ls_ip_resolve_header *header; + void *data; + size_t size; + int attrtype; + int len; + + if (family == AF_INET) { + size = sizeof(struct in_addr); + attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4; + } else { + size = sizeof(struct in6_addr); + attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; + } + + len = nla_total_size(sizeof(size)); + len += NLMSG_ALIGN(sizeof(*header)); + + skb = nlmsg_new(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS, + RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST); + if (!data) { + nlmsg_free(skb); + return -ENODATA; + } + + /* Construct the family header first */ + header = (struct rdma_ls_ip_resolve_header *) + skb_put(skb, NLMSG_ALIGN(sizeof(*header))); + header->ifindex = dev_addr->bound_dev_if; + nla_put(skb, attrtype, size, daddr); + + /* Repair the nlmsg header length */ + nlmsg_end(skb, nlh); + ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL); + + /* Make the request retry, so when we get the response from userspace + * we will have something. + */ + return -ENODATA; +} + int rdma_addr_size(struct sockaddr *addr) { switch (addr->sa_family) { @@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } +static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, + const void *daddr, u32 seq, u16 family) +{ + if (ibnl_chk_listeners(RDMA_NL_GROUP_LS)) + return -EADDRNOTAVAIL; + + /* We fill in what we can, the response will fill the rest */ + rdma_copy_addr(dev_addr, dst->dev, NULL); + return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); +} + static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr) { @@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, return ret; } +static bool has_gateway(struct dst_entry *dst, sa_family_t family) +{ + struct rtable *rt; + struct rt6_info *rt6; + + if (family == AF_INET) { + rt = container_of(dst, struct rtable, dst); + return rt->rt_uses_gateway; + } + + rt6 = container_of(dst, struct rt6_info, dst); + return rt6->rt6i_flags & RTF_GATEWAY; +} + +static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, + const struct sockaddr *dst_in, u32 seq) +{ + const struct sockaddr_in *dst_in4 = + (const struct sockaddr_in *)dst_in; + const struct sockaddr_in6 *dst_in6 = + (const struct sockaddr_in6 *)dst_in; + const void *daddr = (dst_in->sa_family == AF_INET) ? + (const void *)&dst_in4->sin_addr.s_addr : + (const void *)&dst_in6->sin6_addr; + sa_family_t family = dst_in->sa_family; + + /* Gateway + ARPHRD_INFINIBAND -> IB router */ + if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) + return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); + else + return dst_fetch_ha(dst, dev_addr, daddr); +} + static int addr4_resolve(struct sockaddr_in *src_in, const struct sockaddr_in *dst_in, struct rdma_dev_addr *addr, @@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in, src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = fl4.saddr; - /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't - * routable) and we could set the network type accordingly. + /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're + * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network + * type accordingly. */ - if (rt->rt_uses_gateway) + if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) addr->network = RDMA_NETWORK_IPV4; addr->hoplimit = ip4_dst_hoplimit(&rt->dst); @@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, src_in->sin6_addr = fl6.saddr; } - /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't - * routable) and we could set the network type accordingly. + /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're + * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network + * type accordingly. */ - if (rt->rt6i_flags & RTF_GATEWAY) + if (rt->rt6i_flags & RTF_GATEWAY && + ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) addr->network = RDMA_NETWORK_IPV6; addr->hoplimit = ip6_dst_hoplimit(dst); @@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, static int addr_resolve_neigh(struct dst_entry *dst, const struct sockaddr *dst_in, - struct rdma_dev_addr *addr) + struct rdma_dev_addr *addr, + u32 seq) { if (dst->dev->flags & IFF_LOOPBACK) { int ret; @@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, } /* If the device doesn't do ARP internally */ - if (!(dst->dev->flags & IFF_NOARP)) { - const struct sockaddr_in *dst_in4 = - (const struct sockaddr_in *)dst_in; - const struct sockaddr_in6 *dst_in6 = - (const struct sockaddr_in6 *)dst_in; - - return dst_fetch_ha(dst, addr, - dst_in->sa_family == AF_INET ? - (const void *)&dst_in4->sin_addr.s_addr : - (const void *)&dst_in6->sin6_addr); - } + if (!(dst->dev->flags & IFF_NOARP)) + return fetch_ha(dst, addr, dst_in, seq); return rdma_copy_addr(addr, dst->dev, NULL); } @@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, static int addr_resolve(struct sockaddr *src_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, - bool resolve_neigh) + bool resolve_neigh, + u32 seq) { struct net_device *ndev; struct dst_entry *dst; @@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in, return ret; if (resolve_neigh) - ret = addr_resolve_neigh(&rt->dst, dst_in, addr); + ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); ndev = rt->dst.dev; dev_hold(ndev); @@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in, return ret; if (resolve_neigh) - ret = addr_resolve_neigh(dst, dst_in, addr); + ret = addr_resolve_neigh(dst, dst_in, addr, seq); ndev = dst->dev; dev_hold(ndev); @@ -412,7 +575,7 @@ static void process_req(struct work_struct *work) src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr, - true); + true, req->seq); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) @@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->context = context; req->client = client; atomic_inc(&client->refcount); + req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); - req->status = addr_resolve(src_in, dst_in, addr, true); + req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); switch (req->status) { case 0: req->timeout = jiffies; @@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, src_in->sa_family = dst_addr->sa_family; } - return addr_resolve(src_in, dst_addr, addr, false); + return addr_resolve(src_in, dst_addr, addr, false, 0); } EXPORT_SYMBOL(rdma_resolve_ip_route); @@ -634,7 +798,7 @@ static struct notifier_block nb = { .notifier_call = netevent_callback }; -static int __init addr_init(void) +int addr_init(void) { addr_wq = create_singlethread_workqueue("ib_addr"); if (!addr_wq) @@ -642,15 +806,13 @@ static int __init addr_init(void) register_netevent_notifier(&nb); rdma_addr_register_client(&self); + return 0; } -static void __exit addr_cleanup(void) +void addr_cleanup(void) { rdma_addr_unregister_client(&self); unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } - -module_init(addr_init); -module_exit(addr_cleanup); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index eab32215756b..19d499dcab76 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, return _upper == upper; } +int addr_init(void); +void addr_cleanup(void); + +int ib_mad_init(void); +void ib_mad_cleanup(void); + +int ib_sa_init(void); +void ib_sa_cleanup(void); + +int ib_nl_handle_resolve_resp(struct sk_buff *skb, + struct netlink_callback *cb); +int ib_nl_handle_set_timeout(struct sk_buff *skb, + struct netlink_callback *cb); +int ib_nl_handle_ip_res_resp(struct sk_buff *skb, + struct netlink_callback *cb); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 10979844026a..5516fb070344 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); +static struct ibnl_client_cbs ibnl_ls_cb_table[] = { + [RDMA_NL_LS_OP_RESOLVE] = { + .dump = ib_nl_handle_resolve_resp, + .module = THIS_MODULE }, + [RDMA_NL_LS_OP_SET_TIMEOUT] = { + .dump = ib_nl_handle_set_timeout, + .module = THIS_MODULE }, + [RDMA_NL_LS_OP_IP_RESOLVE] = { + .dump = ib_nl_handle_ip_res_resp, + .module = THIS_MODULE }, +}; + +static int ib_add_ibnl_clients(void) +{ + return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table), + ibnl_ls_cb_table); +} + +static void ib_remove_ibnl_clients(void) +{ + ibnl_remove_client(RDMA_NL_LS); +} + static int __init ib_core_init(void) { int ret; @@ -983,10 +1006,41 @@ static int __init ib_core_init(void) goto err_sysfs; } + ret = addr_init(); + if (ret) { + pr_warn("Could't init IB address resolution\n"); + goto err_ibnl; + } + + ret = ib_mad_init(); + if (ret) { + pr_warn("Couldn't init IB MAD\n"); + goto err_addr; + } + + ret = ib_sa_init(); + if (ret) { + pr_warn("Couldn't init SA\n"); + goto err_mad; + } + + if (ib_add_ibnl_clients()) { + pr_warn("Couldn't register ibnl clients\n"); + goto err_sa; + } + ib_cache_setup(); return 0; +err_sa: + ib_sa_cleanup(); +err_mad: + ib_mad_cleanup(); +err_addr: + addr_cleanup(); +err_ibnl: + ibnl_cleanup(); err_sysfs: class_unregister(&ib_class); err_comp: @@ -999,6 +1053,10 @@ err: static void __exit ib_core_cleanup(void) { ib_cache_cleanup(); + ib_remove_ibnl_clients(); + ib_sa_cleanup(); + ib_mad_cleanup(); + addr_cleanup(); ibnl_cleanup(); class_unregister(&ib_class); destroy_workqueue(ib_comp_wq); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9fa5bf33f5a3..82fb511112da 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -47,11 +47,7 @@ #include "smi.h" #include "opa_smi.h" #include "agent.h" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("kernel IB MAD API"); -MODULE_AUTHOR("Hal Rosenstock"); -MODULE_AUTHOR("Sean Hefty"); +#include "core_priv.h" static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; @@ -3316,7 +3312,7 @@ static struct ib_client mad_client = { .remove = ib_mad_remove_device }; -static int __init ib_mad_init_module(void) +int ib_mad_init(void) { mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); @@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void) return 0; } -static void __exit ib_mad_cleanup_module(void) +void ib_mad_cleanup(void) { ib_unregister_client(&mad_client); } - -module_init(ib_mad_init_module); -module_exit(ib_mad_cleanup_module); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 250937cb9a1a..a83ec28a147b 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -93,6 +93,18 @@ enum { struct mcast_member; +/* +* There are 4 types of join states: +* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. +*/ +enum { + FULLMEMBER_JOIN, + NONMEMBER_JOIN, + SENDONLY_NONMEBER_JOIN, + SENDONLY_FULLMEMBER_JOIN, + NUM_JOIN_MEMBERSHIP_TYPES, +}; + struct mcast_group { struct ib_sa_mcmember_rec rec; struct rb_node node; @@ -102,7 +114,7 @@ struct mcast_group { struct list_head pending_list; struct list_head active_list; struct mcast_member *last_join; - int members[3]; + int members[NUM_JOIN_MEMBERSHIP_TYPES]; atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; @@ -220,8 +232,9 @@ static void queue_join(struct mcast_member *member) } /* - * A multicast group has three types of members: full member, non member, and - * send only member. We need to keep track of the number of members of each + * A multicast group has four types of members: full member, non member, + * sendonly non member and sendonly full member. + * We need to keep track of the number of members of each * type based on their join state. Adjust the number of members the belong to * the specified join states. */ @@ -229,7 +242,7 @@ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) { int i; - for (i = 0; i < 3; i++, join_state >>= 1) + for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1) if (join_state & 0x1) group->members[i] += inc; } @@ -245,7 +258,7 @@ static u8 get_leave_state(struct mcast_group *group) u8 leave_state = 0; int i; - for (i = 0; i < 3; i++) + for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++) if (!group->members[i]) leave_state |= (0x1 << i); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 3ebd108bcc5f..e95538650dc6 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -53,10 +53,6 @@ #include "sa.h" #include "core_priv.h" -MODULE_AUTHOR("Roland Dreier"); -MODULE_DESCRIPTION("InfiniBand subnet administration query support"); -MODULE_LICENSE("Dual BSD/GPL"); - #define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 @@ -119,6 +115,12 @@ struct ib_sa_guidinfo_query { struct ib_sa_query sa_query; }; +struct ib_sa_classport_info_query { + void (*callback)(int, struct ib_class_port_info *, void *); + void *context; + struct ib_sa_query sa_query; +}; + struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; @@ -392,6 +394,82 @@ static const struct ib_field service_rec_table[] = { .size_bits = 2*64 }, }; +#define CLASSPORTINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ + .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \ + .field_name = "ib_class_port_info:" #field + +static const struct ib_field classport_info_rec_table[] = { + { CLASSPORTINFO_REC_FIELD(base_version), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 8 }, + { CLASSPORTINFO_REC_FIELD(class_version), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 8 }, + { CLASSPORTINFO_REC_FIELD(capability_mask), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { CLASSPORTINFO_REC_FIELD(redirect_tcslfl), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_lid), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(redirect_pkey), + .offset_words = 7, + .offset_bits = 16, + .size_bits = 16 }, + + { CLASSPORTINFO_REC_FIELD(redirect_qp), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_qkey), + .offset_words = 9, + .offset_bits = 0, + .size_bits = 32 }, + + { CLASSPORTINFO_REC_FIELD(trap_gid), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 128 }, + { CLASSPORTINFO_REC_FIELD(trap_tcslfl), + .offset_words = 14, + .offset_bits = 0, + .size_bits = 32 }, + + { CLASSPORTINFO_REC_FIELD(trap_lid), + .offset_words = 15, + .offset_bits = 0, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(trap_pkey), + .offset_words = 15, + .offset_bits = 16, + .size_bits = 16 }, + + { CLASSPORTINFO_REC_FIELD(trap_hlqp), + .offset_words = 16, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(trap_qkey), + .offset_words = 17, + .offset_bits = 0, + .size_bits = 32 }, +}; + #define GUIDINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ @@ -705,8 +783,8 @@ static void ib_nl_request_timeout(struct work_struct *work) spin_unlock_irqrestore(&ib_nl_request_lock, flags); } -static int ib_nl_handle_set_timeout(struct sk_buff *skb, - struct netlink_callback *cb) +int ib_nl_handle_set_timeout(struct sk_buff *skb, + struct netlink_callback *cb) { const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; int timeout, delta, abs_delta; @@ -782,8 +860,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) return 1; } -static int ib_nl_handle_resolve_resp(struct sk_buff *skb, - struct netlink_callback *cb) +int ib_nl_handle_resolve_resp(struct sk_buff *skb, + struct netlink_callback *cb) { const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; unsigned long flags; @@ -838,15 +916,6 @@ resp_out: return skb->len; } -static struct ibnl_client_cbs ib_sa_cb_table[] = { - [RDMA_NL_LS_OP_RESOLVE] = { - .dump = ib_nl_handle_resolve_resp, - .module = THIS_MODULE }, - [RDMA_NL_LS_OP_SET_TIMEOUT] = { - .dump = ib_nl_handle_set_timeout, - .module = THIS_MODULE }, -}; - static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -1645,6 +1714,97 @@ err1: } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); +/* Support get SA ClassPortInfo */ +static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_classport_info_query *query = + container_of(sa_query, struct ib_sa_classport_info_query, sa_query); + + if (mad) { + struct ib_class_port_info rec; + + ib_unpack(classport_info_rec_table, + ARRAY_SIZE(classport_info_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else { + query->callback(status, NULL, query->context); + } +} + +static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_classport_info_query, + sa_query)); +} + +int ib_sa_classport_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_class_port_info *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_classport_info_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kzalloc(sizeof(*query), gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL; + + query->sa_query.release = ib_sa_portclass_info_rec_release; + /* support GET only */ + mad->mad_hdr.method = IB_MGMT_METHOD_GET; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO); + mad->sa_hdr.comp_mask = 0; + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_classport_info_rec_query); + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { @@ -1794,7 +1954,7 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data) kfree(sa_dev); } -static int __init ib_sa_init(void) +int ib_sa_init(void) { int ret; @@ -1820,17 +1980,10 @@ static int __init ib_sa_init(void) goto err3; } - if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table), - ib_sa_cb_table)) { - pr_err("Failed to add netlink callback\n"); - ret = -EINVAL; - goto err4; - } INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout); return 0; -err4: - destroy_workqueue(ib_nl_wq); + err3: mcast_cleanup(); err2: @@ -1839,9 +1992,8 @@ err1: return ret; } -static void __exit ib_sa_cleanup(void) +void ib_sa_cleanup(void) { - ibnl_remove_client(RDMA_NL_LS); cancel_delayed_work(&ib_nl_timed_work); flush_workqueue(ib_nl_wq); destroy_workqueue(ib_nl_wq); @@ -1849,6 +2001,3 @@ static void __exit ib_sa_cleanup(void) ib_unregister_client(&sa_client); idr_destroy(&query_idr); } - -module_init(ib_sa_init); -module_exit(ib_sa_cleanup); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 14606afbfaa8..5e573bb18660 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -56,8 +56,10 @@ struct ib_port { struct gid_attr_group *gid_attr_group; struct attribute_group gid_group; struct attribute_group pkey_group; - u8 port_num; struct attribute_group *pma_table; + struct attribute_group *hw_stats_ag; + struct rdma_hw_stats *hw_stats; + u8 port_num; }; struct port_attribute { @@ -80,6 +82,18 @@ struct port_table_attribute { __be16 attr_id; }; +struct hw_stats_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); + ssize_t (*store)(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count); + int index; + u8 port_num; +}; + static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -733,6 +747,212 @@ static struct attribute_group *get_counter_table(struct ib_device *dev, return &pma_group; } +static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + int ret; + + if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan)) + return 0; + ret = dev->get_hw_stats(dev, stats, port_num, index); + if (ret < 0) + return ret; + if (ret == stats->num_counters) + stats->timestamp = jiffies; + + return 0; +} + +static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf) +{ + return sprintf(buf, "%llu\n", stats->value[index]); +} + +static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct ib_device *dev; + struct ib_port *port; + struct hw_stats_attribute *hsa; + struct rdma_hw_stats *stats; + int ret; + + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + dev = container_of((struct device *)kobj, + struct ib_device, dev); + stats = dev->hw_stats; + } else { + port = container_of(kobj, struct ib_port, kobj); + dev = port->ibdev; + stats = port->hw_stats; + } + ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index); + if (ret) + return ret; + return print_hw_stat(stats, hsa->index, buf); +} + +static ssize_t show_stats_lifespan(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct hw_stats_attribute *hsa; + int msecs; + + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + struct ib_device *dev = container_of((struct device *)kobj, + struct ib_device, dev); + msecs = jiffies_to_msecs(dev->hw_stats->lifespan); + } else { + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + msecs = jiffies_to_msecs(p->hw_stats->lifespan); + } + return sprintf(buf, "%d\n", msecs); +} + +static ssize_t set_stats_lifespan(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct hw_stats_attribute *hsa; + int msecs; + int jiffies; + int ret; + + ret = kstrtoint(buf, 10, &msecs); + if (ret) + return ret; + if (msecs < 0 || msecs > 10000) + return -EINVAL; + jiffies = msecs_to_jiffies(msecs); + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + struct ib_device *dev = container_of((struct device *)kobj, + struct ib_device, dev); + dev->hw_stats->lifespan = jiffies; + } else { + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + p->hw_stats->lifespan = jiffies; + } + return count; +} + +static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group) +{ + struct attribute **attr; + + sysfs_remove_group(kobj, attr_group); + + for (attr = attr_group->attrs; *attr; attr++) + kfree(*attr); + kfree(attr_group); +} + +static struct attribute *alloc_hsa(int index, u8 port_num, const char *name) +{ + struct hw_stats_attribute *hsa; + + hsa = kmalloc(sizeof(*hsa), GFP_KERNEL); + if (!hsa) + return NULL; + + hsa->attr.name = (char *)name; + hsa->attr.mode = S_IRUGO; + hsa->show = show_hw_stats; + hsa->store = NULL; + hsa->index = index; + hsa->port_num = port_num; + + return &hsa->attr; +} + +static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num) +{ + struct hw_stats_attribute *hsa; + + hsa = kmalloc(sizeof(*hsa), GFP_KERNEL); + if (!hsa) + return NULL; + + hsa->attr.name = name; + hsa->attr.mode = S_IWUSR | S_IRUGO; + hsa->show = show_stats_lifespan; + hsa->store = set_stats_lifespan; + hsa->index = 0; + hsa->port_num = port_num; + + return &hsa->attr; +} + +static void setup_hw_stats(struct ib_device *device, struct ib_port *port, + u8 port_num) +{ + struct attribute_group *hsag = NULL; + struct rdma_hw_stats *stats; + int i = 0, ret; + + stats = device->alloc_hw_stats(device, port_num); + + if (!stats) + return; + + if (!stats->names || stats->num_counters <= 0) + goto err; + + hsag = kzalloc(sizeof(*hsag) + + // 1 extra for the lifespan config entry + sizeof(void *) * (stats->num_counters + 1), + GFP_KERNEL); + if (!hsag) + return; + + ret = device->get_hw_stats(device, stats, port_num, + stats->num_counters); + if (ret != stats->num_counters) + goto err; + + stats->timestamp = jiffies; + + hsag->name = "hw_counters"; + hsag->attrs = (void *)hsag + sizeof(*hsag); + + for (i = 0; i < stats->num_counters; i++) { + hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]); + if (!hsag->attrs[i]) + goto err; + } + + /* treat an error here as non-fatal */ + hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num); + + if (port) { + struct kobject *kobj = &port->kobj; + ret = sysfs_create_group(kobj, hsag); + if (ret) + goto err; + port->hw_stats_ag = hsag; + port->hw_stats = stats; + } else { + struct kobject *kobj = &device->dev.kobj; + ret = sysfs_create_group(kobj, hsag); + if (ret) + goto err; + device->hw_stats_ag = hsag; + device->hw_stats = stats; + } + + return; + +err: + kfree(stats); + for (; i >= 0; i--) + kfree(hsag->attrs[i]); + kfree(hsag); + return; +} + static int add_port(struct ib_device *device, int port_num, int (*port_callback)(struct ib_device *, u8, struct kobject *)) @@ -835,6 +1055,14 @@ static int add_port(struct ib_device *device, int port_num, goto err_remove_pkey; } + /* + * If port == 0, it means we have only one port and the parent + * device, not this port device, should be the holder of the + * hw_counters + */ + if (device->alloc_hw_stats && port_num) + setup_hw_stats(device, p, port_num); + list_add_tail(&p->kobj.entry, &device->port_list); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -972,120 +1200,6 @@ static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_desc }; -/* Show a given an attribute in the statistics group */ -static ssize_t show_protocol_stat(const struct device *device, - struct device_attribute *attr, char *buf, - unsigned offset) -{ - struct ib_device *dev = container_of(device, struct ib_device, dev); - union rdma_protocol_stats stats; - ssize_t ret; - - ret = dev->get_protocol_stats(dev, &stats); - if (ret) - return ret; - - return sprintf(buf, "%llu\n", - (unsigned long long) ((u64 *) &stats)[offset]); -} - -/* generate a read-only iwarp statistics attribute */ -#define IW_STATS_ENTRY(name) \ -static ssize_t show_##name(struct device *device, \ - struct device_attribute *attr, char *buf) \ -{ \ - return show_protocol_stat(device, attr, buf, \ - offsetof(struct iw_protocol_stats, name) / \ - sizeof (u64)); \ -} \ -static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) - -IW_STATS_ENTRY(ipInReceives); -IW_STATS_ENTRY(ipInHdrErrors); -IW_STATS_ENTRY(ipInTooBigErrors); -IW_STATS_ENTRY(ipInNoRoutes); -IW_STATS_ENTRY(ipInAddrErrors); -IW_STATS_ENTRY(ipInUnknownProtos); -IW_STATS_ENTRY(ipInTruncatedPkts); -IW_STATS_ENTRY(ipInDiscards); -IW_STATS_ENTRY(ipInDelivers); -IW_STATS_ENTRY(ipOutForwDatagrams); -IW_STATS_ENTRY(ipOutRequests); -IW_STATS_ENTRY(ipOutDiscards); -IW_STATS_ENTRY(ipOutNoRoutes); -IW_STATS_ENTRY(ipReasmTimeout); -IW_STATS_ENTRY(ipReasmReqds); -IW_STATS_ENTRY(ipReasmOKs); -IW_STATS_ENTRY(ipReasmFails); -IW_STATS_ENTRY(ipFragOKs); -IW_STATS_ENTRY(ipFragFails); -IW_STATS_ENTRY(ipFragCreates); -IW_STATS_ENTRY(ipInMcastPkts); -IW_STATS_ENTRY(ipOutMcastPkts); -IW_STATS_ENTRY(ipInBcastPkts); -IW_STATS_ENTRY(ipOutBcastPkts); -IW_STATS_ENTRY(tcpRtoAlgorithm); -IW_STATS_ENTRY(tcpRtoMin); -IW_STATS_ENTRY(tcpRtoMax); -IW_STATS_ENTRY(tcpMaxConn); -IW_STATS_ENTRY(tcpActiveOpens); -IW_STATS_ENTRY(tcpPassiveOpens); -IW_STATS_ENTRY(tcpAttemptFails); -IW_STATS_ENTRY(tcpEstabResets); -IW_STATS_ENTRY(tcpCurrEstab); -IW_STATS_ENTRY(tcpInSegs); -IW_STATS_ENTRY(tcpOutSegs); -IW_STATS_ENTRY(tcpRetransSegs); -IW_STATS_ENTRY(tcpInErrs); -IW_STATS_ENTRY(tcpOutRsts); - -static struct attribute *iw_proto_stats_attrs[] = { - &dev_attr_ipInReceives.attr, - &dev_attr_ipInHdrErrors.attr, - &dev_attr_ipInTooBigErrors.attr, - &dev_attr_ipInNoRoutes.attr, - &dev_attr_ipInAddrErrors.attr, - &dev_attr_ipInUnknownProtos.attr, - &dev_attr_ipInTruncatedPkts.attr, - &dev_attr_ipInDiscards.attr, - &dev_attr_ipInDelivers.attr, - &dev_attr_ipOutForwDatagrams.attr, - &dev_attr_ipOutRequests.attr, - &dev_attr_ipOutDiscards.attr, - &dev_attr_ipOutNoRoutes.attr, - &dev_attr_ipReasmTimeout.attr, - &dev_attr_ipReasmReqds.attr, - &dev_attr_ipReasmOKs.attr, - &dev_attr_ipReasmFails.attr, - &dev_attr_ipFragOKs.attr, - &dev_attr_ipFragFails.attr, - &dev_attr_ipFragCreates.attr, - &dev_attr_ipInMcastPkts.attr, - &dev_attr_ipOutMcastPkts.attr, - &dev_attr_ipInBcastPkts.attr, - &dev_attr_ipOutBcastPkts.attr, - &dev_attr_tcpRtoAlgorithm.attr, - &dev_attr_tcpRtoMin.attr, - &dev_attr_tcpRtoMax.attr, - &dev_attr_tcpMaxConn.attr, - &dev_attr_tcpActiveOpens.attr, - &dev_attr_tcpPassiveOpens.attr, - &dev_attr_tcpAttemptFails.attr, - &dev_attr_tcpEstabResets.attr, - &dev_attr_tcpCurrEstab.attr, - &dev_attr_tcpInSegs.attr, - &dev_attr_tcpOutSegs.attr, - &dev_attr_tcpRetransSegs.attr, - &dev_attr_tcpInErrs.attr, - &dev_attr_tcpOutRsts.attr, - NULL -}; - -static struct attribute_group iw_stats_group = { - .name = "proto_stats", - .attrs = iw_proto_stats_attrs, -}; - static void free_port_list_attributes(struct ib_device *device) { struct kobject *p, *t; @@ -1093,6 +1207,10 @@ static void free_port_list_attributes(struct ib_device *device) list_for_each_entry_safe(p, t, &device->port_list, entry) { struct ib_port *port = container_of(p, struct ib_port, kobj); list_del(&p->entry); + if (port->hw_stats) { + kfree(port->hw_stats); + free_hsag(&port->kobj, port->hw_stats_ag); + } sysfs_remove_group(p, port->pma_table); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); @@ -1149,11 +1267,8 @@ int ib_device_register_sysfs(struct ib_device *device, } } - if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) { - ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group); - if (ret) - goto err_put; - } + if (device->alloc_hw_stats) + setup_hw_stats(device, NULL, 0); return 0; @@ -1169,15 +1284,18 @@ err: void ib_device_unregister_sysfs(struct ib_device *device) { - /* Hold kobject until ib_dealloc_device() */ - struct kobject *kobj_dev = kobject_get(&device->dev.kobj); int i; - if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) - sysfs_remove_group(kobj_dev, &iw_stats_group); + /* Hold kobject until ib_dealloc_device() */ + kobject_get(&device->dev.kobj); free_port_list_attributes(device); + if (device->hw_stats) { + kfree(device->hw_stats); + free_hsag(&device->dev.kobj, device->hw_stats_ag); + } + for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) device_remove_file(&device->dev, ib_class_attributes[i]); diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index c7ad0a4c8b15..c0c7cf8af3f4 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ +obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index de1c61b417d6..ada2e5009c86 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -327,7 +327,7 @@ int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) kfree(cq->sw_queue); dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << (cq->size_log2)) - * sizeof(struct t3_cqe), cq->queue, + * sizeof(struct t3_cqe) + 1, cq->queue, dma_unmap_addr(cq, mapping)); cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); return err; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 47cb927a0dd6..bb1a839d4d6d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1218,59 +1218,119 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, iwch_dev->rdev.rnic_info.pdev->device); } -static int iwch_get_mib(struct ib_device *ibdev, - union rdma_protocol_stats *stats) +enum counters { + IPINRECEIVES, + IPINHDRERRORS, + IPINADDRERRORS, + IPINUNKNOWNPROTOS, + IPINDISCARDS, + IPINDELIVERS, + IPOUTREQUESTS, + IPOUTDISCARDS, + IPOUTNOROUTES, + IPREASMTIMEOUT, + IPREASMREQDS, + IPREASMOKS, + IPREASMFAILS, + TCPACTIVEOPENS, + TCPPASSIVEOPENS, + TCPATTEMPTFAILS, + TCPESTABRESETS, + TCPCURRESTAB, + TCPINSEGS, + TCPOUTSEGS, + TCPRETRANSSEGS, + TCPINERRS, + TCPOUTRSTS, + TCPRTOMIN, + TCPRTOMAX, + NR_COUNTERS +}; + +static const char * const names[] = { + [IPINRECEIVES] = "ipInReceives", + [IPINHDRERRORS] = "ipInHdrErrors", + [IPINADDRERRORS] = "ipInAddrErrors", + [IPINUNKNOWNPROTOS] = "ipInUnknownProtos", + [IPINDISCARDS] = "ipInDiscards", + [IPINDELIVERS] = "ipInDelivers", + [IPOUTREQUESTS] = "ipOutRequests", + [IPOUTDISCARDS] = "ipOutDiscards", + [IPOUTNOROUTES] = "ipOutNoRoutes", + [IPREASMTIMEOUT] = "ipReasmTimeout", + [IPREASMREQDS] = "ipReasmReqds", + [IPREASMOKS] = "ipReasmOKs", + [IPREASMFAILS] = "ipReasmFails", + [TCPACTIVEOPENS] = "tcpActiveOpens", + [TCPPASSIVEOPENS] = "tcpPassiveOpens", + [TCPATTEMPTFAILS] = "tcpAttemptFails", + [TCPESTABRESETS] = "tcpEstabResets", + [TCPCURRESTAB] = "tcpCurrEstab", + [TCPINSEGS] = "tcpInSegs", + [TCPOUTSEGS] = "tcpOutSegs", + [TCPRETRANSSEGS] = "tcpRetransSegs", + [TCPINERRS] = "tcpInErrs", + [TCPOUTRSTS] = "tcpOutRsts", + [TCPRTOMIN] = "tcpRtoMin", + [TCPRTOMAX] = "tcpRtoMax", +}; + +static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); + + /* Our driver only supports device level stats */ + if (port_num != 0) + return NULL; + + return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u8 port, int index) { struct iwch_dev *dev; struct tp_mib_stats m; int ret; + if (port != 0 || !stats) + return -ENOSYS; + PDBG("%s ibdev %p\n", __func__, ibdev); dev = to_iwch_dev(ibdev); ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); if (ret) return -ENOSYS; - memset(stats, 0, sizeof *stats); - stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) + - m.ipInReceive_lo; - stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) + - m.ipInHdrErrors_lo; - stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) + - m.ipInAddrErrors_lo; - stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) + - m.ipInUnknownProtos_lo; - stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) + - m.ipInDiscards_lo; - stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) + - m.ipInDelivers_lo; - stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) + - m.ipOutRequests_lo; - stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) + - m.ipOutDiscards_lo; - stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) + - m.ipOutNoRoutes_lo; - stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout; - stats->iw.ipReasmReqds = (u64) m.ipReasmReqds; - stats->iw.ipReasmOKs = (u64) m.ipReasmOKs; - stats->iw.ipReasmFails = (u64) m.ipReasmFails; - stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens; - stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens; - stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails; - stats->iw.tcpEstabResets = (u64) m.tcpEstabResets; - stats->iw.tcpOutRsts = (u64) m.tcpOutRsts; - stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab; - stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) + - m.tcpInSegs_lo; - stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) + - m.tcpOutSegs_lo; - stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) + - m.tcpRetransSeg_lo; - stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) + - m.tcpInErrs_lo; - stats->iw.tcpRtoMin = (u64) m.tcpRtoMin; - stats->iw.tcpRtoMax = (u64) m.tcpRtoMax; - return 0; + stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo; + stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo; + stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo; + stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo; + stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo; + stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo; + stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo; + stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo; + stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo; + stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout; + stats->value[IPREASMREQDS] = m.ipReasmReqds; + stats->value[IPREASMOKS] = m.ipReasmOKs; + stats->value[IPREASMFAILS] = m.ipReasmFails; + stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens; + stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens; + stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails; + stats->value[TCPESTABRESETS] = m.tcpEstabResets; + stats->value[TCPCURRESTAB] = m.tcpOutRsts; + stats->value[TCPINSEGS] = m.tcpCurrEstab; + stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo; + stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo; + stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo, + stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo; + stats->value[TCPRTOMIN] = m.tcpRtoMin; + stats->value[TCPRTOMAX] = m.tcpRtoMax; + + return stats->num_counters; } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); @@ -1373,7 +1433,8 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; - dev->ibdev.get_protocol_stats = iwch_get_mib; + dev->ibdev.alloc_hw_stats = iwch_alloc_stats; + dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = iwch_port_immutable; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 7574f394fdac..dd8a86b726d2 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -446,20 +446,59 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, c4iw_dev->rdev.lldi.pdev->device); } +enum counters { + IP4INSEGS, + IP4OUTSEGS, + IP4RETRANSSEGS, + IP4OUTRSTS, + IP6INSEGS, + IP6OUTSEGS, + IP6RETRANSSEGS, + IP6OUTRSTS, + NR_COUNTERS +}; + +static const char * const names[] = { + [IP4INSEGS] = "ip4InSegs", + [IP4OUTSEGS] = "ip4OutSegs", + [IP4RETRANSSEGS] = "ip4RetransSegs", + [IP4OUTRSTS] = "ip4OutRsts", + [IP6INSEGS] = "ip6InSegs", + [IP6OUTSEGS] = "ip6OutSegs", + [IP6RETRANSSEGS] = "ip6RetransSegs", + [IP6OUTRSTS] = "ip6OutRsts" +}; + +static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); + + if (port_num != 0) + return NULL; + + return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + static int c4iw_get_mib(struct ib_device *ibdev, - union rdma_protocol_stats *stats) + struct rdma_hw_stats *stats, + u8 port, int index) { struct tp_tcp_stats v4, v6; struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); - memset(stats, 0, sizeof *stats); - stats->iw.tcpInSegs = v4.tcp_in_segs + v6.tcp_in_segs; - stats->iw.tcpOutSegs = v4.tcp_out_segs + v6.tcp_out_segs; - stats->iw.tcpRetransSegs = v4.tcp_retrans_segs + v6.tcp_retrans_segs; - stats->iw.tcpOutRsts = v4.tcp_out_rsts + v6.tcp_out_rsts; - - return 0; + stats->value[IP4INSEGS] = v4.tcp_in_segs; + stats->value[IP4OUTSEGS] = v4.tcp_out_segs; + stats->value[IP4RETRANSSEGS] = v4.tcp_retrans_segs; + stats->value[IP4OUTRSTS] = v4.tcp_out_rsts; + stats->value[IP6INSEGS] = v6.tcp_in_segs; + stats->value[IP6OUTSEGS] = v6.tcp_out_segs; + stats->value[IP6RETRANSSEGS] = v6.tcp_retrans_segs; + stats->value[IP6OUTRSTS] = v6.tcp_out_rsts; + + return stats->num_counters; } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); @@ -562,7 +601,8 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.req_notify_cq = c4iw_arm_cq; dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; - dev->ibdev.get_protocol_stats = c4iw_get_mib; + dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; + dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; dev->ibdev.drain_sq = c4iw_drain_sq; diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index a925fb0db706..a925fb0db706 100644 --- a/drivers/staging/rdma/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 8dc59382ee96..9b5382c94b0c 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -7,7 +7,7 @@ # obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o -hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \ +hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ eprom.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 6e7050ab9e16..6e7050ab9e16 100644 --- a/drivers/staging/rdma/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 20f52fe74091..20f52fe74091 100644 --- a/drivers/staging/rdma/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 0d58fe3b49b5..0d58fe3b49b5 100644 --- a/drivers/staging/rdma/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index dcae8e723f98..3b876da745a1 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1037,6 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *); static void dc_start(struct hfi1_devdata *); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); +static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6105,7 +6106,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) } /* this access is valid only when the link is up */ - if ((ppd->host_link_state & HLS_UP) == 0) { + if (ppd->host_link_state & HLS_DOWN) { dd_dev_info(dd, "%s: link state %s not up\n", __func__, link_state_name(ppd->host_link_state)); ret = -EBUSY; @@ -6961,6 +6962,8 @@ void handle_link_down(struct work_struct *work) } reset_neighbor_info(ppd); + if (ppd->mgmt_allowed) + remove_full_mgmt_pkey(ppd); /* disable the port */ clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); @@ -7069,6 +7072,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); } +static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd) +{ + ppd->pkeys[2] = 0; + (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); +} + /* * Convert the given link width to the OPA link width bitmask. */ @@ -7429,7 +7438,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths) retry: mutex_lock(&ppd->hls_lock); /* only apply if the link is up */ - if (!(ppd->host_link_state & HLS_UP)) { + if (ppd->host_link_state & HLS_DOWN) { /* still going up..wait and retry */ if (ppd->host_link_state & HLS_GOING_UP) { if (++tries < 1000) { @@ -9212,9 +9221,6 @@ void reset_qsfp(struct hfi1_pportdata *ppd) /* Reset the QSFP */ mask = (u64)QSFP_HFI0_RESET_N; - qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE); - qsfp_mask |= mask; - write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask); qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); @@ -9252,6 +9258,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, dd_dev_info(dd, "%s: QSFP cable temperature too low\n", __func__); + /* + * The remaining alarms/warnings don't matter if the link is down. + */ + if (ppd->host_link_state & HLS_DOWN) + return 0; + if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) dd_dev_info(dd, "%s: QSFP supply voltage too high\n", @@ -9346,9 +9358,8 @@ void qsfp_event(struct work_struct *work) return; /* - * Turn DC back on after cables has been - * re-inserted. Up until now, the DC has been in - * reset to save power. + * Turn DC back on after cable has been re-inserted. Up until + * now, the DC has been in reset to save power. */ dc_start(dd); @@ -9480,7 +9491,15 @@ int bringup_serdes(struct hfi1_pportdata *ppd) return ret; } - /* tune the SERDES to a ballpark setting for + get_port_type(ppd); + if (ppd->port_type == PORT_TYPE_QSFP) { + set_qsfp_int_n(ppd, 0); + wait_for_qsfp_init(ppd); + set_qsfp_int_n(ppd, 1); + } + + /* + * Tune the SerDes to a ballpark setting for * optimal signal and bit error rate * Needs to be done before starting the link */ @@ -10074,7 +10093,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd) */ u32 driver_logical_state(struct hfi1_pportdata *ppd) { - if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP)) + if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN)) return IB_PORT_DOWN; switch (ppd->host_link_state & HLS_UP) { @@ -14578,7 +14597,7 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, (reason), (ret)) /* - * Initialize the Avago Thermal sensor. + * Initialize the thermal sensor. * * After initialization, enable polling of thermal sensor through * SBus interface. In order for this to work, the SBus Master diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 1948706fff1a..66a327978739 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -398,6 +398,12 @@ /* Lane ID for general configuration registers */ #define GENERAL_CONFIG 4 +/* LINK_TUNING_PARAMETERS fields */ +#define TUNING_METHOD_SHIFT 24 + +/* LINK_OPTIMIZATION_SETTINGS fields */ +#define ENABLE_EXT_DEV_CONFIG_SHIFT 24 + /* LOAD_DATA 8051 command shifts and fields */ #define LOAD_DATA_FIELD_ID_SHIFT 40 #define LOAD_DATA_FIELD_ID_MASK 0xfull diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 8744de6667c2..8744de6667c2 100644 --- a/drivers/staging/rdma/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index e9b6bb322025..fcc9c217a97a 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -178,7 +178,8 @@ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_NO_INTEGRITY) -#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << 16) | HFI1_USER_SWMINOR) +#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \ + HFI1_USER_SWMINOR) #ifndef HFI1_KERN_TYPE #define HFI1_KERN_TYPE 0 @@ -349,6 +350,8 @@ struct hfi1_message_header { #define HFI1_BECN_MASK 1 #define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) +#define HFI1_PSM_IOC_BASE_SEQ 0x0 + static inline __u64 rhf_to_cpu(const __le32 *rbuf) { return __le64_to_cpu(*((__le64 *)rbuf)); diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dbab9d9cc288..dbab9d9cc288 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index b6fb6814f1b8..b6fb6814f1b8 100644 --- a/drivers/staging/rdma/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index c05c39da83b1..bf64b5a7bfd7 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -60,7 +60,8 @@ static dev_t hfi1_dev; int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, struct cdev *cdev, struct device **devp, - bool user_accessible) + bool user_accessible, + struct kobject *parent) { const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor); struct device *device = NULL; @@ -68,6 +69,7 @@ int hfi1_cdev_init(int minor, const char *name, cdev_init(cdev, fops); cdev->owner = THIS_MODULE; + cdev->kobj.parent = parent; kobject_set_name(&cdev->kobj, name); ret = cdev_add(cdev, dev, 1); @@ -82,13 +84,13 @@ int hfi1_cdev_init(int minor, const char *name, else device = device_create(class, NULL, dev, NULL, "%s", name); - if (!IS_ERR(device)) - goto done; - ret = PTR_ERR(device); - device = NULL; - pr_err("Could not create device for minor %d, %s (err %d)\n", - minor, name, -ret); - cdev_del(cdev); + if (IS_ERR(device)) { + ret = PTR_ERR(device); + device = NULL; + pr_err("Could not create device for minor %d, %s (err %d)\n", + minor, name, -ret); + cdev_del(cdev); + } done: *devp = device; return ret; diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h index 5bb3e83cf2da..c3ec19cb0ac9 100644 --- a/drivers/staging/rdma/hfi1/device.h +++ b/drivers/infiniband/hw/hfi1/device.h @@ -50,7 +50,8 @@ int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, struct cdev *cdev, struct device **devp, - bool user_accessible); + bool user_accessible, + struct kobject *parent); void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp); const char *class_name(void); int __init dev_init(void); diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c index 7e8dab892848..7e8dab892848 100644 --- a/drivers/staging/rdma/hfi1/dma.c +++ b/drivers/infiniband/hw/hfi1/dma.c diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 700c6fa3a633..c75b0ae688f8 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1161,7 +1161,7 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc) ppd->lmc = lmc; hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0); - dd_dev_info(dd, "IB%u:%u got a lid: 0x%x\n", dd->unit, ppd->port, lid); + dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid); return 0; } diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c index 106349fc1fb9..106349fc1fb9 100644 --- a/drivers/staging/rdma/hfi1/efivar.c +++ b/drivers/infiniband/hw/hfi1/efivar.c diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h index 94e9e70de568..94e9e70de568 100644 --- a/drivers/staging/rdma/hfi1/efivar.h +++ b/drivers/infiniband/hw/hfi1/efivar.h diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c new file mode 100644 index 000000000000..36b77943cbfd --- /dev/null +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -0,0 +1,102 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/delay.h> +#include "hfi.h" +#include "common.h" +#include "eprom.h" + +#define CMD_SHIFT 24 +#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) + +/* controller interface speeds */ +#define EP_SPEED_FULL 0x2 /* full speed */ + +/* + * How long to wait for the EPROM to become available, in ms. + * The spec 32 Mb EPROM takes around 40s to erase then write. + * Double it for safety. + */ +#define EPROM_TIMEOUT 80000 /* ms */ +/* + * Initialize the EPROM handler. + */ +int eprom_init(struct hfi1_devdata *dd) +{ + int ret = 0; + + /* only the discrete chip has an EPROM */ + if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0) + return 0; + + /* + * It is OK if both HFIs reset the EPROM as long as they don't + * do it at the same time. + */ + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); + if (ret) { + dd_dev_err(dd, + "%s: unable to acquire EPROM resource, no EPROM support\n", + __func__); + goto done_asic; + } + + /* reset EPROM to be sure it is in a good state */ + + /* set reset */ + write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK); + /* clear reset, set speed */ + write_csr(dd, ASIC_EEP_CTL_STAT, + EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT); + + /* wake the device with command "release powerdown NoID" */ + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); + + dd->eprom_available = true; + release_chip_resource(dd, CR_EPROM); +done_asic: + return ret; +} diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h index d41f0b1afb15..d41f0b1afb15 100644 --- a/drivers/staging/rdma/hfi1/eprom.h +++ b/drivers/infiniband/hw/hfi1/eprom.h diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c1c5bf82addb..7a5b0e676cc7 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -72,8 +72,6 @@ */ static int hfi1_file_open(struct inode *, struct file *); static int hfi1_file_close(struct inode *, struct file *); -static ssize_t hfi1_file_write(struct file *, const char __user *, - size_t, loff_t *); static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); static int hfi1_file_mmap(struct file *, struct vm_area_struct *); @@ -86,8 +84,7 @@ static int get_ctxt_info(struct file *, void __user *, __u32); static int get_base_info(struct file *, void __user *, __u32); static int setup_ctxt(struct file *); static int setup_subctxt(struct hfi1_ctxtdata *); -static int get_user_context(struct file *, struct hfi1_user_info *, - int, unsigned); +static int get_user_context(struct file *, struct hfi1_user_info *, int); static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); static int allocate_ctxt(struct file *, struct hfi1_devdata *, struct hfi1_user_info *); @@ -97,13 +94,15 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); static int vma_fault(struct vm_area_struct *, struct vm_fault *); +static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, + unsigned long arg); static const struct file_operations hfi1_file_ops = { .owner = THIS_MODULE, - .write = hfi1_file_write, .write_iter = hfi1_write_iter, .open = hfi1_file_open, .release = hfi1_file_close, + .unlocked_ioctl = hfi1_file_ioctl, .poll = hfi1_poll, .mmap = hfi1_file_mmap, .llseek = noop_llseek, @@ -169,6 +168,13 @@ static inline int is_valid_mmap(u64 token) static int hfi1_file_open(struct inode *inode, struct file *fp) { + struct hfi1_devdata *dd = container_of(inode->i_cdev, + struct hfi1_devdata, + user_cdev); + + /* Just take a ref now. Not all opens result in a context assign */ + kobject_get(&dd->kobj); + /* The real work is performed later in assign_ctxt() */ fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); if (fp->private_data) /* no cpu affinity by default */ @@ -176,127 +182,59 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } -static ssize_t hfi1_file_write(struct file *fp, const char __user *data, - size_t count, loff_t *offset) +static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, + unsigned long arg) { - const struct hfi1_cmd __user *ucmd; struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_cmd cmd; struct hfi1_user_info uinfo; struct hfi1_tid_info tinfo; + int ret = 0; unsigned long addr; - ssize_t consumed = 0, copy = 0, ret = 0; - void *dest = NULL; - __u64 user_val = 0; - int uctxt_required = 1; - int must_be_root = 0; - - /* FIXME: This interface cannot continue out of staging */ - if (WARN_ON_ONCE(!ib_safe_file_access(fp))) - return -EACCES; - - if (count < sizeof(cmd)) { - ret = -EINVAL; - goto bail; - } - - ucmd = (const struct hfi1_cmd __user *)data; - if (copy_from_user(&cmd, ucmd, sizeof(cmd))) { - ret = -EFAULT; - goto bail; - } - - consumed = sizeof(cmd); - - switch (cmd.type) { - case HFI1_CMD_ASSIGN_CTXT: - uctxt_required = 0; /* assigned user context not required */ - copy = sizeof(uinfo); - dest = &uinfo; - break; - case HFI1_CMD_SDMA_STATUS_UPD: - case HFI1_CMD_CREDIT_UPD: - copy = 0; - break; - case HFI1_CMD_TID_UPDATE: - case HFI1_CMD_TID_FREE: - case HFI1_CMD_TID_INVAL_READ: - copy = sizeof(tinfo); - dest = &tinfo; - break; - case HFI1_CMD_USER_INFO: - case HFI1_CMD_RECV_CTRL: - case HFI1_CMD_POLL_TYPE: - case HFI1_CMD_ACK_EVENT: - case HFI1_CMD_CTXT_INFO: - case HFI1_CMD_SET_PKEY: - case HFI1_CMD_CTXT_RESET: - copy = 0; - user_val = cmd.addr; - break; - case HFI1_CMD_EP_INFO: - case HFI1_CMD_EP_ERASE_CHIP: - case HFI1_CMD_EP_ERASE_RANGE: - case HFI1_CMD_EP_READ_RANGE: - case HFI1_CMD_EP_WRITE_RANGE: - uctxt_required = 0; /* assigned user context not required */ - must_be_root = 1; /* validate user */ - copy = 0; - break; - default: - ret = -EINVAL; - goto bail; - } - - /* If the command comes with user data, copy it. */ - if (copy) { - if (copy_from_user(dest, (void __user *)cmd.addr, copy)) { - ret = -EFAULT; - goto bail; - } - consumed += copy; - } - - /* - * Make sure there is a uctxt when needed. - */ - if (uctxt_required && !uctxt) { - ret = -EINVAL; - goto bail; - } + int uval = 0; + unsigned long ul_uval = 0; + u16 uval16 = 0; + + hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); + if (cmd != HFI1_IOCTL_ASSIGN_CTXT && + cmd != HFI1_IOCTL_GET_VERS && + !uctxt) + return -EINVAL; - /* only root can do these operations */ - if (must_be_root && !capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto bail; - } + switch (cmd) { + case HFI1_IOCTL_ASSIGN_CTXT: + if (copy_from_user(&uinfo, + (struct hfi1_user_info __user *)arg, + sizeof(uinfo))) + return -EFAULT; - switch (cmd.type) { - case HFI1_CMD_ASSIGN_CTXT: ret = assign_ctxt(fp, &uinfo); if (ret < 0) - goto bail; - ret = setup_ctxt(fp); + return ret; + setup_ctxt(fp); if (ret) - goto bail; + return ret; ret = user_init(fp); break; - case HFI1_CMD_CTXT_INFO: - ret = get_ctxt_info(fp, (void __user *)(unsigned long) - user_val, cmd.len); - break; - case HFI1_CMD_USER_INFO: - ret = get_base_info(fp, (void __user *)(unsigned long) - user_val, cmd.len); + case HFI1_IOCTL_CTXT_INFO: + ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, + sizeof(struct hfi1_ctxt_info)); break; - case HFI1_CMD_SDMA_STATUS_UPD: + case HFI1_IOCTL_USER_INFO: + ret = get_base_info(fp, (void __user *)(unsigned long)arg, + sizeof(struct hfi1_base_info)); break; - case HFI1_CMD_CREDIT_UPD: + case HFI1_IOCTL_CREDIT_UPD: if (uctxt && uctxt->sc) sc_return_credits(uctxt->sc); break; - case HFI1_CMD_TID_UPDATE: + + case HFI1_IOCTL_TID_UPDATE: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + ret = hfi1_user_exp_rcv_setup(fp, &tinfo); if (!ret) { /* @@ -305,57 +243,82 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, * These fields are adjacent in the structure so * we can copy them at the same time. */ - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt) + sizeof(tinfo.length))) ret = -EFAULT; } break; - case HFI1_CMD_TID_INVAL_READ: - ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); + + case HFI1_IOCTL_TID_FREE: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_clear(fp, &tinfo); if (ret) break; - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt))) ret = -EFAULT; break; - case HFI1_CMD_TID_FREE: - ret = hfi1_user_exp_rcv_clear(fp, &tinfo); + + case HFI1_IOCTL_TID_INVAL_READ: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); if (ret) break; - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt))) ret = -EFAULT; break; - case HFI1_CMD_RECV_CTRL: - ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val); + + case HFI1_IOCTL_RECV_CTRL: + ret = get_user(uval, (int __user *)arg); + if (ret != 0) + return -EFAULT; + ret = manage_rcvq(uctxt, fd->subctxt, uval); break; - case HFI1_CMD_POLL_TYPE: - uctxt->poll_type = (typeof(uctxt->poll_type))user_val; + + case HFI1_IOCTL_POLL_TYPE: + ret = get_user(uval, (int __user *)arg); + if (ret != 0) + return -EFAULT; + uctxt->poll_type = (typeof(uctxt->poll_type))uval; break; - case HFI1_CMD_ACK_EVENT: - ret = user_event_ack(uctxt, fd->subctxt, user_val); + + case HFI1_IOCTL_ACK_EVENT: + ret = get_user(ul_uval, (unsigned long __user *)arg); + if (ret != 0) + return -EFAULT; + ret = user_event_ack(uctxt, fd->subctxt, ul_uval); break; - case HFI1_CMD_SET_PKEY: + + case HFI1_IOCTL_SET_PKEY: + ret = get_user(uval16, (u16 __user *)arg); + if (ret != 0) + return -EFAULT; if (HFI1_CAP_IS_USET(PKEY_CHECK)) - ret = set_ctxt_pkey(uctxt, fd->subctxt, user_val); + ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); else - ret = -EPERM; + return -EPERM; break; - case HFI1_CMD_CTXT_RESET: { + + case HFI1_IOCTL_CTXT_RESET: { struct send_context *sc; struct hfi1_devdata *dd; - if (!uctxt || !uctxt->dd || !uctxt->sc) { - ret = -EINVAL; - break; - } + if (!uctxt || !uctxt->dd || !uctxt->sc) + return -EINVAL; + /* * There is no protection here. User level has to * guarantee that no one will be writing to the send @@ -373,10 +336,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, wait_event_interruptible_timeout( sc->halt_wait, (sc->flags & SCF_HALTED), msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (!(sc->flags & SCF_HALTED)) { - ret = -ENOLCK; - break; - } + if (!(sc->flags & SCF_HALTED)) + return -ENOLCK; + /* * If the send context was halted due to a Freeze, * wait until the device has been "unfrozen" before @@ -387,18 +349,16 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, dd->event_queue, !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (dd->flags & HFI1_FROZEN) { - ret = -ENOLCK; - break; - } - if (dd->flags & HFI1_FORCED_FREEZE) { + if (dd->flags & HFI1_FROZEN) + return -ENOLCK; + + if (dd->flags & HFI1_FORCED_FREEZE) /* * Don't allow context reset if we are into * forced freeze */ - ret = -ENODEV; - break; - } + return -ENODEV; + sc_disable(sc); ret = sc_enable(sc); hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, @@ -410,18 +370,17 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, sc_return_credits(sc); break; } - case HFI1_CMD_EP_INFO: - case HFI1_CMD_EP_ERASE_CHIP: - case HFI1_CMD_EP_ERASE_RANGE: - case HFI1_CMD_EP_READ_RANGE: - case HFI1_CMD_EP_WRITE_RANGE: - ret = handle_eprom_command(fp, &cmd); + + case HFI1_IOCTL_GET_VERS: + uval = HFI1_USER_SWVERSION; + if (put_user(uval, (int __user *)arg)) + return -EFAULT; break; + + default: + return -EINVAL; } - if (ret >= 0) - ret = consumed; -bail: return ret; } @@ -738,7 +697,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) { struct hfi1_filedata *fdata = fp->private_data; struct hfi1_ctxtdata *uctxt = fdata->uctxt; - struct hfi1_devdata *dd; + struct hfi1_devdata *dd = container_of(inode->i_cdev, + struct hfi1_devdata, + user_cdev); unsigned long flags, *ev; fp->private_data = NULL; @@ -747,7 +708,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) goto done; hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); - dd = uctxt->dd; mutex_lock(&hfi1_mutex); flush_wc(); @@ -813,6 +773,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + kobject_put(&dd->kobj); kfree(fdata); return 0; } @@ -836,7 +797,7 @@ static u64 kvirt_to_phys(void *addr) static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) { int i_minor, ret = 0; - unsigned swmajor, swminor, alg = HFI1_ALG_ACROSS; + unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) { @@ -846,9 +807,6 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) swminor = uinfo->userversion & 0xffff; - if (uinfo->hfi1_alg < HFI1_ALG_COUNT) - alg = uinfo->hfi1_alg; - mutex_lock(&hfi1_mutex); /* First, lets check if we need to setup a shared context? */ if (uinfo->subctxt_cnt) { @@ -868,7 +826,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) */ if (!ret) { i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - ret = get_user_context(fp, uinfo, i_minor - 1, alg); + ret = get_user_context(fp, uinfo, i_minor); } done_unlock: mutex_unlock(&hfi1_mutex); @@ -876,71 +834,26 @@ done: return ret; } -/* return true if the device available for general use */ -static int usable_device(struct hfi1_devdata *dd) -{ - struct hfi1_pportdata *ppd = dd->pport; - - return driver_lstate(ppd) == IB_PORT_ACTIVE; -} - static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, - int devno, unsigned alg) + int devno) { struct hfi1_devdata *dd = NULL; - int ret = 0, devmax, npresent, nup, dev; + int devmax, npresent, nup; devmax = hfi1_count_units(&npresent, &nup); - if (!npresent) { - ret = -ENXIO; - goto done; - } - if (!nup) { - ret = -ENETDOWN; - goto done; - } - if (devno >= 0) { - dd = hfi1_lookup(devno); - if (!dd) - ret = -ENODEV; - else if (!dd->freectxts) - ret = -EBUSY; - } else { - struct hfi1_devdata *pdd; - - if (alg == HFI1_ALG_ACROSS) { - unsigned free = 0U; - - for (dev = 0; dev < devmax; dev++) { - pdd = hfi1_lookup(dev); - if (!pdd) - continue; - if (!usable_device(pdd)) - continue; - if (pdd->freectxts && - pdd->freectxts > free) { - dd = pdd; - free = pdd->freectxts; - } - } - } else { - for (dev = 0; dev < devmax; dev++) { - pdd = hfi1_lookup(dev); - if (!pdd) - continue; - if (!usable_device(pdd)) - continue; - if (pdd->freectxts) { - dd = pdd; - break; - } - } - } - if (!dd) - ret = -EBUSY; - } -done: - return ret ? ret : allocate_ctxt(fp, dd, uinfo); + if (!npresent) + return -ENXIO; + + if (!nup) + return -ENETDOWN; + + dd = hfi1_lookup(devno); + if (!dd) + return -ENODEV; + else if (!dd->freectxts) + return -EBUSY; + + return allocate_ctxt(fp, dd, uinfo); } static int find_shared_ctxt(struct file *fp, @@ -1546,170 +1459,10 @@ done: return ret; } -static int ui_open(struct inode *inode, struct file *filp) -{ - struct hfi1_devdata *dd; - - dd = container_of(inode->i_cdev, struct hfi1_devdata, ui_cdev); - filp->private_data = dd; /* for other methods */ - return 0; -} - -static int ui_release(struct inode *inode, struct file *filp) -{ - /* nothing to do */ - return 0; -} - -static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) -{ - struct hfi1_devdata *dd = filp->private_data; - - return fixed_size_llseek(filp, offset, whence, - (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE); -} - -/* NOTE: assumes unsigned long is 8 bytes */ -static ssize_t ui_read(struct file *filp, char __user *buf, size_t count, - loff_t *f_pos) -{ - struct hfi1_devdata *dd = filp->private_data; - void __iomem *base = dd->kregbase; - unsigned long total, csr_off, - barlen = (dd->kregend - dd->kregbase); - u64 data; - - /* only read 8 byte quantities */ - if ((count % 8) != 0) - return -EINVAL; - /* offset must be 8-byte aligned */ - if ((*f_pos % 8) != 0) - return -EINVAL; - /* destination buffer must be 8-byte aligned */ - if ((unsigned long)buf % 8 != 0) - return -EINVAL; - /* must be in range */ - if (*f_pos + count > (barlen + DC8051_DATA_MEM_SIZE)) - return -EINVAL; - /* only set the base if we are not starting past the BAR */ - if (*f_pos < barlen) - base += *f_pos; - csr_off = *f_pos; - for (total = 0; total < count; total += 8, csr_off += 8) { - /* accessing LCB CSRs requires more checks */ - if (is_lcb_offset(csr_off)) { - if (read_lcb_csr(dd, csr_off, (u64 *)&data)) - break; /* failed */ - } - /* - * Cannot read ASIC GPIO/QSFP* clear and force CSRs without a - * false parity error. Avoid the whole issue by not reading - * them. These registers are defined as having a read value - * of 0. - */ - else if (csr_off == ASIC_GPIO_CLEAR || - csr_off == ASIC_GPIO_FORCE || - csr_off == ASIC_QSFP1_CLEAR || - csr_off == ASIC_QSFP1_FORCE || - csr_off == ASIC_QSFP2_CLEAR || - csr_off == ASIC_QSFP2_FORCE) - data = 0; - else if (csr_off >= barlen) { - /* - * read_8051_data can read more than just 8 bytes at - * a time. However, folding this into the loop and - * handling the reads in 8 byte increments allows us - * to smoothly transition from chip memory to 8051 - * memory. - */ - if (read_8051_data(dd, - (u32)(csr_off - barlen), - sizeof(data), &data)) - break; /* failed */ - } else - data = readq(base + total); - if (put_user(data, (unsigned long __user *)(buf + total))) - break; - } - *f_pos += total; - return total; -} - -/* NOTE: assumes unsigned long is 8 bytes */ -static ssize_t ui_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos) -{ - struct hfi1_devdata *dd = filp->private_data; - void __iomem *base; - unsigned long total, data, csr_off; - int in_lcb; - - /* only write 8 byte quantities */ - if ((count % 8) != 0) - return -EINVAL; - /* offset must be 8-byte aligned */ - if ((*f_pos % 8) != 0) - return -EINVAL; - /* source buffer must be 8-byte aligned */ - if ((unsigned long)buf % 8 != 0) - return -EINVAL; - /* must be in range */ - if (*f_pos + count > dd->kregend - dd->kregbase) - return -EINVAL; - - base = (void __iomem *)dd->kregbase + *f_pos; - csr_off = *f_pos; - in_lcb = 0; - for (total = 0; total < count; total += 8, csr_off += 8) { - if (get_user(data, (unsigned long __user *)(buf + total))) - break; - /* accessing LCB CSRs requires a special procedure */ - if (is_lcb_offset(csr_off)) { - if (!in_lcb) { - int ret = acquire_lcb_access(dd, 1); - - if (ret) - break; - in_lcb = 1; - } - } else { - if (in_lcb) { - release_lcb_access(dd, 1); - in_lcb = 0; - } - } - writeq(data, base + total); - } - if (in_lcb) - release_lcb_access(dd, 1); - *f_pos += total; - return total; -} - -static const struct file_operations ui_file_ops = { - .owner = THIS_MODULE, - .llseek = ui_lseek, - .read = ui_read, - .write = ui_write, - .open = ui_open, - .release = ui_release, -}; - -#define UI_OFFSET 192 /* device minor offset for UI devices */ -static int create_ui = 1; - -static struct cdev wildcard_cdev; -static struct device *wildcard_device; - -static atomic_t user_count = ATOMIC_INIT(0); - static void user_remove(struct hfi1_devdata *dd) { - if (atomic_dec_return(&user_count) == 0) - hfi1_cdev_cleanup(&wildcard_cdev, &wildcard_device); hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); - hfi1_cdev_cleanup(&dd->ui_cdev, &dd->ui_device); } static int user_add(struct hfi1_devdata *dd) @@ -1717,34 +1470,13 @@ static int user_add(struct hfi1_devdata *dd) char name[10]; int ret; - if (atomic_inc_return(&user_count) == 1) { - ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops, - &wildcard_cdev, &wildcard_device, - true); - if (ret) - goto done; - } - snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); - ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops, + ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, &dd->user_cdev, &dd->user_device, - true); + true, &dd->kobj); if (ret) - goto done; + user_remove(dd); - if (create_ui) { - snprintf(name, sizeof(name), - "%s_ui%d", class_name(), dd->unit); - ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops, - &dd->ui_cdev, &dd->ui_device, - false); - if (ret) - goto done; - } - - return 0; -done: - user_remove(dd); return ret; } @@ -1753,13 +1485,7 @@ done: */ int hfi1_device_create(struct hfi1_devdata *dd) { - int r, ret; - - r = user_add(dd); - ret = hfi1_diag_add(dd); - if (r && !ret) - ret = r; - return ret; + return user_add(dd); } /* @@ -1769,5 +1495,4 @@ int hfi1_device_create(struct hfi1_devdata *dd) void hfi1_device_remove(struct hfi1_devdata *dd) { user_remove(dd); - hfi1_diag_remove(dd); } diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index ed680fda611d..ed680fda611d 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7b78d56de7f5..4417a0fd3ef9 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -453,6 +453,7 @@ struct rvt_sge_state; #define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP) #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) +#define HLS_DOWN ~(HLS_UP) /* use this MTU size if none other is given */ #define HFI1_DEFAULT_ACTIVE_MTU 10240 @@ -1168,6 +1169,7 @@ struct hfi1_devdata { atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + struct kobject kobj; }; /* 8051 firmware version helper */ @@ -1882,9 +1884,8 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) get_unit_name((dd)->unit), ##__VA_ARGS__) #define hfi1_dev_porterr(dd, port, fmt, ...) \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__) + dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \ + get_unit_name((dd)->unit), (port), ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 502b7cf4647d..5cc492e5776d 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -732,12 +732,12 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) lastfail = hfi1_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = hfi1_setup_eagerbufs(rcd); - if (lastfail) + if (lastfail) { dd_dev_err(dd, "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); + ret = lastfail; + } } - if (lastfail) - ret = lastfail; /* Allocate enough memory for user event notification. */ len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * @@ -989,8 +989,10 @@ static void release_asic_data(struct hfi1_devdata *dd) dd->asic_data = NULL; } -void hfi1_free_devdata(struct hfi1_devdata *dd) +static void __hfi1_free_devdata(struct kobject *kobj) { + struct hfi1_devdata *dd = + container_of(kobj, struct hfi1_devdata, kobj); unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); @@ -1007,6 +1009,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) rvt_dealloc_device(&dd->verbs_dev.rdi); } +static struct kobj_type hfi1_devdata_type = { + .release = __hfi1_free_devdata, +}; + +void hfi1_free_devdata(struct hfi1_devdata *dd) +{ + kobject_put(&dd->kobj); +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -1102,6 +1113,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) &pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; bail: @@ -1300,7 +1312,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_lock(&ppd->cc_state_lock); cc_state = get_cc_state(ppd); - rcu_assign_pointer(ppd->cc_state, NULL); + RCU_INIT_POINTER(ppd->cc_state, NULL); spin_unlock(&ppd->cc_state_lock); if (cc_state) diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 65348d16ab2f..65348d16ab2f 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 2ec6ef38d389..2ec6ef38d389 100644 --- a/drivers/staging/rdma/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index ed58cf21e790..219029576ba0 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1403,6 +1403,12 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) if (key == okey) continue; /* + * Don't update pkeys[2], if an HFI port without MgmtAllowed + * by neighbor is a switch. + */ + if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1) + continue; + /* * The SM gives us the complete PKey table. We have * to ensure that we put the PKeys in the matching * slots. @@ -3363,6 +3369,50 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, return reply((struct ib_mad_hdr *)smp); } +/* + * Apply congestion control information stored in the ppd to the + * active structure. + */ +static void apply_cc_state(struct hfi1_pportdata *ppd) +{ + struct cc_state *old_cc_state, *new_cc_state; + + new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); + if (!new_cc_state) + return; + + /* + * Hold the lock for updating *and* to prevent ppd information + * from changing during the update. + */ + spin_lock(&ppd->cc_state_lock); + + old_cc_state = get_cc_state(ppd); + if (!old_cc_state) { + /* never active, or shutting down */ + spin_unlock(&ppd->cc_state_lock); + kfree(new_cc_state); + return; + } + + *new_cc_state = *old_cc_state; + + new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1; + memcpy(new_cc_state->cct.entries, ppd->ccti_entries, + ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)); + + new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED; + new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map; + memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries, + OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry)); + + rcu_assign_pointer(ppd->cc_state, new_cc_state); + + spin_unlock(&ppd->cc_state_lock); + + call_rcu(&old_cc_state->rcu, cc_state_reclaim); +} + static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) @@ -3374,6 +3424,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct opa_congestion_setting_entry_shadow *entries; int i; + /* + * Save details from packet into the ppd. Hold the cc_state_lock so + * our information is consistent with anyone trying to apply the state. + */ + spin_lock(&ppd->cc_state_lock); ppd->cc_sl_control_map = be32_to_cpu(p->control_map); entries = ppd->congestion_entries; @@ -3384,6 +3439,10 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, p->entries[i].trigger_threshold; entries[i].ccti_min = p->entries[i].ccti_min; } + spin_unlock(&ppd->cc_state_lock); + + /* now apply the information */ + apply_cc_state(ppd); return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, resp_len); @@ -3526,7 +3585,6 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; u16 ccti_limit; - struct cc_state *old_cc_state, *new_cc_state; /* sanity check n_blocks, start_block */ if (n_blocks == 0 || @@ -3546,45 +3604,20 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); - if (!new_cc_state) - goto getit; - + /* + * Save details from packet into the ppd. Hold the cc_state_lock so + * our information is consistent with anyone trying to apply the state. + */ spin_lock(&ppd->cc_state_lock); - - old_cc_state = get_cc_state(ppd); - - if (!old_cc_state) { - spin_unlock(&ppd->cc_state_lock); - kfree(new_cc_state); - return reply((struct ib_mad_hdr *)smp); - } - - *new_cc_state = *old_cc_state; - - new_cc_state->cct.ccti_limit = ccti_limit; - - entries = ppd->ccti_entries; ppd->total_cct_entry = ccti_limit + 1; - + entries = ppd->ccti_entries; for (j = 0, i = sentry; i < eentry; j++, i++) entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry); - - memcpy(new_cc_state->cct.entries, entries, - eentry * sizeof(struct ib_cc_table_entry)); - - new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED; - new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map; - memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries, - OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry)); - - rcu_assign_pointer(ppd->cc_state, new_cc_state); - spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + /* now apply the information */ + apply_cc_state(ppd); -getit: return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); } diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 55ee08675333..55ee08675333 100644 --- a/drivers/staging/rdma/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 2b0e91d3093d..b7a80aa1ae30 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -45,6 +45,7 @@ * */ #include <linux/list.h> +#include <linux/rculist.h> #include <linux/mmu_notifier.h> #include <linux/interval_tree_generic.h> @@ -97,7 +98,6 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node) int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) { struct mmu_rb_handler *handlr; - unsigned long flags; if (!ops->invalidate) return -EINVAL; @@ -111,9 +111,9 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) INIT_HLIST_NODE(&handlr->mn.hlist); spin_lock_init(&handlr->lock); handlr->mn.ops = &mn_opts; - spin_lock_irqsave(&mmu_rb_lock, flags); - list_add_tail(&handlr->list, &mmu_rb_handlers); - spin_unlock_irqrestore(&mmu_rb_lock, flags); + spin_lock(&mmu_rb_lock); + list_add_tail_rcu(&handlr->list, &mmu_rb_handlers); + spin_unlock(&mmu_rb_lock); return mmu_notifier_register(&handlr->mn, current->mm); } @@ -130,9 +130,10 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) if (current->mm) mmu_notifier_unregister(&handler->mn, current->mm); - spin_lock_irqsave(&mmu_rb_lock, flags); - list_del(&handler->list); - spin_unlock_irqrestore(&mmu_rb_lock, flags); + spin_lock(&mmu_rb_lock); + list_del_rcu(&handler->list); + spin_unlock(&mmu_rb_lock); + synchronize_rcu(); spin_lock_irqsave(&handler->lock, flags); if (!RB_EMPTY_ROOT(root)) { @@ -271,16 +272,15 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) { struct mmu_rb_handler *handler; - unsigned long flags; - spin_lock_irqsave(&mmu_rb_lock, flags); - list_for_each_entry(handler, &mmu_rb_handlers, list) { + rcu_read_lock(); + list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) { if (handler->root == root) goto unlock; } handler = NULL; unlock: - spin_unlock_irqrestore(&mmu_rb_lock, flags); + rcu_read_unlock(); return handler; } diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 7a57b9c49d27..7a57b9c49d27 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h index 6ef3c1cbdcd7..6ef3c1cbdcd7 100644 --- a/drivers/staging/rdma/hfi1/opa_compat.h +++ b/drivers/infiniband/hw/hfi1/opa_compat.h diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 0bac21e6a658..0bac21e6a658 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index c67b9ad3fcf4..d5edb1afbb8f 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1835,8 +1835,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) struct pio_vl_map *oldmap, *newmap; if (!vl_scontexts) { - /* send context 0 reserved for VL15 */ - for (i = 1; i < dd->num_send_contexts; i++) + for (i = 0; i < dd->num_send_contexts; i++) if (dd->send_contexts[i].type == SC_KERNEL) num_kernel_send_contexts++; /* truncate divide */ diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 53a08edb7f64..464cbd27b975 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -49,10 +49,10 @@ /* send context types */ #define SC_KERNEL 0 -#define SC_ACK 1 -#define SC_USER 2 -#define SC_VL15 3 -#define SC_MAX 4 +#define SC_VL15 1 +#define SC_ACK 2 +#define SC_USER 3 /* must be the last one: it may take all left */ +#define SC_MAX 4 /* count of send context types */ /* invalid send context index */ #define INVALID_SCI 0xff diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 8c25e1b58849..8c25e1b58849 100644 --- a/drivers/staging/rdma/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 8fe8a205b5bb..03df9322f862 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -87,6 +87,17 @@ void free_platform_config(struct hfi1_devdata *dd) */ } +void get_port_type(struct hfi1_pportdata *ppd) +{ + int ret; + + ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_PORT_TYPE, &ppd->port_type, + 4); + if (ret) + ppd->port_type = PORT_TYPE_UNKNOWN; +} + int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) { u8 tx_ctrl_byte = on ? 0x0 : 0xF; @@ -529,7 +540,8 @@ static void apply_tunings( /* Enable external device config if channel is limiting active */ read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, &config_data); - config_data |= limiting_active; + config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT); + config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT); ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -542,7 +554,8 @@ static void apply_tunings( /* Pass tuning method to 8051 */ read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, &config_data); - config_data |= tuning_method; + config_data &= ~(0xff << TUNING_METHOD_SHIFT); + config_data |= ((u32)tuning_method << TUNING_METHOD_SHIFT); ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -564,8 +577,8 @@ static void apply_tunings( ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, &config_data); /* Clear, then set the external device config field */ - config_data &= ~(0xFF << 24); - config_data |= (external_device_config << 24); + config_data &= ~(u32)0xFF; + config_data |= external_device_config; ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -784,12 +797,6 @@ void tune_serdes(struct hfi1_pportdata *ppd) return; } - ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, - PORT_TABLE_PORT_TYPE, &ppd->port_type, - 4); - if (ret) - ppd->port_type = PORT_TYPE_UNKNOWN; - switch (ppd->port_type) { case PORT_TYPE_DISCONNECTED: ppd->offline_disabled_reason = diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h index 19620cf546d5..e2c21613c326 100644 --- a/drivers/staging/rdma/hfi1/platform.h +++ b/drivers/infiniband/hw/hfi1/platform.h @@ -298,6 +298,7 @@ enum link_tuning_encoding { /* platform.c */ void get_platform_config(struct hfi1_devdata *dd); void free_platform_config(struct hfi1_devdata *dd); +void get_port_type(struct hfi1_pportdata *ppd); int set_qsfp_tx(struct hfi1_pportdata *ppd, int on); void tune_serdes(struct hfi1_pportdata *ppd); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 91eb42316df9..1a942ffba4cb 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -49,7 +49,6 @@ #include <linux/vmalloc.h> #include <linux/hash.h> #include <linux/module.h> -#include <linux/random.h> #include <linux/seq_file.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> @@ -161,9 +160,6 @@ static inline int opa_mtu_enum_to_int(int mtu) * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs * to blindly pass the MTU enum value from the PathRecord to us. - * - * The actual flag used to determine "8k MTU" will change and is currently - * unknown. */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { @@ -516,6 +512,7 @@ static void iowait_wakeup(struct iowait *wait, int reason) static void iowait_sdma_drained(struct iowait *wait) { struct rvt_qp *qp = iowait_to_qp(wait); + unsigned long flags; /* * This happens when the send engine notes @@ -523,12 +520,12 @@ static void iowait_sdma_drained(struct iowait *wait) * do the flush work until that QP's * sdma work has finished. */ - spin_lock(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_flags & RVT_S_WAIT_DMA) { qp->s_flags &= ~RVT_S_WAIT_DMA; hfi1_schedule_send(qp); } - spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); } /** diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index e7bc8d6cf681..e7bc8d6cf681 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 2441669f0817..2441669f0817 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index dadc66c442b9..dadc66c442b9 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 792f15eb8efe..792f15eb8efe 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a659aec3c3c6..a659aec3c3c6 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index abb8ebc1fcac..f9befc05b349 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -134,6 +134,7 @@ static const char * const sdma_state_names[] = { [sdma_state_s99_running] = "s99_Running", }; +#ifdef CONFIG_SDMA_VERBOSITY static const char * const sdma_event_names[] = { [sdma_event_e00_go_hw_down] = "e00_GoHwDown", [sdma_event_e10_go_hw_start] = "e10_GoHwStart", @@ -150,6 +151,7 @@ static const char * const sdma_event_names[] = { [sdma_event_e85_link_down] = "e85_LinkDown", [sdma_event_e90_sw_halted] = "e90_SwHalted", }; +#endif static const struct sdma_set_state_action sdma_action_table[] = { [sdma_state_s00_hw_down] = { @@ -376,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde, sdma_txclean(sde->dd, tx); if (complete) (*complete)(tx, res); - if (iowait_sdma_dec(wait) && wait) + if (wait && iowait_sdma_dec(wait)) iowait_drain_wakeup(wait); } diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 8f50c99fe711..8f50c99fe711 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h index bf7d777d756e..bf7d777d756e 100644 --- a/drivers/staging/rdma/hfi1/sdma_txreq.h +++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 8cd6df8634ad..91fc2aed6aed 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -721,8 +721,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, } dd_dev_info(dd, - "IB%u: Congestion Control Agent enabled for port %d\n", - dd->unit, port_num); + "Congestion Control Agent enabled for port %d\n", + port_num); return 0; diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 8b62fefcf903..79b2952c0dfb 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -66,6 +66,7 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) #define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" +#define IETH_PRN "ieth rkey 0x%.8x" #define ATOMICACKETH_PRN "origdata %lld" #define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" @@ -166,6 +167,12 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->ud.deth[0]), be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); break; + /* ieth */ + case OP(RC, SEND_LAST_WITH_INVALIDATE): + case OP(RC, SEND_ONLY_WITH_INVALIDATE): + trace_seq_printf(p, IETH_PRN, + be32_to_cpu(eh->ieth)); + break; } trace_seq_putc(p, 0); return ret; @@ -233,3 +240,4 @@ __hfi1_trace_fn(FIRMWARE); __hfi1_trace_fn(RCVCTRL); __hfi1_trace_fn(TID); __hfi1_trace_fn(MMU); +__hfi1_trace_fn(IOCTL); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index 963dc948c38a..28c1d0832886 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -74,8 +74,8 @@ __print_symbolic(etype, \ TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, - u64 eflags, u32 ctxt, + u64 eflags, u32 etype, u32 hlen, u32 tlen, @@ -392,6 +392,8 @@ __print_symbolic(opcode, \ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ ib_opcode_name(RC_COMPARE_SWAP), \ ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ + ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ ib_opcode_name(UC_SEND_FIRST), \ ib_opcode_name(UC_SEND_MIDDLE), \ ib_opcode_name(UC_SEND_LAST), \ @@ -1341,6 +1343,7 @@ __hfi1_trace_def(FIRMWARE); __hfi1_trace_def(RCVCTRL); __hfi1_trace_def(TID); __hfi1_trace_def(MMU); +__hfi1_trace_def(IOCTL); #define hfi1_cdbg(which, fmt, ...) \ __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c index e82e52a63d35..e82e52a63d35 100644 --- a/drivers/staging/rdma/hfi1/twsi.c +++ b/drivers/infiniband/hw/hfi1/twsi.c diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h index 5b8a5b5e7eae..5b8a5b5e7eae 100644 --- a/drivers/staging/rdma/hfi1/twsi.h +++ b/drivers/infiniband/hw/hfi1/twsi.h diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index df773d433297..df773d433297 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 1e503ad0bebb..1e503ad0bebb 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 1b640a35b3fe..1b640a35b3fe 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 9bc8d9fba87e..9bc8d9fba87e 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 88e10b5f55f1..88e10b5f55f1 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0014c9c0e967..29f4795f866c 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -166,6 +166,8 @@ static unsigned initial_pkt_count = 8; #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ +struct sdma_mmu_node; + struct user_sdma_iovec { struct list_head list; struct iovec iov; @@ -178,6 +180,7 @@ struct user_sdma_iovec { * which we last left off. */ u64 offset; + struct sdma_mmu_node *node; }; #define SDMA_CACHE_NODE_EVICT BIT(0) @@ -507,6 +510,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, struct sdma_req_info info; struct user_sdma_request *req; u8 opcode, sc, vl; + int req_queued = 0; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -703,6 +707,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); atomic_inc(&pq->n_reqs); + req_queued = 1; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); if (unlikely(ret < 0 && ret != -EBUSY)) { @@ -747,7 +752,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, return 0; free_req: user_sdma_free_request(req, true); - pq_update(pq); + if (req_queued) + pq_update(pq); set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); return ret; } @@ -1153,6 +1159,7 @@ retry: } iovec->pages = node->pages; iovec->npages = npages; + iovec->node = node; ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); if (ret) { @@ -1519,18 +1526,13 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) } if (req->data_iovs) { struct sdma_mmu_node *node; - struct mmu_rb_node *mnode; int i; for (i = 0; i < req->data_iovs; i++) { - mnode = hfi1_mmu_rb_search( - &req->pq->sdma_rb_root, - (unsigned long)req->iovs[i].iov.iov_base, - req->iovs[i].iov.iov_len); - if (!mnode || IS_ERR(mnode)) + node = req->iovs[i].node; + if (!node) continue; - node = container_of(mnode, struct sdma_mmu_node, rb); if (unpin) hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, &node->rb); diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index b9240e351161..b9240e351161 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 9cdc85fa366f..849c4b9399d4 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -52,7 +52,6 @@ #include <linux/utsname.h> #include <linux/rculist.h> #include <linux/mm.h> -#include <linux/random.h> #include <linux/vmalloc.h> #include "hfi.h" @@ -336,6 +335,8 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, @@ -946,7 +947,6 @@ static int pio_wait(struct rvt_qp *qp, dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); - dev->n_piowait++; qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 3ee223983b20..488356775627 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -152,6 +152,7 @@ union ib_ehdrs { } at; __be32 imm_data; __be32 aeth; + __be32 ieth; struct ib_atomic_eth atomic_eth; } __packed; diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index bc95c4112c61..bc95c4112c61 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 1cf69b2fe4a5..1cf69b2fe4a5 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 4a740f7a0519..02a735b64208 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2361,58 +2361,130 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static const char * const i40iw_hw_stat_names[] = { + // 32bit names + [I40IW_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards", + [I40IW_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts", + [I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes", + [I40IW_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards", + [I40IW_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts", + [I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes", + [I40IW_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs", + [I40IW_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors", + [I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors", + // 64bit names + [I40IW_HW_STAT_INDEX_IP4RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4InOctets", + [I40IW_HW_STAT_INDEX_IP4RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4InPkts", + [I40IW_HW_STAT_INDEX_IP4RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4InReasmRqd", + [I40IW_HW_STAT_INDEX_IP4RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4InMcastPkts", + [I40IW_HW_STAT_INDEX_IP4TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4OutOctets", + [I40IW_HW_STAT_INDEX_IP4TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4OutPkts", + [I40IW_HW_STAT_INDEX_IP4TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4OutSegRqd", + [I40IW_HW_STAT_INDEX_IP4TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4OutMcastPkts", + [I40IW_HW_STAT_INDEX_IP6RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6InOctets", + [I40IW_HW_STAT_INDEX_IP6RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6InPkts", + [I40IW_HW_STAT_INDEX_IP6RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6InReasmRqd", + [I40IW_HW_STAT_INDEX_IP6RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6InMcastPkts", + [I40IW_HW_STAT_INDEX_IP6TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6OutOctets", + [I40IW_HW_STAT_INDEX_IP6TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6OutPkts", + [I40IW_HW_STAT_INDEX_IP6TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6OutSegRqd", + [I40IW_HW_STAT_INDEX_IP6TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6OutMcastPkts", + [I40IW_HW_STAT_INDEX_TCPRXSEGS + I40IW_HW_STAT_INDEX_MAX_32] = + "tcpInSegs", + [I40IW_HW_STAT_INDEX_TCPTXSEG + I40IW_HW_STAT_INDEX_MAX_32] = + "tcpOutSegs", + [I40IW_HW_STAT_INDEX_RDMARXRDS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwInRdmaReads", + [I40IW_HW_STAT_INDEX_RDMARXSNDS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwInRdmaSends", + [I40IW_HW_STAT_INDEX_RDMARXWRS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwInRdmaWrites", + [I40IW_HW_STAT_INDEX_RDMATXRDS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwOutRdmaReads", + [I40IW_HW_STAT_INDEX_RDMATXSNDS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwOutRdmaSends", + [I40IW_HW_STAT_INDEX_RDMATXWRS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwOutRdmaWrites", + [I40IW_HW_STAT_INDEX_RDMAVBND + I40IW_HW_STAT_INDEX_MAX_32] = + "iwRdmaBnd", + [I40IW_HW_STAT_INDEX_RDMAVINV + I40IW_HW_STAT_INDEX_MAX_32] = + "iwRdmaInv" +}; + /** - * i40iw_get_protocol_stats - Populates the rdma_stats structure - * @ibdev: ib dev struct - * @stats: iw protocol stats struct + * i40iw_alloc_hw_stats - Allocate a hw stats structure + * @ibdev: device pointer from stack + * @port_num: port number */ -static int i40iw_get_protocol_stats(struct ib_device *ibdev, - union rdma_protocol_stats *stats) +static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + struct i40iw_device *iwdev = to_iwdev(ibdev); + struct i40iw_sc_dev *dev = &iwdev->sc_dev; + int num_counters = I40IW_HW_STAT_INDEX_MAX_32 + + I40IW_HW_STAT_INDEX_MAX_64; + unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN; + + BUILD_BUG_ON(ARRAY_SIZE(i40iw_hw_stat_names) != + (I40IW_HW_STAT_INDEX_MAX_32 + + I40IW_HW_STAT_INDEX_MAX_64)); + + /* + * PFs get the default update lifespan, but VFs only update once + * per second + */ + if (!dev->is_pf) + lifespan = 1000; + return rdma_alloc_hw_stats_struct(i40iw_hw_stat_names, num_counters, + lifespan); +} + +/** + * i40iw_get_hw_stats - Populates the rdma_hw_stats structure + * @ibdev: device pointer from stack + * @stats: stats pointer from stack + * @port_num: port number + * @index: which hw counter the stack is requesting we update + */ +static int i40iw_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port_num, int index) { struct i40iw_device *iwdev = to_iwdev(ibdev); struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_dev_pestat *devstat = &dev->dev_pestat; struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; - struct timespec curr_time; - static struct timespec last_rd_time = {0, 0}; unsigned long flags; - curr_time = current_kernel_time(); - memset(stats, 0, sizeof(*stats)); - if (dev->is_pf) { spin_lock_irqsave(&devstat->stats_lock, flags); devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); spin_unlock_irqrestore(&devstat->stats_lock, flags); } else { - if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1) - if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) - return -ENOSYS; + if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) + return -ENOSYS; } - stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXPKTS]; - stats->iw.ipInTruncatedPkts = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] + - hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC]; - stats->iw.ipInDiscards = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] + - hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD]; - stats->iw.ipOutNoRoutes = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] + - hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE]; - stats->iw.ipReasmReqds = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS]; - stats->iw.ipFragCreates = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS]; - stats->iw.ipInMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS]; - stats->iw.ipOutMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXMCPKTS]; - stats->iw.tcpOutSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPTXSEG]; - stats->iw.tcpInSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPRXSEGS]; - stats->iw.tcpRetransSegs = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_TCPRTXSEG]; - - last_rd_time = curr_time; - return 0; + memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats)); + + return stats->num_counters; } /** @@ -2551,7 +2623,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr; iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr; iwibdev->ibdev.dereg_mr = i40iw_dereg_mr; - iwibdev->ibdev.get_protocol_stats = i40iw_get_protocol_stats; + iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; + iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; iwibdev->ibdev.query_device = i40iw_query_device; iwibdev->ibdev.create_ah = i40iw_create_ah; iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 82d7c4bf5970..ce4034071f9c 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1308,21 +1308,6 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { SYM_LSB(IntMask, fldname##17IntMask)), \ .msg = #fldname "_C", .sz = sizeof(#fldname "_C") } -static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { - INTR_AUTO_P(SDmaInt), - INTR_AUTO_P(SDmaProgressInt), - INTR_AUTO_P(SDmaIdleInt), - INTR_AUTO_P(SDmaCleanupDone), - INTR_AUTO_C(RcvUrg), - INTR_AUTO_P(ErrInt), - INTR_AUTO(ErrInt), /* non-port-specific errs */ - INTR_AUTO(AssertGPIOInt), - INTR_AUTO_P(SendDoneInt), - INTR_AUTO(SendBufAvailInt), - INTR_AUTO_C(RcvAvail), - { .mask = 0, .sz = 0 } -}; - #define TXSYMPTOM_AUTO_P(fldname) \ { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \ .msg = #fldname, .sz = sizeof(#fldname) } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 0bd18375d7df..d2ac29861af5 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1172,11 +1172,13 @@ static int pma_get_classportinfo(struct ib_pma_mad *pmp, * Set the most significant bit of CM2 to indicate support for * congestion statistics */ - p->reserved[0] = dd->psxmitwait_supported << 7; + ib_set_cpi_capmask2(p, + dd->psxmitwait_supported << + (31 - IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE)); /* * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. */ - p->resp_time_value = 18; + ib_set_cpi_resp_time(p, 18); return reply((struct ib_smp *) pmp); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 6888f03c6d61..4f878151f81f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -159,6 +159,7 @@ struct qib_other_headers { } at; __be32 imm_data; __be32 aeth; + __be32 ieth; struct ib_atomic_eth atomic_eth; } u; } __packed; diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index b1ffc8b4a6c0..6ca6fa80dd6e 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -525,6 +525,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) return PTR_ERR(task); } + set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); kthread_bind(task, cpu); wake_up_process(task); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 0ff765bfd619..0f4d4500f45e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -124,11 +124,13 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, int count) { int m, i = 0; + struct rvt_dev_info *dev = ib_to_rvt(pd->device); mr->mapsz = 0; m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; for (; i < m; i++) { - mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); + mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, + dev->dparms.node); if (!mr->map[i]) { rvt_deinit_mregion(mr); return -ENOMEM; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0f12c211c385..5fa4d4d81ee0 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -397,6 +397,7 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) rvt_put_ss(&qp->s_rdma_read_sge); @@ -431,7 +432,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) if (qp->ibqp.qp_type != IB_QPT_RC) return; - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + for (n = 0; n < rvt_max_atomic(rdi); n++) { struct rvt_ack_entry *e = &qp->s_ack_queue[n]; if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && @@ -569,7 +570,12 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_ssn = 1; qp->s_lsn = 0; qp->s_mig_state = IB_MIG_MIGRATED; - memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + if (qp->s_ack_queue) + memset( + qp->s_ack_queue, + 0, + rvt_max_atomic(rdi) * + sizeof(*qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; qp->s_num_rd_atomic = 0; @@ -653,9 +659,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (gfp == GFP_NOIO) swq = __vmalloc( (init_attr->cap.max_send_wr + 1) * sz, - gfp, PAGE_KERNEL); + gfp | __GFP_ZERO, PAGE_KERNEL); else - swq = vmalloc_node( + swq = vzalloc_node( (init_attr->cap.max_send_wr + 1) * sz, rdi->dparms.node); if (!swq) @@ -677,6 +683,16 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_swq; RCU_INIT_POINTER(qp->next, NULL); + if (init_attr->qp_type == IB_QPT_RC) { + qp->s_ack_queue = + kzalloc_node( + sizeof(*qp->s_ack_queue) * + rvt_max_atomic(rdi), + gfp, + rdi->dparms.node); + if (!qp->s_ack_queue) + goto bail_qp; + } /* * Driver needs to set up it's private QP structure and do any @@ -704,9 +720,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.wq = __vmalloc( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, - gfp, PAGE_KERNEL); + gfp | __GFP_ZERO, PAGE_KERNEL); else - qp->r_rq.wq = vmalloc_node( + qp->r_rq.wq = vzalloc_node( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, rdi->dparms.node); @@ -857,6 +873,7 @@ bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); bail_qp: + kfree(qp->s_ack_queue); kfree(qp); bail_swq: @@ -1284,6 +1301,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp) vfree(qp->r_rq.wq); vfree(qp->s_wq); rdi->driver_f.qp_priv_free(rdi, qp); + kfree(qp->s_ack_queue); kfree(qp); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index caec8e9c4666..bab7db6fa9ab 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -92,6 +92,8 @@ enum { IPOIB_FLAG_UMCAST = 10, IPOIB_STOP_NEIGH_GC = 11, IPOIB_NEIGH_TBL_FLUSH = 12, + IPOIB_FLAG_DEV_ADDR_SET = 13, + IPOIB_FLAG_DEV_ADDR_CTRL = 14, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -392,6 +394,7 @@ struct ipoib_dev_priv { struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; unsigned max_send_sge; + bool sm_fullmember_sendonly_support; }; struct ipoib_ah { @@ -476,6 +479,7 @@ void ipoib_reap_ah(struct work_struct *work); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); +int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 418e5a1c8744..45c40a17d6a6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv) return 0; } +/* + * returns true if the device address of the ipoib interface has changed and the + * new address is a valid one (i.e in the gid table), return false otherwise. + */ +static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) +{ + union ib_gid search_gid; + union ib_gid gid0; + union ib_gid *netdev_gid; + int err; + u16 index; + u8 port; + bool ret = false; + + netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); + if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) + return false; + + netif_addr_lock(priv->dev); + + /* The subnet prefix may have changed, update it now so we won't have + * to do it later + */ + priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix; + netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix; + search_gid.global.subnet_prefix = gid0.global.subnet_prefix; + + search_gid.global.interface_id = priv->local_gid.global.interface_id; + + netif_addr_unlock(priv->dev); + + err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB, + priv->dev, &port, &index); + + netif_addr_lock(priv->dev); + + if (search_gid.global.interface_id != + priv->local_gid.global.interface_id) + /* There was a change while we were looking up the gid, bail + * here and let the next work sort this out + */ + goto out; + + /* The next section of code needs some background: + * Per IB spec the port GUID can't change if the HCA is powered on. + * port GUID is the basis for GID at index 0 which is the basis for + * the default device address of a ipoib interface. + * + * so it seems the flow should be: + * if user_changed_dev_addr && gid in gid tbl + * set bit dev_addr_set + * return true + * else + * return false + * + * The issue is that there are devices that don't follow the spec, + * they change the port GUID when the HCA is powered, so in order + * not to break userspace applications, We need to check if the + * user wanted to control the device address and we assume that + * if he sets the device address back to be based on GID index 0, + * he no longer wishs to control it. + * + * If the user doesn't control the the device address, + * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means + * the port GUID has changed and GID at index 0 has changed + * so we need to change priv->local_gid and priv->dev->dev_addr + * to reflect the new GID. + */ + if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { + if (!err && port == priv->port) { + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); + if (index == 0) + clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL, + &priv->flags); + else + set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags); + ret = true; + } else { + ret = false; + } + } else { + if (!err && port == priv->port) { + ret = true; + } else { + if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) { + memcpy(&priv->local_gid, &gid0, + sizeof(priv->local_gid)); + memcpy(priv->dev->dev_addr + 4, &gid0, + sizeof(priv->local_gid)); + ret = true; + } + } + } + +out: + netif_addr_unlock(priv->dev); + + return ret; +} + static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, enum ipoib_flush_level level, int nesting) @@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && level != IPOIB_FLUSH_HEAVY) { + /* Make sure the dev_addr is set even if not flushing */ + if (level == IPOIB_FLUSH_LIGHT) + ipoib_dev_addr_changed_valid(priv); ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); return; } @@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, update_parent_pkey(priv); else update_child_pkey(priv); - } + } else if (level == IPOIB_FLUSH_LIGHT) + ipoib_dev_addr_changed_valid(priv); ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); return; } @@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { if (level >= IPOIB_FLUSH_NORMAL) ipoib_ib_dev_up(dev); - ipoib_mcast_restart_task(&priv->restart_task); + if (ipoib_dev_addr_changed_valid(priv)) + ipoib_mcast_restart_task(&priv->restart_task); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b940ef1c19c7..2d7c16346648 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params( struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, void *client_data); +static int ipoib_set_mac(struct net_device *dev, void *addr); static struct ib_client ipoib_client = { .name = "ipoib", @@ -117,6 +118,8 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); + priv->sm_fullmember_sendonly_support = false; + if (ipoib_ib_dev_open(dev)) { if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return 0; @@ -629,6 +632,77 @@ void ipoib_mark_paths_invalid(struct net_device *dev) spin_unlock_irq(&priv->lock); } +struct classport_info_context { + struct ipoib_dev_priv *priv; + struct completion done; + struct ib_sa_query *sa_query; +}; + +static void classport_info_query_cb(int status, struct ib_class_port_info *rec, + void *context) +{ + struct classport_info_context *cb_ctx = context; + struct ipoib_dev_priv *priv; + + WARN_ON(!context); + + priv = cb_ctx->priv; + + if (status || !rec) { + pr_debug("device: %s failed query classport_info status: %d\n", + priv->dev->name, status); + /* keeps the default, will try next mcast_restart */ + priv->sm_fullmember_sendonly_support = false; + goto out; + } + + if (ib_get_cpi_capmask2(rec) & + IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) { + pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n", + priv->dev->name); + priv->sm_fullmember_sendonly_support = true; + } else { + pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n", + priv->dev->name); + priv->sm_fullmember_sendonly_support = false; + } + +out: + complete(&cb_ctx->done); +} + +int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv) +{ + struct classport_info_context *callback_context; + int ret; + + callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL); + if (!callback_context) + return -ENOMEM; + + callback_context->priv = priv; + init_completion(&callback_context->done); + + ret = ib_sa_classport_info_rec_query(&ipoib_sa_client, + priv->ca, priv->port, 3000, + GFP_KERNEL, + classport_info_query_cb, + callback_context, + &callback_context->sa_query); + if (ret < 0) { + pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n", + priv->dev->name, ret); + kfree(callback_context); + return ret; + } + + /* waiting for the callback to finish before returnning */ + wait_for_completion(&callback_context->done); + kfree(callback_context); + + return ret; +} + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1649,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_get_vf_config = ipoib_get_vf_config, .ndo_get_vf_stats = ipoib_get_vf_stats, .ndo_set_vf_guid = ipoib_set_vf_guid, + .ndo_set_mac_address = ipoib_set_mac, }; static const struct net_device_ops ipoib_netdev_ops_vf = { @@ -1771,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_umcast); } +static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) +{ + struct ipoib_dev_priv *child_priv; + struct net_device *netdev = priv->dev; + + netif_addr_lock(netdev); + + memcpy(&priv->local_gid.global.interface_id, + &gid->global.interface_id, + sizeof(gid->global.interface_id)); + memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); + clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); + + netif_addr_unlock(netdev); + + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + down_read(&priv->vlan_rwsem); + list_for_each_entry(child_priv, &priv->child_intfs, list) + set_base_guid(child_priv, gid); + up_read(&priv->vlan_rwsem); + } +} + +static int ipoib_check_lladdr(struct net_device *dev, + struct sockaddr_storage *ss) +{ + union ib_gid *gid = (union ib_gid *)(ss->__data + 4); + int ret = 0; + + netif_addr_lock(dev); + + /* Make sure the QPN, reserved and subnet prefix match the current + * lladdr, it also makes sure the lladdr is unicast. + */ + if (memcmp(dev->dev_addr, ss->__data, + 4 + sizeof(gid->global.subnet_prefix)) || + gid->global.interface_id == 0) + ret = -EINVAL; + + netif_addr_unlock(dev); + + return ret; +} + +static int ipoib_set_mac(struct net_device *dev, void *addr) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sockaddr_storage *ss = addr; + int ret; + + if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) + return -EBUSY; + + ret = ipoib_check_lladdr(dev, ss); + if (ret) + return ret; + + set_base_guid(priv, (union ib_gid *)(ss->__data + 4)); + + queue_work(ipoib_workqueue, &priv->flush_light); + + return 0; +} + static ssize_t create_child(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -1894,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format, goto device_init_failed; } else memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); result = ipoib_dev_init(priv->dev, hca, port); if (result < 0) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 25889311b1e9..82fbc9442608 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -64,6 +64,9 @@ struct ipoib_mcast_iter { unsigned int send_only; }; +/* join state that allows creating mcg with sendonly member request */ +#define SENDONLY_FULLMEMBER_JOIN 8 + /* * This should be called with the priv->lock held */ @@ -326,12 +329,23 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, carrier_on_task); struct ib_port_attr attr; + int ret; if (ib_query_port(priv->ca, priv->port, &attr) || attr.state != IB_PORT_ACTIVE) { ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); return; } + /* + * Check if can send sendonly MCG's with sendonly-fullmember join state. + * It done here after the successfully join to the broadcast group, + * because the broadcast group must always be joined first and is always + * re-joined if the SM changes substantially. + */ + ret = ipoib_check_sm_sendonly_fullmember_support(priv); + if (ret < 0) + pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n", + priv->dev->name, ret); /* * Take rtnl_lock to avoid racing with ipoib_stop() and @@ -515,22 +529,20 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) rec.hop_limit = priv->broadcast->mcmember.hop_limit; /* - * Send-only IB Multicast joins do not work at the core - * IB layer yet, so we can't use them here. However, - * we are emulating an Ethernet multicast send, which - * does not require a multicast subscription and will - * still send properly. The most appropriate thing to + * Send-only IB Multicast joins work at the core IB layer but + * require specific SM support. + * We can use such joins here only if the current SM supports that feature. + * However, if not, we emulate an Ethernet multicast send, + * which does not require a multicast subscription and will + * still send properly. The most appropriate thing to * do is to create the group if it doesn't exist as that * most closely emulates the behavior, from a user space - * application perspecitive, of Ethernet multicast - * operation. For now, we do a full join, maybe later - * when the core IB layers support send only joins we - * will use them. + * application perspective, of Ethernet multicast operation. */ -#if 0 - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) - rec.join_state = 4; -#endif + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && + priv->sm_fullmember_sendonly_support) + /* SM supports sendonly-fullmember, otherwise fallback to full-member */ + rec.join_state = SENDONLY_FULLMEMBER_JOIN; } spin_unlock_irq(&priv->lock); @@ -570,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } priv->local_lid = port_attr.lid; + netif_addr_lock(dev); - if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL)) - ipoib_warn(priv, "ib_query_gid() failed\n"); - else - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { + netif_addr_unlock(dev); + return; + } + netif_addr_unlock(dev); spin_lock_irq(&priv->lock); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index b809c373e40e..1e7cbbaa15bd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler, queue_work(ipoib_workqueue, &priv->flush_normal); } else if (record->event == IB_EVENT_PKEY_CHANGE) { queue_work(ipoib_workqueue, &priv->flush_heavy); + } else if (record->event == IB_EVENT_GID_CHANGE && + !test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { + queue_work(ipoib_workqueue, &priv->flush_light); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index fca1a882de27..64a35595eab8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, priv->pkey = pkey; memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); + memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); priv->dev->broadcast[8] = pkey >> 8; priv->dev->broadcast[9] = pkey & 0xff; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 897b5a4993e8..a990c04208c9 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2596,9 +2596,19 @@ static void isert_free_conn(struct iscsi_conn *conn) isert_put_conn(isert_conn); } +static void isert_get_rx_pdu(struct iscsi_conn *conn) +{ + struct completion comp; + + init_completion(&comp); + + wait_for_completion_interruptible(&comp); +} + static struct iscsit_transport iser_target_transport = { .name = "IB/iSER", .transport_type = ISCSI_INFINIBAND, + .rdma_shutdown = true, .priv_size = sizeof(struct isert_cmd), .owner = THIS_MODULE, .iscsit_setup_np = isert_setup_np, @@ -2614,6 +2624,7 @@ static struct iscsit_transport iser_target_transport = { .iscsit_queue_data_in = isert_put_datain, .iscsit_queue_status = isert_put_response, .iscsit_aborted_task = isert_aborted_task, + .iscsit_get_rx_pdu = isert_get_rx_pdu, .iscsit_get_sup_prot_ops = isert_get_sup_prot_ops, }; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 2843f1ae75bd..e68b20cba70b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -254,8 +254,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad) memset(cif, 0, sizeof(*cif)); cif->base_version = 1; cif->class_version = 1; - cif->resp_time_value = 20; + ib_set_cpi_resp_time(cif, 20); mad->mad_hdr.status = 0; } @@ -1767,14 +1767,6 @@ static void __srpt_close_all_ch(struct srpt_device *sdev) } } -/** - * srpt_shutdown_session() - Whether or not a session may be shut down. - */ -static int srpt_shutdown_session(struct se_session *se_sess) -{ - return 1; -} - static void srpt_free_ch(struct kref *kref) { struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); @@ -3064,7 +3056,6 @@ static const struct target_core_fabric_ops srpt_template = { .tpg_get_inst_index = srpt_tpg_get_inst_index, .release_cmd = srpt_release_cmd, .check_stop_free = srpt_check_stop_free, - .shutdown_session = srpt_shutdown_session, .close_session = srpt_close_session, .sess_get_index = srpt_sess_get_index, .sess_get_initiator_sid = NULL, diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index d03df4a60d05..76bdae1a93bb 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -64,4 +64,14 @@ config CROS_EC_PROTO help ChromeOS EC communication protocol helpers. +config CROS_KBD_LED_BACKLIGHT + tristate "Backlight LED support for Chrome OS keyboards" + depends on LEDS_CLASS && ACPI + help + This option enables support for the keyboard backlight LEDs on + select Chrome OS systems. + + To compile this driver as a module, choose M here: the + module will be called cros_kbd_led_backlight. + endif # CHROMEOS_PLATFORMS diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index bc498bda8211..4f3462783a3c 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -1,8 +1,9 @@ -obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o -obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o -cros_ec_devs-objs := cros_ec_dev.o cros_ec_sysfs.o \ - cros_ec_lightbar.o cros_ec_vbc.o -obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_devs.o -obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpc.o -obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o +obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o +obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o +cros_ec_devs-objs := cros_ec_dev.o cros_ec_sysfs.o \ + cros_ec_lightbar.o cros_ec_vbc.o +obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_devs.o +obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpc.o +obj-$(CONFIG_CROS_EC_PROTO) += cros_ec_proto.o +obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT) += cros_kbd_led_backlight.o diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c index 2b441e9ae593..e8a44a9bc916 100644 --- a/drivers/platform/chrome/chromeos_laptop.c +++ b/drivers/platform/chrome/chromeos_laptop.c @@ -34,6 +34,7 @@ #define ATMEL_TS_I2C_ADDR 0x4a #define ATMEL_TS_I2C_BL_ADDR 0x26 #define CYAPA_TP_I2C_ADDR 0x67 +#define ELAN_TP_I2C_ADDR 0x15 #define ISL_ALS_I2C_ADDR 0x44 #define TAOS_ALS_I2C_ADDR 0x29 @@ -73,7 +74,7 @@ struct i2c_peripheral { int tries; }; -#define MAX_I2C_PERIPHERALS 3 +#define MAX_I2C_PERIPHERALS 4 struct chromeos_laptop { struct i2c_peripheral i2c_peripherals[MAX_I2C_PERIPHERALS]; @@ -86,6 +87,11 @@ static struct i2c_board_info cyapa_device = { .flags = I2C_CLIENT_WAKE, }; +static struct i2c_board_info elantech_device = { + I2C_BOARD_INFO("elan_i2c", ELAN_TP_I2C_ADDR), + .flags = I2C_CLIENT_WAKE, +}; + static struct i2c_board_info isl_als_device = { I2C_BOARD_INFO("isl29018", ISL_ALS_I2C_ADDR), }; @@ -306,6 +312,16 @@ static int setup_atmel_224s_tp(enum i2c_adapter_type type) return (!tp) ? -EAGAIN : 0; } +static int setup_elantech_tp(enum i2c_adapter_type type) +{ + if (tp) + return 0; + + /* add elantech touchpad */ + tp = add_i2c_device("trackpad", type, &elantech_device); + return (!tp) ? -EAGAIN : 0; +} + static int setup_atmel_1664s_ts(enum i2c_adapter_type type) { const unsigned short addr_list[] = { ATMEL_TS_I2C_BL_ADDR, @@ -445,6 +461,8 @@ static struct chromeos_laptop dell_chromebook_11 = { .i2c_peripherals = { /* Touchpad. */ { .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 }, + /* Elan Touchpad option. */ + { .add = setup_elantech_tp, I2C_ADAPTER_DESIGNWARE_0 }, }, }; @@ -475,6 +493,8 @@ static struct chromeos_laptop acer_c720 = { { .add = setup_atmel_1664s_ts, I2C_ADAPTER_DESIGNWARE_1 }, /* Touchpad. */ { .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 }, + /* Elan Touchpad option. */ + { .add = setup_elantech_tp, I2C_ADAPTER_DESIGNWARE_0 }, /* Light Sensor. */ { .add = setup_isl29018_als, I2C_ADAPTER_DESIGNWARE_1 }, }, diff --git a/drivers/platform/chrome/chromeos_pstore.c b/drivers/platform/chrome/chromeos_pstore.c index 34749200e4ab..308a853ac4f1 100644 --- a/drivers/platform/chrome/chromeos_pstore.c +++ b/drivers/platform/chrome/chromeos_pstore.c @@ -8,6 +8,7 @@ * the Free Software Foundation, version 2 of the License. */ +#include <linux/acpi.h> #include <linux/dmi.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -58,7 +59,7 @@ MODULE_DEVICE_TABLE(dmi, chromeos_pstore_dmi_table); static struct ramoops_platform_data chromeos_ramoops_data = { .mem_size = 0x100000, .mem_address = 0xf00000, - .record_size = 0x20000, + .record_size = 0x40000, .console_size = 0x20000, .ftrace_size = 0x20000, .dump_oops = 1, @@ -71,9 +72,59 @@ static struct platform_device chromeos_ramoops = { }, }; +#ifdef CONFIG_ACPI +static const struct acpi_device_id cros_ramoops_acpi_match[] = { + { "GOOG9999", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, cros_ramoops_acpi_match); + +static struct platform_driver chromeos_ramoops_acpi = { + .driver = { + .name = "chromeos_pstore", + .acpi_match_table = ACPI_PTR(cros_ramoops_acpi_match), + }, +}; + +static int __init chromeos_probe_acpi(struct platform_device *pdev) +{ + struct resource *res; + resource_size_t len; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOMEM; + + len = resource_size(res); + if (!res->start || !len) + return -ENOMEM; + + pr_info("chromeos ramoops using acpi device.\n"); + + chromeos_ramoops_data.mem_size = len; + chromeos_ramoops_data.mem_address = res->start; + + return 0; +} + +static bool __init chromeos_check_acpi(void) +{ + if (!platform_driver_probe(&chromeos_ramoops_acpi, chromeos_probe_acpi)) + return true; + return false; +} +#else +static inline bool chromeos_check_acpi(void) { return false; } +#endif + static int __init chromeos_pstore_init(void) { - if (dmi_check_system(chromeos_pstore_dmi_table)) + bool acpi_dev_found; + + /* First check ACPI for non-hardcoded values from firmware. */ + acpi_dev_found = chromeos_check_acpi(); + + if (acpi_dev_found || dmi_check_system(chromeos_pstore_dmi_table)) return platform_device_register(&chromeos_ramoops); return -ENODEV; diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c index d45cd254ed1c..6d8ee3b15872 100644 --- a/drivers/platform/chrome/cros_ec_dev.c +++ b/drivers/platform/chrome/cros_ec_dev.c @@ -137,6 +137,10 @@ static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) if (copy_from_user(&u_cmd, arg, sizeof(u_cmd))) return -EFAULT; + if ((u_cmd.outsize > EC_MAX_MSG_BYTES) || + (u_cmd.insize > EC_MAX_MSG_BYTES)) + return -EINVAL; + s_cmd = kmalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize), GFP_KERNEL); if (!s_cmd) @@ -208,6 +212,9 @@ static const struct file_operations fops = { .release = ec_device_release, .read = ec_device_read, .unlocked_ioctl = ec_device_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ec_device_ioctl, +#endif }; static void __remove(struct device *dev) diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c index ff7640575c75..8df3d447cacf 100644 --- a/drivers/platform/chrome/cros_ec_lightbar.c +++ b/drivers/platform/chrome/cros_ec_lightbar.c @@ -412,9 +412,13 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj, struct device *dev = container_of(kobj, struct device, kobj); struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev, class_dev); - struct platform_device *pdev = container_of(ec->dev, - struct platform_device, dev); - if (pdev->id != 0) + struct platform_device *pdev = to_platform_device(ec->dev); + struct cros_ec_platform *pdata = pdev->dev.platform_data; + int is_cros_ec; + + is_cros_ec = strcmp(pdata->ec_name, CROS_EC_DEV_NAME); + + if (is_cros_ec != 0) return 0; /* Only instantiate this stuff if the EC has a lightbar */ diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 990308ca384f..b6e161f71b26 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -298,8 +298,8 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev) ec_dev->max_response = EC_PROTO2_MAX_PARAM_SIZE; ec_dev->max_passthru = 0; ec_dev->pkt_xfer = NULL; - ec_dev->din_size = EC_MSG_BYTES; - ec_dev->dout_size = EC_MSG_BYTES; + ec_dev->din_size = EC_PROTO2_MSG_BYTES; + ec_dev->dout_size = EC_PROTO2_MSG_BYTES; } else { /* * It's possible for a test to occur too early when diff --git a/drivers/platform/chrome/cros_kbd_led_backlight.c b/drivers/platform/chrome/cros_kbd_led_backlight.c new file mode 100644 index 000000000000..ca3e4da852b4 --- /dev/null +++ b/drivers/platform/chrome/cros_kbd_led_backlight.c @@ -0,0 +1,122 @@ +/* + * Keyboard backlight LED driver for Chrome OS. + * + * Copyright (C) 2012 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/acpi.h> +#include <linux/leds.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +/* Keyboard LED ACPI Device must be defined in firmware */ +#define ACPI_KEYBOARD_BACKLIGHT_DEVICE "\\_SB.KBLT" +#define ACPI_KEYBOARD_BACKLIGHT_READ ACPI_KEYBOARD_BACKLIGHT_DEVICE ".KBQC" +#define ACPI_KEYBOARD_BACKLIGHT_WRITE ACPI_KEYBOARD_BACKLIGHT_DEVICE ".KBCM" + +#define ACPI_KEYBOARD_BACKLIGHT_MAX 100 + +static void keyboard_led_set_brightness(struct led_classdev *cdev, + enum led_brightness brightness) +{ + union acpi_object param; + struct acpi_object_list input; + acpi_status status; + + param.type = ACPI_TYPE_INTEGER; + param.integer.value = brightness; + input.count = 1; + input.pointer = ¶m; + + status = acpi_evaluate_object(NULL, ACPI_KEYBOARD_BACKLIGHT_WRITE, + &input, NULL); + if (ACPI_FAILURE(status)) + dev_err(cdev->dev, "Error setting keyboard LED value: %d\n", + status); +} + +static enum led_brightness +keyboard_led_get_brightness(struct led_classdev *cdev) +{ + unsigned long long brightness; + acpi_status status; + + status = acpi_evaluate_integer(NULL, ACPI_KEYBOARD_BACKLIGHT_READ, + NULL, &brightness); + if (ACPI_FAILURE(status)) { + dev_err(cdev->dev, "Error getting keyboard LED value: %d\n", + status); + return -EIO; + } + + return brightness; +} + +static int keyboard_led_probe(struct platform_device *pdev) +{ + struct led_classdev *cdev; + acpi_handle handle; + acpi_status status; + int error; + + /* Look for the keyboard LED ACPI Device */ + status = acpi_get_handle(ACPI_ROOT_OBJECT, + ACPI_KEYBOARD_BACKLIGHT_DEVICE, + &handle); + if (ACPI_FAILURE(status)) { + dev_err(&pdev->dev, "Unable to find ACPI device %s: %d\n", + ACPI_KEYBOARD_BACKLIGHT_DEVICE, status); + return -ENXIO; + } + + cdev = devm_kzalloc(&pdev->dev, sizeof(*cdev), GFP_KERNEL); + if (!cdev) + return -ENOMEM; + + cdev->name = "chromeos::kbd_backlight"; + cdev->max_brightness = ACPI_KEYBOARD_BACKLIGHT_MAX; + cdev->flags |= LED_CORE_SUSPENDRESUME; + cdev->brightness_set = keyboard_led_set_brightness; + cdev->brightness_get = keyboard_led_get_brightness; + + error = devm_led_classdev_register(&pdev->dev, cdev); + if (error) + return error; + + return 0; +} + +static const struct acpi_device_id keyboard_led_id[] = { + { "GOOG0002", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, keyboard_led_id); + +static struct platform_driver keyboard_led_driver = { + .driver = { + .name = "chromeos-keyboard-leds", + .acpi_match_table = ACPI_PTR(keyboard_led_id), + }, + .probe = keyboard_led_probe, +}; +module_platform_driver(keyboard_led_driver); + +MODULE_AUTHOR("Simon Que <sque@chromium.org>"); +MODULE_DESCRIPTION("ChromeOS Keyboard backlight LED Driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:chromeos-keyboard-leds"); diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig index 10aa18ba05fd..67c0d5aa3212 100644 --- a/drivers/scsi/qla2xxx/Kconfig +++ b/drivers/scsi/qla2xxx/Kconfig @@ -36,3 +36,12 @@ config TCM_QLA2XXX default n ---help--- Say Y here to enable the TCM_QLA2XXX fabric module for QLogic 24xx+ series target mode HBAs + +if TCM_QLA2XXX +config TCM_QLA2XXX_DEBUG + bool "TCM_QLA2XXX fabric module DEBUG mode for QLogic 24xx+ series target mode HBAs" + default n + ---help--- + Say Y here to enable the TCM_QLA2XXX fabric module DEBUG for QLogic 24xx+ series target mode HBAs + This will include code to enable the SCSI command jammer +endif diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 8a44d1541eb4..ca39deb4ff5b 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -637,8 +637,10 @@ static void qlt_free_session_done(struct work_struct *work) } /* ha->tgt.sess_lock supposed to be held on entry */ -void qlt_unreg_sess(struct qla_tgt_sess *sess) +static void qlt_release_session(struct kref *kref) { + struct qla_tgt_sess *sess = + container_of(kref, struct qla_tgt_sess, sess_kref); struct scsi_qla_host *vha = sess->vha; if (sess->se_sess) @@ -651,8 +653,16 @@ void qlt_unreg_sess(struct qla_tgt_sess *sess) INIT_WORK(&sess->free_work, qlt_free_session_done); schedule_work(&sess->free_work); } -EXPORT_SYMBOL(qlt_unreg_sess); +void qlt_put_sess(struct qla_tgt_sess *sess) +{ + if (!sess) + return; + + assert_spin_locked(&sess->vha->hw->tgt.sess_lock); + kref_put(&sess->sess_kref, qlt_release_session); +} +EXPORT_SYMBOL(qlt_put_sess); static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd) { @@ -857,12 +867,9 @@ static void qlt_del_sess_work_fn(struct delayed_work *work) ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004, "Timeout: sess %p about to be deleted\n", sess); - if (sess->se_sess) { + if (sess->se_sess) ha->tgt.tgt_ops->shutdown_sess(sess); - ha->tgt.tgt_ops->put_sess(sess); - } else { - qlt_unreg_sess(sess); - } + qlt_put_sess(sess); } else { schedule_delayed_work(&tgt->sess_del_work, sess->expires - elapsed); @@ -917,7 +924,7 @@ static struct qla_tgt_sess *qlt_create_sess( } } - kref_get(&sess->se_sess->sess_kref); + kref_get(&sess->sess_kref); ha->tgt.tgt_ops->update_sess(sess, fcport->d_id, fcport->loop_id, (fcport->flags & FCF_CONF_COMP_SUPPORTED)); @@ -947,6 +954,7 @@ static struct qla_tgt_sess *qlt_create_sess( sess->s_id = fcport->d_id; sess->loop_id = fcport->loop_id; sess->local = local; + kref_init(&sess->sess_kref); INIT_LIST_HEAD(&sess->del_list_entry); /* Under normal circumstances we want to logout from firmware when @@ -991,7 +999,7 @@ static struct qla_tgt_sess *qlt_create_sess( * Take an extra reference to ->sess_kref here to handle qla_tgt_sess * access across ->tgt.sess_lock reaquire. */ - kref_get(&sess->se_sess->sess_kref); + kref_get(&sess->sess_kref); } return sess; @@ -1035,7 +1043,7 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); return; } else { - kref_get(&sess->se_sess->sess_kref); + kref_get(&sess->sess_kref); if (sess->deleted) { qlt_undelete_sess(sess); @@ -1060,7 +1068,7 @@ void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport) fcport->port_name, sess->loop_id); sess->local = 0; } - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); } @@ -3817,7 +3825,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) * Drop extra session reference from qla_tgt_handle_cmd_for_atio*( */ spin_lock_irqsave(&ha->tgt.sess_lock, flags); - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); return; @@ -3836,7 +3844,7 @@ out_term: spin_unlock_irqrestore(&ha->hardware_lock, flags); spin_lock_irqsave(&ha->tgt.sess_lock, flags); - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); } @@ -3936,13 +3944,13 @@ static void qlt_create_sess_from_atio(struct work_struct *work) if (!cmd) { spin_lock_irqsave(&ha->hardware_lock, flags); qlt_send_busy(vha, &op->atio, SAM_STAT_BUSY); - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&ha->hardware_lock, flags); kfree(op); return; } /* - * __qlt_do_work() will call ha->tgt.tgt_ops->put_sess() to release + * __qlt_do_work() will call qlt_put_sess() to release * the extra reference taken above by qlt_make_local_sess() */ __qlt_do_work(cmd); @@ -4003,13 +4011,13 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, /* * Do kref_get() before returning + dropping qla_hw_data->hardware_lock. */ - kref_get(&sess->se_sess->sess_kref); + kref_get(&sess->sess_kref); cmd = qlt_get_tag(vha, sess, atio); if (!cmd) { ql_dbg(ql_dbg_io, vha, 0x3062, "qla_target(%d): Allocation of cmd failed\n", vha->vp_idx); - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); return -ENOMEM; } @@ -5911,7 +5919,7 @@ static void qlt_abort_work(struct qla_tgt *tgt, goto out_term2; } - kref_get(&sess->se_sess->sess_kref); + kref_get(&sess->sess_kref); } spin_lock_irqsave(&ha->hardware_lock, flags); @@ -5924,7 +5932,7 @@ static void qlt_abort_work(struct qla_tgt *tgt, goto out_term; spin_unlock_irqrestore(&ha->hardware_lock, flags); - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); return; @@ -5935,8 +5943,7 @@ out_term: qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false); spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (sess) - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); } @@ -5976,7 +5983,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt, goto out_term; } - kref_get(&sess->se_sess->sess_kref); + kref_get(&sess->sess_kref); } iocb = a; @@ -5988,14 +5995,13 @@ static void qlt_tmr_work(struct qla_tgt *tgt, if (rc != 0) goto out_term; - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); return; out_term: qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0); - if (sess) - ha->tgt.tgt_ops->put_sess(sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); } diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index d857feeb6514..f26c5f60eedd 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -738,7 +738,6 @@ struct qla_tgt_func_tmpl { struct qla_tgt_sess *(*find_sess_by_s_id)(struct scsi_qla_host *, const uint8_t *); void (*clear_nacl_from_fcport_map)(struct qla_tgt_sess *); - void (*put_sess)(struct qla_tgt_sess *); void (*shutdown_sess)(struct qla_tgt_sess *); }; @@ -930,6 +929,7 @@ struct qla_tgt_sess { int generation; struct se_session *se_sess; + struct kref sess_kref; struct scsi_qla_host *vha; struct qla_tgt *tgt; @@ -1101,7 +1101,7 @@ extern int qlt_remove_target(struct qla_hw_data *, struct scsi_qla_host *); extern int qlt_lport_register(void *, u64, u64, u64, int (*callback)(struct scsi_qla_host *, void *, u64, u64)); extern void qlt_lport_deregister(struct scsi_qla_host *); -extern void qlt_unreg_sess(struct qla_tgt_sess *); +void qlt_put_sess(struct qla_tgt_sess *sess); extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *); extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *, int); extern int __init qlt_init(void); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index c1461d225f08..6643f6fc7795 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -339,22 +339,6 @@ static void tcm_qla2xxx_release_cmd(struct se_cmd *se_cmd) qlt_free_cmd(cmd); } -static int tcm_qla2xxx_shutdown_session(struct se_session *se_sess) -{ - struct qla_tgt_sess *sess = se_sess->fabric_sess_ptr; - struct scsi_qla_host *vha; - unsigned long flags; - - BUG_ON(!sess); - vha = sess->vha; - - spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); - target_sess_cmd_list_set_waiting(se_sess); - spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); - - return 1; -} - static void tcm_qla2xxx_close_session(struct se_session *se_sess) { struct qla_tgt_sess *sess = se_sess->fabric_sess_ptr; @@ -365,7 +349,8 @@ static void tcm_qla2xxx_close_session(struct se_session *se_sess) vha = sess->vha; spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); - qlt_unreg_sess(sess); + target_sess_cmd_list_set_waiting(se_sess); + qlt_put_sess(sess); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); } @@ -457,6 +442,10 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd, struct se_cmd *se_cmd = &cmd->se_cmd; struct se_session *se_sess; struct qla_tgt_sess *sess; +#ifdef CONFIG_TCM_QLA2XXX_DEBUG + struct se_portal_group *se_tpg; + struct tcm_qla2xxx_tpg *tpg; +#endif int flags = TARGET_SCF_ACK_KREF; if (bidi) @@ -477,6 +466,15 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd, return -EINVAL; } +#ifdef CONFIG_TCM_QLA2XXX_DEBUG + se_tpg = se_sess->se_tpg; + tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); + if (unlikely(tpg->tpg_attrib.jam_host)) { + /* return, and dont run target_submit_cmd,discarding command */ + return 0; + } +#endif + cmd->vha->tgt_counters.qla_core_sbt_cmd++; return target_submit_cmd(se_cmd, se_sess, cdb, &cmd->sense_buffer[0], cmd->unpacked_lun, data_length, fcp_task_attr, @@ -758,23 +756,6 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess) tcm_qla2xxx_clear_sess_lookup(lport, nacl, sess); } -static void tcm_qla2xxx_release_session(struct kref *kref) -{ - struct se_session *se_sess = container_of(kref, - struct se_session, sess_kref); - - qlt_unreg_sess(se_sess->fabric_sess_ptr); -} - -static void tcm_qla2xxx_put_sess(struct qla_tgt_sess *sess) -{ - if (!sess) - return; - - assert_spin_locked(&sess->vha->hw->tgt.sess_lock); - kref_put(&sess->se_sess->sess_kref, tcm_qla2xxx_release_session); -} - static void tcm_qla2xxx_shutdown_sess(struct qla_tgt_sess *sess) { assert_spin_locked(&sess->vha->hw->tgt.sess_lock); @@ -844,6 +825,9 @@ DEF_QLA_TPG_ATTRIB(cache_dynamic_acls); DEF_QLA_TPG_ATTRIB(demo_mode_write_protect); DEF_QLA_TPG_ATTRIB(prod_mode_write_protect); DEF_QLA_TPG_ATTRIB(demo_mode_login_only); +#ifdef CONFIG_TCM_QLA2XXX_DEBUG +DEF_QLA_TPG_ATTRIB(jam_host); +#endif static struct configfs_attribute *tcm_qla2xxx_tpg_attrib_attrs[] = { &tcm_qla2xxx_tpg_attrib_attr_generate_node_acls, @@ -851,6 +835,9 @@ static struct configfs_attribute *tcm_qla2xxx_tpg_attrib_attrs[] = { &tcm_qla2xxx_tpg_attrib_attr_demo_mode_write_protect, &tcm_qla2xxx_tpg_attrib_attr_prod_mode_write_protect, &tcm_qla2xxx_tpg_attrib_attr_demo_mode_login_only, +#ifdef CONFIG_TCM_QLA2XXX_DEBUG + &tcm_qla2xxx_tpg_attrib_attr_jam_host, +#endif NULL, }; @@ -1023,6 +1010,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( tpg->tpg_attrib.demo_mode_write_protect = 1; tpg->tpg_attrib.cache_dynamic_acls = 1; tpg->tpg_attrib.demo_mode_login_only = 1; + tpg->tpg_attrib.jam_host = 0; ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP); if (ret < 0) { @@ -1579,7 +1567,6 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = { .find_sess_by_s_id = tcm_qla2xxx_find_sess_by_s_id, .find_sess_by_loop_id = tcm_qla2xxx_find_sess_by_loop_id, .clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map, - .put_sess = tcm_qla2xxx_put_sess, .shutdown_sess = tcm_qla2xxx_shutdown_sess, }; @@ -1847,7 +1834,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, - .shutdown_session = tcm_qla2xxx_shutdown_session, .close_session = tcm_qla2xxx_close_session, .sess_get_index = tcm_qla2xxx_sess_get_index, .sess_get_initiator_sid = NULL, @@ -1890,7 +1876,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, - .shutdown_session = tcm_qla2xxx_shutdown_session, .close_session = tcm_qla2xxx_close_session, .sess_get_index = tcm_qla2xxx_sess_get_index, .sess_get_initiator_sid = NULL, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 3bbf4cb6fd97..37e026a4823d 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -34,6 +34,7 @@ struct tcm_qla2xxx_tpg_attrib { int prod_mode_write_protect; int demo_mode_login_only; int fabric_prot_type; + int jam_host; }; struct tcm_qla2xxx_tpg { diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c index bb00be8d1851..17a6387e20b5 100644 --- a/drivers/spi/spi-ep93xx.c +++ b/drivers/spi/spi-ep93xx.c @@ -567,7 +567,7 @@ static void ep93xx_spi_dma_transfer(struct ep93xx_spi *espi) txd = ep93xx_spi_dma_prepare(espi, DMA_MEM_TO_DEV); if (IS_ERR(txd)) { ep93xx_spi_dma_finish(espi, DMA_DEV_TO_MEM); - dev_err(&espi->pdev->dev, "DMA TX failed: %ld\n", PTR_ERR(rxd)); + dev_err(&espi->pdev->dev, "DMA TX failed: %ld\n", PTR_ERR(txd)); msg->status = PTR_ERR(txd); return; } diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 5bac28a3944e..7c197d1a1231 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -66,8 +66,6 @@ source "drivers/staging/nvec/Kconfig" source "drivers/staging/media/Kconfig" -source "drivers/staging/rdma/Kconfig" - source "drivers/staging/android/Kconfig" source "drivers/staging/board/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index a954242b0f2c..a470c7276142 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_FB_XGI) += xgifb/ obj-$(CONFIG_USB_EMXX) += emxx_udc/ obj-$(CONFIG_SPEAKUP) += speakup/ obj-$(CONFIG_MFD_NVEC) += nvec/ -obj-$(CONFIG_STAGING_RDMA) += rdma/ obj-$(CONFIG_ANDROID) += android/ obj-$(CONFIG_STAGING_BOARD) += board/ obj-$(CONFIG_LTE_GDM724X) += gdm724x/ diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig deleted file mode 100644 index f1f3ecadf0fb..000000000000 --- a/drivers/staging/rdma/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -menuconfig STAGING_RDMA - tristate "RDMA staging drivers" - depends on INFINIBAND - depends on PCI || BROKEN - depends on HAS_IOMEM - depends on NET - depends on INET - default n - ---help--- - This option allows you to select a number of RDMA drivers that - fall into one of two categories: deprecated drivers being held - here before finally being removed or new drivers that still need - some work before being moved to the normal RDMA driver area. - - If you wish to work on these drivers, to help improve them, or - to report problems you have with them, please use the - linux-rdma@vger.kernel.org mailing list. - - If in doubt, say N here. - - -# Please keep entries in alphabetic order -if STAGING_RDMA - -source "drivers/staging/rdma/hfi1/Kconfig" - -endif diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile deleted file mode 100644 index 8c7fc1de48a7..000000000000 --- a/drivers/staging/rdma/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# Entries for RDMA_STAGING tree -obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO deleted file mode 100644 index 4c6f1d7d2eaf..000000000000 --- a/drivers/staging/rdma/hfi1/TODO +++ /dev/null @@ -1,6 +0,0 @@ -July, 2015 - -- Remove unneeded file entries in sysfs -- Remove software processing of IB protocol and place in library for use - by qib, ipath (if still present), hfi1, and eventually soft-roce -- Replace incorrect uAPI diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c deleted file mode 100644 index bb2409ad891a..000000000000 --- a/drivers/staging/rdma/hfi1/diag.c +++ /dev/null @@ -1,1925 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -/* - * This file contains support for diagnostic functions. It is accessed by - * opening the hfi1_diag device, normally minor number 129. Diagnostic use - * of the chip may render the chip or board unusable until the driver - * is unloaded, or in some cases, until the system is rebooted. - * - * Accesses to the chip through this interface are not similar to going - * through the /sys/bus/pci resource mmap interface. - */ - -#include <linux/io.h> -#include <linux/pci.h> -#include <linux/poll.h> -#include <linux/vmalloc.h> -#include <linux/export.h> -#include <linux/fs.h> -#include <linux/uaccess.h> -#include <linux/module.h> -#include <rdma/ib_smi.h> -#include "hfi.h" -#include "device.h" -#include "common.h" -#include "verbs_txreq.h" -#include "trace.h" - -#undef pr_fmt -#define pr_fmt(fmt) DRIVER_NAME ": " fmt -#define snoop_dbg(fmt, ...) \ - hfi1_cdbg(SNOOP, fmt, ##__VA_ARGS__) - -/* Snoop option mask */ -#define SNOOP_DROP_SEND BIT(0) -#define SNOOP_USE_METADATA BIT(1) -#define SNOOP_SET_VL0TOVL15 BIT(2) - -static u8 snoop_flags; - -/* - * Extract packet length from LRH header. - * This is in Dwords so multiply by 4 to get size in bytes - */ -#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2) - -enum hfi1_filter_status { - HFI1_FILTER_HIT, - HFI1_FILTER_ERR, - HFI1_FILTER_MISS -}; - -/* snoop processing functions */ -rhf_rcv_function_ptr snoop_rhf_rcv_functions[8] = { - [RHF_RCV_TYPE_EXPECTED] = snoop_recv_handler, - [RHF_RCV_TYPE_EAGER] = snoop_recv_handler, - [RHF_RCV_TYPE_IB] = snoop_recv_handler, - [RHF_RCV_TYPE_ERROR] = snoop_recv_handler, - [RHF_RCV_TYPE_BYPASS] = snoop_recv_handler, - [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, - [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, - [RHF_RCV_TYPE_INVALID7] = process_receive_invalid -}; - -/* Snoop packet structure */ -struct snoop_packet { - struct list_head list; - u32 total_len; - u8 data[]; -}; - -/* Do not make these an enum or it will blow up the capture_md */ -#define PKT_DIR_EGRESS 0x0 -#define PKT_DIR_INGRESS 0x1 - -/* Packet capture metadata returned to the user with the packet. */ -struct capture_md { - u8 port; - u8 dir; - u8 reserved[6]; - union { - u64 pbc; - u64 rhf; - } u; -}; - -static atomic_t diagpkt_count = ATOMIC_INIT(0); -static struct cdev diagpkt_cdev; -static struct device *diagpkt_device; - -static ssize_t diagpkt_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); - -static const struct file_operations diagpkt_file_ops = { - .owner = THIS_MODULE, - .write = diagpkt_write, - .llseek = noop_llseek, -}; - -/* - * This is used for communication with user space for snoop extended IOCTLs - */ -struct hfi1_link_info { - __be64 node_guid; - u8 port_mode; - u8 port_state; - u16 link_speed_active; - u16 link_width_active; - u16 vl15_init; - u8 port_number; - /* - * Add padding to make this a full IB SMP payload. Note: changing the - * size of this structure will make the IOCTLs created with _IOWR - * change. - * Be sure to run tests on all IOCTLs when making changes to this - * structure. - */ - u8 res[47]; -}; - -/* - * This starts our ioctl sequence numbers *way* off from the ones - * defined in ib_core. - */ -#define SNOOP_CAPTURE_VERSION 0x1 - -#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl-number.txt */ -#define HFI1_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC -#define HFI1_SNOOP_IOC_BASE_SEQ 0x80 - -#define HFI1_SNOOP_IOCGETLINKSTATE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ) -#define HFI1_SNOOP_IOCSETLINKSTATE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 1) -#define HFI1_SNOOP_IOCCLEARQUEUE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 2) -#define HFI1_SNOOP_IOCCLEARFILTER \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 3) -#define HFI1_SNOOP_IOCSETFILTER \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 4) -#define HFI1_SNOOP_IOCGETVERSION \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 5) -#define HFI1_SNOOP_IOCSET_OPTS \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6) - -/* - * These offsets +6/+7 could change, but these are already known and used - * IOCTL numbers so don't change them without a good reason. - */ -#define HFI1_SNOOP_IOCGETLINKSTATE_EXTRA \ - _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6, \ - struct hfi1_link_info) -#define HFI1_SNOOP_IOCSETLINKSTATE_EXTRA \ - _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 7, \ - struct hfi1_link_info) - -static int hfi1_snoop_open(struct inode *in, struct file *fp); -static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, - size_t pkt_len, loff_t *off); -static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); -static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); -static unsigned int hfi1_snoop_poll(struct file *fp, - struct poll_table_struct *wait); -static int hfi1_snoop_release(struct inode *in, struct file *fp); - -struct hfi1_packet_filter_command { - int opcode; - int length; - void *value_ptr; -}; - -/* Can't re-use PKT_DIR_*GRESS here because 0 means no packets for this */ -#define HFI1_SNOOP_INGRESS 0x1 -#define HFI1_SNOOP_EGRESS 0x2 - -enum hfi1_packet_filter_opcodes { - FILTER_BY_LID, - FILTER_BY_DLID, - FILTER_BY_MAD_MGMT_CLASS, - FILTER_BY_QP_NUMBER, - FILTER_BY_PKT_TYPE, - FILTER_BY_SERVICE_LEVEL, - FILTER_BY_PKEY, - FILTER_BY_DIRECTION, -}; - -static const struct file_operations snoop_file_ops = { - .owner = THIS_MODULE, - .open = hfi1_snoop_open, - .read = hfi1_snoop_read, - .unlocked_ioctl = hfi1_ioctl, - .poll = hfi1_snoop_poll, - .write = hfi1_snoop_write, - .release = hfi1_snoop_release -}; - -struct hfi1_filter_array { - int (*filter)(void *, void *, void *); -}; - -static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value); - -static const struct hfi1_filter_array hfi1_filters[] = { - { hfi1_filter_lid }, - { hfi1_filter_dlid }, - { hfi1_filter_mad_mgmt_class }, - { hfi1_filter_qp_number }, - { hfi1_filter_ibpacket_type }, - { hfi1_filter_ib_service_level }, - { hfi1_filter_ib_pkey }, - { hfi1_filter_direction }, -}; - -#define HFI1_MAX_FILTERS ARRAY_SIZE(hfi1_filters) -#define HFI1_DIAG_MINOR_BASE 129 - -static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name); - -int hfi1_diag_add(struct hfi1_devdata *dd) -{ - char name[16]; - int ret = 0; - - snprintf(name, sizeof(name), "%s_diagpkt%d", class_name(), - dd->unit); - /* - * Do this for each device as opposed to the normal diagpkt - * interface which is one per host - */ - ret = hfi1_snoop_add(dd, name); - if (ret) - dd_dev_err(dd, "Unable to init snoop/capture device"); - - snprintf(name, sizeof(name), "%s_diagpkt", class_name()); - if (atomic_inc_return(&diagpkt_count) == 1) { - ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name, - &diagpkt_file_ops, &diagpkt_cdev, - &diagpkt_device, false); - } - - return ret; -} - -/* this must be called w/ dd->snoop_in_lock held */ -static void drain_snoop_list(struct list_head *queue) -{ - struct list_head *pos, *q; - struct snoop_packet *packet; - - list_for_each_safe(pos, q, queue) { - packet = list_entry(pos, struct snoop_packet, list); - list_del(pos); - kfree(packet); - } -} - -static void hfi1_snoop_remove(struct hfi1_devdata *dd) -{ - unsigned long flags = 0; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); -} - -void hfi1_diag_remove(struct hfi1_devdata *dd) -{ - hfi1_snoop_remove(dd); - if (atomic_dec_and_test(&diagpkt_count)) - hfi1_cdev_cleanup(&diagpkt_cdev, &diagpkt_device); - hfi1_cdev_cleanup(&dd->diag_cdev, &dd->diag_device); -} - -/* - * Allocated structure shared between the credit return mechanism and - * diagpkt_send(). - */ -struct diagpkt_wait { - struct completion credits_returned; - int code; - atomic_t count; -}; - -/* - * When each side is finished with the structure, they call this. - * The last user frees the structure. - */ -static void put_diagpkt_wait(struct diagpkt_wait *wait) -{ - if (atomic_dec_and_test(&wait->count)) - kfree(wait); -} - -/* - * Callback from the credit return code. Set the complete, which - * will let diapkt_send() continue. - */ -static void diagpkt_complete(void *arg, int code) -{ - struct diagpkt_wait *wait = (struct diagpkt_wait *)arg; - - wait->code = code; - complete(&wait->credits_returned); - put_diagpkt_wait(wait); /* finished with the structure */ -} - -/** - * diagpkt_send - send a packet - * @dp: diag packet descriptor - */ -static ssize_t diagpkt_send(struct diag_pkt *dp) -{ - struct hfi1_devdata *dd; - struct send_context *sc; - struct pio_buf *pbuf; - u32 *tmpbuf = NULL; - ssize_t ret = 0; - u32 pkt_len, total_len; - pio_release_cb credit_cb = NULL; - void *credit_arg = NULL; - struct diagpkt_wait *wait = NULL; - int trycount = 0; - - dd = hfi1_lookup(dp->unit); - if (!dd || !(dd->flags & HFI1_PRESENT) || !dd->kregbase) { - ret = -ENODEV; - goto bail; - } - if (!(dd->flags & HFI1_INITTED)) { - /* no hardware, freeze, etc. */ - ret = -ENODEV; - goto bail; - } - - if (dp->version != _DIAG_PKT_VERS) { - dd_dev_err(dd, "Invalid version %u for diagpkt_write\n", - dp->version); - ret = -EINVAL; - goto bail; - } - - /* send count must be an exact number of dwords */ - if (dp->len & 3) { - ret = -EINVAL; - goto bail; - } - - /* there is only port 1 */ - if (dp->port != 1) { - ret = -EINVAL; - goto bail; - } - - /* need a valid context */ - if (dp->sw_index >= dd->num_send_contexts) { - ret = -EINVAL; - goto bail; - } - /* can only use kernel contexts */ - if (dd->send_contexts[dp->sw_index].type != SC_KERNEL && - dd->send_contexts[dp->sw_index].type != SC_VL15) { - ret = -EINVAL; - goto bail; - } - /* must be allocated */ - sc = dd->send_contexts[dp->sw_index].sc; - if (!sc) { - ret = -EINVAL; - goto bail; - } - /* must be enabled */ - if (!(sc->flags & SCF_ENABLED)) { - ret = -EINVAL; - goto bail; - } - - /* allocate a buffer and copy the data in */ - tmpbuf = vmalloc(dp->len); - if (!tmpbuf) { - ret = -ENOMEM; - goto bail; - } - - if (copy_from_user(tmpbuf, - (const void __user *)(unsigned long)dp->data, - dp->len)) { - ret = -EFAULT; - goto bail; - } - - /* - * pkt_len is how much data we have to write, includes header and data. - * total_len is length of the packet in Dwords plus the PBC should not - * include the CRC. - */ - pkt_len = dp->len >> 2; - total_len = pkt_len + 2; /* PBC + packet */ - - /* if 0, fill in a default */ - if (dp->pbc == 0) { - struct hfi1_pportdata *ppd = dd->pport; - - hfi1_cdbg(PKT, "Generating PBC"); - dp->pbc = create_pbc(ppd, 0, 0, 0, total_len); - } else { - hfi1_cdbg(PKT, "Using passed in PBC"); - } - - hfi1_cdbg(PKT, "Egress PBC content is 0x%llx", dp->pbc); - - /* - * The caller wants to wait until the packet is sent and to - * check for errors. The best we can do is wait until - * the buffer credits are returned and check if any packet - * error has occurred. If there are any late errors, this - * could miss it. If there are other senders who generate - * an error, this may find it. However, in general, it - * should catch most. - */ - if (dp->flags & F_DIAGPKT_WAIT) { - /* always force a credit return */ - dp->pbc |= PBC_CREDIT_RETURN; - /* turn on credit return interrupts */ - sc_add_credit_return_intr(sc); - wait = kmalloc(sizeof(*wait), GFP_KERNEL); - if (!wait) { - ret = -ENOMEM; - goto bail; - } - init_completion(&wait->credits_returned); - atomic_set(&wait->count, 2); - wait->code = PRC_OK; - - credit_cb = diagpkt_complete; - credit_arg = wait; - } - -retry: - pbuf = sc_buffer_alloc(sc, total_len, credit_cb, credit_arg); - if (!pbuf) { - if (trycount == 0) { - /* force a credit return and try again */ - sc_return_credits(sc); - trycount = 1; - goto retry; - } - /* - * No send buffer means no credit callback. Undo - * the wait set-up that was done above. We free wait - * because the callback will never be called. - */ - if (dp->flags & F_DIAGPKT_WAIT) { - sc_del_credit_return_intr(sc); - kfree(wait); - wait = NULL; - } - ret = -ENOSPC; - goto bail; - } - - pio_copy(dd, pbuf, dp->pbc, tmpbuf, pkt_len); - /* no flush needed as the HW knows the packet size */ - - ret = sizeof(*dp); - - if (dp->flags & F_DIAGPKT_WAIT) { - /* wait for credit return */ - ret = wait_for_completion_interruptible( - &wait->credits_returned); - /* - * If the wait returns an error, the wait was interrupted, - * e.g. with a ^C in the user program. The callback is - * still pending. This is OK as the wait structure is - * kmalloc'ed and the structure will free itself when - * all users are done with it. - * - * A context disable occurs on a send context restart, so - * include that in the list of errors below to check for. - * NOTE: PRC_FILL_ERR is at best informational and cannot - * be depended on. - */ - if (!ret && (((wait->code & PRC_STATUS_ERR) || - (wait->code & PRC_FILL_ERR) || - (wait->code & PRC_SC_DISABLE)))) - ret = -EIO; - - put_diagpkt_wait(wait); /* finished with the structure */ - sc_del_credit_return_intr(sc); - } - -bail: - vfree(tmpbuf); - return ret; -} - -static ssize_t diagpkt_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct hfi1_devdata *dd; - struct send_context *sc; - u8 vl; - - struct diag_pkt dp; - - if (count != sizeof(dp)) - return -EINVAL; - - if (copy_from_user(&dp, data, sizeof(dp))) - return -EFAULT; - - /* - * The Send Context is derived from the PbcVL value - * if PBC is populated - */ - if (dp.pbc) { - dd = hfi1_lookup(dp.unit); - if (!dd) - return -ENODEV; - vl = (dp.pbc >> PBC_VL_SHIFT) & PBC_VL_MASK; - sc = dd->vld[vl].sc; - if (sc) { - dp.sw_index = sc->sw_index; - hfi1_cdbg( - PKT, - "Packet sent over VL %d via Send Context %u(%u)", - vl, sc->sw_index, sc->hw_context); - } - } - - return diagpkt_send(&dp); -} - -static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name) -{ - int ret = 0; - - dd->hfi1_snoop.mode_flag = 0; - spin_lock_init(&dd->hfi1_snoop.snoop_lock); - INIT_LIST_HEAD(&dd->hfi1_snoop.queue); - init_waitqueue_head(&dd->hfi1_snoop.waitq); - - ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name, - &snoop_file_ops, - &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev, - false); - - if (ret) { - dd_dev_err(dd, "Couldn't create %s device: %d", name, ret); - hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, - &dd->hfi1_snoop.class_dev); - } - - return ret; -} - -static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in) -{ - int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE; - struct hfi1_devdata *dd; - - dd = hfi1_lookup(unit); - return dd; -} - -/* clear or restore send context integrity checks */ -static void adjust_integrity_checks(struct hfi1_devdata *dd) -{ - struct send_context *sc; - unsigned long sc_flags; - int i; - - spin_lock_irqsave(&dd->sc_lock, sc_flags); - for (i = 0; i < dd->num_send_contexts; i++) { - int enable; - - sc = dd->send_contexts[i].sc; - - if (!sc) - continue; /* not allocated */ - - enable = likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) && - dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE; - - set_pio_integrity(sc); - - if (enable) /* take HFI_CAP_* flags into account */ - hfi1_init_ctxt(sc); - } - spin_unlock_irqrestore(&dd->sc_lock, sc_flags); -} - -static int hfi1_snoop_open(struct inode *in, struct file *fp) -{ - int ret; - int mode_flag = 0; - unsigned long flags = 0; - struct hfi1_devdata *dd; - struct list_head *queue; - - mutex_lock(&hfi1_mutex); - - dd = hfi1_dd_from_sc_inode(in); - if (!dd) { - ret = -ENODEV; - goto bail; - } - - /* - * File mode determines snoop or capture. Some existing user - * applications expect the capture device to be able to be opened RDWR - * because they expect a dedicated capture device. For this reason we - * support a module param to force capture mode even if the file open - * mode matches snoop. - */ - if ((fp->f_flags & O_ACCMODE) == O_RDONLY) { - snoop_dbg("Capture Enabled"); - mode_flag = HFI1_PORT_CAPTURE_MODE; - } else if ((fp->f_flags & O_ACCMODE) == O_RDWR) { - snoop_dbg("Snoop Enabled"); - mode_flag = HFI1_PORT_SNOOP_MODE; - } else { - snoop_dbg("Invalid"); - ret = -EINVAL; - goto bail; - } - queue = &dd->hfi1_snoop.queue; - - /* - * We are not supporting snoop and capture at the same time. - */ - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (dd->hfi1_snoop.mode_flag) { - ret = -EBUSY; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - goto bail; - } - - dd->hfi1_snoop.mode_flag = mode_flag; - drain_snoop_list(queue); - - dd->hfi1_snoop.filter_callback = NULL; - dd->hfi1_snoop.filter_value = NULL; - - /* - * Send side packet integrity checks are not helpful when snooping so - * disable and re-enable when we stop snooping. - */ - if (mode_flag == HFI1_PORT_SNOOP_MODE) { - /* clear after snoop mode is on */ - adjust_integrity_checks(dd); /* clear */ - - /* - * We also do not want to be doing the DLID LMC check for - * ingressed packets. - */ - dd->hfi1_snoop.dcc_cfg = read_csr(dd, DCC_CFG_PORT_CONFIG1); - write_csr(dd, DCC_CFG_PORT_CONFIG1, - (dd->hfi1_snoop.dcc_cfg >> 32) << 32); - } - - /* - * As soon as we set these function pointers the recv and send handlers - * are active. This is a race condition so we must make sure to drain - * the queue and init filter values above. Technically we should add - * locking here but all that will happen is on recv a packet will get - * allocated and get stuck on the snoop_lock before getting added to the - * queue. Same goes for send. - */ - dd->rhf_rcv_function_map = snoop_rhf_rcv_functions; - dd->process_pio_send = snoop_send_pio_handler; - dd->process_dma_send = snoop_send_pio_handler; - dd->pio_inline_send = snoop_inline_pio_send; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - ret = 0; - -bail: - mutex_unlock(&hfi1_mutex); - - return ret; -} - -static int hfi1_snoop_release(struct inode *in, struct file *fp) -{ - unsigned long flags = 0; - struct hfi1_devdata *dd; - int mode_flag; - - dd = hfi1_dd_from_sc_inode(in); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - /* clear the snoop mode before re-adjusting send context CSRs */ - mode_flag = dd->hfi1_snoop.mode_flag; - dd->hfi1_snoop.mode_flag = 0; - - /* - * Drain the queue and clear the filters we are done with it. Don't - * forget to restore the packet integrity checks - */ - drain_snoop_list(&dd->hfi1_snoop.queue); - if (mode_flag == HFI1_PORT_SNOOP_MODE) { - /* restore after snoop mode is clear */ - adjust_integrity_checks(dd); /* restore */ - - /* - * Also should probably reset the DCC_CONFIG1 register for DLID - * checking on incoming packets again. Use the value saved when - * opening the snoop device. - */ - write_csr(dd, DCC_CFG_PORT_CONFIG1, dd->hfi1_snoop.dcc_cfg); - } - - dd->hfi1_snoop.filter_callback = NULL; - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = NULL; - - /* - * User is done snooping and capturing, return control to the normal - * handler. Re-enable SDMA handling. - */ - dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions; - dd->process_pio_send = hfi1_verbs_send_pio; - dd->process_dma_send = hfi1_verbs_send_dma; - dd->pio_inline_send = pio_copy; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - - snoop_dbg("snoop/capture device released"); - - return 0; -} - -static unsigned int hfi1_snoop_poll(struct file *fp, - struct poll_table_struct *wait) -{ - int ret = 0; - unsigned long flags = 0; - - struct hfi1_devdata *dd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - poll_wait(fp, &dd->hfi1_snoop.waitq, wait); - if (!list_empty(&dd->hfi1_snoop.queue)) - ret |= POLLIN | POLLRDNORM; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - return ret; -} - -static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct diag_pkt dpkt; - struct hfi1_devdata *dd; - size_t ret; - u8 byte_two, sl, sc5, sc4, vl, byte_one; - struct send_context *sc; - u32 len; - u64 pbc; - struct hfi1_ibport *ibp; - struct hfi1_pportdata *ppd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - ppd = dd->pport; - snoop_dbg("received %lu bytes from user", count); - - memset(&dpkt, 0, sizeof(struct diag_pkt)); - dpkt.version = _DIAG_PKT_VERS; - dpkt.unit = dd->unit; - dpkt.port = 1; - - if (likely(!(snoop_flags & SNOOP_USE_METADATA))) { - /* - * We need to generate the PBC and not let diagpkt_send do it, - * to do this we need the VL and the length in dwords. - * The VL can be determined by using the SL and looking up the - * SC. Then the SC can be converted into VL. The exception to - * this is those packets which are from an SMI queue pair. - * Since we can't detect anything about the QP here we have to - * rely on the SC. If its 0xF then we assume its SMI and - * do not look at the SL. - */ - if (copy_from_user(&byte_one, data, 1)) - return -EINVAL; - - if (copy_from_user(&byte_two, data + 1, 1)) - return -EINVAL; - - sc4 = (byte_one >> 4) & 0xf; - if (sc4 == 0xF) { - snoop_dbg("Detected VL15 packet ignoring SL in packet"); - vl = sc4; - } else { - sl = (byte_two >> 4) & 0xf; - ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1); - sc5 = ibp->sl_to_sc[sl]; - vl = sc_to_vlt(dd, sc5); - if (vl != sc4) { - snoop_dbg("VL %d does not match SC %d of packet", - vl, sc4); - return -EINVAL; - } - } - - sc = dd->vld[vl].sc; /* Look up the context based on VL */ - if (sc) { - dpkt.sw_index = sc->sw_index; - snoop_dbg("Sending on context %u(%u)", sc->sw_index, - sc->hw_context); - } else { - snoop_dbg("Could not find context for vl %d", vl); - return -EINVAL; - } - - len = (count >> 2) + 2; /* Add in PBC */ - pbc = create_pbc(ppd, 0, 0, vl, len); - } else { - if (copy_from_user(&pbc, data, sizeof(pbc))) - return -EINVAL; - vl = (pbc >> PBC_VL_SHIFT) & PBC_VL_MASK; - sc = dd->vld[vl].sc; /* Look up the context based on VL */ - if (sc) { - dpkt.sw_index = sc->sw_index; - } else { - snoop_dbg("Could not find context for vl %d", vl); - return -EINVAL; - } - data += sizeof(pbc); - count -= sizeof(pbc); - } - dpkt.len = count; - dpkt.data = (unsigned long)data; - - snoop_dbg("PBC: vl=0x%llx Length=0x%llx", - (pbc >> 12) & 0xf, - (pbc & 0xfff)); - - dpkt.pbc = pbc; - ret = diagpkt_send(&dpkt); - /* - * diagpkt_send only returns number of bytes in the diagpkt so patch - * that up here before returning. - */ - if (ret == sizeof(dpkt)) - return count; - - return ret; -} - -static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, - size_t pkt_len, loff_t *off) -{ - ssize_t ret = 0; - unsigned long flags = 0; - struct snoop_packet *packet = NULL; - struct hfi1_devdata *dd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - while (list_empty(&dd->hfi1_snoop.queue)) { - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - - if (fp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible( - dd->hfi1_snoop.waitq, - !list_empty(&dd->hfi1_snoop.queue))) - return -EINTR; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - } - - if (!list_empty(&dd->hfi1_snoop.queue)) { - packet = list_entry(dd->hfi1_snoop.queue.next, - struct snoop_packet, list); - list_del(&packet->list); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - if (pkt_len >= packet->total_len) { - if (copy_to_user(data, packet->data, - packet->total_len)) - ret = -EFAULT; - else - ret = packet->total_len; - } else { - ret = -EINVAL; - } - - kfree(packet); - } else { - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - } - - return ret; -} - -/** - * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others - * @ppd : ptr to hfi1 port data - * @value : options from user space - * - * Assumes the rest of the CM credit registers are zero from a - * previous global or credit reset. - * Leave shared count at zero for both global and all vls. - * In snoop mode ideally we don't use shared credits - * Reserve 8.5k for VL15 - * If total credits less than 8.5kbytes return error. - * Divide the rest of the credits across VL0 to VL7 and if - * each of these levels has less than 34 credits (at least 2048 + 128 bytes) - * return with an error. - * The credit registers will be reset to zero on link negotiation or link up - * so this function should be activated from user space only if the port has - * gone past link negotiation and link up. - * - * Return -- 0 if successful else error condition - * - */ -static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd, - int value) -{ -#define OPA_MIN_PER_VL_CREDITS 34 /* 2048 + 128 bytes */ - struct buffer_control t; - int i; - struct hfi1_devdata *dd = ppd->dd; - u16 total_credits = (value >> 16) & 0xffff; - u16 vl15_credits = dd->vl15_init / 2; - u16 per_vl_credits; - __be16 be_per_vl_credits; - - if (!(ppd->host_link_state & HLS_UP)) - goto err_exit; - if (total_credits < vl15_credits) - goto err_exit; - - per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL; - - if (per_vl_credits < OPA_MIN_PER_VL_CREDITS) - goto err_exit; - - memset(&t, 0, sizeof(t)); - be_per_vl_credits = cpu_to_be16(per_vl_credits); - - for (i = 0; i < TXE_NUM_DATA_VL; i++) - t.vl[i].dedicated = be_per_vl_credits; - - t.vl[15].dedicated = cpu_to_be16(vl15_credits); - return set_buffer_control(ppd, &t); - -err_exit: - snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d", - ppd->host_link_state, total_credits, vl15_credits); - - return -EINVAL; -} - -static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -{ - struct hfi1_devdata *dd; - void *filter_value = NULL; - long ret = 0; - int value = 0; - u8 phys_state = 0; - u8 link_state = 0; - u16 dev_state = 0; - unsigned long flags = 0; - unsigned long *argp = NULL; - struct hfi1_packet_filter_command filter_cmd = {0}; - int mode_flag = 0; - struct hfi1_pportdata *ppd = NULL; - unsigned int index; - struct hfi1_link_info link_info; - int read_cmd, write_cmd, read_ok, write_ok; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - mode_flag = dd->hfi1_snoop.mode_flag; - read_cmd = _IOC_DIR(cmd) & _IOC_READ; - write_cmd = _IOC_DIR(cmd) & _IOC_WRITE; - write_ok = access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)); - read_ok = access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)); - - if ((read_cmd && !write_ok) || (write_cmd && !read_ok)) - return -EFAULT; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if ((mode_flag & HFI1_PORT_CAPTURE_MODE) && - (cmd != HFI1_SNOOP_IOCCLEARQUEUE) && - (cmd != HFI1_SNOOP_IOCCLEARFILTER) && - (cmd != HFI1_SNOOP_IOCSETFILTER)) - /* Capture devices are allowed only 3 operations - * 1.Clear capture queue - * 2.Clear capture filter - * 3.Set capture filter - * Other are invalid. - */ - return -EINVAL; - - switch (cmd) { - case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA: - memset(&link_info, 0, sizeof(link_info)); - - if (copy_from_user(&link_info, - (struct hfi1_link_info __user *)arg, - sizeof(link_info))) - return -EFAULT; - - value = link_info.port_state; - index = link_info.port_number; - if (index > dd->num_pports - 1) - return -EINVAL; - - ppd = &dd->pport[index]; - if (!ppd) - return -EINVAL; - - /* What we want to transition to */ - phys_state = (value >> 4) & 0xF; - link_state = value & 0xF; - snoop_dbg("Setting link state 0x%x", value); - - switch (link_state) { - case IB_PORT_NOP: - if (phys_state == 0) - break; - /* fall through */ - case IB_PORT_DOWN: - switch (phys_state) { - case 0: - dev_state = HLS_DN_DOWNDEF; - break; - case 2: - dev_state = HLS_DN_POLL; - break; - case 3: - dev_state = HLS_DN_DISABLE; - break; - default: - return -EINVAL; - } - ret = set_link_state(ppd, dev_state); - break; - case IB_PORT_ARMED: - ret = set_link_state(ppd, HLS_UP_ARMED); - if (!ret) - send_idle_sma(dd, SMA_IDLE_ARM); - break; - case IB_PORT_ACTIVE: - ret = set_link_state(ppd, HLS_UP_ACTIVE); - if (!ret) - send_idle_sma(dd, SMA_IDLE_ACTIVE); - break; - default: - return -EINVAL; - } - - if (ret) - break; - /* fall through */ - case HFI1_SNOOP_IOCGETLINKSTATE: - case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA: - if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) { - memset(&link_info, 0, sizeof(link_info)); - if (copy_from_user(&link_info, - (struct hfi1_link_info __user *)arg, - sizeof(link_info))) - return -EFAULT; - index = link_info.port_number; - } else { - ret = __get_user(index, (int __user *)arg); - if (ret != 0) - break; - } - - if (index > dd->num_pports - 1) - return -EINVAL; - - ppd = &dd->pport[index]; - if (!ppd) - return -EINVAL; - - value = hfi1_ibphys_portstate(ppd); - value <<= 4; - value |= driver_lstate(ppd); - - snoop_dbg("Link port | Link State: %d", value); - - if ((cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) || - (cmd == HFI1_SNOOP_IOCSETLINKSTATE_EXTRA)) { - link_info.port_state = value; - link_info.node_guid = cpu_to_be64(ppd->guid); - link_info.link_speed_active = - ppd->link_speed_active; - link_info.link_width_active = - ppd->link_width_active; - if (copy_to_user((struct hfi1_link_info __user *)arg, - &link_info, sizeof(link_info))) - return -EFAULT; - } else { - ret = __put_user(value, (int __user *)arg); - } - break; - - case HFI1_SNOOP_IOCCLEARQUEUE: - snoop_dbg("Clearing snoop queue"); - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - - case HFI1_SNOOP_IOCCLEARFILTER: - snoop_dbg("Clearing filter"); - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (dd->hfi1_snoop.filter_callback) { - /* Drain packets first */ - drain_snoop_list(&dd->hfi1_snoop.queue); - dd->hfi1_snoop.filter_callback = NULL; - } - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = NULL; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - - case HFI1_SNOOP_IOCSETFILTER: - snoop_dbg("Setting filter"); - /* just copy command structure */ - argp = (unsigned long *)arg; - if (copy_from_user(&filter_cmd, (void __user *)argp, - sizeof(filter_cmd))) - return -EFAULT; - - if (filter_cmd.opcode >= HFI1_MAX_FILTERS) { - pr_alert("Invalid opcode in request\n"); - return -EINVAL; - } - - snoop_dbg("Opcode %d Len %d Ptr %p", - filter_cmd.opcode, filter_cmd.length, - filter_cmd.value_ptr); - - filter_value = kcalloc(filter_cmd.length, sizeof(u8), - GFP_KERNEL); - if (!filter_value) - return -ENOMEM; - - /* copy remaining data from userspace */ - if (copy_from_user((u8 *)filter_value, - (void __user *)filter_cmd.value_ptr, - filter_cmd.length)) { - kfree(filter_value); - return -EFAULT; - } - /* Drain packets first */ - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - dd->hfi1_snoop.filter_callback = - hfi1_filters[filter_cmd.opcode].filter; - /* just in case we see back to back sets */ - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = filter_value; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - case HFI1_SNOOP_IOCGETVERSION: - value = SNOOP_CAPTURE_VERSION; - snoop_dbg("Getting version: %d", value); - ret = __put_user(value, (int __user *)arg); - break; - case HFI1_SNOOP_IOCSET_OPTS: - snoop_flags = 0; - ret = __get_user(value, (int __user *)arg); - if (ret != 0) - break; - - snoop_dbg("Setting snoop option %d", value); - if (value & SNOOP_DROP_SEND) - snoop_flags |= SNOOP_DROP_SEND; - if (value & SNOOP_USE_METADATA) - snoop_flags |= SNOOP_USE_METADATA; - if (value & (SNOOP_SET_VL0TOVL15)) { - ppd = &dd->pport[0]; /* first port will do */ - ret = hfi1_assign_snoop_link_credits(ppd, value); - } - break; - default: - return -ENOTTY; - } - - return ret; -} - -static void snoop_list_add_tail(struct snoop_packet *packet, - struct hfi1_devdata *dd) -{ - unsigned long flags = 0; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (likely((dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) || - (dd->hfi1_snoop.mode_flag & HFI1_PORT_CAPTURE_MODE))) { - list_add_tail(&packet->list, &dd->hfi1_snoop.queue); - snoop_dbg("Added packet to list"); - } - - /* - * Technically we can could have closed the snoop device while waiting - * on the above lock and it is gone now. The snoop mode_flag will - * prevent us from adding the packet to the queue though. - */ - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - wake_up_interruptible(&dd->hfi1_snoop.waitq); -} - -static inline int hfi1_filter_check(void *val, const char *msg) -{ - if (!val) { - snoop_dbg("Error invalid %s value for filter", msg); - return HFI1_FILTER_ERR; - } - return 0; -} - -static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - hdr = (struct hfi1_ib_header *)ibhdr; - - if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) /* matches slid */ - return HFI1_FILTER_HIT; /* matched */ - - return HFI1_FILTER_MISS; /* Not matched */ -} - -static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1])) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -/* Not valid for outgoing packets, send handler passes null for data*/ -static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data, - void *value) -{ - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - struct ib_smp *smp = NULL; - u32 qpn = 0; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(packet_data, "packet_data"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - /* Check for GRH */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ - else - ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ - - qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF; - if (qpn <= 1) { - smp = (struct ib_smp *)packet_data; - if (*((u8 *)value) == smp->mgmt_class) - return HFI1_FILTER_HIT; - else - return HFI1_FILTER_MISS; - } - return HFI1_FILTER_ERR; -} - -static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - /* Check for GRH */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ - else - ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ - if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data, - void *value) -{ - u32 lnh = 0; - u8 opcode = 0; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - lnh = (be16_to_cpu(hdr->lrh[0]) & 3); - - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - return HFI1_FILTER_ERR; - - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - - if (*((u8 *)value) == ((opcode >> 5) & 0x7)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data, - void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - if ((*((u8 *)value)) == ((be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value) -{ - u32 lnh = 0; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - lnh = (be16_to_cpu(hdr->lrh[0]) & 3); - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - return HFI1_FILTER_ERR; - - /* P_key is 16-bit entity, however top most bit indicates - * type of membership. 0 for limited and 1 for Full. - * Limited members cannot accept information from other - * Limited members, but communication is allowed between - * every other combination of membership. - * Hence we'll omit comparing top-most bit while filtering - */ - - if ((*(u16 *)value & 0x7FFF) == - ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -/* - * If packet_data is NULL then this is coming from one of the send functions. - * Thus we know if its an ingressed or egressed packet. - */ -static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value) -{ - u8 user_dir = *(u8 *)value; - int ret; - - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - if (packet_data) { - /* Incoming packet */ - if (user_dir & HFI1_SNOOP_INGRESS) - return HFI1_FILTER_HIT; - } else { - /* Outgoing packet */ - if (user_dir & HFI1_SNOOP_EGRESS) - return HFI1_FILTER_HIT; - } - - return HFI1_FILTER_MISS; -} - -/* - * Allocate a snoop packet. The structure that is stored in the ring buffer, not - * to be confused with an hfi packet type. - */ -static struct snoop_packet *allocate_snoop_packet(u32 hdr_len, - u32 data_len, - u32 md_len) -{ - struct snoop_packet *packet; - - packet = kzalloc(sizeof(*packet) + hdr_len + data_len - + md_len, - GFP_ATOMIC | __GFP_NOWARN); - if (likely(packet)) - INIT_LIST_HEAD(&packet->list); - - return packet; -} - -/* - * Instead of having snoop and capture code intermixed with the recv functions, - * both the interrupt handler and hfi1_ib_rcv() we are going to hijack the call - * and land in here for snoop/capture but if not enabled the call will go - * through as before. This gives us a single point to constrain all of the snoop - * snoop recv logic. There is nothing special that needs to happen for bypass - * packets. This routine should not try to look into the packet. It just copied - * it. There is no guarantee for filters when it comes to bypass packets as - * there is no specific support. Bottom line is this routine does now even know - * what a bypass packet is. - */ -int snoop_recv_handler(struct hfi1_packet *packet) -{ - struct hfi1_pportdata *ppd = packet->rcd->ppd; - struct hfi1_ib_header *hdr = packet->hdr; - int header_size = packet->hlen; - void *data = packet->ebuf; - u32 tlen = packet->tlen; - struct snoop_packet *s_packet = NULL; - int ret; - int snoop_mode = 0; - u32 md_len = 0; - struct capture_md md; - - snoop_dbg("PACKET IN: hdr size %d tlen %d data %p", header_size, tlen, - data); - - trace_snoop_capture(ppd->dd, header_size, hdr, tlen - header_size, - data); - - if (!ppd->dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set"); - ret = HFI1_FILTER_HIT; - } else { - ret = ppd->dd->hfi1_snoop.filter_callback(hdr, data, - ppd->dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - break; - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - break; - case HFI1_FILTER_HIT: - - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - s_packet = allocate_snoop_packet(header_size, - tlen - header_size, - md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n"); - break; - } - - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_INGRESS; - md.u.rhf = packet->rhf; - memcpy(s_packet->data, &md, md_len); - } - - /* We should always have a header */ - if (hdr) { - memcpy(s_packet->data + md_len, hdr, header_size); - } else { - dd_dev_err(ppd->dd, "Unable to copy header to snoop/capture packet\n"); - kfree(s_packet); - break; - } - - /* - * Packets with no data are possible. If there is no data needed - * to take care of the last 4 bytes which are normally included - * with data buffers and are included in tlen. Since we kzalloc - * the buffer we do not need to set any values but if we decide - * not to use kzalloc we should zero them. - */ - if (data) - memcpy(s_packet->data + header_size + md_len, data, - tlen - header_size); - - s_packet->total_len = tlen + md_len; - snoop_list_add_tail(s_packet, ppd->dd); - - /* - * If we are snooping the packet not capturing then throw away - * after adding to the list. - */ - snoop_dbg("Capturing packet"); - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) { - snoop_dbg("Throwing packet away"); - /* - * If we are dropping the packet we still may need to - * handle the case where error flags are set, this is - * normally done by the type specific handler but that - * won't be called in this case. - */ - if (unlikely(rhf_err_flags(packet->rhf))) - handle_eflags(packet); - - /* throw the packet on the floor */ - return RHF_RCV_CONTINUE; - } - break; - default: - break; - } - - /* - * We do not care what type of packet came in here - just pass it off - * to the normal handler. - */ - return ppd->dd->normal_rhf_rcv_functions[rhf_rcv_type(packet->rhf)] - (packet); -} - -/* - * Handle snooping and capturing packets when sdma is being used. - */ -int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc) -{ - pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n"); - snoop_dbg("Unsupported Operation"); - return hfi1_verbs_send_dma(qp, ps, 0); -} - -/* - * Handle snooping and capturing packets when pio is being used. Does not handle - * bypass packets. The only way to send a bypass packet currently is to use the - * diagpkt interface. When that interface is enable snoop/capture is not. - */ -int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc) -{ - u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; - u32 dwords = (len + 3) >> 2; - u32 plen = hdrwords + dwords + 2; /* includes pbc */ - struct hfi1_pportdata *ppd = ps->ppd; - struct snoop_packet *s_packet = NULL; - u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; - u32 length = 0; - struct rvt_sge_state temp_ss; - void *data = NULL; - void *data_start = NULL; - int ret; - int snoop_mode = 0; - int md_len = 0; - struct capture_md md; - u32 vl; - u32 hdr_len = hdrwords << 2; - u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr); - - md.u.pbc = 0; - - snoop_dbg("PACKET OUT: hdrword %u len %u plen %u dwords %u tlen %u", - hdrwords, len, plen, dwords, tlen); - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - /* not using ss->total_len as arg 2 b/c that does not count CRC */ - s_packet = allocate_snoop_packet(hdr_len, tlen - hdr_len, md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n"); - goto out; - } - - s_packet->total_len = tlen + md_len; - - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_EGRESS; - if (likely(pbc == 0)) { - vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12; - md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen); - } else { - md.u.pbc = 0; - } - memcpy(s_packet->data, &md, md_len); - } else { - md.u.pbc = pbc; - } - - /* Copy header */ - if (likely(hdr)) { - memcpy(s_packet->data + md_len, hdr, hdr_len); - } else { - dd_dev_err(ppd->dd, - "Unable to copy header to snoop/capture packet\n"); - kfree(s_packet); - goto out; - } - - if (ss) { - data = s_packet->data + hdr_len + md_len; - data_start = data; - - /* - * Copy SGE State - * The update_sge() function below will not modify the - * individual SGEs in the array. It will make a copy each time - * and operate on that. So we only need to copy this instance - * and it won't impact PIO. - */ - temp_ss = *ss; - length = len; - - snoop_dbg("Need to copy %d bytes", length); - while (length) { - void *addr = temp_ss.sge.vaddr; - u32 slen = temp_ss.sge.length; - - if (slen > length) { - slen = length; - snoop_dbg("slen %d > len %d", slen, length); - } - snoop_dbg("copy %d to %p", slen, addr); - memcpy(data, addr, slen); - update_sge(&temp_ss, slen); - length -= slen; - data += slen; - snoop_dbg("data is now %p bytes left %d", data, length); - } - snoop_dbg("Completed SGE copy"); - } - - /* - * Why do the filter check down here? Because the event tracing has its - * own filtering and we need to have the walked the SGE list. - */ - if (!ppd->dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set\n"); - ret = HFI1_FILTER_HIT; - } else { - ret = ppd->dd->hfi1_snoop.filter_callback( - &ps->s_txreq->phdr.hdr, - NULL, - ppd->dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - /* fall through */ - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - kfree(s_packet); - break; - case HFI1_FILTER_HIT: - snoop_dbg("Capturing packet"); - snoop_list_add_tail(s_packet, ppd->dd); - - if (unlikely((snoop_flags & SNOOP_DROP_SEND) && - (ppd->dd->hfi1_snoop.mode_flag & - HFI1_PORT_SNOOP_MODE))) { - unsigned long flags; - - snoop_dbg("Dropping packet"); - if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); - hfi1_send_complete( - qp, - qp->s_wqe, - IB_WC_SUCCESS); - spin_unlock_irqrestore(&qp->s_lock, flags); - } else if (qp->ibqp.qp_type == IB_QPT_RC) { - spin_lock_irqsave(&qp->s_lock, flags); - hfi1_rc_send_complete(qp, - &ps->s_txreq->phdr.hdr); - spin_unlock_irqrestore(&qp->s_lock, flags); - } - - /* - * If snoop is dropping the packet we need to put the - * txreq back because no one else will. - */ - hfi1_put_txreq(ps->s_txreq); - return 0; - } - break; - default: - kfree(s_packet); - break; - } -out: - return hfi1_verbs_send_pio(qp, ps, md.u.pbc); -} - -/* - * Callers of this must pass a hfi1_ib_header type for the from ptr. Currently - * this can be used anywhere, but the intention is for inline ACKs for RC and - * CCA packets. We don't restrict this usage though. - */ -void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, - u64 pbc, const void *from, size_t count) -{ - int snoop_mode = 0; - int md_len = 0; - struct capture_md md; - struct snoop_packet *s_packet = NULL; - - /* - * count is in dwords so we need to convert to bytes. - * We also need to account for CRC which would be tacked on by hardware. - */ - int packet_len = (count << 2) + 4; - int ret; - - snoop_dbg("ACK OUT: len %d", packet_len); - - if (!dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set"); - ret = HFI1_FILTER_HIT; - } else { - ret = dd->hfi1_snoop.filter_callback( - (struct hfi1_ib_header *)from, - NULL, - dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - /* fall through */ - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - break; - case HFI1_FILTER_HIT: - snoop_dbg("Capturing packet"); - if (dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - s_packet = allocate_snoop_packet(packet_len, 0, md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(dd, "Unable to allocate snoop/capture packet\n"); - goto inline_pio_out; - } - - s_packet->total_len = packet_len + md_len; - - /* Fill in the metadata for the packet */ - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_EGRESS; - md.u.pbc = pbc; - memcpy(s_packet->data, &md, md_len); - } - - /* Add the packet data which is a single buffer */ - memcpy(s_packet->data + md_len, from, packet_len); - - snoop_list_add_tail(s_packet, dd); - - if (unlikely((snoop_flags & SNOOP_DROP_SEND) && snoop_mode)) { - snoop_dbg("Dropping packet"); - return; - } - break; - default: - break; - } - -inline_pio_out: - pio_copy(dd, pbuf, pbc, from, count); -} diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c deleted file mode 100644 index bd8771570f81..000000000000 --- a/drivers/staging/rdma/hfi1/eprom.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#include <linux/delay.h> -#include "hfi.h" -#include "common.h" -#include "eprom.h" - -/* - * The EPROM is logically divided into three partitions: - * partition 0: the first 128K, visible from PCI ROM BAR - * partition 1: 4K config file (sector size) - * partition 2: the rest - */ -#define P0_SIZE (128 * 1024) -#define P1_SIZE (4 * 1024) -#define P1_START P0_SIZE -#define P2_START (P0_SIZE + P1_SIZE) - -/* erase sizes supported by the controller */ -#define SIZE_4KB (4 * 1024) -#define MASK_4KB (SIZE_4KB - 1) - -#define SIZE_32KB (32 * 1024) -#define MASK_32KB (SIZE_32KB - 1) - -#define SIZE_64KB (64 * 1024) -#define MASK_64KB (SIZE_64KB - 1) - -/* controller page size, in bytes */ -#define EP_PAGE_SIZE 256 -#define EEP_PAGE_MASK (EP_PAGE_SIZE - 1) - -/* controller commands */ -#define CMD_SHIFT 24 -#define CMD_NOP (0) -#define CMD_PAGE_PROGRAM(addr) ((0x02 << CMD_SHIFT) | addr) -#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr) -#define CMD_READ_SR1 ((0x05 << CMD_SHIFT)) -#define CMD_WRITE_ENABLE ((0x06 << CMD_SHIFT)) -#define CMD_SECTOR_ERASE_4KB(addr) ((0x20 << CMD_SHIFT) | addr) -#define CMD_SECTOR_ERASE_32KB(addr) ((0x52 << CMD_SHIFT) | addr) -#define CMD_CHIP_ERASE ((0x60 << CMD_SHIFT)) -#define CMD_READ_MANUF_DEV_ID ((0x90 << CMD_SHIFT)) -#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) -#define CMD_SECTOR_ERASE_64KB(addr) ((0xd8 << CMD_SHIFT) | addr) - -/* controller interface speeds */ -#define EP_SPEED_FULL 0x2 /* full speed */ - -/* controller status register 1 bits */ -#define SR1_BUSY 0x1ull /* the BUSY bit in SR1 */ - -/* sleep length while waiting for controller */ -#define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */ -#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US)) - -/* GPIO pins */ -#define EPROM_WP_N BIT_ULL(14) /* EPROM write line */ - -/* - * How long to wait for the EPROM to become available, in ms. - * The spec 32 Mb EPROM takes around 40s to erase then write. - * Double it for safety. - */ -#define EPROM_TIMEOUT 80000 /* ms */ - -/* - * Turn on external enable line that allows writing on the flash. - */ -static void write_enable(struct hfi1_devdata *dd) -{ - /* raise signal */ - write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N); - /* raise enable */ - write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N); -} - -/* - * Turn off external enable line that allows writing on the flash. - */ -static void write_disable(struct hfi1_devdata *dd) -{ - /* lower signal */ - write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N); - /* lower enable */ - write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N); -} - -/* - * Wait for the device to become not busy. Must be called after all - * write or erase operations. - */ -static int wait_for_not_busy(struct hfi1_devdata *dd) -{ - unsigned long count = 0; - u64 reg; - int ret = 0; - - /* starts page mode */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_SR1); - while (1) { - udelay(WAIT_SLEEP_US); - usleep_range(WAIT_SLEEP_US - 5, WAIT_SLEEP_US + 5); - count++; - reg = read_csr(dd, ASIC_EEP_DATA); - if ((reg & SR1_BUSY) == 0) - break; - /* 200s is the largest time for a 128Mb device */ - if (count > COUNT_DELAY_SEC(200)) { - dd_dev_err(dd, "waited too long for SPI FLASH busy to clear - failing\n"); - ret = -ETIMEDOUT; - break; /* break, not goto - must stop page mode */ - } - } - - /* stop page mode with a NOP */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); - - return ret; -} - -/* - * Read the device ID from the SPI controller. - */ -static u32 read_device_id(struct hfi1_devdata *dd) -{ - /* read the Manufacture Device ID */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_MANUF_DEV_ID); - return (u32)read_csr(dd, ASIC_EEP_DATA); -} - -/* - * Erase the whole flash. - */ -static int erase_chip(struct hfi1_devdata *dd) -{ - int ret; - - write_enable(dd); - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_CHIP_ERASE); - ret = wait_for_not_busy(dd); - - write_disable(dd); - - return ret; -} - -/* - * Erase a range. - */ -static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len) -{ - u32 end = start + len; - int ret = 0; - - if (end < start) - return -EINVAL; - - /* check the end points for the minimum erase */ - if ((start & MASK_4KB) || (end & MASK_4KB)) { - dd_dev_err(dd, - "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n", - __func__, start, end); - return -EINVAL; - } - - write_enable(dd); - - while (start < end) { - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - /* check in order of largest to smallest */ - if (((start & MASK_64KB) == 0) && (start + SIZE_64KB <= end)) { - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_64KB(start)); - start += SIZE_64KB; - } else if (((start & MASK_32KB) == 0) && - (start + SIZE_32KB <= end)) { - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_32KB(start)); - start += SIZE_32KB; - } else { /* 4KB will work */ - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_4KB(start)); - start += SIZE_4KB; - } - ret = wait_for_not_busy(dd); - if (ret) - goto done; - } - -done: - write_disable(dd); - - return ret; -} - -/* - * Read a 256 byte (64 dword) EPROM page. - * All callers have verified the offset is at a page boundary. - */ -static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) -{ - int i; - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); - for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++) - result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ -} - -/* - * Read length bytes starting at offset. Copy to user address addr. - */ -static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) -{ - u32 offset; - u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; - int ret = 0; - - /* reject anything not on an EPROM page boundary */ - if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK)) - return -EINVAL; - - for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { - read_page(dd, start + offset, buffer); - if (copy_to_user((void __user *)(addr + offset), - buffer, EP_PAGE_SIZE)) { - ret = -EFAULT; - goto done; - } - } - -done: - return ret; -} - -/* - * Write a 256 byte (64 dword) EPROM page. - * All callers have verified the offset is at a page boundary. - */ -static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data) -{ - int i; - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - write_csr(dd, ASIC_EEP_DATA, data[0]); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset)); - for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++) - write_csr(dd, ASIC_EEP_DATA, data[i]); - /* will close the open page */ - return wait_for_not_busy(dd); -} - -/* - * Write length bytes starting at offset. Read from user address addr. - */ -static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) -{ - u32 offset; - u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; - int ret = 0; - - /* reject anything not on an EPROM page boundary */ - if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK)) - return -EINVAL; - - write_enable(dd); - - for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { - if (copy_from_user(buffer, (void __user *)(addr + offset), - EP_PAGE_SIZE)) { - ret = -EFAULT; - goto done; - } - ret = write_page(dd, start + offset, buffer); - if (ret) - goto done; - } - -done: - write_disable(dd); - return ret; -} - -/* convert an range composite to a length, in bytes */ -static inline u32 extract_rlen(u32 composite) -{ - return (composite & 0xffff) * EP_PAGE_SIZE; -} - -/* convert an range composite to a start, in bytes */ -static inline u32 extract_rstart(u32 composite) -{ - return (composite >> 16) * EP_PAGE_SIZE; -} - -/* - * Perform the given operation on the EPROM. Called from user space. The - * user credentials have already been checked. - * - * Return 0 on success, -ERRNO on error - */ -int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) -{ - struct hfi1_devdata *dd; - u32 dev_id; - u32 rlen; /* range length */ - u32 rstart; /* range start */ - int i_minor; - int ret = 0; - - /* - * Map the device file to device data using the relative minor. - * The device file minor number is the unit number + 1. 0 is - * the generic device file - reject it. - */ - i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - if (i_minor <= 0) - return -EINVAL; - dd = hfi1_lookup(i_minor - 1); - if (!dd) { - pr_err("%s: cannot find unit %d!\n", __func__, i_minor); - return -EINVAL; - } - - /* some devices do not have an EPROM */ - if (!dd->eprom_available) - return -EOPNOTSUPP; - - ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); - if (ret) { - dd_dev_err(dd, "%s: unable to acquire EPROM resource\n", - __func__); - goto done_asic; - } - - dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n", - __func__, cmd->type, cmd->len, cmd->addr); - - switch (cmd->type) { - case HFI1_CMD_EP_INFO: - if (cmd->len != sizeof(u32)) { - ret = -ERANGE; - break; - } - dev_id = read_device_id(dd); - /* addr points to a u32 user buffer */ - if (copy_to_user((void __user *)cmd->addr, &dev_id, - sizeof(u32))) - ret = -EFAULT; - break; - - case HFI1_CMD_EP_ERASE_CHIP: - ret = erase_chip(dd); - break; - - case HFI1_CMD_EP_ERASE_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = erase_range(dd, rstart, rlen); - break; - - case HFI1_CMD_EP_READ_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = read_length(dd, rstart, rlen, cmd->addr); - break; - - case HFI1_CMD_EP_WRITE_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = write_length(dd, rstart, rlen, cmd->addr); - break; - - default: - dd_dev_err(dd, "%s: unexpected command %d\n", - __func__, cmd->type); - ret = -EINVAL; - break; - } - - release_chip_resource(dd, CR_EPROM); -done_asic: - return ret; -} - -/* - * Initialize the EPROM handler. - */ -int eprom_init(struct hfi1_devdata *dd) -{ - int ret = 0; - - /* only the discrete chip has an EPROM */ - if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0) - return 0; - - /* - * It is OK if both HFIs reset the EPROM as long as they don't - * do it at the same time. - */ - ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); - if (ret) { - dd_dev_err(dd, - "%s: unable to acquire EPROM resource, no EPROM support\n", - __func__); - goto done_asic; - } - - /* reset EPROM to be sure it is in a good state */ - - /* set reset */ - write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK); - /* clear reset, set speed */ - write_csr(dd, ASIC_EEP_CTL_STAT, - EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT); - - /* wake the device with command "release powerdown NoID" */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); - - dd->eprom_available = true; - release_chip_resource(dd, CR_EPROM); -done_asic: - return ret; -} diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig index 8345fb457a40..bbdbf9c4e93a 100644 --- a/drivers/target/iscsi/Kconfig +++ b/drivers/target/iscsi/Kconfig @@ -7,3 +7,5 @@ config ISCSI_TARGET help Say M here to enable the ConfigFS enabled Linux-iSCSI.org iSCSI Target Mode Stack. + +source "drivers/target/iscsi/cxgbit/Kconfig" diff --git a/drivers/target/iscsi/Makefile b/drivers/target/iscsi/Makefile index 0f43be9c3453..0f18295e05bc 100644 --- a/drivers/target/iscsi/Makefile +++ b/drivers/target/iscsi/Makefile @@ -18,3 +18,4 @@ iscsi_target_mod-y += iscsi_target_parameters.o \ iscsi_target_transport.o obj-$(CONFIG_ISCSI_TARGET) += iscsi_target_mod.o +obj-$(CONFIG_ISCSI_TARGET_CXGB4) += cxgbit/ diff --git a/drivers/target/iscsi/cxgbit/Kconfig b/drivers/target/iscsi/cxgbit/Kconfig new file mode 100644 index 000000000000..c9b6a3c758b1 --- /dev/null +++ b/drivers/target/iscsi/cxgbit/Kconfig @@ -0,0 +1,7 @@ +config ISCSI_TARGET_CXGB4 + tristate "Chelsio iSCSI target offload driver" + depends on ISCSI_TARGET && CHELSIO_T4 && INET + select CHELSIO_T4_UWIRE + ---help--- + To compile this driver as module, choose M here: the module + will be called cxgbit. diff --git a/drivers/target/iscsi/cxgbit/Makefile b/drivers/target/iscsi/cxgbit/Makefile new file mode 100644 index 000000000000..bd56c073dff6 --- /dev/null +++ b/drivers/target/iscsi/cxgbit/Makefile @@ -0,0 +1,6 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -Idrivers/target/iscsi + +obj-$(CONFIG_ISCSI_TARGET_CXGB4) += cxgbit.o + +cxgbit-y := cxgbit_main.o cxgbit_cm.o cxgbit_target.o cxgbit_ddp.o diff --git a/drivers/target/iscsi/cxgbit/cxgbit.h b/drivers/target/iscsi/cxgbit/cxgbit.h new file mode 100644 index 000000000000..625c7f6de6b2 --- /dev/null +++ b/drivers/target/iscsi/cxgbit/cxgbit.h @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CXGBIT_H__ +#define __CXGBIT_H__ + +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/idr.h> +#include <linux/completion.h> +#include <linux/netdevice.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/inet.h> +#include <linux/wait.h> +#include <linux/kref.h> +#include <linux/timer.h> +#include <linux/io.h> + +#include <asm/byteorder.h> + +#include <net/net_namespace.h> + +#include <target/iscsi/iscsi_transport.h> +#include <iscsi_target_parameters.h> +#include <iscsi_target_login.h> + +#include "t4_regs.h" +#include "t4_msg.h" +#include "cxgb4.h" +#include "cxgb4_uld.h" +#include "l2t.h" +#include "cxgb4_ppm.h" +#include "cxgbit_lro.h" + +extern struct mutex cdev_list_lock; +extern struct list_head cdev_list_head; +struct cxgbit_np; + +struct cxgbit_sock; + +struct cxgbit_cmd { + struct scatterlist sg; + struct cxgbi_task_tag_info ttinfo; + bool setup_ddp; + bool release; +}; + +#define CXGBIT_MAX_ISO_PAYLOAD \ + min_t(u32, MAX_SKB_FRAGS * PAGE_SIZE, 65535) + +struct cxgbit_iso_info { + u8 flags; + u32 mpdu; + u32 len; + u32 burst_len; +}; + +enum cxgbit_skcb_flags { + SKCBF_TX_NEED_HDR = (1 << 0), /* packet needs a header */ + SKCBF_TX_FLAG_COMPL = (1 << 1), /* wr completion flag */ + SKCBF_TX_ISO = (1 << 2), /* iso cpl in tx skb */ + SKCBF_RX_LRO = (1 << 3), /* lro skb */ +}; + +struct cxgbit_skb_rx_cb { + u8 opcode; + void *pdu_cb; + void (*backlog_fn)(struct cxgbit_sock *, struct sk_buff *); +}; + +struct cxgbit_skb_tx_cb { + u8 submode; + u32 extra_len; +}; + +union cxgbit_skb_cb { + struct { + u8 flags; + union { + struct cxgbit_skb_tx_cb tx; + struct cxgbit_skb_rx_cb rx; + }; + }; + + struct { + /* This member must be first. */ + struct l2t_skb_cb l2t; + struct sk_buff *wr_next; + }; +}; + +#define CXGBIT_SKB_CB(skb) ((union cxgbit_skb_cb *)&((skb)->cb[0])) +#define cxgbit_skcb_flags(skb) (CXGBIT_SKB_CB(skb)->flags) +#define cxgbit_skcb_submode(skb) (CXGBIT_SKB_CB(skb)->tx.submode) +#define cxgbit_skcb_tx_wr_next(skb) (CXGBIT_SKB_CB(skb)->wr_next) +#define cxgbit_skcb_tx_extralen(skb) (CXGBIT_SKB_CB(skb)->tx.extra_len) +#define cxgbit_skcb_rx_opcode(skb) (CXGBIT_SKB_CB(skb)->rx.opcode) +#define cxgbit_skcb_rx_backlog_fn(skb) (CXGBIT_SKB_CB(skb)->rx.backlog_fn) +#define cxgbit_rx_pdu_cb(skb) (CXGBIT_SKB_CB(skb)->rx.pdu_cb) + +static inline void *cplhdr(struct sk_buff *skb) +{ + return skb->data; +} + +enum cxgbit_cdev_flags { + CDEV_STATE_UP = 0, + CDEV_ISO_ENABLE, + CDEV_DDP_ENABLE, +}; + +#define NP_INFO_HASH_SIZE 32 + +struct np_info { + struct np_info *next; + struct cxgbit_np *cnp; + unsigned int stid; +}; + +struct cxgbit_list_head { + struct list_head list; + /* device lock */ + spinlock_t lock; +}; + +struct cxgbit_device { + struct list_head list; + struct cxgb4_lld_info lldi; + struct np_info *np_hash_tab[NP_INFO_HASH_SIZE]; + /* np lock */ + spinlock_t np_lock; + u8 selectq[MAX_NPORTS][2]; + struct cxgbit_list_head cskq; + u32 mdsl; + struct kref kref; + unsigned long flags; +}; + +struct cxgbit_wr_wait { + struct completion completion; + int ret; +}; + +enum cxgbit_csk_state { + CSK_STATE_IDLE = 0, + CSK_STATE_LISTEN, + CSK_STATE_CONNECTING, + CSK_STATE_ESTABLISHED, + CSK_STATE_ABORTING, + CSK_STATE_CLOSING, + CSK_STATE_MORIBUND, + CSK_STATE_DEAD, +}; + +enum cxgbit_csk_flags { + CSK_TX_DATA_SENT = 0, + CSK_LOGIN_PDU_DONE, + CSK_LOGIN_DONE, + CSK_DDP_ENABLE, +}; + +struct cxgbit_sock_common { + struct cxgbit_device *cdev; + struct sockaddr_storage local_addr; + struct sockaddr_storage remote_addr; + struct cxgbit_wr_wait wr_wait; + enum cxgbit_csk_state state; + unsigned long flags; +}; + +struct cxgbit_np { + struct cxgbit_sock_common com; + wait_queue_head_t accept_wait; + struct iscsi_np *np; + struct completion accept_comp; + struct list_head np_accept_list; + /* np accept lock */ + spinlock_t np_accept_lock; + struct kref kref; + unsigned int stid; +}; + +struct cxgbit_sock { + struct cxgbit_sock_common com; + struct cxgbit_np *cnp; + struct iscsi_conn *conn; + struct l2t_entry *l2t; + struct dst_entry *dst; + struct list_head list; + struct sk_buff_head rxq; + struct sk_buff_head txq; + struct sk_buff_head ppodq; + struct sk_buff_head backlogq; + struct sk_buff_head skbq; + struct sk_buff *wr_pending_head; + struct sk_buff *wr_pending_tail; + struct sk_buff *skb; + struct sk_buff *lro_skb; + struct sk_buff *lro_hskb; + struct list_head accept_node; + /* socket lock */ + spinlock_t lock; + wait_queue_head_t waitq; + wait_queue_head_t ack_waitq; + bool lock_owner; + struct kref kref; + u32 max_iso_npdu; + u32 wr_cred; + u32 wr_una_cred; + u32 wr_max_cred; + u32 snd_una; + u32 tid; + u32 snd_nxt; + u32 rcv_nxt; + u32 smac_idx; + u32 tx_chan; + u32 mtu; + u32 write_seq; + u32 rx_credits; + u32 snd_win; + u32 rcv_win; + u16 mss; + u16 emss; + u16 plen; + u16 rss_qid; + u16 txq_idx; + u16 ctrlq_idx; + u8 tos; + u8 port_id; +#define CXGBIT_SUBMODE_HCRC 0x1 +#define CXGBIT_SUBMODE_DCRC 0x2 + u8 submode; +#ifdef CONFIG_CHELSIO_T4_DCB + u8 dcb_priority; +#endif + u8 snd_wscale; +}; + +void _cxgbit_free_cdev(struct kref *kref); +void _cxgbit_free_csk(struct kref *kref); +void _cxgbit_free_cnp(struct kref *kref); + +static inline void cxgbit_get_cdev(struct cxgbit_device *cdev) +{ + kref_get(&cdev->kref); +} + +static inline void cxgbit_put_cdev(struct cxgbit_device *cdev) +{ + kref_put(&cdev->kref, _cxgbit_free_cdev); +} + +static inline void cxgbit_get_csk(struct cxgbit_sock *csk) +{ + kref_get(&csk->kref); +} + +static inline void cxgbit_put_csk(struct cxgbit_sock *csk) +{ + kref_put(&csk->kref, _cxgbit_free_csk); +} + +static inline void cxgbit_get_cnp(struct cxgbit_np *cnp) +{ + kref_get(&cnp->kref); +} + +static inline void cxgbit_put_cnp(struct cxgbit_np *cnp) +{ + kref_put(&cnp->kref, _cxgbit_free_cnp); +} + +static inline void cxgbit_sock_reset_wr_list(struct cxgbit_sock *csk) +{ + csk->wr_pending_tail = NULL; + csk->wr_pending_head = NULL; +} + +static inline struct sk_buff *cxgbit_sock_peek_wr(const struct cxgbit_sock *csk) +{ + return csk->wr_pending_head; +} + +static inline void +cxgbit_sock_enqueue_wr(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + cxgbit_skcb_tx_wr_next(skb) = NULL; + + skb_get(skb); + + if (!csk->wr_pending_head) + csk->wr_pending_head = skb; + else + cxgbit_skcb_tx_wr_next(csk->wr_pending_tail) = skb; + csk->wr_pending_tail = skb; +} + +static inline struct sk_buff *cxgbit_sock_dequeue_wr(struct cxgbit_sock *csk) +{ + struct sk_buff *skb = csk->wr_pending_head; + + if (likely(skb)) { + csk->wr_pending_head = cxgbit_skcb_tx_wr_next(skb); + cxgbit_skcb_tx_wr_next(skb) = NULL; + } + return skb; +} + +typedef void (*cxgbit_cplhandler_func)(struct cxgbit_device *, + struct sk_buff *); + +int cxgbit_setup_np(struct iscsi_np *, struct sockaddr_storage *); +int cxgbit_setup_conn_digest(struct cxgbit_sock *); +int cxgbit_accept_np(struct iscsi_np *, struct iscsi_conn *); +void cxgbit_free_np(struct iscsi_np *); +void cxgbit_free_conn(struct iscsi_conn *); +extern cxgbit_cplhandler_func cxgbit_cplhandlers[NUM_CPL_CMDS]; +int cxgbit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); +int cxgbit_rx_data_ack(struct cxgbit_sock *); +int cxgbit_l2t_send(struct cxgbit_device *, struct sk_buff *, + struct l2t_entry *); +void cxgbit_push_tx_frames(struct cxgbit_sock *); +int cxgbit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); +int cxgbit_xmit_pdu(struct iscsi_conn *, struct iscsi_cmd *, + struct iscsi_datain_req *, const void *, u32); +void cxgbit_get_r2t_ttt(struct iscsi_conn *, struct iscsi_cmd *, + struct iscsi_r2t *); +u32 cxgbit_send_tx_flowc_wr(struct cxgbit_sock *); +int cxgbit_ofld_send(struct cxgbit_device *, struct sk_buff *); +void cxgbit_get_rx_pdu(struct iscsi_conn *); +int cxgbit_validate_params(struct iscsi_conn *); +struct cxgbit_device *cxgbit_find_device(struct net_device *, u8 *); + +/* DDP */ +int cxgbit_ddp_init(struct cxgbit_device *); +int cxgbit_setup_conn_pgidx(struct cxgbit_sock *, u32); +int cxgbit_reserve_ttt(struct cxgbit_sock *, struct iscsi_cmd *); +void cxgbit_release_cmd(struct iscsi_conn *, struct iscsi_cmd *); + +static inline +struct cxgbi_ppm *cdev2ppm(struct cxgbit_device *cdev) +{ + return (struct cxgbi_ppm *)(*cdev->lldi.iscsi_ppm); +} +#endif /* __CXGBIT_H__ */ diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c new file mode 100644 index 000000000000..0ae0b131abfc --- /dev/null +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -0,0 +1,2086 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/list.h> +#include <linux/workqueue.h> +#include <linux/skbuff.h> +#include <linux/timer.h> +#include <linux/notifier.h> +#include <linux/inetdevice.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/if_vlan.h> + +#include <net/neighbour.h> +#include <net/netevent.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + +#include "cxgbit.h" +#include "clip_tbl.h" + +static void cxgbit_init_wr_wait(struct cxgbit_wr_wait *wr_waitp) +{ + wr_waitp->ret = 0; + reinit_completion(&wr_waitp->completion); +} + +static void +cxgbit_wake_up(struct cxgbit_wr_wait *wr_waitp, const char *func, u8 ret) +{ + if (ret == CPL_ERR_NONE) + wr_waitp->ret = 0; + else + wr_waitp->ret = -EIO; + + if (wr_waitp->ret) + pr_err("%s: err:%u", func, ret); + + complete(&wr_waitp->completion); +} + +static int +cxgbit_wait_for_reply(struct cxgbit_device *cdev, + struct cxgbit_wr_wait *wr_waitp, u32 tid, u32 timeout, + const char *func) +{ + int ret; + + if (!test_bit(CDEV_STATE_UP, &cdev->flags)) { + wr_waitp->ret = -EIO; + goto out; + } + + ret = wait_for_completion_timeout(&wr_waitp->completion, timeout * HZ); + if (!ret) { + pr_info("%s - Device %s not responding tid %u\n", + func, pci_name(cdev->lldi.pdev), tid); + wr_waitp->ret = -ETIMEDOUT; + } +out: + if (wr_waitp->ret) + pr_info("%s: FW reply %d tid %u\n", + pci_name(cdev->lldi.pdev), wr_waitp->ret, tid); + return wr_waitp->ret; +} + +/* Returns whether a CPL status conveys negative advice. + */ +static int cxgbit_is_neg_adv(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE; +} + +static int cxgbit_np_hashfn(const struct cxgbit_np *cnp) +{ + return ((unsigned long)cnp >> 10) & (NP_INFO_HASH_SIZE - 1); +} + +static struct np_info * +cxgbit_np_hash_add(struct cxgbit_device *cdev, struct cxgbit_np *cnp, + unsigned int stid) +{ + struct np_info *p = kzalloc(sizeof(*p), GFP_KERNEL); + + if (p) { + int bucket = cxgbit_np_hashfn(cnp); + + p->cnp = cnp; + p->stid = stid; + spin_lock(&cdev->np_lock); + p->next = cdev->np_hash_tab[bucket]; + cdev->np_hash_tab[bucket] = p; + spin_unlock(&cdev->np_lock); + } + + return p; +} + +static int +cxgbit_np_hash_find(struct cxgbit_device *cdev, struct cxgbit_np *cnp) +{ + int stid = -1, bucket = cxgbit_np_hashfn(cnp); + struct np_info *p; + + spin_lock(&cdev->np_lock); + for (p = cdev->np_hash_tab[bucket]; p; p = p->next) { + if (p->cnp == cnp) { + stid = p->stid; + break; + } + } + spin_unlock(&cdev->np_lock); + + return stid; +} + +static int cxgbit_np_hash_del(struct cxgbit_device *cdev, struct cxgbit_np *cnp) +{ + int stid = -1, bucket = cxgbit_np_hashfn(cnp); + struct np_info *p, **prev = &cdev->np_hash_tab[bucket]; + + spin_lock(&cdev->np_lock); + for (p = *prev; p; prev = &p->next, p = p->next) { + if (p->cnp == cnp) { + stid = p->stid; + *prev = p->next; + kfree(p); + break; + } + } + spin_unlock(&cdev->np_lock); + + return stid; +} + +void _cxgbit_free_cnp(struct kref *kref) +{ + struct cxgbit_np *cnp; + + cnp = container_of(kref, struct cxgbit_np, kref); + kfree(cnp); +} + +static int +cxgbit_create_server6(struct cxgbit_device *cdev, unsigned int stid, + struct cxgbit_np *cnp) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &cnp->com.local_addr; + int addr_type; + int ret; + + pr_debug("%s: dev = %s; stid = %u; sin6_port = %u\n", + __func__, cdev->lldi.ports[0]->name, stid, sin6->sin6_port); + + addr_type = ipv6_addr_type((const struct in6_addr *) + &sin6->sin6_addr); + if (addr_type != IPV6_ADDR_ANY) { + ret = cxgb4_clip_get(cdev->lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + if (ret) { + pr_err("Unable to find clip table entry. laddr %pI6. Error:%d.\n", + sin6->sin6_addr.s6_addr, ret); + return -ENOMEM; + } + } + + cxgbit_get_cnp(cnp); + cxgbit_init_wr_wait(&cnp->com.wr_wait); + + ret = cxgb4_create_server6(cdev->lldi.ports[0], + stid, &sin6->sin6_addr, + sin6->sin6_port, + cdev->lldi.rxq_ids[0]); + if (!ret) + ret = cxgbit_wait_for_reply(cdev, &cnp->com.wr_wait, + 0, 10, __func__); + else if (ret > 0) + ret = net_xmit_errno(ret); + else + cxgbit_put_cnp(cnp); + + if (ret) { + if (ret != -ETIMEDOUT) + cxgb4_clip_release(cdev->lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, 1); + + pr_err("create server6 err %d stid %d laddr %pI6 lport %d\n", + ret, stid, sin6->sin6_addr.s6_addr, + ntohs(sin6->sin6_port)); + } + + return ret; +} + +static int +cxgbit_create_server4(struct cxgbit_device *cdev, unsigned int stid, + struct cxgbit_np *cnp) +{ + struct sockaddr_in *sin = (struct sockaddr_in *) + &cnp->com.local_addr; + int ret; + + pr_debug("%s: dev = %s; stid = %u; sin_port = %u\n", + __func__, cdev->lldi.ports[0]->name, stid, sin->sin_port); + + cxgbit_get_cnp(cnp); + cxgbit_init_wr_wait(&cnp->com.wr_wait); + + ret = cxgb4_create_server(cdev->lldi.ports[0], + stid, sin->sin_addr.s_addr, + sin->sin_port, 0, + cdev->lldi.rxq_ids[0]); + if (!ret) + ret = cxgbit_wait_for_reply(cdev, + &cnp->com.wr_wait, + 0, 10, __func__); + else if (ret > 0) + ret = net_xmit_errno(ret); + else + cxgbit_put_cnp(cnp); + + if (ret) + pr_err("create server failed err %d stid %d laddr %pI4 lport %d\n", + ret, stid, &sin->sin_addr, ntohs(sin->sin_port)); + return ret; +} + +struct cxgbit_device *cxgbit_find_device(struct net_device *ndev, u8 *port_id) +{ + struct cxgbit_device *cdev; + u8 i; + + list_for_each_entry(cdev, &cdev_list_head, list) { + struct cxgb4_lld_info *lldi = &cdev->lldi; + + for (i = 0; i < lldi->nports; i++) { + if (lldi->ports[i] == ndev) { + if (port_id) + *port_id = i; + return cdev; + } + } + } + + return NULL; +} + +static struct net_device *cxgbit_get_real_dev(struct net_device *ndev) +{ + if (ndev->priv_flags & IFF_BONDING) { + pr_err("Bond devices are not supported. Interface:%s\n", + ndev->name); + return NULL; + } + + if (is_vlan_dev(ndev)) + return vlan_dev_real_dev(ndev); + + return ndev; +} + +static struct net_device *cxgbit_ipv4_netdev(__be32 saddr) +{ + struct net_device *ndev; + + ndev = __ip_dev_find(&init_net, saddr, false); + if (!ndev) + return NULL; + + return cxgbit_get_real_dev(ndev); +} + +static struct net_device *cxgbit_ipv6_netdev(struct in6_addr *addr6) +{ + struct net_device *ndev = NULL; + bool found = false; + + if (IS_ENABLED(CONFIG_IPV6)) { + for_each_netdev_rcu(&init_net, ndev) + if (ipv6_chk_addr(&init_net, addr6, ndev, 1)) { + found = true; + break; + } + } + if (!found) + return NULL; + return cxgbit_get_real_dev(ndev); +} + +static struct cxgbit_device *cxgbit_find_np_cdev(struct cxgbit_np *cnp) +{ + struct sockaddr_storage *sockaddr = &cnp->com.local_addr; + int ss_family = sockaddr->ss_family; + struct net_device *ndev = NULL; + struct cxgbit_device *cdev = NULL; + + rcu_read_lock(); + if (ss_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)sockaddr; + ndev = cxgbit_ipv4_netdev(sin->sin_addr.s_addr); + } else if (ss_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sockaddr; + ndev = cxgbit_ipv6_netdev(&sin6->sin6_addr); + } + if (!ndev) + goto out; + + cdev = cxgbit_find_device(ndev, NULL); +out: + rcu_read_unlock(); + return cdev; +} + +static bool cxgbit_inaddr_any(struct cxgbit_np *cnp) +{ + struct sockaddr_storage *sockaddr = &cnp->com.local_addr; + int ss_family = sockaddr->ss_family; + int addr_type; + + if (ss_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)sockaddr; + if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) + return true; + } else if (ss_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sockaddr; + addr_type = ipv6_addr_type((const struct in6_addr *) + &sin6->sin6_addr); + if (addr_type == IPV6_ADDR_ANY) + return true; + } + return false; +} + +static int +__cxgbit_setup_cdev_np(struct cxgbit_device *cdev, struct cxgbit_np *cnp) +{ + int stid, ret; + int ss_family = cnp->com.local_addr.ss_family; + + if (!test_bit(CDEV_STATE_UP, &cdev->flags)) + return -EINVAL; + + stid = cxgb4_alloc_stid(cdev->lldi.tids, ss_family, cnp); + if (stid < 0) + return -EINVAL; + + if (!cxgbit_np_hash_add(cdev, cnp, stid)) { + cxgb4_free_stid(cdev->lldi.tids, stid, ss_family); + return -EINVAL; + } + + if (ss_family == AF_INET) + ret = cxgbit_create_server4(cdev, stid, cnp); + else + ret = cxgbit_create_server6(cdev, stid, cnp); + + if (ret) { + if (ret != -ETIMEDOUT) + cxgb4_free_stid(cdev->lldi.tids, stid, + ss_family); + cxgbit_np_hash_del(cdev, cnp); + return ret; + } + return ret; +} + +static int cxgbit_setup_cdev_np(struct cxgbit_np *cnp) +{ + struct cxgbit_device *cdev; + int ret = -1; + + mutex_lock(&cdev_list_lock); + cdev = cxgbit_find_np_cdev(cnp); + if (!cdev) + goto out; + + if (cxgbit_np_hash_find(cdev, cnp) >= 0) + goto out; + + if (__cxgbit_setup_cdev_np(cdev, cnp)) + goto out; + + cnp->com.cdev = cdev; + ret = 0; +out: + mutex_unlock(&cdev_list_lock); + return ret; +} + +static int cxgbit_setup_all_np(struct cxgbit_np *cnp) +{ + struct cxgbit_device *cdev; + int ret; + u32 count = 0; + + mutex_lock(&cdev_list_lock); + list_for_each_entry(cdev, &cdev_list_head, list) { + if (cxgbit_np_hash_find(cdev, cnp) >= 0) { + mutex_unlock(&cdev_list_lock); + return -1; + } + } + + list_for_each_entry(cdev, &cdev_list_head, list) { + ret = __cxgbit_setup_cdev_np(cdev, cnp); + if (ret == -ETIMEDOUT) + break; + if (ret != 0) + continue; + count++; + } + mutex_unlock(&cdev_list_lock); + + return count ? 0 : -1; +} + +int cxgbit_setup_np(struct iscsi_np *np, struct sockaddr_storage *ksockaddr) +{ + struct cxgbit_np *cnp; + int ret; + + if ((ksockaddr->ss_family != AF_INET) && + (ksockaddr->ss_family != AF_INET6)) + return -EINVAL; + + cnp = kzalloc(sizeof(*cnp), GFP_KERNEL); + if (!cnp) + return -ENOMEM; + + init_waitqueue_head(&cnp->accept_wait); + init_completion(&cnp->com.wr_wait.completion); + init_completion(&cnp->accept_comp); + INIT_LIST_HEAD(&cnp->np_accept_list); + spin_lock_init(&cnp->np_accept_lock); + kref_init(&cnp->kref); + memcpy(&np->np_sockaddr, ksockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&cnp->com.local_addr, &np->np_sockaddr, + sizeof(cnp->com.local_addr)); + + cnp->np = np; + cnp->com.cdev = NULL; + + if (cxgbit_inaddr_any(cnp)) + ret = cxgbit_setup_all_np(cnp); + else + ret = cxgbit_setup_cdev_np(cnp); + + if (ret) { + cxgbit_put_cnp(cnp); + return -EINVAL; + } + + np->np_context = cnp; + cnp->com.state = CSK_STATE_LISTEN; + return 0; +} + +static void +cxgbit_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn, + struct cxgbit_sock *csk) +{ + conn->login_family = np->np_sockaddr.ss_family; + conn->login_sockaddr = csk->com.remote_addr; + conn->local_sockaddr = csk->com.local_addr; +} + +int cxgbit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) +{ + struct cxgbit_np *cnp = np->np_context; + struct cxgbit_sock *csk; + int ret = 0; + +accept_wait: + ret = wait_for_completion_interruptible(&cnp->accept_comp); + if (ret) + return -ENODEV; + + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) { + spin_unlock_bh(&np->np_thread_lock); + /** + * No point in stalling here when np_thread + * is in state RESET/SHUTDOWN/EXIT - bail + **/ + return -ENODEV; + } + spin_unlock_bh(&np->np_thread_lock); + + spin_lock_bh(&cnp->np_accept_lock); + if (list_empty(&cnp->np_accept_list)) { + spin_unlock_bh(&cnp->np_accept_lock); + goto accept_wait; + } + + csk = list_first_entry(&cnp->np_accept_list, + struct cxgbit_sock, + accept_node); + + list_del_init(&csk->accept_node); + spin_unlock_bh(&cnp->np_accept_lock); + conn->context = csk; + csk->conn = conn; + + cxgbit_set_conn_info(np, conn, csk); + return 0; +} + +static int +__cxgbit_free_cdev_np(struct cxgbit_device *cdev, struct cxgbit_np *cnp) +{ + int stid, ret; + bool ipv6 = false; + + stid = cxgbit_np_hash_del(cdev, cnp); + if (stid < 0) + return -EINVAL; + if (!test_bit(CDEV_STATE_UP, &cdev->flags)) + return -EINVAL; + + if (cnp->np->np_sockaddr.ss_family == AF_INET6) + ipv6 = true; + + cxgbit_get_cnp(cnp); + cxgbit_init_wr_wait(&cnp->com.wr_wait); + ret = cxgb4_remove_server(cdev->lldi.ports[0], stid, + cdev->lldi.rxq_ids[0], ipv6); + + if (ret > 0) + ret = net_xmit_errno(ret); + + if (ret) { + cxgbit_put_cnp(cnp); + return ret; + } + + ret = cxgbit_wait_for_reply(cdev, &cnp->com.wr_wait, + 0, 10, __func__); + if (ret == -ETIMEDOUT) + return ret; + + if (ipv6 && cnp->com.cdev) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)&cnp->com.local_addr; + cxgb4_clip_release(cdev->lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, + 1); + } + + cxgb4_free_stid(cdev->lldi.tids, stid, + cnp->com.local_addr.ss_family); + return 0; +} + +static void cxgbit_free_all_np(struct cxgbit_np *cnp) +{ + struct cxgbit_device *cdev; + int ret; + + mutex_lock(&cdev_list_lock); + list_for_each_entry(cdev, &cdev_list_head, list) { + ret = __cxgbit_free_cdev_np(cdev, cnp); + if (ret == -ETIMEDOUT) + break; + } + mutex_unlock(&cdev_list_lock); +} + +static void cxgbit_free_cdev_np(struct cxgbit_np *cnp) +{ + struct cxgbit_device *cdev; + bool found = false; + + mutex_lock(&cdev_list_lock); + list_for_each_entry(cdev, &cdev_list_head, list) { + if (cdev == cnp->com.cdev) { + found = true; + break; + } + } + if (!found) + goto out; + + __cxgbit_free_cdev_np(cdev, cnp); +out: + mutex_unlock(&cdev_list_lock); +} + +void cxgbit_free_np(struct iscsi_np *np) +{ + struct cxgbit_np *cnp = np->np_context; + + cnp->com.state = CSK_STATE_DEAD; + if (cnp->com.cdev) + cxgbit_free_cdev_np(cnp); + else + cxgbit_free_all_np(cnp); + + np->np_context = NULL; + cxgbit_put_cnp(cnp); +} + +static void cxgbit_send_halfclose(struct cxgbit_sock *csk) +{ + struct sk_buff *skb; + struct cpl_close_con_req *req; + unsigned int len = roundup(sizeof(struct cpl_close_con_req), 16); + + skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) + return; + + req = (struct cpl_close_con_req *)__skb_put(skb, len); + memset(req, 0, len); + + set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); + INIT_TP_WR(req, csk->tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, + csk->tid)); + req->rsvd = 0; + + cxgbit_skcb_flags(skb) |= SKCBF_TX_FLAG_COMPL; + __skb_queue_tail(&csk->txq, skb); + cxgbit_push_tx_frames(csk); +} + +static void cxgbit_arp_failure_discard(void *handle, struct sk_buff *skb) +{ + pr_debug("%s cxgbit_device %p\n", __func__, handle); + kfree_skb(skb); +} + +static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb) +{ + struct cxgbit_device *cdev = handle; + struct cpl_abort_req *req = cplhdr(skb); + + pr_debug("%s cdev %p\n", __func__, cdev); + req->cmd = CPL_ABORT_NO_RST; + cxgbit_ofld_send(cdev, skb); +} + +static int cxgbit_send_abort_req(struct cxgbit_sock *csk) +{ + struct cpl_abort_req *req; + unsigned int len = roundup(sizeof(*req), 16); + struct sk_buff *skb; + + pr_debug("%s: csk %p tid %u; state %d\n", + __func__, csk, csk->tid, csk->com.state); + + __skb_queue_purge(&csk->txq); + + if (!test_and_set_bit(CSK_TX_DATA_SENT, &csk->com.flags)) + cxgbit_send_tx_flowc_wr(csk); + + skb = __skb_dequeue(&csk->skbq); + req = (struct cpl_abort_req *)__skb_put(skb, len); + memset(req, 0, len); + + set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); + t4_set_arp_err_handler(skb, csk->com.cdev, cxgbit_abort_arp_failure); + INIT_TP_WR(req, csk->tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, + csk->tid)); + req->cmd = CPL_ABORT_SEND_RST; + return cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); +} + +void cxgbit_free_conn(struct iscsi_conn *conn) +{ + struct cxgbit_sock *csk = conn->context; + bool release = false; + + pr_debug("%s: state %d\n", + __func__, csk->com.state); + + spin_lock_bh(&csk->lock); + switch (csk->com.state) { + case CSK_STATE_ESTABLISHED: + if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) { + csk->com.state = CSK_STATE_CLOSING; + cxgbit_send_halfclose(csk); + } else { + csk->com.state = CSK_STATE_ABORTING; + cxgbit_send_abort_req(csk); + } + break; + case CSK_STATE_CLOSING: + csk->com.state = CSK_STATE_MORIBUND; + cxgbit_send_halfclose(csk); + break; + case CSK_STATE_DEAD: + release = true; + break; + default: + pr_err("%s: csk %p; state %d\n", + __func__, csk, csk->com.state); + } + spin_unlock_bh(&csk->lock); + + if (release) + cxgbit_put_csk(csk); +} + +static void cxgbit_set_emss(struct cxgbit_sock *csk, u16 opt) +{ + csk->emss = csk->com.cdev->lldi.mtus[TCPOPT_MSS_G(opt)] - + ((csk->com.remote_addr.ss_family == AF_INET) ? + sizeof(struct iphdr) : sizeof(struct ipv6hdr)) - + sizeof(struct tcphdr); + csk->mss = csk->emss; + if (TCPOPT_TSTAMP_G(opt)) + csk->emss -= round_up(TCPOLEN_TIMESTAMP, 4); + if (csk->emss < 128) + csk->emss = 128; + if (csk->emss & 7) + pr_info("Warning: misaligned mtu idx %u mss %u emss=%u\n", + TCPOPT_MSS_G(opt), csk->mss, csk->emss); + pr_debug("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt), + csk->mss, csk->emss); +} + +static void cxgbit_free_skb(struct cxgbit_sock *csk) +{ + struct sk_buff *skb; + + __skb_queue_purge(&csk->txq); + __skb_queue_purge(&csk->rxq); + __skb_queue_purge(&csk->backlogq); + __skb_queue_purge(&csk->ppodq); + __skb_queue_purge(&csk->skbq); + + while ((skb = cxgbit_sock_dequeue_wr(csk))) + kfree_skb(skb); + + __kfree_skb(csk->lro_hskb); +} + +void _cxgbit_free_csk(struct kref *kref) +{ + struct cxgbit_sock *csk; + struct cxgbit_device *cdev; + + csk = container_of(kref, struct cxgbit_sock, kref); + + pr_debug("%s csk %p state %d\n", __func__, csk, csk->com.state); + + if (csk->com.local_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &csk->com.local_addr; + cxgb4_clip_release(csk->com.cdev->lldi.ports[0], + (const u32 *) + &sin6->sin6_addr.s6_addr, 1); + } + + cxgb4_remove_tid(csk->com.cdev->lldi.tids, 0, csk->tid); + dst_release(csk->dst); + cxgb4_l2t_release(csk->l2t); + + cdev = csk->com.cdev; + spin_lock_bh(&cdev->cskq.lock); + list_del(&csk->list); + spin_unlock_bh(&cdev->cskq.lock); + + cxgbit_free_skb(csk); + cxgbit_put_cdev(cdev); + + kfree(csk); +} + +static void +cxgbit_get_tuple_info(struct cpl_pass_accept_req *req, int *iptype, + __u8 *local_ip, __u8 *peer_ip, __be16 *local_port, + __be16 *peer_port) +{ + u32 eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + u32 ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); + struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); + struct tcphdr *tcp = (struct tcphdr *) + ((u8 *)(req + 1) + eth_len + ip_len); + + if (ip->version == 4) { + pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", + __func__, + ntohl(ip->saddr), ntohl(ip->daddr), + ntohs(tcp->source), + ntohs(tcp->dest)); + *iptype = 4; + memcpy(peer_ip, &ip->saddr, 4); + memcpy(local_ip, &ip->daddr, 4); + } else { + pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", + __func__, + ip6->saddr.s6_addr, ip6->daddr.s6_addr, + ntohs(tcp->source), + ntohs(tcp->dest)); + *iptype = 6; + memcpy(peer_ip, ip6->saddr.s6_addr, 16); + memcpy(local_ip, ip6->daddr.s6_addr, 16); + } + + *peer_port = tcp->source; + *local_port = tcp->dest; +} + +static int +cxgbit_our_interface(struct cxgbit_device *cdev, struct net_device *egress_dev) +{ + u8 i; + + egress_dev = cxgbit_get_real_dev(egress_dev); + for (i = 0; i < cdev->lldi.nports; i++) + if (cdev->lldi.ports[i] == egress_dev) + return 1; + return 0; +} + +static struct dst_entry * +cxgbit_find_route6(struct cxgbit_device *cdev, __u8 *local_ip, __u8 *peer_ip, + __be16 local_port, __be16 peer_port, u8 tos, + __u32 sin6_scope_id) +{ + struct dst_entry *dst = NULL; + + if (IS_ENABLED(CONFIG_IPV6)) { + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, peer_ip, 16); + memcpy(&fl6.saddr, local_ip, 16); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = sin6_scope_id; + dst = ip6_route_output(&init_net, NULL, &fl6); + if (!dst) + goto out; + if (!cxgbit_our_interface(cdev, ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + dst_release(dst); + dst = NULL; + } + } +out: + return dst; +} + +static struct dst_entry * +cxgbit_find_route(struct cxgbit_device *cdev, __be32 local_ip, __be32 peer_ip, + __be16 local_port, __be16 peer_port, u8 tos) +{ + struct rtable *rt; + struct flowi4 fl4; + struct neighbour *n; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, + local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) + return NULL; + n = dst_neigh_lookup(&rt->dst, &peer_ip); + if (!n) + return NULL; + if (!cxgbit_our_interface(cdev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { + neigh_release(n); + dst_release(&rt->dst); + return NULL; + } + neigh_release(n); + return &rt->dst; +} + +static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi) +{ + unsigned int linkspeed; + u8 scale; + + linkspeed = pi->link_cfg.speed; + scale = linkspeed / SPEED_10000; + +#define CXGBIT_10G_RCV_WIN (256 * 1024) + csk->rcv_win = CXGBIT_10G_RCV_WIN; + if (scale) + csk->rcv_win *= scale; + +#define CXGBIT_10G_SND_WIN (256 * 1024) + csk->snd_win = CXGBIT_10G_SND_WIN; + if (scale) + csk->snd_win *= scale; + + pr_debug("%s snd_win %d rcv_win %d\n", + __func__, csk->snd_win, csk->rcv_win); +} + +#ifdef CONFIG_CHELSIO_T4_DCB +static u8 cxgbit_get_iscsi_dcb_state(struct net_device *ndev) +{ + return ndev->dcbnl_ops->getstate(ndev); +} + +static int cxgbit_select_priority(int pri_mask) +{ + if (!pri_mask) + return 0; + + return (ffs(pri_mask) - 1); +} + +static u8 cxgbit_get_iscsi_dcb_priority(struct net_device *ndev, u16 local_port) +{ + int ret; + u8 caps; + + struct dcb_app iscsi_dcb_app = { + .protocol = local_port + }; + + ret = (int)ndev->dcbnl_ops->getcap(ndev, DCB_CAP_ATTR_DCBX, &caps); + + if (ret) + return 0; + + if (caps & DCB_CAP_DCBX_VER_IEEE) { + iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_ANY; + + ret = dcb_ieee_getapp_mask(ndev, &iscsi_dcb_app); + + } else if (caps & DCB_CAP_DCBX_VER_CEE) { + iscsi_dcb_app.selector = DCB_APP_IDTYPE_PORTNUM; + + ret = dcb_getapp(ndev, &iscsi_dcb_app); + } + + pr_info("iSCSI priority is set to %u\n", cxgbit_select_priority(ret)); + + return cxgbit_select_priority(ret); +} +#endif + +static int +cxgbit_offload_init(struct cxgbit_sock *csk, int iptype, __u8 *peer_ip, + u16 local_port, struct dst_entry *dst, + struct cxgbit_device *cdev) +{ + struct neighbour *n; + int ret, step; + struct net_device *ndev; + u16 rxq_idx, port_id; +#ifdef CONFIG_CHELSIO_T4_DCB + u8 priority = 0; +#endif + + n = dst_neigh_lookup(dst, peer_ip); + if (!n) + return -ENODEV; + + rcu_read_lock(); + ret = -ENOMEM; + if (n->dev->flags & IFF_LOOPBACK) { + if (iptype == 4) + ndev = cxgbit_ipv4_netdev(*(__be32 *)peer_ip); + else if (IS_ENABLED(CONFIG_IPV6)) + ndev = cxgbit_ipv6_netdev((struct in6_addr *)peer_ip); + else + ndev = NULL; + + if (!ndev) { + ret = -ENODEV; + goto out; + } + + csk->l2t = cxgb4_l2t_get(cdev->lldi.l2t, + n, ndev, 0); + if (!csk->l2t) + goto out; + csk->mtu = ndev->mtu; + csk->tx_chan = cxgb4_port_chan(ndev); + csk->smac_idx = (cxgb4_port_viid(ndev) & 0x7F) << 1; + step = cdev->lldi.ntxq / + cdev->lldi.nchan; + csk->txq_idx = cxgb4_port_idx(ndev) * step; + step = cdev->lldi.nrxq / + cdev->lldi.nchan; + csk->ctrlq_idx = cxgb4_port_idx(ndev); + csk->rss_qid = cdev->lldi.rxq_ids[ + cxgb4_port_idx(ndev) * step]; + csk->port_id = cxgb4_port_idx(ndev); + cxgbit_set_tcp_window(csk, + (struct port_info *)netdev_priv(ndev)); + } else { + ndev = cxgbit_get_real_dev(n->dev); + if (!ndev) { + ret = -ENODEV; + goto out; + } + +#ifdef CONFIG_CHELSIO_T4_DCB + if (cxgbit_get_iscsi_dcb_state(ndev)) + priority = cxgbit_get_iscsi_dcb_priority(ndev, + local_port); + + csk->dcb_priority = priority; + + csk->l2t = cxgb4_l2t_get(cdev->lldi.l2t, n, ndev, priority); +#else + csk->l2t = cxgb4_l2t_get(cdev->lldi.l2t, n, ndev, 0); +#endif + if (!csk->l2t) + goto out; + port_id = cxgb4_port_idx(ndev); + csk->mtu = dst_mtu(dst); + csk->tx_chan = cxgb4_port_chan(ndev); + csk->smac_idx = (cxgb4_port_viid(ndev) & 0x7F) << 1; + step = cdev->lldi.ntxq / + cdev->lldi.nports; + csk->txq_idx = (port_id * step) + + (cdev->selectq[port_id][0]++ % step); + csk->ctrlq_idx = cxgb4_port_idx(ndev); + step = cdev->lldi.nrxq / + cdev->lldi.nports; + rxq_idx = (port_id * step) + + (cdev->selectq[port_id][1]++ % step); + csk->rss_qid = cdev->lldi.rxq_ids[rxq_idx]; + csk->port_id = port_id; + cxgbit_set_tcp_window(csk, + (struct port_info *)netdev_priv(ndev)); + } + ret = 0; +out: + rcu_read_unlock(); + neigh_release(n); + return ret; +} + +int cxgbit_ofld_send(struct cxgbit_device *cdev, struct sk_buff *skb) +{ + int ret = 0; + + if (!test_bit(CDEV_STATE_UP, &cdev->flags)) { + kfree_skb(skb); + pr_err("%s - device not up - dropping\n", __func__); + return -EIO; + } + + ret = cxgb4_ofld_send(cdev->lldi.ports[0], skb); + if (ret < 0) + kfree_skb(skb); + return ret < 0 ? ret : 0; +} + +static void cxgbit_release_tid(struct cxgbit_device *cdev, u32 tid) +{ + struct cpl_tid_release *req; + unsigned int len = roundup(sizeof(*req), 16); + struct sk_buff *skb; + + skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) + return; + + req = (struct cpl_tid_release *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID( + CPL_TID_RELEASE, tid)); + set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + cxgbit_ofld_send(cdev, skb); +} + +int +cxgbit_l2t_send(struct cxgbit_device *cdev, struct sk_buff *skb, + struct l2t_entry *l2e) +{ + int ret = 0; + + if (!test_bit(CDEV_STATE_UP, &cdev->flags)) { + kfree_skb(skb); + pr_err("%s - device not up - dropping\n", __func__); + return -EIO; + } + + ret = cxgb4_l2t_send(cdev->lldi.ports[0], skb, l2e); + if (ret < 0) + kfree_skb(skb); + return ret < 0 ? ret : 0; +} + +static void +cxgbit_best_mtu(const unsigned short *mtus, unsigned short mtu, + unsigned int *idx, int use_ts, int ipv6) +{ + unsigned short hdr_size = (ipv6 ? sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + + sizeof(struct tcphdr) + + (use_ts ? round_up(TCPOLEN_TIMESTAMP, + 4) : 0); + unsigned short data_size = mtu - hdr_size; + + cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); +} + +static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + if (csk->com.state != CSK_STATE_ESTABLISHED) { + __kfree_skb(skb); + return; + } + + cxgbit_ofld_send(csk->com.cdev, skb); +} + +/* + * CPL connection rx data ack: host -> + * Send RX credits through an RX_DATA_ACK CPL message. + * Returns the number of credits sent. + */ +int cxgbit_rx_data_ack(struct cxgbit_sock *csk) +{ + struct sk_buff *skb; + struct cpl_rx_data_ack *req; + unsigned int len = roundup(sizeof(*req), 16); + + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -1; + + req = (struct cpl_rx_data_ack *)__skb_put(skb, len); + memset(req, 0, len); + + set_wr_txq(skb, CPL_PRIORITY_ACK, csk->ctrlq_idx); + INIT_TP_WR(req, csk->tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, + csk->tid)); + req->credit_dack = cpu_to_be32(RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) | + RX_CREDITS_V(csk->rx_credits)); + + csk->rx_credits = 0; + + spin_lock_bh(&csk->lock); + if (csk->lock_owner) { + cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_send_rx_credits; + __skb_queue_tail(&csk->backlogq, skb); + spin_unlock_bh(&csk->lock); + return 0; + } + + cxgbit_send_rx_credits(csk, skb); + spin_unlock_bh(&csk->lock); + + return 0; +} + +#define FLOWC_WR_NPARAMS_MIN 9 +#define FLOWC_WR_NPARAMS_MAX 11 +static int cxgbit_alloc_csk_skb(struct cxgbit_sock *csk) +{ + struct sk_buff *skb; + u32 len, flowclen; + u8 i; + + flowclen = offsetof(struct fw_flowc_wr, + mnemval[FLOWC_WR_NPARAMS_MAX]); + + len = max_t(u32, sizeof(struct cpl_abort_req), + sizeof(struct cpl_abort_rpl)); + + len = max(len, flowclen); + len = roundup(len, 16); + + for (i = 0; i < 3; i++) { + skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) + goto out; + __skb_queue_tail(&csk->skbq, skb); + } + + skb = alloc_skb(LRO_SKB_MIN_HEADROOM, GFP_ATOMIC); + if (!skb) + goto out; + + memset(skb->data, 0, LRO_SKB_MIN_HEADROOM); + csk->lro_hskb = skb; + + return 0; +out: + __skb_queue_purge(&csk->skbq); + return -ENOMEM; +} + +static u32 cxgbit_compute_wscale(u32 win) +{ + u32 wscale = 0; + + while (wscale < 14 && (65535 << wscale) < win) + wscale++; + return wscale; +} + +static void +cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req) +{ + struct sk_buff *skb; + const struct tcphdr *tcph; + struct cpl_t5_pass_accept_rpl *rpl5; + unsigned int len = roundup(sizeof(*rpl5), 16); + unsigned int mtu_idx; + u64 opt0; + u32 opt2, hlen; + u32 wscale; + u32 win; + + pr_debug("%s csk %p tid %u\n", __func__, csk, csk->tid); + + skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) { + cxgbit_put_csk(csk); + return; + } + + rpl5 = (struct cpl_t5_pass_accept_rpl *)__skb_put(skb, len); + memset(rpl5, 0, len); + + INIT_TP_WR(rpl5, csk->tid); + OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + csk->tid)); + cxgbit_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx, + req->tcpopt.tstamp, + (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgbit_compute_wscale(csk->rcv_win); + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = csk->rcv_win >> 10; + if (win > RCV_BUFSIZ_M) + win = RCV_BUFSIZ_M; + opt0 = TCAM_BYPASS_F | + WND_SCALE_V(wscale) | + MSS_IDX_V(mtu_idx) | + L2T_IDX_V(csk->l2t->idx) | + TX_CHAN_V(csk->tx_chan) | + SMAC_SEL_V(csk->smac_idx) | + DSCP_V(csk->tos >> 2) | + ULP_MODE_V(ULP_MODE_ISCSI) | + RCV_BUFSIZ_V(win); + + opt2 = RX_CHANNEL_V(0) | + RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid); + + if (req->tcpopt.tstamp) + opt2 |= TSTAMPS_EN_F; + if (req->tcpopt.sack) + opt2 |= SACK_EN_F; + if (wscale) + opt2 |= WND_SCALE_EN_F; + + hlen = ntohl(req->hdr_len); + tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + + IP_HDR_LEN_G(hlen); + + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN_V(1); + + opt2 |= RX_COALESCE_V(3); + opt2 |= CONG_CNTRL_V(CONG_ALG_NEWRENO); + + opt2 |= T5_ISS_F; + rpl5->iss = cpu_to_be32((prandom_u32() & ~7UL) - 1); + + opt2 |= T5_OPT_2_VALID_F; + + rpl5->opt0 = cpu_to_be64(opt0); + rpl5->opt2 = cpu_to_be32(opt2); + set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->ctrlq_idx); + t4_set_arp_err_handler(skb, NULL, cxgbit_arp_failure_discard); + cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); +} + +static void +cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) +{ + struct cxgbit_sock *csk = NULL; + struct cxgbit_np *cnp; + struct cpl_pass_accept_req *req = cplhdr(skb); + unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid)); + struct tid_info *t = cdev->lldi.tids; + unsigned int tid = GET_TID(req); + u16 peer_mss = ntohs(req->tcpopt.mss); + unsigned short hdrs; + + struct dst_entry *dst; + __u8 local_ip[16], peer_ip[16]; + __be16 local_port, peer_port; + int ret; + int iptype; + + pr_debug("%s: cdev = %p; stid = %u; tid = %u\n", + __func__, cdev, stid, tid); + + cnp = lookup_stid(t, stid); + if (!cnp) { + pr_err("%s connect request on invalid stid %d\n", + __func__, stid); + goto rel_skb; + } + + if (cnp->com.state != CSK_STATE_LISTEN) { + pr_err("%s - listening parent not in CSK_STATE_LISTEN\n", + __func__); + goto reject; + } + + csk = lookup_tid(t, tid); + if (csk) { + pr_err("%s csk not null tid %u\n", + __func__, tid); + goto rel_skb; + } + + cxgbit_get_tuple_info(req, &iptype, local_ip, peer_ip, + &local_port, &peer_port); + + /* Find output route */ + if (iptype == 4) { + pr_debug("%s parent sock %p tid %u laddr %pI4 raddr %pI4 " + "lport %d rport %d peer_mss %d\n" + , __func__, cnp, tid, + local_ip, peer_ip, ntohs(local_port), + ntohs(peer_port), peer_mss); + dst = cxgbit_find_route(cdev, *(__be32 *)local_ip, + *(__be32 *)peer_ip, + local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid))); + } else { + pr_debug("%s parent sock %p tid %u laddr %pI6 raddr %pI6 " + "lport %d rport %d peer_mss %d\n" + , __func__, cnp, tid, + local_ip, peer_ip, ntohs(local_port), + ntohs(peer_port), peer_mss); + dst = cxgbit_find_route6(cdev, local_ip, peer_ip, + local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &cnp->com.local_addr)->sin6_scope_id); + } + if (!dst) { + pr_err("%s - failed to find dst entry!\n", + __func__); + goto reject; + } + + csk = kzalloc(sizeof(*csk), GFP_ATOMIC); + if (!csk) { + dst_release(dst); + goto rel_skb; + } + + ret = cxgbit_offload_init(csk, iptype, peer_ip, ntohs(local_port), + dst, cdev); + if (ret) { + pr_err("%s - failed to allocate l2t entry!\n", + __func__); + dst_release(dst); + kfree(csk); + goto reject; + } + + kref_init(&csk->kref); + init_completion(&csk->com.wr_wait.completion); + + INIT_LIST_HEAD(&csk->accept_node); + + hdrs = (iptype == 4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) + + sizeof(struct tcphdr) + (req->tcpopt.tstamp ? 12 : 0); + if (peer_mss && csk->mtu > (peer_mss + hdrs)) + csk->mtu = peer_mss + hdrs; + + csk->com.state = CSK_STATE_CONNECTING; + csk->com.cdev = cdev; + csk->cnp = cnp; + csk->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); + csk->dst = dst; + csk->tid = tid; + csk->wr_cred = cdev->lldi.wr_cred - + DIV_ROUND_UP(sizeof(struct cpl_abort_req), 16); + csk->wr_max_cred = csk->wr_cred; + csk->wr_una_cred = 0; + + if (iptype == 4) { + struct sockaddr_in *sin = (struct sockaddr_in *) + &csk->com.local_addr; + sin->sin_family = AF_INET; + sin->sin_port = local_port; + sin->sin_addr.s_addr = *(__be32 *)local_ip; + + sin = (struct sockaddr_in *)&csk->com.remote_addr; + sin->sin_family = AF_INET; + sin->sin_port = peer_port; + sin->sin_addr.s_addr = *(__be32 *)peer_ip; + } else { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &csk->com.local_addr; + + sin6->sin6_family = PF_INET6; + sin6->sin6_port = local_port; + memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); + cxgb4_clip_get(cdev->lldi.ports[0], + (const u32 *)&sin6->sin6_addr.s6_addr, + 1); + + sin6 = (struct sockaddr_in6 *)&csk->com.remote_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = peer_port; + memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); + } + + skb_queue_head_init(&csk->rxq); + skb_queue_head_init(&csk->txq); + skb_queue_head_init(&csk->ppodq); + skb_queue_head_init(&csk->backlogq); + skb_queue_head_init(&csk->skbq); + cxgbit_sock_reset_wr_list(csk); + spin_lock_init(&csk->lock); + init_waitqueue_head(&csk->waitq); + init_waitqueue_head(&csk->ack_waitq); + csk->lock_owner = false; + + if (cxgbit_alloc_csk_skb(csk)) { + dst_release(dst); + kfree(csk); + goto rel_skb; + } + + cxgbit_get_cdev(cdev); + + spin_lock(&cdev->cskq.lock); + list_add_tail(&csk->list, &cdev->cskq.list); + spin_unlock(&cdev->cskq.lock); + + cxgb4_insert_tid(t, csk, tid); + cxgbit_pass_accept_rpl(csk, req); + goto rel_skb; + +reject: + cxgbit_release_tid(cdev, tid); +rel_skb: + __kfree_skb(skb); +} + +static u32 +cxgbit_tx_flowc_wr_credits(struct cxgbit_sock *csk, u32 *nparamsp, + u32 *flowclenp) +{ + u32 nparams, flowclen16, flowclen; + + nparams = FLOWC_WR_NPARAMS_MIN; + + if (csk->snd_wscale) + nparams++; + +#ifdef CONFIG_CHELSIO_T4_DCB + nparams++; +#endif + flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); + flowclen16 = DIV_ROUND_UP(flowclen, 16); + flowclen = flowclen16 * 16; + /* + * Return the number of 16-byte credits used by the flowc request. + * Pass back the nparams and actual flowc length if requested. + */ + if (nparamsp) + *nparamsp = nparams; + if (flowclenp) + *flowclenp = flowclen; + return flowclen16; +} + +u32 cxgbit_send_tx_flowc_wr(struct cxgbit_sock *csk) +{ + struct cxgbit_device *cdev = csk->com.cdev; + struct fw_flowc_wr *flowc; + u32 nparams, flowclen16, flowclen; + struct sk_buff *skb; + u8 index; + +#ifdef CONFIG_CHELSIO_T4_DCB + u16 vlan = ((struct l2t_entry *)csk->l2t)->vlan; +#endif + + flowclen16 = cxgbit_tx_flowc_wr_credits(csk, &nparams, &flowclen); + + skb = __skb_dequeue(&csk->skbq); + flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); + memset(flowc, 0, flowclen); + + flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | + FW_FLOWC_WR_NPARAMS_V(nparams)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | + FW_WR_FLOWID_V(csk->tid)); + flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; + flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V + (csk->com.cdev->lldi.pf)); + flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; + flowc->mnemval[1].val = cpu_to_be32(csk->tx_chan); + flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; + flowc->mnemval[2].val = cpu_to_be32(csk->tx_chan); + flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; + flowc->mnemval[3].val = cpu_to_be32(csk->rss_qid); + flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; + flowc->mnemval[4].val = cpu_to_be32(csk->snd_nxt); + flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; + flowc->mnemval[5].val = cpu_to_be32(csk->rcv_nxt); + flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; + flowc->mnemval[6].val = cpu_to_be32(csk->snd_win); + flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; + flowc->mnemval[7].val = cpu_to_be32(csk->emss); + + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; + if (test_bit(CDEV_ISO_ENABLE, &cdev->flags)) + flowc->mnemval[8].val = cpu_to_be32(CXGBIT_MAX_ISO_PAYLOAD); + else + flowc->mnemval[8].val = cpu_to_be32(16384); + + index = 9; + + if (csk->snd_wscale) { + flowc->mnemval[index].mnemonic = FW_FLOWC_MNEM_RCV_SCALE; + flowc->mnemval[index].val = cpu_to_be32(csk->snd_wscale); + index++; + } + +#ifdef CONFIG_CHELSIO_T4_DCB + flowc->mnemval[index].mnemonic = FW_FLOWC_MNEM_DCBPRIO; + if (vlan == VLAN_NONE) { + pr_warn("csk %u without VLAN Tag on DCB Link\n", csk->tid); + flowc->mnemval[index].val = cpu_to_be32(0); + } else + flowc->mnemval[index].val = cpu_to_be32( + (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT); +#endif + + pr_debug("%s: csk %p; tx_chan = %u; rss_qid = %u; snd_seq = %u;" + " rcv_seq = %u; snd_win = %u; emss = %u\n", + __func__, csk, csk->tx_chan, csk->rss_qid, csk->snd_nxt, + csk->rcv_nxt, csk->snd_win, csk->emss); + set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); + cxgbit_ofld_send(csk->com.cdev, skb); + return flowclen16; +} + +int cxgbit_setup_conn_digest(struct cxgbit_sock *csk) +{ + struct sk_buff *skb; + struct cpl_set_tcb_field *req; + u8 hcrc = csk->submode & CXGBIT_SUBMODE_HCRC; + u8 dcrc = csk->submode & CXGBIT_SUBMODE_DCRC; + unsigned int len = roundup(sizeof(*req), 16); + int ret; + + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* set up ulp submode */ + req = (struct cpl_set_tcb_field *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, csk->tid); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, csk->tid)); + req->reply_ctrl = htons(NO_REPLY_V(0) | QUEUENO_V(csk->rss_qid)); + req->word_cookie = htons(0); + req->mask = cpu_to_be64(0x3 << 4); + req->val = cpu_to_be64(((hcrc ? ULP_CRC_HEADER : 0) | + (dcrc ? ULP_CRC_DATA : 0)) << 4); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->ctrlq_idx); + + cxgbit_get_csk(csk); + cxgbit_init_wr_wait(&csk->com.wr_wait); + + cxgbit_ofld_send(csk->com.cdev, skb); + + ret = cxgbit_wait_for_reply(csk->com.cdev, + &csk->com.wr_wait, + csk->tid, 5, __func__); + if (ret) + return -1; + + return 0; +} + +int cxgbit_setup_conn_pgidx(struct cxgbit_sock *csk, u32 pg_idx) +{ + struct sk_buff *skb; + struct cpl_set_tcb_field *req; + unsigned int len = roundup(sizeof(*req), 16); + int ret; + + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + req = (struct cpl_set_tcb_field *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, csk->tid); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, csk->tid)); + req->reply_ctrl = htons(NO_REPLY_V(0) | QUEUENO_V(csk->rss_qid)); + req->word_cookie = htons(0); + req->mask = cpu_to_be64(0x3 << 8); + req->val = cpu_to_be64(pg_idx << 8); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->ctrlq_idx); + + cxgbit_get_csk(csk); + cxgbit_init_wr_wait(&csk->com.wr_wait); + + cxgbit_ofld_send(csk->com.cdev, skb); + + ret = cxgbit_wait_for_reply(csk->com.cdev, + &csk->com.wr_wait, + csk->tid, 5, __func__); + if (ret) + return -1; + + return 0; +} + +static void +cxgbit_pass_open_rpl(struct cxgbit_device *cdev, struct sk_buff *skb) +{ + struct cpl_pass_open_rpl *rpl = cplhdr(skb); + struct tid_info *t = cdev->lldi.tids; + unsigned int stid = GET_TID(rpl); + struct cxgbit_np *cnp = lookup_stid(t, stid); + + pr_debug("%s: cnp = %p; stid = %u; status = %d\n", + __func__, cnp, stid, rpl->status); + + if (!cnp) { + pr_info("%s stid %d lookup failure\n", __func__, stid); + return; + } + + cxgbit_wake_up(&cnp->com.wr_wait, __func__, rpl->status); + cxgbit_put_cnp(cnp); +} + +static void +cxgbit_close_listsrv_rpl(struct cxgbit_device *cdev, struct sk_buff *skb) +{ + struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); + struct tid_info *t = cdev->lldi.tids; + unsigned int stid = GET_TID(rpl); + struct cxgbit_np *cnp = lookup_stid(t, stid); + + pr_debug("%s: cnp = %p; stid = %u; status = %d\n", + __func__, cnp, stid, rpl->status); + + if (!cnp) { + pr_info("%s stid %d lookup failure\n", __func__, stid); + return; + } + + cxgbit_wake_up(&cnp->com.wr_wait, __func__, rpl->status); + cxgbit_put_cnp(cnp); +} + +static void +cxgbit_pass_establish(struct cxgbit_device *cdev, struct sk_buff *skb) +{ + struct cpl_pass_establish *req = cplhdr(skb); + struct tid_info *t = cdev->lldi.tids; + unsigned int tid = GET_TID(req); + struct cxgbit_sock *csk; + struct cxgbit_np *cnp; + u16 tcp_opt = be16_to_cpu(req->tcp_opt); + u32 snd_isn = be32_to_cpu(req->snd_isn); + u32 rcv_isn = be32_to_cpu(req->rcv_isn); + + csk = lookup_tid(t, tid); + if (unlikely(!csk)) { + pr_err("can't find connection for tid %u.\n", tid); + goto rel_skb; + } + cnp = csk->cnp; + + pr_debug("%s: csk %p; tid %u; cnp %p\n", + __func__, csk, tid, cnp); + + csk->write_seq = snd_isn; + csk->snd_una = snd_isn; + csk->snd_nxt = snd_isn; + + csk->rcv_nxt = rcv_isn; + + if (csk->rcv_win > (RCV_BUFSIZ_M << 10)) + csk->rx_credits = (csk->rcv_win - (RCV_BUFSIZ_M << 10)); + + csk->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); + cxgbit_set_emss(csk, tcp_opt); + dst_confirm(csk->dst); + csk->com.state = CSK_STATE_ESTABLISHED; + spin_lock_bh(&cnp->np_accept_lock); + list_add_tail(&csk->accept_node, &cnp->np_accept_list); + spin_unlock_bh(&cnp->np_accept_lock); + complete(&cnp->accept_comp); +rel_skb: + __kfree_skb(skb); +} + +static void cxgbit_queue_rx_skb(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + cxgbit_skcb_flags(skb) = 0; + spin_lock_bh(&csk->rxq.lock); + __skb_queue_tail(&csk->rxq, skb); + spin_unlock_bh(&csk->rxq.lock); + wake_up(&csk->waitq); +} + +static void cxgbit_peer_close(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + pr_debug("%s: csk %p; tid %u; state %d\n", + __func__, csk, csk->tid, csk->com.state); + + switch (csk->com.state) { + case CSK_STATE_ESTABLISHED: + csk->com.state = CSK_STATE_CLOSING; + cxgbit_queue_rx_skb(csk, skb); + return; + case CSK_STATE_CLOSING: + /* simultaneous close */ + csk->com.state = CSK_STATE_MORIBUND; + break; + case CSK_STATE_MORIBUND: + csk->com.state = CSK_STATE_DEAD; + cxgbit_put_csk(csk); + break; + case CSK_STATE_ABORTING: + break; + default: + pr_info("%s: cpl_peer_close in bad state %d\n", + __func__, csk->com.state); + } + + __kfree_skb(skb); +} + +static void cxgbit_close_con_rpl(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + pr_debug("%s: csk %p; tid %u; state %d\n", + __func__, csk, csk->tid, csk->com.state); + + switch (csk->com.state) { + case CSK_STATE_CLOSING: + csk->com.state = CSK_STATE_MORIBUND; + break; + case CSK_STATE_MORIBUND: + csk->com.state = CSK_STATE_DEAD; + cxgbit_put_csk(csk); + break; + case CSK_STATE_ABORTING: + case CSK_STATE_DEAD: + break; + default: + pr_info("%s: cpl_close_con_rpl in bad state %d\n", + __func__, csk->com.state); + } + + __kfree_skb(skb); +} + +static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + struct cpl_abort_req_rss *hdr = cplhdr(skb); + unsigned int tid = GET_TID(hdr); + struct cpl_abort_rpl *rpl; + struct sk_buff *rpl_skb; + bool release = false; + bool wakeup_thread = false; + unsigned int len = roundup(sizeof(*rpl), 16); + + pr_debug("%s: csk %p; tid %u; state %d\n", + __func__, csk, tid, csk->com.state); + + if (cxgbit_is_neg_adv(hdr->status)) { + pr_err("%s: got neg advise %d on tid %u\n", + __func__, hdr->status, tid); + goto rel_skb; + } + + switch (csk->com.state) { + case CSK_STATE_CONNECTING: + case CSK_STATE_MORIBUND: + csk->com.state = CSK_STATE_DEAD; + release = true; + break; + case CSK_STATE_ESTABLISHED: + csk->com.state = CSK_STATE_DEAD; + wakeup_thread = true; + break; + case CSK_STATE_CLOSING: + csk->com.state = CSK_STATE_DEAD; + if (!csk->conn) + release = true; + break; + case CSK_STATE_ABORTING: + break; + default: + pr_info("%s: cpl_abort_req_rss in bad state %d\n", + __func__, csk->com.state); + csk->com.state = CSK_STATE_DEAD; + } + + __skb_queue_purge(&csk->txq); + + if (!test_and_set_bit(CSK_TX_DATA_SENT, &csk->com.flags)) + cxgbit_send_tx_flowc_wr(csk); + + rpl_skb = __skb_dequeue(&csk->skbq); + set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); + + rpl = (struct cpl_abort_rpl *)__skb_put(rpl_skb, len); + memset(rpl, 0, len); + + INIT_TP_WR(rpl, csk->tid); + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); + rpl->cmd = CPL_ABORT_NO_RST; + cxgbit_ofld_send(csk->com.cdev, rpl_skb); + + if (wakeup_thread) { + cxgbit_queue_rx_skb(csk, skb); + return; + } + + if (release) + cxgbit_put_csk(csk); +rel_skb: + __kfree_skb(skb); +} + +static void cxgbit_abort_rpl_rss(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + pr_debug("%s: csk %p; tid %u; state %d\n", + __func__, csk, csk->tid, csk->com.state); + + switch (csk->com.state) { + case CSK_STATE_ABORTING: + csk->com.state = CSK_STATE_DEAD; + cxgbit_put_csk(csk); + break; + default: + pr_info("%s: cpl_abort_rpl_rss in state %d\n", + __func__, csk->com.state); + } + + __kfree_skb(skb); +} + +static bool cxgbit_credit_err(const struct cxgbit_sock *csk) +{ + const struct sk_buff *skb = csk->wr_pending_head; + u32 credit = 0; + + if (unlikely(csk->wr_cred > csk->wr_max_cred)) { + pr_err("csk 0x%p, tid %u, credit %u > %u\n", + csk, csk->tid, csk->wr_cred, csk->wr_max_cred); + return true; + } + + while (skb) { + credit += skb->csum; + skb = cxgbit_skcb_tx_wr_next(skb); + } + + if (unlikely((csk->wr_cred + credit) != csk->wr_max_cred)) { + pr_err("csk 0x%p, tid %u, credit %u + %u != %u.\n", + csk, csk->tid, csk->wr_cred, + credit, csk->wr_max_cred); + + return true; + } + + return false; +} + +static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + struct cpl_fw4_ack *rpl = (struct cpl_fw4_ack *)cplhdr(skb); + u32 credits = rpl->credits; + u32 snd_una = ntohl(rpl->snd_una); + + csk->wr_cred += credits; + if (csk->wr_una_cred > (csk->wr_max_cred - csk->wr_cred)) + csk->wr_una_cred = csk->wr_max_cred - csk->wr_cred; + + while (credits) { + struct sk_buff *p = cxgbit_sock_peek_wr(csk); + + if (unlikely(!p)) { + pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n", + csk, csk->tid, credits, + csk->wr_cred, csk->wr_una_cred); + break; + } + + if (unlikely(credits < p->csum)) { + pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n", + csk, csk->tid, + credits, csk->wr_cred, csk->wr_una_cred, + p->csum); + p->csum -= credits; + break; + } + + cxgbit_sock_dequeue_wr(csk); + credits -= p->csum; + kfree_skb(p); + } + + if (unlikely(cxgbit_credit_err(csk))) { + cxgbit_queue_rx_skb(csk, skb); + return; + } + + if (rpl->seq_vld & CPL_FW4_ACK_FLAGS_SEQVAL) { + if (unlikely(before(snd_una, csk->snd_una))) { + pr_warn("csk 0x%p,%u, snd_una %u/%u.", + csk, csk->tid, snd_una, + csk->snd_una); + goto rel_skb; + } + + if (csk->snd_una != snd_una) { + csk->snd_una = snd_una; + dst_confirm(csk->dst); + wake_up(&csk->ack_waitq); + } + } + + if (skb_queue_len(&csk->txq)) + cxgbit_push_tx_frames(csk); + +rel_skb: + __kfree_skb(skb); +} + +static void cxgbit_set_tcb_rpl(struct cxgbit_device *cdev, struct sk_buff *skb) +{ + struct cxgbit_sock *csk; + struct cpl_set_tcb_rpl *rpl = (struct cpl_set_tcb_rpl *)skb->data; + unsigned int tid = GET_TID(rpl); + struct cxgb4_lld_info *lldi = &cdev->lldi; + struct tid_info *t = lldi->tids; + + csk = lookup_tid(t, tid); + if (unlikely(!csk)) + pr_err("can't find connection for tid %u.\n", tid); + else + cxgbit_wake_up(&csk->com.wr_wait, __func__, rpl->status); + + cxgbit_put_csk(csk); +} + +static void cxgbit_rx_data(struct cxgbit_device *cdev, struct sk_buff *skb) +{ + struct cxgbit_sock *csk; + struct cpl_rx_data *cpl = cplhdr(skb); + unsigned int tid = GET_TID(cpl); + struct cxgb4_lld_info *lldi = &cdev->lldi; + struct tid_info *t = lldi->tids; + + csk = lookup_tid(t, tid); + if (unlikely(!csk)) { + pr_err("can't find conn. for tid %u.\n", tid); + goto rel_skb; + } + + cxgbit_queue_rx_skb(csk, skb); + return; +rel_skb: + __kfree_skb(skb); +} + +static void +__cxgbit_process_rx_cpl(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + spin_lock(&csk->lock); + if (csk->lock_owner) { + __skb_queue_tail(&csk->backlogq, skb); + spin_unlock(&csk->lock); + return; + } + + cxgbit_skcb_rx_backlog_fn(skb)(csk, skb); + spin_unlock(&csk->lock); +} + +static void cxgbit_process_rx_cpl(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + cxgbit_get_csk(csk); + __cxgbit_process_rx_cpl(csk, skb); + cxgbit_put_csk(csk); +} + +static void cxgbit_rx_cpl(struct cxgbit_device *cdev, struct sk_buff *skb) +{ + struct cxgbit_sock *csk; + struct cpl_tx_data *cpl = cplhdr(skb); + struct cxgb4_lld_info *lldi = &cdev->lldi; + struct tid_info *t = lldi->tids; + unsigned int tid = GET_TID(cpl); + u8 opcode = cxgbit_skcb_rx_opcode(skb); + bool ref = true; + + switch (opcode) { + case CPL_FW4_ACK: + cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_fw4_ack; + ref = false; + break; + case CPL_PEER_CLOSE: + cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_peer_close; + break; + case CPL_CLOSE_CON_RPL: + cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_close_con_rpl; + break; + case CPL_ABORT_REQ_RSS: + cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_abort_req_rss; + break; + case CPL_ABORT_RPL_RSS: + cxgbit_skcb_rx_backlog_fn(skb) = cxgbit_abort_rpl_rss; + break; + default: + goto rel_skb; + } + + csk = lookup_tid(t, tid); + if (unlikely(!csk)) { + pr_err("can't find conn. for tid %u.\n", tid); + goto rel_skb; + } + + if (ref) + cxgbit_process_rx_cpl(csk, skb); + else + __cxgbit_process_rx_cpl(csk, skb); + + return; +rel_skb: + __kfree_skb(skb); +} + +cxgbit_cplhandler_func cxgbit_cplhandlers[NUM_CPL_CMDS] = { + [CPL_PASS_OPEN_RPL] = cxgbit_pass_open_rpl, + [CPL_CLOSE_LISTSRV_RPL] = cxgbit_close_listsrv_rpl, + [CPL_PASS_ACCEPT_REQ] = cxgbit_pass_accept_req, + [CPL_PASS_ESTABLISH] = cxgbit_pass_establish, + [CPL_SET_TCB_RPL] = cxgbit_set_tcb_rpl, + [CPL_RX_DATA] = cxgbit_rx_data, + [CPL_FW4_ACK] = cxgbit_rx_cpl, + [CPL_PEER_CLOSE] = cxgbit_rx_cpl, + [CPL_CLOSE_CON_RPL] = cxgbit_rx_cpl, + [CPL_ABORT_REQ_RSS] = cxgbit_rx_cpl, + [CPL_ABORT_RPL_RSS] = cxgbit_rx_cpl, +}; diff --git a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c new file mode 100644 index 000000000000..5d78bdb7fc64 --- /dev/null +++ b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "cxgbit.h" + +static void +cxgbit_set_one_ppod(struct cxgbi_pagepod *ppod, + struct cxgbi_task_tag_info *ttinfo, + struct scatterlist **sg_pp, unsigned int *sg_off) +{ + struct scatterlist *sg = sg_pp ? *sg_pp : NULL; + unsigned int offset = sg_off ? *sg_off : 0; + dma_addr_t addr = 0UL; + unsigned int len = 0; + int i; + + memcpy(ppod, &ttinfo->hdr, sizeof(struct cxgbi_pagepod_hdr)); + + if (sg) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + } + + for (i = 0; i < PPOD_PAGES_MAX; i++) { + if (sg) { + ppod->addr[i] = cpu_to_be64(addr + offset); + offset += PAGE_SIZE; + if (offset == (len + sg->offset)) { + offset = 0; + sg = sg_next(sg); + if (sg) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + } + } + } else { + ppod->addr[i] = 0ULL; + } + } + + /* + * the fifth address needs to be repeated in the next ppod, so do + * not move sg + */ + if (sg_pp) { + *sg_pp = sg; + *sg_off = offset; + } + + if (offset == len) { + offset = 0; + if (sg) { + sg = sg_next(sg); + if (sg) + addr = sg_dma_address(sg); + } + } + ppod->addr[i] = sg ? cpu_to_be64(addr + offset) : 0ULL; +} + +static struct sk_buff * +cxgbit_ppod_init_idata(struct cxgbit_device *cdev, struct cxgbi_ppm *ppm, + unsigned int idx, unsigned int npods, unsigned int tid) +{ + struct ulp_mem_io *req; + struct ulptx_idata *idata; + unsigned int pm_addr = (idx << PPOD_SIZE_SHIFT) + ppm->llimit; + unsigned int dlen = npods << PPOD_SIZE_SHIFT; + unsigned int wr_len = roundup(sizeof(struct ulp_mem_io) + + sizeof(struct ulptx_idata) + dlen, 16); + struct sk_buff *skb; + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) + return NULL; + + req = (struct ulp_mem_io *)__skb_put(skb, wr_len); + INIT_ULPTX_WR(req, wr_len, 0, tid); + req->wr.wr_hi = htonl(FW_WR_OP_V(FW_ULPTX_WR) | + FW_WR_ATOMIC_V(0)); + req->cmd = htonl(ULPTX_CMD_V(ULP_TX_MEM_WRITE) | + ULP_MEMIO_ORDER_V(0) | + T5_ULP_MEMIO_IMM_V(1)); + req->dlen = htonl(ULP_MEMIO_DATA_LEN_V(dlen >> 5)); + req->lock_addr = htonl(ULP_MEMIO_ADDR_V(pm_addr >> 5)); + req->len16 = htonl(DIV_ROUND_UP(wr_len - sizeof(req->wr), 16)); + + idata = (struct ulptx_idata *)(req + 1); + idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM)); + idata->len = htonl(dlen); + + return skb; +} + +static int +cxgbit_ppod_write_idata(struct cxgbi_ppm *ppm, struct cxgbit_sock *csk, + struct cxgbi_task_tag_info *ttinfo, unsigned int idx, + unsigned int npods, struct scatterlist **sg_pp, + unsigned int *sg_off) +{ + struct cxgbit_device *cdev = csk->com.cdev; + struct sk_buff *skb; + struct ulp_mem_io *req; + struct ulptx_idata *idata; + struct cxgbi_pagepod *ppod; + unsigned int i; + + skb = cxgbit_ppod_init_idata(cdev, ppm, idx, npods, csk->tid); + if (!skb) + return -ENOMEM; + + req = (struct ulp_mem_io *)skb->data; + idata = (struct ulptx_idata *)(req + 1); + ppod = (struct cxgbi_pagepod *)(idata + 1); + + for (i = 0; i < npods; i++, ppod++) + cxgbit_set_one_ppod(ppod, ttinfo, sg_pp, sg_off); + + __skb_queue_tail(&csk->ppodq, skb); + + return 0; +} + +static int +cxgbit_ddp_set_map(struct cxgbi_ppm *ppm, struct cxgbit_sock *csk, + struct cxgbi_task_tag_info *ttinfo) +{ + unsigned int pidx = ttinfo->idx; + unsigned int npods = ttinfo->npods; + unsigned int i, cnt; + struct scatterlist *sg = ttinfo->sgl; + unsigned int offset = 0; + int ret = 0; + + for (i = 0; i < npods; i += cnt, pidx += cnt) { + cnt = npods - i; + + if (cnt > ULPMEM_IDATA_MAX_NPPODS) + cnt = ULPMEM_IDATA_MAX_NPPODS; + + ret = cxgbit_ppod_write_idata(ppm, csk, ttinfo, pidx, cnt, + &sg, &offset); + if (ret < 0) + break; + } + + return ret; +} + +static int cxgbit_ddp_sgl_check(struct scatterlist *sg, + unsigned int nents) +{ + unsigned int last_sgidx = nents - 1; + unsigned int i; + + for (i = 0; i < nents; i++, sg = sg_next(sg)) { + unsigned int len = sg->length + sg->offset; + + if ((sg->offset & 0x3) || (i && sg->offset) || + ((i != last_sgidx) && (len != PAGE_SIZE))) { + return -EINVAL; + } + } + + return 0; +} + +static int +cxgbit_ddp_reserve(struct cxgbit_sock *csk, struct cxgbi_task_tag_info *ttinfo, + unsigned int xferlen) +{ + struct cxgbit_device *cdev = csk->com.cdev; + struct cxgbi_ppm *ppm = cdev2ppm(cdev); + struct scatterlist *sgl = ttinfo->sgl; + unsigned int sgcnt = ttinfo->nents; + unsigned int sg_offset = sgl->offset; + int ret; + + if ((xferlen < DDP_THRESHOLD) || (!sgcnt)) { + pr_debug("ppm 0x%p, pgidx %u, xfer %u, sgcnt %u, NO ddp.\n", + ppm, ppm->tformat.pgsz_idx_dflt, + xferlen, ttinfo->nents); + return -EINVAL; + } + + if (cxgbit_ddp_sgl_check(sgl, sgcnt) < 0) + return -EINVAL; + + ttinfo->nr_pages = (xferlen + sgl->offset + + (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT; + + /* + * the ddp tag will be used for the ttt in the outgoing r2t pdu + */ + ret = cxgbi_ppm_ppods_reserve(ppm, ttinfo->nr_pages, 0, &ttinfo->idx, + &ttinfo->tag, 0); + if (ret < 0) + return ret; + ttinfo->npods = ret; + + sgl->offset = 0; + ret = dma_map_sg(&ppm->pdev->dev, sgl, sgcnt, DMA_FROM_DEVICE); + sgl->offset = sg_offset; + if (!ret) { + pr_info("%s: 0x%x, xfer %u, sgl %u dma mapping err.\n", + __func__, 0, xferlen, sgcnt); + goto rel_ppods; + } + + cxgbi_ppm_make_ppod_hdr(ppm, ttinfo->tag, csk->tid, sgl->offset, + xferlen, &ttinfo->hdr); + + ret = cxgbit_ddp_set_map(ppm, csk, ttinfo); + if (ret < 0) { + __skb_queue_purge(&csk->ppodq); + dma_unmap_sg(&ppm->pdev->dev, sgl, sgcnt, DMA_FROM_DEVICE); + goto rel_ppods; + } + + return 0; + +rel_ppods: + cxgbi_ppm_ppod_release(ppm, ttinfo->idx); + return -EINVAL; +} + +void +cxgbit_get_r2t_ttt(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_r2t *r2t) +{ + struct cxgbit_sock *csk = conn->context; + struct cxgbit_device *cdev = csk->com.cdev; + struct cxgbit_cmd *ccmd = iscsit_priv_cmd(cmd); + struct cxgbi_task_tag_info *ttinfo = &ccmd->ttinfo; + int ret = -EINVAL; + + if ((!ccmd->setup_ddp) || + (!test_bit(CSK_DDP_ENABLE, &csk->com.flags))) + goto out; + + ccmd->setup_ddp = false; + + ttinfo->sgl = cmd->se_cmd.t_data_sg; + ttinfo->nents = cmd->se_cmd.t_data_nents; + + ret = cxgbit_ddp_reserve(csk, ttinfo, cmd->se_cmd.data_length); + if (ret < 0) { + pr_info("csk 0x%p, cmd 0x%p, xfer len %u, sgcnt %u no ddp.\n", + csk, cmd, cmd->se_cmd.data_length, ttinfo->nents); + + ttinfo->sgl = NULL; + ttinfo->nents = 0; + } else { + ccmd->release = true; + } +out: + pr_debug("cdev 0x%p, cmd 0x%p, tag 0x%x\n", cdev, cmd, ttinfo->tag); + r2t->targ_xfer_tag = ttinfo->tag; +} + +void cxgbit_release_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +{ + struct cxgbit_cmd *ccmd = iscsit_priv_cmd(cmd); + + if (ccmd->release) { + struct cxgbi_task_tag_info *ttinfo = &ccmd->ttinfo; + + if (ttinfo->sgl) { + struct cxgbit_sock *csk = conn->context; + struct cxgbit_device *cdev = csk->com.cdev; + struct cxgbi_ppm *ppm = cdev2ppm(cdev); + + cxgbi_ppm_ppod_release(ppm, ttinfo->idx); + + dma_unmap_sg(&ppm->pdev->dev, ttinfo->sgl, + ttinfo->nents, DMA_FROM_DEVICE); + } else { + put_page(sg_page(&ccmd->sg)); + } + + ccmd->release = false; + } +} + +int cxgbit_ddp_init(struct cxgbit_device *cdev) +{ + struct cxgb4_lld_info *lldi = &cdev->lldi; + struct net_device *ndev = cdev->lldi.ports[0]; + struct cxgbi_tag_format tformat; + unsigned int ppmax; + int ret, i; + + if (!lldi->vr->iscsi.size) { + pr_warn("%s, iscsi NOT enabled, check config!\n", ndev->name); + return -EACCES; + } + + ppmax = lldi->vr->iscsi.size >> PPOD_SIZE_SHIFT; + + memset(&tformat, 0, sizeof(struct cxgbi_tag_format)); + for (i = 0; i < 4; i++) + tformat.pgsz_order[i] = (lldi->iscsi_pgsz_order >> (i << 3)) + & 0xF; + cxgbi_tagmask_check(lldi->iscsi_tagmask, &tformat); + + ret = cxgbi_ppm_init(lldi->iscsi_ppm, cdev->lldi.ports[0], + cdev->lldi.pdev, &cdev->lldi, &tformat, + ppmax, lldi->iscsi_llimit, + lldi->vr->iscsi.start, 2); + if (ret >= 0) { + struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*lldi->iscsi_ppm); + + if ((ppm->tformat.pgsz_idx_dflt < DDP_PGIDX_MAX) && + (ppm->ppmax >= 1024)) + set_bit(CDEV_DDP_ENABLE, &cdev->flags); + ret = 0; + } + + return ret; +} diff --git a/drivers/target/iscsi/cxgbit/cxgbit_lro.h b/drivers/target/iscsi/cxgbit/cxgbit_lro.h new file mode 100644 index 000000000000..28c11bd1b930 --- /dev/null +++ b/drivers/target/iscsi/cxgbit/cxgbit_lro.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + */ + +#ifndef __CXGBIT_LRO_H__ +#define __CXGBIT_LRO_H__ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/skbuff.h> + +#define LRO_FLUSH_LEN_MAX 65535 + +struct cxgbit_lro_cb { + struct cxgbit_sock *csk; + u32 pdu_totallen; + u32 offset; + u8 pdu_idx; + bool complete; +}; + +enum cxgbit_pducb_flags { + PDUCBF_RX_HDR = (1 << 0), /* received pdu header */ + PDUCBF_RX_DATA = (1 << 1), /* received pdu payload */ + PDUCBF_RX_STATUS = (1 << 2), /* received ddp status */ + PDUCBF_RX_DATA_DDPD = (1 << 3), /* pdu payload ddp'd */ + PDUCBF_RX_HCRC_ERR = (1 << 4), /* header digest error */ + PDUCBF_RX_DCRC_ERR = (1 << 5), /* data digest error */ +}; + +struct cxgbit_lro_pdu_cb { + u8 flags; + u8 frags; + u8 hfrag_idx; + u8 nr_dfrags; + u8 dfrag_idx; + bool complete; + u32 seq; + u32 pdulen; + u32 hlen; + u32 dlen; + u32 doffset; + u32 ddigest; + void *hdr; +}; + +#define LRO_SKB_MAX_HEADROOM \ + (sizeof(struct cxgbit_lro_cb) + \ + (MAX_SKB_FRAGS * sizeof(struct cxgbit_lro_pdu_cb))) + +#define LRO_SKB_MIN_HEADROOM \ + (sizeof(struct cxgbit_lro_cb) + \ + sizeof(struct cxgbit_lro_pdu_cb)) + +#define cxgbit_skb_lro_cb(skb) ((struct cxgbit_lro_cb *)skb->data) +#define cxgbit_skb_lro_pdu_cb(skb, i) \ + ((struct cxgbit_lro_pdu_cb *)(skb->data + sizeof(struct cxgbit_lro_cb) \ + + (i * sizeof(struct cxgbit_lro_pdu_cb)))) + +#define CPL_RX_ISCSI_DDP_STATUS_DDP_SHIFT 16 /* ddp'able */ +#define CPL_RX_ISCSI_DDP_STATUS_PAD_SHIFT 19 /* pad error */ +#define CPL_RX_ISCSI_DDP_STATUS_HCRC_SHIFT 20 /* hcrc error */ +#define CPL_RX_ISCSI_DDP_STATUS_DCRC_SHIFT 21 /* dcrc error */ + +#endif /*__CXGBIT_LRO_H_*/ diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c new file mode 100644 index 000000000000..60dccd02bd85 --- /dev/null +++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c @@ -0,0 +1,702 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define DRV_NAME "cxgbit" +#define DRV_VERSION "1.0.0-ko" +#define pr_fmt(fmt) DRV_NAME ": " fmt + +#include "cxgbit.h" + +#ifdef CONFIG_CHELSIO_T4_DCB +#include <net/dcbevent.h> +#include "cxgb4_dcb.h" +#endif + +LIST_HEAD(cdev_list_head); +/* cdev list lock */ +DEFINE_MUTEX(cdev_list_lock); + +void _cxgbit_free_cdev(struct kref *kref) +{ + struct cxgbit_device *cdev; + + cdev = container_of(kref, struct cxgbit_device, kref); + kfree(cdev); +} + +static void cxgbit_set_mdsl(struct cxgbit_device *cdev) +{ + struct cxgb4_lld_info *lldi = &cdev->lldi; + u32 mdsl; + +#define ULP2_MAX_PKT_LEN 16224 +#define ISCSI_PDU_NONPAYLOAD_LEN 312 + mdsl = min_t(u32, lldi->iscsi_iolen - ISCSI_PDU_NONPAYLOAD_LEN, + ULP2_MAX_PKT_LEN - ISCSI_PDU_NONPAYLOAD_LEN); + mdsl = min_t(u32, mdsl, 8192); + mdsl = min_t(u32, mdsl, (MAX_SKB_FRAGS - 1) * PAGE_SIZE); + + cdev->mdsl = mdsl; +} + +static void *cxgbit_uld_add(const struct cxgb4_lld_info *lldi) +{ + struct cxgbit_device *cdev; + + if (is_t4(lldi->adapter_type)) + return ERR_PTR(-ENODEV); + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) + return ERR_PTR(-ENOMEM); + + kref_init(&cdev->kref); + + cdev->lldi = *lldi; + + cxgbit_set_mdsl(cdev); + + if (cxgbit_ddp_init(cdev) < 0) { + kfree(cdev); + return ERR_PTR(-EINVAL); + } + + if (!test_bit(CDEV_DDP_ENABLE, &cdev->flags)) + pr_info("cdev %s ddp init failed\n", + pci_name(lldi->pdev)); + + if (lldi->fw_vers >= 0x10d2b00) + set_bit(CDEV_ISO_ENABLE, &cdev->flags); + + spin_lock_init(&cdev->cskq.lock); + INIT_LIST_HEAD(&cdev->cskq.list); + + mutex_lock(&cdev_list_lock); + list_add_tail(&cdev->list, &cdev_list_head); + mutex_unlock(&cdev_list_lock); + + pr_info("cdev %s added for iSCSI target transport\n", + pci_name(lldi->pdev)); + + return cdev; +} + +static void cxgbit_close_conn(struct cxgbit_device *cdev) +{ + struct cxgbit_sock *csk; + struct sk_buff *skb; + bool wakeup_thread = false; + + spin_lock_bh(&cdev->cskq.lock); + list_for_each_entry(csk, &cdev->cskq.list, list) { + skb = alloc_skb(0, GFP_ATOMIC); + if (!skb) + continue; + + spin_lock_bh(&csk->rxq.lock); + __skb_queue_tail(&csk->rxq, skb); + if (skb_queue_len(&csk->rxq) == 1) + wakeup_thread = true; + spin_unlock_bh(&csk->rxq.lock); + + if (wakeup_thread) { + wake_up(&csk->waitq); + wakeup_thread = false; + } + } + spin_unlock_bh(&cdev->cskq.lock); +} + +static void cxgbit_detach_cdev(struct cxgbit_device *cdev) +{ + bool free_cdev = false; + + spin_lock_bh(&cdev->cskq.lock); + if (list_empty(&cdev->cskq.list)) + free_cdev = true; + spin_unlock_bh(&cdev->cskq.lock); + + if (free_cdev) { + mutex_lock(&cdev_list_lock); + list_del(&cdev->list); + mutex_unlock(&cdev_list_lock); + + cxgbit_put_cdev(cdev); + } else { + cxgbit_close_conn(cdev); + } +} + +static int cxgbit_uld_state_change(void *handle, enum cxgb4_state state) +{ + struct cxgbit_device *cdev = handle; + + switch (state) { + case CXGB4_STATE_UP: + set_bit(CDEV_STATE_UP, &cdev->flags); + pr_info("cdev %s state UP.\n", pci_name(cdev->lldi.pdev)); + break; + case CXGB4_STATE_START_RECOVERY: + clear_bit(CDEV_STATE_UP, &cdev->flags); + cxgbit_close_conn(cdev); + pr_info("cdev %s state RECOVERY.\n", pci_name(cdev->lldi.pdev)); + break; + case CXGB4_STATE_DOWN: + pr_info("cdev %s state DOWN.\n", pci_name(cdev->lldi.pdev)); + break; + case CXGB4_STATE_DETACH: + clear_bit(CDEV_STATE_UP, &cdev->flags); + pr_info("cdev %s state DETACH.\n", pci_name(cdev->lldi.pdev)); + cxgbit_detach_cdev(cdev); + break; + default: + pr_info("cdev %s unknown state %d.\n", + pci_name(cdev->lldi.pdev), state); + break; + } + return 0; +} + +static void +cxgbit_proc_ddp_status(unsigned int tid, struct cpl_rx_data_ddp *cpl, + struct cxgbit_lro_pdu_cb *pdu_cb) +{ + unsigned int status = ntohl(cpl->ddpvld); + + pdu_cb->flags |= PDUCBF_RX_STATUS; + pdu_cb->ddigest = ntohl(cpl->ulp_crc); + pdu_cb->pdulen = ntohs(cpl->len); + + if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_HCRC_SHIFT)) { + pr_info("tid 0x%x, status 0x%x, hcrc bad.\n", tid, status); + pdu_cb->flags |= PDUCBF_RX_HCRC_ERR; + } + + if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_DCRC_SHIFT)) { + pr_info("tid 0x%x, status 0x%x, dcrc bad.\n", tid, status); + pdu_cb->flags |= PDUCBF_RX_DCRC_ERR; + } + + if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_PAD_SHIFT)) + pr_info("tid 0x%x, status 0x%x, pad bad.\n", tid, status); + + if ((status & (1 << CPL_RX_ISCSI_DDP_STATUS_DDP_SHIFT)) && + (!(pdu_cb->flags & PDUCBF_RX_DATA))) { + pdu_cb->flags |= PDUCBF_RX_DATA_DDPD; + } +} + +static void +cxgbit_lro_add_packet_rsp(struct sk_buff *skb, u8 op, const __be64 *rsp) +{ + struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb); + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb, + lro_cb->pdu_idx); + struct cpl_rx_iscsi_ddp *cpl = (struct cpl_rx_iscsi_ddp *)(rsp + 1); + + cxgbit_proc_ddp_status(lro_cb->csk->tid, cpl, pdu_cb); + + if (pdu_cb->flags & PDUCBF_RX_HDR) + pdu_cb->complete = true; + + lro_cb->complete = true; + lro_cb->pdu_totallen += pdu_cb->pdulen; + lro_cb->pdu_idx++; +} + +static void +cxgbit_copy_frags(struct sk_buff *skb, const struct pkt_gl *gl, + unsigned int offset) +{ + u8 skb_frag_idx = skb_shinfo(skb)->nr_frags; + u8 i; + + /* usually there's just one frag */ + __skb_fill_page_desc(skb, skb_frag_idx, gl->frags[0].page, + gl->frags[0].offset + offset, + gl->frags[0].size - offset); + for (i = 1; i < gl->nfrags; i++) + __skb_fill_page_desc(skb, skb_frag_idx + i, + gl->frags[i].page, + gl->frags[i].offset, + gl->frags[i].size); + + skb_shinfo(skb)->nr_frags += gl->nfrags; + + /* get a reference to the last page, we don't own it */ + get_page(gl->frags[gl->nfrags - 1].page); +} + +static void +cxgbit_lro_add_packet_gl(struct sk_buff *skb, u8 op, const struct pkt_gl *gl) +{ + struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb); + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb, + lro_cb->pdu_idx); + u32 len, offset; + + if (op == CPL_ISCSI_HDR) { + struct cpl_iscsi_hdr *cpl = (struct cpl_iscsi_hdr *)gl->va; + + offset = sizeof(struct cpl_iscsi_hdr); + pdu_cb->flags |= PDUCBF_RX_HDR; + pdu_cb->seq = ntohl(cpl->seq); + len = ntohs(cpl->len); + pdu_cb->hdr = gl->va + offset; + pdu_cb->hlen = len; + pdu_cb->hfrag_idx = skb_shinfo(skb)->nr_frags; + + if (unlikely(gl->nfrags > 1)) + cxgbit_skcb_flags(skb) = 0; + + lro_cb->complete = false; + } else { + struct cpl_iscsi_data *cpl = (struct cpl_iscsi_data *)gl->va; + + offset = sizeof(struct cpl_iscsi_data); + pdu_cb->flags |= PDUCBF_RX_DATA; + len = ntohs(cpl->len); + pdu_cb->dlen = len; + pdu_cb->doffset = lro_cb->offset; + pdu_cb->nr_dfrags = gl->nfrags; + pdu_cb->dfrag_idx = skb_shinfo(skb)->nr_frags; + } + + cxgbit_copy_frags(skb, gl, offset); + + pdu_cb->frags += gl->nfrags; + lro_cb->offset += len; + skb->len += len; + skb->data_len += len; + skb->truesize += len; +} + +static struct sk_buff * +cxgbit_lro_init_skb(struct cxgbit_sock *csk, u8 op, const struct pkt_gl *gl, + const __be64 *rsp, struct napi_struct *napi) +{ + struct sk_buff *skb; + struct cxgbit_lro_cb *lro_cb; + + skb = napi_alloc_skb(napi, LRO_SKB_MAX_HEADROOM); + + if (unlikely(!skb)) + return NULL; + + memset(skb->data, 0, LRO_SKB_MAX_HEADROOM); + + cxgbit_skcb_flags(skb) |= SKCBF_RX_LRO; + + lro_cb = cxgbit_skb_lro_cb(skb); + + cxgbit_get_csk(csk); + + lro_cb->csk = csk; + + return skb; +} + +static void cxgbit_queue_lro_skb(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + bool wakeup_thread = false; + + spin_lock(&csk->rxq.lock); + __skb_queue_tail(&csk->rxq, skb); + if (skb_queue_len(&csk->rxq) == 1) + wakeup_thread = true; + spin_unlock(&csk->rxq.lock); + + if (wakeup_thread) + wake_up(&csk->waitq); +} + +static void cxgbit_lro_flush(struct t4_lro_mgr *lro_mgr, struct sk_buff *skb) +{ + struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb); + struct cxgbit_sock *csk = lro_cb->csk; + + csk->lro_skb = NULL; + + __skb_unlink(skb, &lro_mgr->lroq); + cxgbit_queue_lro_skb(csk, skb); + + cxgbit_put_csk(csk); + + lro_mgr->lro_pkts++; + lro_mgr->lro_session_cnt--; +} + +static void cxgbit_uld_lro_flush(struct t4_lro_mgr *lro_mgr) +{ + struct sk_buff *skb; + + while ((skb = skb_peek(&lro_mgr->lroq))) + cxgbit_lro_flush(lro_mgr, skb); +} + +static int +cxgbit_lro_receive(struct cxgbit_sock *csk, u8 op, const __be64 *rsp, + const struct pkt_gl *gl, struct t4_lro_mgr *lro_mgr, + struct napi_struct *napi) +{ + struct sk_buff *skb; + struct cxgbit_lro_cb *lro_cb; + + if (!csk) { + pr_err("%s: csk NULL, op 0x%x.\n", __func__, op); + goto out; + } + + if (csk->lro_skb) + goto add_packet; + +start_lro: + if (lro_mgr->lro_session_cnt >= MAX_LRO_SESSIONS) { + cxgbit_uld_lro_flush(lro_mgr); + goto start_lro; + } + + skb = cxgbit_lro_init_skb(csk, op, gl, rsp, napi); + if (unlikely(!skb)) + goto out; + + csk->lro_skb = skb; + + __skb_queue_tail(&lro_mgr->lroq, skb); + lro_mgr->lro_session_cnt++; + +add_packet: + skb = csk->lro_skb; + lro_cb = cxgbit_skb_lro_cb(skb); + + if ((gl && (((skb_shinfo(skb)->nr_frags + gl->nfrags) > + MAX_SKB_FRAGS) || (lro_cb->pdu_totallen >= LRO_FLUSH_LEN_MAX))) || + (lro_cb->pdu_idx >= MAX_SKB_FRAGS)) { + cxgbit_lro_flush(lro_mgr, skb); + goto start_lro; + } + + if (gl) + cxgbit_lro_add_packet_gl(skb, op, gl); + else + cxgbit_lro_add_packet_rsp(skb, op, rsp); + + lro_mgr->lro_merged++; + + return 0; + +out: + return -1; +} + +static int +cxgbit_uld_lro_rx_handler(void *hndl, const __be64 *rsp, + const struct pkt_gl *gl, struct t4_lro_mgr *lro_mgr, + struct napi_struct *napi) +{ + struct cxgbit_device *cdev = hndl; + struct cxgb4_lld_info *lldi = &cdev->lldi; + struct cpl_tx_data *rpl = NULL; + struct cxgbit_sock *csk = NULL; + unsigned int tid = 0; + struct sk_buff *skb; + unsigned int op = *(u8 *)rsp; + bool lro_flush = true; + + switch (op) { + case CPL_ISCSI_HDR: + case CPL_ISCSI_DATA: + case CPL_RX_ISCSI_DDP: + case CPL_FW4_ACK: + lro_flush = false; + case CPL_ABORT_RPL_RSS: + case CPL_PASS_ESTABLISH: + case CPL_PEER_CLOSE: + case CPL_CLOSE_CON_RPL: + case CPL_ABORT_REQ_RSS: + case CPL_SET_TCB_RPL: + case CPL_RX_DATA: + rpl = gl ? (struct cpl_tx_data *)gl->va : + (struct cpl_tx_data *)(rsp + 1); + tid = GET_TID(rpl); + csk = lookup_tid(lldi->tids, tid); + break; + default: + break; + } + + if (csk && csk->lro_skb && lro_flush) + cxgbit_lro_flush(lro_mgr, csk->lro_skb); + + if (!gl) { + unsigned int len; + + if (op == CPL_RX_ISCSI_DDP) { + if (!cxgbit_lro_receive(csk, op, rsp, NULL, lro_mgr, + napi)) + return 0; + } + + len = 64 - sizeof(struct rsp_ctrl) - 8; + skb = napi_alloc_skb(napi, len); + if (!skb) + goto nomem; + __skb_put(skb, len); + skb_copy_to_linear_data(skb, &rsp[1], len); + } else { + if (unlikely(op != *(u8 *)gl->va)) { + pr_info("? FL 0x%p,RSS%#llx,FL %#llx,len %u.\n", + gl->va, be64_to_cpu(*rsp), + be64_to_cpu(*(u64 *)gl->va), + gl->tot_len); + return 0; + } + + if (op == CPL_ISCSI_HDR || op == CPL_ISCSI_DATA) { + if (!cxgbit_lro_receive(csk, op, rsp, gl, lro_mgr, + napi)) + return 0; + } + +#define RX_PULL_LEN 128 + skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN); + if (unlikely(!skb)) + goto nomem; + } + + rpl = (struct cpl_tx_data *)skb->data; + op = rpl->ot.opcode; + cxgbit_skcb_rx_opcode(skb) = op; + + pr_debug("cdev %p, opcode 0x%x(0x%x,0x%x), skb %p.\n", + cdev, op, rpl->ot.opcode_tid, + ntohl(rpl->ot.opcode_tid), skb); + + if (op < NUM_CPL_CMDS && cxgbit_cplhandlers[op]) { + cxgbit_cplhandlers[op](cdev, skb); + } else { + pr_err("No handler for opcode 0x%x.\n", op); + __kfree_skb(skb); + } + return 0; +nomem: + pr_err("%s OOM bailing out.\n", __func__); + return 1; +} + +#ifdef CONFIG_CHELSIO_T4_DCB +struct cxgbit_dcb_work { + struct dcb_app_type dcb_app; + struct work_struct work; +}; + +static void +cxgbit_update_dcb_priority(struct cxgbit_device *cdev, u8 port_id, + u8 dcb_priority, u16 port_num) +{ + struct cxgbit_sock *csk; + struct sk_buff *skb; + u16 local_port; + bool wakeup_thread = false; + + spin_lock_bh(&cdev->cskq.lock); + list_for_each_entry(csk, &cdev->cskq.list, list) { + if (csk->port_id != port_id) + continue; + + if (csk->com.local_addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sock_in6; + + sock_in6 = (struct sockaddr_in6 *)&csk->com.local_addr; + local_port = ntohs(sock_in6->sin6_port); + } else { + struct sockaddr_in *sock_in; + + sock_in = (struct sockaddr_in *)&csk->com.local_addr; + local_port = ntohs(sock_in->sin_port); + } + + if (local_port != port_num) + continue; + + if (csk->dcb_priority == dcb_priority) + continue; + + skb = alloc_skb(0, GFP_ATOMIC); + if (!skb) + continue; + + spin_lock(&csk->rxq.lock); + __skb_queue_tail(&csk->rxq, skb); + if (skb_queue_len(&csk->rxq) == 1) + wakeup_thread = true; + spin_unlock(&csk->rxq.lock); + + if (wakeup_thread) { + wake_up(&csk->waitq); + wakeup_thread = false; + } + } + spin_unlock_bh(&cdev->cskq.lock); +} + +static void cxgbit_dcb_workfn(struct work_struct *work) +{ + struct cxgbit_dcb_work *dcb_work; + struct net_device *ndev; + struct cxgbit_device *cdev = NULL; + struct dcb_app_type *iscsi_app; + u8 priority, port_id = 0xff; + + dcb_work = container_of(work, struct cxgbit_dcb_work, work); + iscsi_app = &dcb_work->dcb_app; + + if (iscsi_app->dcbx & DCB_CAP_DCBX_VER_IEEE) { + if (iscsi_app->app.selector != IEEE_8021QAZ_APP_SEL_ANY) + goto out; + + priority = iscsi_app->app.priority; + + } else if (iscsi_app->dcbx & DCB_CAP_DCBX_VER_CEE) { + if (iscsi_app->app.selector != DCB_APP_IDTYPE_PORTNUM) + goto out; + + if (!iscsi_app->app.priority) + goto out; + + priority = ffs(iscsi_app->app.priority) - 1; + } else { + goto out; + } + + pr_debug("priority for ifid %d is %u\n", + iscsi_app->ifindex, priority); + + ndev = dev_get_by_index(&init_net, iscsi_app->ifindex); + + if (!ndev) + goto out; + + mutex_lock(&cdev_list_lock); + cdev = cxgbit_find_device(ndev, &port_id); + + dev_put(ndev); + + if (!cdev) { + mutex_unlock(&cdev_list_lock); + goto out; + } + + cxgbit_update_dcb_priority(cdev, port_id, priority, + iscsi_app->app.protocol); + mutex_unlock(&cdev_list_lock); +out: + kfree(dcb_work); +} + +static int +cxgbit_dcbevent_notify(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct cxgbit_dcb_work *dcb_work; + struct dcb_app_type *dcb_app = data; + + dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC); + if (!dcb_work) + return NOTIFY_DONE; + + dcb_work->dcb_app = *dcb_app; + INIT_WORK(&dcb_work->work, cxgbit_dcb_workfn); + schedule_work(&dcb_work->work); + return NOTIFY_OK; +} +#endif + +static enum target_prot_op cxgbit_get_sup_prot_ops(struct iscsi_conn *conn) +{ + return TARGET_PROT_NORMAL; +} + +static struct iscsit_transport cxgbit_transport = { + .name = DRV_NAME, + .transport_type = ISCSI_CXGBIT, + .rdma_shutdown = false, + .priv_size = sizeof(struct cxgbit_cmd), + .owner = THIS_MODULE, + .iscsit_setup_np = cxgbit_setup_np, + .iscsit_accept_np = cxgbit_accept_np, + .iscsit_free_np = cxgbit_free_np, + .iscsit_free_conn = cxgbit_free_conn, + .iscsit_get_login_rx = cxgbit_get_login_rx, + .iscsit_put_login_tx = cxgbit_put_login_tx, + .iscsit_immediate_queue = iscsit_immediate_queue, + .iscsit_response_queue = iscsit_response_queue, + .iscsit_get_dataout = iscsit_build_r2ts_for_cmd, + .iscsit_queue_data_in = iscsit_queue_rsp, + .iscsit_queue_status = iscsit_queue_rsp, + .iscsit_xmit_pdu = cxgbit_xmit_pdu, + .iscsit_get_r2t_ttt = cxgbit_get_r2t_ttt, + .iscsit_get_rx_pdu = cxgbit_get_rx_pdu, + .iscsit_validate_params = cxgbit_validate_params, + .iscsit_release_cmd = cxgbit_release_cmd, + .iscsit_aborted_task = iscsit_aborted_task, + .iscsit_get_sup_prot_ops = cxgbit_get_sup_prot_ops, +}; + +static struct cxgb4_uld_info cxgbit_uld_info = { + .name = DRV_NAME, + .add = cxgbit_uld_add, + .state_change = cxgbit_uld_state_change, + .lro_rx_handler = cxgbit_uld_lro_rx_handler, + .lro_flush = cxgbit_uld_lro_flush, +}; + +#ifdef CONFIG_CHELSIO_T4_DCB +static struct notifier_block cxgbit_dcbevent_nb = { + .notifier_call = cxgbit_dcbevent_notify, +}; +#endif + +static int __init cxgbit_init(void) +{ + cxgb4_register_uld(CXGB4_ULD_ISCSIT, &cxgbit_uld_info); + iscsit_register_transport(&cxgbit_transport); + +#ifdef CONFIG_CHELSIO_T4_DCB + pr_info("%s dcb enabled.\n", DRV_NAME); + register_dcbevent_notifier(&cxgbit_dcbevent_nb); +#endif + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) < + sizeof(union cxgbit_skb_cb)); + return 0; +} + +static void __exit cxgbit_exit(void) +{ + struct cxgbit_device *cdev, *tmp; + +#ifdef CONFIG_CHELSIO_T4_DCB + unregister_dcbevent_notifier(&cxgbit_dcbevent_nb); +#endif + mutex_lock(&cdev_list_lock); + list_for_each_entry_safe(cdev, tmp, &cdev_list_head, list) { + list_del(&cdev->list); + cxgbit_put_cdev(cdev); + } + mutex_unlock(&cdev_list_lock); + iscsit_unregister_transport(&cxgbit_transport); + cxgb4_unregister_uld(CXGB4_ULD_ISCSIT); +} + +module_init(cxgbit_init); +module_exit(cxgbit_exit); + +MODULE_DESCRIPTION("Chelsio iSCSI target offload driver"); +MODULE_AUTHOR("Chelsio Communications"); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL"); diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c new file mode 100644 index 000000000000..d02bf58aea6d --- /dev/null +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -0,0 +1,1561 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/workqueue.h> +#include <linux/kthread.h> +#include <asm/unaligned.h> +#include <target/target_core_base.h> +#include <target/target_core_fabric.h> +#include "cxgbit.h" + +struct sge_opaque_hdr { + void *dev; + dma_addr_t addr[MAX_SKB_FRAGS + 1]; +}; + +static const u8 cxgbit_digest_len[] = {0, 4, 4, 8}; + +#define TX_HDR_LEN (sizeof(struct sge_opaque_hdr) + \ + sizeof(struct fw_ofld_tx_data_wr)) + +static struct sk_buff * +__cxgbit_alloc_skb(struct cxgbit_sock *csk, u32 len, bool iso) +{ + struct sk_buff *skb = NULL; + u8 submode = 0; + int errcode; + static const u32 hdr_len = TX_HDR_LEN + ISCSI_HDR_LEN; + + if (len) { + skb = alloc_skb_with_frags(hdr_len, len, + 0, &errcode, + GFP_KERNEL); + if (!skb) + return NULL; + + skb_reserve(skb, TX_HDR_LEN); + skb_reset_transport_header(skb); + __skb_put(skb, ISCSI_HDR_LEN); + skb->data_len = len; + skb->len += len; + submode |= (csk->submode & CXGBIT_SUBMODE_DCRC); + + } else { + u32 iso_len = iso ? sizeof(struct cpl_tx_data_iso) : 0; + + skb = alloc_skb(hdr_len + iso_len, GFP_KERNEL); + if (!skb) + return NULL; + + skb_reserve(skb, TX_HDR_LEN + iso_len); + skb_reset_transport_header(skb); + __skb_put(skb, ISCSI_HDR_LEN); + } + + submode |= (csk->submode & CXGBIT_SUBMODE_HCRC); + cxgbit_skcb_submode(skb) = submode; + cxgbit_skcb_tx_extralen(skb) = cxgbit_digest_len[submode]; + cxgbit_skcb_flags(skb) |= SKCBF_TX_NEED_HDR; + return skb; +} + +static struct sk_buff *cxgbit_alloc_skb(struct cxgbit_sock *csk, u32 len) +{ + return __cxgbit_alloc_skb(csk, len, false); +} + +/* + * cxgbit_is_ofld_imm - check whether a packet can be sent as immediate data + * @skb: the packet + * + * Returns true if a packet can be sent as an offload WR with immediate + * data. We currently use the same limit as for Ethernet packets. + */ +static int cxgbit_is_ofld_imm(const struct sk_buff *skb) +{ + int length = skb->len; + + if (likely(cxgbit_skcb_flags(skb) & SKCBF_TX_NEED_HDR)) + length += sizeof(struct fw_ofld_tx_data_wr); + + if (likely(cxgbit_skcb_flags(skb) & SKCBF_TX_ISO)) + length += sizeof(struct cpl_tx_data_iso); + +#define MAX_IMM_TX_PKT_LEN 256 + return length <= MAX_IMM_TX_PKT_LEN; +} + +/* + * cxgbit_sgl_len - calculates the size of an SGL of the given capacity + * @n: the number of SGL entries + * Calculates the number of flits needed for a scatter/gather list that + * can hold the given number of entries. + */ +static inline unsigned int cxgbit_sgl_len(unsigned int n) +{ + n--; + return (3 * n) / 2 + (n & 1) + 2; +} + +/* + * cxgbit_calc_tx_flits_ofld - calculate # of flits for an offload packet + * @skb: the packet + * + * Returns the number of flits needed for the given offload packet. + * These packets are already fully constructed and no additional headers + * will be added. + */ +static unsigned int cxgbit_calc_tx_flits_ofld(const struct sk_buff *skb) +{ + unsigned int flits, cnt; + + if (cxgbit_is_ofld_imm(skb)) + return DIV_ROUND_UP(skb->len, 8); + flits = skb_transport_offset(skb) / 8; + cnt = skb_shinfo(skb)->nr_frags; + if (skb_tail_pointer(skb) != skb_transport_header(skb)) + cnt++; + return flits + cxgbit_sgl_len(cnt); +} + +#define CXGBIT_ISO_FSLICE 0x1 +#define CXGBIT_ISO_LSLICE 0x2 +static void +cxgbit_cpl_tx_data_iso(struct sk_buff *skb, struct cxgbit_iso_info *iso_info) +{ + struct cpl_tx_data_iso *cpl; + unsigned int submode = cxgbit_skcb_submode(skb); + unsigned int fslice = !!(iso_info->flags & CXGBIT_ISO_FSLICE); + unsigned int lslice = !!(iso_info->flags & CXGBIT_ISO_LSLICE); + + cpl = (struct cpl_tx_data_iso *)__skb_push(skb, sizeof(*cpl)); + + cpl->op_to_scsi = htonl(CPL_TX_DATA_ISO_OP_V(CPL_TX_DATA_ISO) | + CPL_TX_DATA_ISO_FIRST_V(fslice) | + CPL_TX_DATA_ISO_LAST_V(lslice) | + CPL_TX_DATA_ISO_CPLHDRLEN_V(0) | + CPL_TX_DATA_ISO_HDRCRC_V(submode & 1) | + CPL_TX_DATA_ISO_PLDCRC_V(((submode >> 1) & 1)) | + CPL_TX_DATA_ISO_IMMEDIATE_V(0) | + CPL_TX_DATA_ISO_SCSI_V(2)); + + cpl->ahs_len = 0; + cpl->mpdu = htons(DIV_ROUND_UP(iso_info->mpdu, 4)); + cpl->burst_size = htonl(DIV_ROUND_UP(iso_info->burst_len, 4)); + cpl->len = htonl(iso_info->len); + cpl->reserved2_seglen_offset = htonl(0); + cpl->datasn_offset = htonl(0); + cpl->buffer_offset = htonl(0); + cpl->reserved3 = 0; + + __skb_pull(skb, sizeof(*cpl)); +} + +static void +cxgbit_tx_data_wr(struct cxgbit_sock *csk, struct sk_buff *skb, u32 dlen, + u32 len, u32 credits, u32 compl) +{ + struct fw_ofld_tx_data_wr *req; + u32 submode = cxgbit_skcb_submode(skb); + u32 wr_ulp_mode = 0; + u32 hdr_size = sizeof(*req); + u32 opcode = FW_OFLD_TX_DATA_WR; + u32 immlen = 0; + u32 force = TX_FORCE_V(!submode); + + if (cxgbit_skcb_flags(skb) & SKCBF_TX_ISO) { + opcode = FW_ISCSI_TX_DATA_WR; + immlen += sizeof(struct cpl_tx_data_iso); + hdr_size += sizeof(struct cpl_tx_data_iso); + submode |= 8; + } + + if (cxgbit_is_ofld_imm(skb)) + immlen += dlen; + + req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, + hdr_size); + req->op_to_immdlen = cpu_to_be32(FW_WR_OP_V(opcode) | + FW_WR_COMPL_V(compl) | + FW_WR_IMMDLEN_V(immlen)); + req->flowid_len16 = cpu_to_be32(FW_WR_FLOWID_V(csk->tid) | + FW_WR_LEN16_V(credits)); + req->plen = htonl(len); + wr_ulp_mode = FW_OFLD_TX_DATA_WR_ULPMODE_V(ULP_MODE_ISCSI) | + FW_OFLD_TX_DATA_WR_ULPSUBMODE_V(submode); + + req->tunnel_to_proxy = htonl((wr_ulp_mode) | force | + FW_OFLD_TX_DATA_WR_SHOVE_V(skb_peek(&csk->txq) ? 0 : 1)); +} + +static void cxgbit_arp_failure_skb_discard(void *handle, struct sk_buff *skb) +{ + kfree_skb(skb); +} + +void cxgbit_push_tx_frames(struct cxgbit_sock *csk) +{ + struct sk_buff *skb; + + while (csk->wr_cred && ((skb = skb_peek(&csk->txq)) != NULL)) { + u32 dlen = skb->len; + u32 len = skb->len; + u32 credits_needed; + u32 compl = 0; + u32 flowclen16 = 0; + u32 iso_cpl_len = 0; + + if (cxgbit_skcb_flags(skb) & SKCBF_TX_ISO) + iso_cpl_len = sizeof(struct cpl_tx_data_iso); + + if (cxgbit_is_ofld_imm(skb)) + credits_needed = DIV_ROUND_UP(dlen + iso_cpl_len, 16); + else + credits_needed = DIV_ROUND_UP((8 * + cxgbit_calc_tx_flits_ofld(skb)) + + iso_cpl_len, 16); + + if (likely(cxgbit_skcb_flags(skb) & SKCBF_TX_NEED_HDR)) + credits_needed += DIV_ROUND_UP( + sizeof(struct fw_ofld_tx_data_wr), 16); + /* + * Assumes the initial credits is large enough to support + * fw_flowc_wr plus largest possible first payload + */ + + if (!test_and_set_bit(CSK_TX_DATA_SENT, &csk->com.flags)) { + flowclen16 = cxgbit_send_tx_flowc_wr(csk); + csk->wr_cred -= flowclen16; + csk->wr_una_cred += flowclen16; + } + + if (csk->wr_cred < credits_needed) { + pr_debug("csk 0x%p, skb %u/%u, wr %d < %u.\n", + csk, skb->len, skb->data_len, + credits_needed, csk->wr_cred); + break; + } + __skb_unlink(skb, &csk->txq); + set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); + skb->csum = credits_needed + flowclen16; + csk->wr_cred -= credits_needed; + csk->wr_una_cred += credits_needed; + + pr_debug("csk 0x%p, skb %u/%u, wr %d, left %u, unack %u.\n", + csk, skb->len, skb->data_len, credits_needed, + csk->wr_cred, csk->wr_una_cred); + + if (likely(cxgbit_skcb_flags(skb) & SKCBF_TX_NEED_HDR)) { + len += cxgbit_skcb_tx_extralen(skb); + + if ((csk->wr_una_cred >= (csk->wr_max_cred / 2)) || + (!before(csk->write_seq, + csk->snd_una + csk->snd_win))) { + compl = 1; + csk->wr_una_cred = 0; + } + + cxgbit_tx_data_wr(csk, skb, dlen, len, credits_needed, + compl); + csk->snd_nxt += len; + + } else if ((cxgbit_skcb_flags(skb) & SKCBF_TX_FLAG_COMPL) || + (csk->wr_una_cred >= (csk->wr_max_cred / 2))) { + struct cpl_close_con_req *req = + (struct cpl_close_con_req *)skb->data; + req->wr.wr_hi |= htonl(FW_WR_COMPL_F); + csk->wr_una_cred = 0; + } + + cxgbit_sock_enqueue_wr(csk, skb); + t4_set_arp_err_handler(skb, csk, + cxgbit_arp_failure_skb_discard); + + pr_debug("csk 0x%p,%u, skb 0x%p, %u.\n", + csk, csk->tid, skb, len); + + cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); + } +} + +static bool cxgbit_lock_sock(struct cxgbit_sock *csk) +{ + spin_lock_bh(&csk->lock); + + if (before(csk->write_seq, csk->snd_una + csk->snd_win)) + csk->lock_owner = true; + + spin_unlock_bh(&csk->lock); + + return csk->lock_owner; +} + +static void cxgbit_unlock_sock(struct cxgbit_sock *csk) +{ + struct sk_buff_head backlogq; + struct sk_buff *skb; + void (*fn)(struct cxgbit_sock *, struct sk_buff *); + + skb_queue_head_init(&backlogq); + + spin_lock_bh(&csk->lock); + while (skb_queue_len(&csk->backlogq)) { + skb_queue_splice_init(&csk->backlogq, &backlogq); + spin_unlock_bh(&csk->lock); + + while ((skb = __skb_dequeue(&backlogq))) { + fn = cxgbit_skcb_rx_backlog_fn(skb); + fn(csk, skb); + } + + spin_lock_bh(&csk->lock); + } + + csk->lock_owner = false; + spin_unlock_bh(&csk->lock); +} + +static int cxgbit_queue_skb(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + int ret = 0; + + wait_event_interruptible(csk->ack_waitq, cxgbit_lock_sock(csk)); + + if (unlikely((csk->com.state != CSK_STATE_ESTABLISHED) || + signal_pending(current))) { + __kfree_skb(skb); + __skb_queue_purge(&csk->ppodq); + ret = -1; + spin_lock_bh(&csk->lock); + if (csk->lock_owner) { + spin_unlock_bh(&csk->lock); + goto unlock; + } + spin_unlock_bh(&csk->lock); + return ret; + } + + csk->write_seq += skb->len + + cxgbit_skcb_tx_extralen(skb); + + skb_queue_splice_tail_init(&csk->ppodq, &csk->txq); + __skb_queue_tail(&csk->txq, skb); + cxgbit_push_tx_frames(csk); + +unlock: + cxgbit_unlock_sock(csk); + return ret; +} + +static int +cxgbit_map_skb(struct iscsi_cmd *cmd, struct sk_buff *skb, u32 data_offset, + u32 data_length) +{ + u32 i = 0, nr_frags = MAX_SKB_FRAGS; + u32 padding = ((-data_length) & 3); + struct scatterlist *sg; + struct page *page; + unsigned int page_off; + + if (padding) + nr_frags--; + + /* + * We know each entry in t_data_sg contains a page. + */ + sg = &cmd->se_cmd.t_data_sg[data_offset / PAGE_SIZE]; + page_off = (data_offset % PAGE_SIZE); + + while (data_length && (i < nr_frags)) { + u32 cur_len = min_t(u32, data_length, sg->length - page_off); + + page = sg_page(sg); + + get_page(page); + skb_fill_page_desc(skb, i, page, sg->offset + page_off, + cur_len); + skb->data_len += cur_len; + skb->len += cur_len; + skb->truesize += cur_len; + + data_length -= cur_len; + page_off = 0; + sg = sg_next(sg); + i++; + } + + if (data_length) + return -1; + + if (padding) { + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return -1; + skb_fill_page_desc(skb, i, page, 0, padding); + skb->data_len += padding; + skb->len += padding; + skb->truesize += padding; + } + + return 0; +} + +static int +cxgbit_tx_datain_iso(struct cxgbit_sock *csk, struct iscsi_cmd *cmd, + struct iscsi_datain_req *dr) +{ + struct iscsi_conn *conn = csk->conn; + struct sk_buff *skb; + struct iscsi_datain datain; + struct cxgbit_iso_info iso_info; + u32 data_length = cmd->se_cmd.data_length; + u32 mrdsl = conn->conn_ops->MaxRecvDataSegmentLength; + u32 num_pdu, plen, tx_data = 0; + bool task_sense = !!(cmd->se_cmd.se_cmd_flags & + SCF_TRANSPORT_TASK_SENSE); + bool set_statsn = false; + int ret = -1; + + while (data_length) { + num_pdu = (data_length + mrdsl - 1) / mrdsl; + if (num_pdu > csk->max_iso_npdu) + num_pdu = csk->max_iso_npdu; + + plen = num_pdu * mrdsl; + if (plen > data_length) + plen = data_length; + + skb = __cxgbit_alloc_skb(csk, 0, true); + if (unlikely(!skb)) + return -ENOMEM; + + memset(skb->data, 0, ISCSI_HDR_LEN); + cxgbit_skcb_flags(skb) |= SKCBF_TX_ISO; + cxgbit_skcb_submode(skb) |= (csk->submode & + CXGBIT_SUBMODE_DCRC); + cxgbit_skcb_tx_extralen(skb) = (num_pdu * + cxgbit_digest_len[cxgbit_skcb_submode(skb)]) + + ((num_pdu - 1) * ISCSI_HDR_LEN); + + memset(&datain, 0, sizeof(struct iscsi_datain)); + memset(&iso_info, 0, sizeof(iso_info)); + + if (!tx_data) + iso_info.flags |= CXGBIT_ISO_FSLICE; + + if (!(data_length - plen)) { + iso_info.flags |= CXGBIT_ISO_LSLICE; + if (!task_sense) { + datain.flags = ISCSI_FLAG_DATA_STATUS; + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + set_statsn = true; + } + } + + iso_info.burst_len = num_pdu * mrdsl; + iso_info.mpdu = mrdsl; + iso_info.len = ISCSI_HDR_LEN + plen; + + cxgbit_cpl_tx_data_iso(skb, &iso_info); + + datain.offset = tx_data; + datain.data_sn = cmd->data_sn - 1; + + iscsit_build_datain_pdu(cmd, conn, &datain, + (struct iscsi_data_rsp *)skb->data, + set_statsn); + + ret = cxgbit_map_skb(cmd, skb, tx_data, plen); + if (unlikely(ret)) { + __kfree_skb(skb); + goto out; + } + + ret = cxgbit_queue_skb(csk, skb); + if (unlikely(ret)) + goto out; + + tx_data += plen; + data_length -= plen; + + cmd->read_data_done += plen; + cmd->data_sn += num_pdu; + } + + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return 0; + +out: + return ret; +} + +static int +cxgbit_tx_datain(struct cxgbit_sock *csk, struct iscsi_cmd *cmd, + const struct iscsi_datain *datain) +{ + struct sk_buff *skb; + int ret = 0; + + skb = cxgbit_alloc_skb(csk, 0); + if (unlikely(!skb)) + return -ENOMEM; + + memcpy(skb->data, cmd->pdu, ISCSI_HDR_LEN); + + if (datain->length) { + cxgbit_skcb_submode(skb) |= (csk->submode & + CXGBIT_SUBMODE_DCRC); + cxgbit_skcb_tx_extralen(skb) = + cxgbit_digest_len[cxgbit_skcb_submode(skb)]; + } + + ret = cxgbit_map_skb(cmd, skb, datain->offset, datain->length); + if (ret < 0) { + __kfree_skb(skb); + return ret; + } + + return cxgbit_queue_skb(csk, skb); +} + +static int +cxgbit_xmit_datain_pdu(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_datain_req *dr, + const struct iscsi_datain *datain) +{ + struct cxgbit_sock *csk = conn->context; + u32 data_length = cmd->se_cmd.data_length; + u32 padding = ((-data_length) & 3); + u32 mrdsl = conn->conn_ops->MaxRecvDataSegmentLength; + + if ((data_length > mrdsl) && (!dr->recovery) && + (!padding) && (!datain->offset) && csk->max_iso_npdu) { + atomic_long_add(data_length - datain->length, + &conn->sess->tx_data_octets); + return cxgbit_tx_datain_iso(csk, cmd, dr); + } + + return cxgbit_tx_datain(csk, cmd, datain); +} + +static int +cxgbit_xmit_nondatain_pdu(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + const void *data_buf, u32 data_buf_len) +{ + struct cxgbit_sock *csk = conn->context; + struct sk_buff *skb; + u32 padding = ((-data_buf_len) & 3); + + skb = cxgbit_alloc_skb(csk, data_buf_len + padding); + if (unlikely(!skb)) + return -ENOMEM; + + memcpy(skb->data, cmd->pdu, ISCSI_HDR_LEN); + + if (data_buf_len) { + u32 pad_bytes = 0; + + skb_store_bits(skb, ISCSI_HDR_LEN, data_buf, data_buf_len); + + if (padding) + skb_store_bits(skb, ISCSI_HDR_LEN + data_buf_len, + &pad_bytes, padding); + } + + cxgbit_skcb_tx_extralen(skb) = cxgbit_digest_len[ + cxgbit_skcb_submode(skb)]; + + return cxgbit_queue_skb(csk, skb); +} + +int +cxgbit_xmit_pdu(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_datain_req *dr, const void *buf, u32 buf_len) +{ + if (dr) + return cxgbit_xmit_datain_pdu(conn, cmd, dr, buf); + else + return cxgbit_xmit_nondatain_pdu(conn, cmd, buf, buf_len); +} + +int cxgbit_validate_params(struct iscsi_conn *conn) +{ + struct cxgbit_sock *csk = conn->context; + struct cxgbit_device *cdev = csk->com.cdev; + struct iscsi_param *param; + u32 max_xmitdsl; + + param = iscsi_find_param_from_key(MAXXMITDATASEGMENTLENGTH, + conn->param_list); + if (!param) + return -1; + + if (kstrtou32(param->value, 0, &max_xmitdsl) < 0) + return -1; + + if (max_xmitdsl > cdev->mdsl) { + if (iscsi_change_param_sprintf( + conn, "MaxXmitDataSegmentLength=%u", cdev->mdsl)) + return -1; + } + + return 0; +} + +static int cxgbit_set_digest(struct cxgbit_sock *csk) +{ + struct iscsi_conn *conn = csk->conn; + struct iscsi_param *param; + + param = iscsi_find_param_from_key(HEADERDIGEST, conn->param_list); + if (!param) { + pr_err("param not found key %s\n", HEADERDIGEST); + return -1; + } + + if (!strcmp(param->value, CRC32C)) + csk->submode |= CXGBIT_SUBMODE_HCRC; + + param = iscsi_find_param_from_key(DATADIGEST, conn->param_list); + if (!param) { + csk->submode = 0; + pr_err("param not found key %s\n", DATADIGEST); + return -1; + } + + if (!strcmp(param->value, CRC32C)) + csk->submode |= CXGBIT_SUBMODE_DCRC; + + if (cxgbit_setup_conn_digest(csk)) { + csk->submode = 0; + return -1; + } + + return 0; +} + +static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk) +{ + struct iscsi_conn *conn = csk->conn; + struct iscsi_conn_ops *conn_ops = conn->conn_ops; + struct iscsi_param *param; + u32 mrdsl, mbl; + u32 max_npdu, max_iso_npdu; + + if (conn->login->leading_connection) { + param = iscsi_find_param_from_key(DATASEQUENCEINORDER, + conn->param_list); + if (!param) { + pr_err("param not found key %s\n", DATASEQUENCEINORDER); + return -1; + } + + if (strcmp(param->value, YES)) + return 0; + + param = iscsi_find_param_from_key(DATAPDUINORDER, + conn->param_list); + if (!param) { + pr_err("param not found key %s\n", DATAPDUINORDER); + return -1; + } + + if (strcmp(param->value, YES)) + return 0; + + param = iscsi_find_param_from_key(MAXBURSTLENGTH, + conn->param_list); + if (!param) { + pr_err("param not found key %s\n", MAXBURSTLENGTH); + return -1; + } + + if (kstrtou32(param->value, 0, &mbl) < 0) + return -1; + } else { + if (!conn->sess->sess_ops->DataSequenceInOrder) + return 0; + if (!conn->sess->sess_ops->DataPDUInOrder) + return 0; + + mbl = conn->sess->sess_ops->MaxBurstLength; + } + + mrdsl = conn_ops->MaxRecvDataSegmentLength; + max_npdu = mbl / mrdsl; + + max_iso_npdu = CXGBIT_MAX_ISO_PAYLOAD / + (ISCSI_HDR_LEN + mrdsl + + cxgbit_digest_len[csk->submode]); + + csk->max_iso_npdu = min(max_npdu, max_iso_npdu); + + if (csk->max_iso_npdu <= 1) + csk->max_iso_npdu = 0; + + return 0; +} + +static int cxgbit_set_params(struct iscsi_conn *conn) +{ + struct cxgbit_sock *csk = conn->context; + struct cxgbit_device *cdev = csk->com.cdev; + struct cxgbi_ppm *ppm = *csk->com.cdev->lldi.iscsi_ppm; + struct iscsi_conn_ops *conn_ops = conn->conn_ops; + struct iscsi_param *param; + u8 erl; + + if (conn_ops->MaxRecvDataSegmentLength > cdev->mdsl) + conn_ops->MaxRecvDataSegmentLength = cdev->mdsl; + + if (conn->login->leading_connection) { + param = iscsi_find_param_from_key(ERRORRECOVERYLEVEL, + conn->param_list); + if (!param) { + pr_err("param not found key %s\n", ERRORRECOVERYLEVEL); + return -1; + } + if (kstrtou8(param->value, 0, &erl) < 0) + return -1; + } else { + erl = conn->sess->sess_ops->ErrorRecoveryLevel; + } + + if (!erl) { + if (test_bit(CDEV_ISO_ENABLE, &cdev->flags)) { + if (cxgbit_set_iso_npdu(csk)) + return -1; + } + + if (test_bit(CDEV_DDP_ENABLE, &cdev->flags)) { + if (cxgbit_setup_conn_pgidx(csk, + ppm->tformat.pgsz_idx_dflt)) + return -1; + set_bit(CSK_DDP_ENABLE, &csk->com.flags); + } + } + + if (cxgbit_set_digest(csk)) + return -1; + + return 0; +} + +int +cxgbit_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, + u32 length) +{ + struct cxgbit_sock *csk = conn->context; + struct sk_buff *skb; + u32 padding_buf = 0; + u8 padding = ((-length) & 3); + + skb = cxgbit_alloc_skb(csk, length + padding); + if (!skb) + return -ENOMEM; + skb_store_bits(skb, 0, login->rsp, ISCSI_HDR_LEN); + skb_store_bits(skb, ISCSI_HDR_LEN, login->rsp_buf, length); + + if (padding) + skb_store_bits(skb, ISCSI_HDR_LEN + length, + &padding_buf, padding); + + if (login->login_complete) { + if (cxgbit_set_params(conn)) { + kfree_skb(skb); + return -1; + } + + set_bit(CSK_LOGIN_DONE, &csk->com.flags); + } + + if (cxgbit_queue_skb(csk, skb)) + return -1; + + if ((!login->login_complete) && (!login->login_failed)) + schedule_delayed_work(&conn->login_work, 0); + + return 0; +} + +static void +cxgbit_skb_copy_to_sg(struct sk_buff *skb, struct scatterlist *sg, + unsigned int nents) +{ + struct skb_seq_state st; + const u8 *buf; + unsigned int consumed = 0, buf_len; + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(skb); + + skb_prepare_seq_read(skb, pdu_cb->doffset, + pdu_cb->doffset + pdu_cb->dlen, + &st); + + while (true) { + buf_len = skb_seq_read(consumed, &buf, &st); + if (!buf_len) { + skb_abort_seq_read(&st); + break; + } + + consumed += sg_pcopy_from_buffer(sg, nents, (void *)buf, + buf_len, consumed); + } +} + +static struct iscsi_cmd *cxgbit_allocate_cmd(struct cxgbit_sock *csk) +{ + struct iscsi_conn *conn = csk->conn; + struct cxgbi_ppm *ppm = cdev2ppm(csk->com.cdev); + struct cxgbit_cmd *ccmd; + struct iscsi_cmd *cmd; + + cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE); + if (!cmd) { + pr_err("Unable to allocate iscsi_cmd + cxgbit_cmd\n"); + return NULL; + } + + ccmd = iscsit_priv_cmd(cmd); + ccmd->ttinfo.tag = ppm->tformat.no_ddp_mask; + ccmd->setup_ddp = true; + + return cmd; +} + +static int +cxgbit_handle_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, + u32 length) +{ + struct iscsi_conn *conn = cmd->conn; + struct cxgbit_sock *csk = conn->context; + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(csk->skb); + + if (pdu_cb->flags & PDUCBF_RX_DCRC_ERR) { + pr_err("ImmediateData CRC32C DataDigest error\n"); + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " Immediate Data digest failure while" + " in ERL=0.\n"); + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, + (unsigned char *)hdr); + return IMMEDIATE_DATA_CANNOT_RECOVER; + } + + iscsit_reject_cmd(cmd, ISCSI_REASON_DATA_DIGEST_ERROR, + (unsigned char *)hdr); + return IMMEDIATE_DATA_ERL1_CRC_FAILURE; + } + + if (cmd->se_cmd.se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) { + struct cxgbit_cmd *ccmd = iscsit_priv_cmd(cmd); + struct skb_shared_info *ssi = skb_shinfo(csk->skb); + skb_frag_t *dfrag = &ssi->frags[pdu_cb->dfrag_idx]; + + sg_init_table(&ccmd->sg, 1); + sg_set_page(&ccmd->sg, dfrag->page.p, skb_frag_size(dfrag), + dfrag->page_offset); + get_page(dfrag->page.p); + + cmd->se_cmd.t_data_sg = &ccmd->sg; + cmd->se_cmd.t_data_nents = 1; + + ccmd->release = true; + } else { + struct scatterlist *sg = &cmd->se_cmd.t_data_sg[0]; + u32 sg_nents = max(1UL, DIV_ROUND_UP(pdu_cb->dlen, PAGE_SIZE)); + + cxgbit_skb_copy_to_sg(csk->skb, sg, sg_nents); + } + + cmd->write_data_done += pdu_cb->dlen; + + if (cmd->write_data_done == cmd->se_cmd.data_length) { + spin_lock_bh(&cmd->istate_lock); + cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; + cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; + spin_unlock_bh(&cmd->istate_lock); + } + + return IMMEDIATE_DATA_NORMAL_OPERATION; +} + +static int +cxgbit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, + bool dump_payload) +{ + struct iscsi_conn *conn = cmd->conn; + int cmdsn_ret = 0, immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; + /* + * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes. + */ + if (dump_payload) + goto after_immediate_data; + + immed_ret = cxgbit_handle_immediate_data(cmd, hdr, + cmd->first_burst_len); +after_immediate_data: + if (immed_ret == IMMEDIATE_DATA_NORMAL_OPERATION) { + /* + * A PDU/CmdSN carrying Immediate Data passed + * DataCRC, check against ExpCmdSN/MaxCmdSN if + * Immediate Bit is not set. + */ + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, + (unsigned char *)hdr, + hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return -1; + + if (cmd->sense_reason || cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + target_put_sess_cmd(&cmd->se_cmd); + return 0; + } else if (cmd->unsolicited_data) { + iscsit_set_unsoliticed_dataout(cmd); + } + + } else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) { + /* + * Immediate Data failed DataCRC and ERL>=1, + * silently drop this PDU and let the initiator + * plug the CmdSN gap. + * + * FIXME: Send Unsolicited NOPIN with reserved + * TTT here to help the initiator figure out + * the missing CmdSN, although they should be + * intelligent enough to determine the missing + * CmdSN and issue a retry to plug the sequence. + */ + cmd->i_state = ISTATE_REMOVE; + iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); + } else /* immed_ret == IMMEDIATE_DATA_CANNOT_RECOVER */ + return -1; + + return 0; +} + +static int +cxgbit_handle_scsi_cmd(struct cxgbit_sock *csk, struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = csk->conn; + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(csk->skb); + struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)pdu_cb->hdr; + int rc; + bool dump_payload = false; + + rc = iscsit_setup_scsi_cmd(conn, cmd, (unsigned char *)hdr); + if (rc < 0) + return rc; + + if (pdu_cb->dlen && (pdu_cb->dlen == cmd->se_cmd.data_length) && + (pdu_cb->nr_dfrags == 1)) + cmd->se_cmd.se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + + rc = iscsit_process_scsi_cmd(conn, cmd, hdr); + if (rc < 0) + return 0; + else if (rc > 0) + dump_payload = true; + + if (!pdu_cb->dlen) + return 0; + + return cxgbit_get_immediate_data(cmd, hdr, dump_payload); +} + +static int cxgbit_handle_iscsi_dataout(struct cxgbit_sock *csk) +{ + struct scatterlist *sg_start; + struct iscsi_conn *conn = csk->conn; + struct iscsi_cmd *cmd = NULL; + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(csk->skb); + struct iscsi_data *hdr = (struct iscsi_data *)pdu_cb->hdr; + u32 data_offset = be32_to_cpu(hdr->offset); + u32 data_len = pdu_cb->dlen; + int rc, sg_nents, sg_off; + bool dcrc_err = false; + + rc = iscsit_check_dataout_hdr(conn, (unsigned char *)hdr, &cmd); + if (rc < 0) + return rc; + else if (!cmd) + return 0; + + if (pdu_cb->flags & PDUCBF_RX_DCRC_ERR) { + pr_err("ITT: 0x%08x, Offset: %u, Length: %u," + " DataSN: 0x%08x\n", + hdr->itt, hdr->offset, data_len, + hdr->datasn); + + dcrc_err = true; + goto check_payload; + } + + pr_debug("DataOut data_len: %u, " + "write_data_done: %u, data_length: %u\n", + data_len, cmd->write_data_done, + cmd->se_cmd.data_length); + + if (!(pdu_cb->flags & PDUCBF_RX_DATA_DDPD)) { + sg_off = data_offset / PAGE_SIZE; + sg_start = &cmd->se_cmd.t_data_sg[sg_off]; + sg_nents = max(1UL, DIV_ROUND_UP(data_len, PAGE_SIZE)); + + cxgbit_skb_copy_to_sg(csk->skb, sg_start, sg_nents); + } + +check_payload: + + rc = iscsit_check_dataout_payload(cmd, hdr, dcrc_err); + if (rc < 0) + return rc; + + return 0; +} + +static int cxgbit_handle_nop_out(struct cxgbit_sock *csk, struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = csk->conn; + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(csk->skb); + struct iscsi_nopout *hdr = (struct iscsi_nopout *)pdu_cb->hdr; + unsigned char *ping_data = NULL; + u32 payload_length = pdu_cb->dlen; + int ret; + + ret = iscsit_setup_nop_out(conn, cmd, hdr); + if (ret < 0) + return 0; + + if (pdu_cb->flags & PDUCBF_RX_DCRC_ERR) { + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " NOPOUT Ping DataCRC failure while in" + " ERL=0.\n"); + ret = -1; + goto out; + } else { + /* + * drop this PDU and let the + * initiator plug the CmdSN gap. + */ + pr_info("Dropping NOPOUT" + " Command CmdSN: 0x%08x due to" + " DataCRC error.\n", hdr->cmdsn); + ret = 0; + goto out; + } + } + + /* + * Handle NOP-OUT payload for traditional iSCSI sockets + */ + if (payload_length && hdr->ttt == cpu_to_be32(0xFFFFFFFF)) { + ping_data = kzalloc(payload_length + 1, GFP_KERNEL); + if (!ping_data) { + pr_err("Unable to allocate memory for" + " NOPOUT ping data.\n"); + ret = -1; + goto out; + } + + skb_copy_bits(csk->skb, pdu_cb->doffset, + ping_data, payload_length); + + ping_data[payload_length] = '\0'; + /* + * Attach ping data to struct iscsi_cmd->buf_ptr. + */ + cmd->buf_ptr = ping_data; + cmd->buf_ptr_size = payload_length; + + pr_debug("Got %u bytes of NOPOUT ping" + " data.\n", payload_length); + pr_debug("Ping Data: \"%s\"\n", ping_data); + } + + return iscsit_process_nop_out(conn, cmd, hdr); +out: + if (cmd) + iscsit_free_cmd(cmd, false); + return ret; +} + +static int +cxgbit_handle_text_cmd(struct cxgbit_sock *csk, struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = csk->conn; + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(csk->skb); + struct iscsi_text *hdr = (struct iscsi_text *)pdu_cb->hdr; + u32 payload_length = pdu_cb->dlen; + int rc; + unsigned char *text_in = NULL; + + rc = iscsit_setup_text_cmd(conn, cmd, hdr); + if (rc < 0) + return rc; + + if (pdu_cb->flags & PDUCBF_RX_DCRC_ERR) { + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " Text Data digest failure while in" + " ERL=0.\n"); + goto reject; + } else { + /* + * drop this PDU and let the + * initiator plug the CmdSN gap. + */ + pr_info("Dropping Text" + " Command CmdSN: 0x%08x due to" + " DataCRC error.\n", hdr->cmdsn); + return 0; + } + } + + if (payload_length) { + text_in = kzalloc(payload_length, GFP_KERNEL); + if (!text_in) { + pr_err("Unable to allocate text_in of payload_length: %u\n", + payload_length); + return -ENOMEM; + } + skb_copy_bits(csk->skb, pdu_cb->doffset, + text_in, payload_length); + + text_in[payload_length - 1] = '\0'; + + cmd->text_in_ptr = text_in; + } + + return iscsit_process_text_cmd(conn, cmd, hdr); + +reject: + return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, + pdu_cb->hdr); +} + +static int cxgbit_target_rx_opcode(struct cxgbit_sock *csk) +{ + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(csk->skb); + struct iscsi_hdr *hdr = (struct iscsi_hdr *)pdu_cb->hdr; + struct iscsi_conn *conn = csk->conn; + struct iscsi_cmd *cmd = NULL; + u8 opcode = (hdr->opcode & ISCSI_OPCODE_MASK); + int ret = -EINVAL; + + switch (opcode) { + case ISCSI_OP_SCSI_CMD: + cmd = cxgbit_allocate_cmd(csk); + if (!cmd) + goto reject; + + ret = cxgbit_handle_scsi_cmd(csk, cmd); + break; + case ISCSI_OP_SCSI_DATA_OUT: + ret = cxgbit_handle_iscsi_dataout(csk); + break; + case ISCSI_OP_NOOP_OUT: + if (hdr->ttt == cpu_to_be32(0xFFFFFFFF)) { + cmd = cxgbit_allocate_cmd(csk); + if (!cmd) + goto reject; + } + + ret = cxgbit_handle_nop_out(csk, cmd); + break; + case ISCSI_OP_SCSI_TMFUNC: + cmd = cxgbit_allocate_cmd(csk); + if (!cmd) + goto reject; + + ret = iscsit_handle_task_mgt_cmd(conn, cmd, + (unsigned char *)hdr); + break; + case ISCSI_OP_TEXT: + if (hdr->ttt != cpu_to_be32(0xFFFFFFFF)) { + cmd = iscsit_find_cmd_from_itt(conn, hdr->itt); + if (!cmd) + goto reject; + } else { + cmd = cxgbit_allocate_cmd(csk); + if (!cmd) + goto reject; + } + + ret = cxgbit_handle_text_cmd(csk, cmd); + break; + case ISCSI_OP_LOGOUT: + cmd = cxgbit_allocate_cmd(csk); + if (!cmd) + goto reject; + + ret = iscsit_handle_logout_cmd(conn, cmd, (unsigned char *)hdr); + if (ret > 0) + wait_for_completion_timeout(&conn->conn_logout_comp, + SECONDS_FOR_LOGOUT_COMP + * HZ); + break; + case ISCSI_OP_SNACK: + ret = iscsit_handle_snack(conn, (unsigned char *)hdr); + break; + default: + pr_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode); + dump_stack(); + break; + } + + return ret; + +reject: + return iscsit_add_reject(conn, ISCSI_REASON_BOOKMARK_NO_RESOURCES, + (unsigned char *)hdr); + return ret; +} + +static int cxgbit_rx_opcode(struct cxgbit_sock *csk) +{ + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(csk->skb); + struct iscsi_conn *conn = csk->conn; + struct iscsi_hdr *hdr = pdu_cb->hdr; + u8 opcode; + + if (pdu_cb->flags & PDUCBF_RX_HCRC_ERR) { + atomic_long_inc(&conn->sess->conn_digest_errors); + goto transport_err; + } + + if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) + goto transport_err; + + opcode = hdr->opcode & ISCSI_OPCODE_MASK; + + if (conn->sess->sess_ops->SessionType && + ((!(opcode & ISCSI_OP_TEXT)) || + (!(opcode & ISCSI_OP_LOGOUT)))) { + pr_err("Received illegal iSCSI Opcode: 0x%02x" + " while in Discovery Session, rejecting.\n", opcode); + iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); + goto transport_err; + } + + if (cxgbit_target_rx_opcode(csk) < 0) + goto transport_err; + + return 0; + +transport_err: + return -1; +} + +static int cxgbit_rx_login_pdu(struct cxgbit_sock *csk) +{ + struct iscsi_conn *conn = csk->conn; + struct iscsi_login *login = conn->login; + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_rx_pdu_cb(csk->skb); + struct iscsi_login_req *login_req; + + login_req = (struct iscsi_login_req *)login->req; + memcpy(login_req, pdu_cb->hdr, sizeof(*login_req)); + + pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x," + " CmdSN: 0x%08x, ExpStatSN: 0x%08x, CID: %hu, Length: %u\n", + login_req->flags, login_req->itt, login_req->cmdsn, + login_req->exp_statsn, login_req->cid, pdu_cb->dlen); + /* + * Setup the initial iscsi_login values from the leading + * login request PDU. + */ + if (login->first_request) { + login_req = (struct iscsi_login_req *)login->req; + login->leading_connection = (!login_req->tsih) ? 1 : 0; + login->current_stage = ISCSI_LOGIN_CURRENT_STAGE( + login_req->flags); + login->version_min = login_req->min_version; + login->version_max = login_req->max_version; + memcpy(login->isid, login_req->isid, 6); + login->cmd_sn = be32_to_cpu(login_req->cmdsn); + login->init_task_tag = login_req->itt; + login->initial_exp_statsn = be32_to_cpu(login_req->exp_statsn); + login->cid = be16_to_cpu(login_req->cid); + login->tsih = be16_to_cpu(login_req->tsih); + } + + if (iscsi_target_check_login_request(conn, login) < 0) + return -1; + + memset(login->req_buf, 0, MAX_KEY_VALUE_PAIRS); + skb_copy_bits(csk->skb, pdu_cb->doffset, login->req_buf, pdu_cb->dlen); + + return 0; +} + +static int +cxgbit_process_iscsi_pdu(struct cxgbit_sock *csk, struct sk_buff *skb, int idx) +{ + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb, idx); + int ret; + + cxgbit_rx_pdu_cb(skb) = pdu_cb; + + csk->skb = skb; + + if (!test_bit(CSK_LOGIN_DONE, &csk->com.flags)) { + ret = cxgbit_rx_login_pdu(csk); + set_bit(CSK_LOGIN_PDU_DONE, &csk->com.flags); + } else { + ret = cxgbit_rx_opcode(csk); + } + + return ret; +} + +static void cxgbit_lro_skb_dump(struct sk_buff *skb) +{ + struct skb_shared_info *ssi = skb_shinfo(skb); + struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb); + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb, 0); + u8 i; + + pr_info("skb 0x%p, head 0x%p, 0x%p, len %u,%u, frags %u.\n", + skb, skb->head, skb->data, skb->len, skb->data_len, + ssi->nr_frags); + pr_info("skb 0x%p, lro_cb, csk 0x%p, pdu %u, %u.\n", + skb, lro_cb->csk, lro_cb->pdu_idx, lro_cb->pdu_totallen); + + for (i = 0; i < lro_cb->pdu_idx; i++, pdu_cb++) + pr_info("skb 0x%p, pdu %d, %u, f 0x%x, seq 0x%x, dcrc 0x%x, " + "frags %u.\n", + skb, i, pdu_cb->pdulen, pdu_cb->flags, pdu_cb->seq, + pdu_cb->ddigest, pdu_cb->frags); + for (i = 0; i < ssi->nr_frags; i++) + pr_info("skb 0x%p, frag %d, off %u, sz %u.\n", + skb, i, ssi->frags[i].page_offset, ssi->frags[i].size); +} + +static void cxgbit_lro_hskb_reset(struct cxgbit_sock *csk) +{ + struct sk_buff *skb = csk->lro_hskb; + struct skb_shared_info *ssi = skb_shinfo(skb); + u8 i; + + memset(skb->data, 0, LRO_SKB_MIN_HEADROOM); + for (i = 0; i < ssi->nr_frags; i++) + put_page(skb_frag_page(&ssi->frags[i])); + ssi->nr_frags = 0; +} + +static void +cxgbit_lro_skb_merge(struct cxgbit_sock *csk, struct sk_buff *skb, u8 pdu_idx) +{ + struct sk_buff *hskb = csk->lro_hskb; + struct cxgbit_lro_pdu_cb *hpdu_cb = cxgbit_skb_lro_pdu_cb(hskb, 0); + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb, pdu_idx); + struct skb_shared_info *hssi = skb_shinfo(hskb); + struct skb_shared_info *ssi = skb_shinfo(skb); + unsigned int len = 0; + + if (pdu_cb->flags & PDUCBF_RX_HDR) { + hpdu_cb->flags = pdu_cb->flags; + hpdu_cb->seq = pdu_cb->seq; + hpdu_cb->hdr = pdu_cb->hdr; + hpdu_cb->hlen = pdu_cb->hlen; + + memcpy(&hssi->frags[0], &ssi->frags[pdu_cb->hfrag_idx], + sizeof(skb_frag_t)); + + get_page(skb_frag_page(&hssi->frags[0])); + hssi->nr_frags = 1; + hpdu_cb->frags = 1; + hpdu_cb->hfrag_idx = 0; + + len = hssi->frags[0].size; + hskb->len = len; + hskb->data_len = len; + hskb->truesize = len; + } + + if (pdu_cb->flags & PDUCBF_RX_DATA) { + u8 hfrag_idx = 1, i; + + hpdu_cb->flags |= pdu_cb->flags; + + len = 0; + for (i = 0; i < pdu_cb->nr_dfrags; hfrag_idx++, i++) { + memcpy(&hssi->frags[hfrag_idx], + &ssi->frags[pdu_cb->dfrag_idx + i], + sizeof(skb_frag_t)); + + get_page(skb_frag_page(&hssi->frags[hfrag_idx])); + + len += hssi->frags[hfrag_idx].size; + + hssi->nr_frags++; + hpdu_cb->frags++; + } + + hpdu_cb->dlen = pdu_cb->dlen; + hpdu_cb->doffset = hpdu_cb->hlen; + hpdu_cb->nr_dfrags = pdu_cb->nr_dfrags; + hpdu_cb->dfrag_idx = 1; + hskb->len += len; + hskb->data_len += len; + hskb->truesize += len; + } + + if (pdu_cb->flags & PDUCBF_RX_STATUS) { + hpdu_cb->flags |= pdu_cb->flags; + + if (hpdu_cb->flags & PDUCBF_RX_DATA) + hpdu_cb->flags &= ~PDUCBF_RX_DATA_DDPD; + + hpdu_cb->ddigest = pdu_cb->ddigest; + hpdu_cb->pdulen = pdu_cb->pdulen; + } +} + +static int cxgbit_process_lro_skb(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb); + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb, 0); + u8 pdu_idx = 0, last_idx = 0; + int ret = 0; + + if (!pdu_cb->complete) { + cxgbit_lro_skb_merge(csk, skb, 0); + + if (pdu_cb->flags & PDUCBF_RX_STATUS) { + struct sk_buff *hskb = csk->lro_hskb; + + ret = cxgbit_process_iscsi_pdu(csk, hskb, 0); + + cxgbit_lro_hskb_reset(csk); + + if (ret < 0) + goto out; + } + + pdu_idx = 1; + } + + if (lro_cb->pdu_idx) + last_idx = lro_cb->pdu_idx - 1; + + for (; pdu_idx <= last_idx; pdu_idx++) { + ret = cxgbit_process_iscsi_pdu(csk, skb, pdu_idx); + if (ret < 0) + goto out; + } + + if ((!lro_cb->complete) && lro_cb->pdu_idx) + cxgbit_lro_skb_merge(csk, skb, lro_cb->pdu_idx); + +out: + return ret; +} + +static int cxgbit_rx_lro_skb(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb); + struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb, 0); + int ret = -1; + + if ((pdu_cb->flags & PDUCBF_RX_HDR) && + (pdu_cb->seq != csk->rcv_nxt)) { + pr_info("csk 0x%p, tid 0x%x, seq 0x%x != 0x%x.\n", + csk, csk->tid, pdu_cb->seq, csk->rcv_nxt); + cxgbit_lro_skb_dump(skb); + return ret; + } + + csk->rcv_nxt += lro_cb->pdu_totallen; + + ret = cxgbit_process_lro_skb(csk, skb); + + csk->rx_credits += lro_cb->pdu_totallen; + + if (csk->rx_credits >= (csk->rcv_win / 4)) + cxgbit_rx_data_ack(csk); + + return ret; +} + +static int cxgbit_rx_skb(struct cxgbit_sock *csk, struct sk_buff *skb) +{ + int ret = -1; + + if (likely(cxgbit_skcb_flags(skb) & SKCBF_RX_LRO)) + ret = cxgbit_rx_lro_skb(csk, skb); + + __kfree_skb(skb); + return ret; +} + +static bool cxgbit_rxq_len(struct cxgbit_sock *csk, struct sk_buff_head *rxq) +{ + spin_lock_bh(&csk->rxq.lock); + if (skb_queue_len(&csk->rxq)) { + skb_queue_splice_init(&csk->rxq, rxq); + spin_unlock_bh(&csk->rxq.lock); + return true; + } + spin_unlock_bh(&csk->rxq.lock); + return false; +} + +static int cxgbit_wait_rxq(struct cxgbit_sock *csk) +{ + struct sk_buff *skb; + struct sk_buff_head rxq; + + skb_queue_head_init(&rxq); + + wait_event_interruptible(csk->waitq, cxgbit_rxq_len(csk, &rxq)); + + if (signal_pending(current)) + goto out; + + while ((skb = __skb_dequeue(&rxq))) { + if (cxgbit_rx_skb(csk, skb)) + goto out; + } + + return 0; +out: + __skb_queue_purge(&rxq); + return -1; +} + +int cxgbit_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) +{ + struct cxgbit_sock *csk = conn->context; + int ret = -1; + + while (!test_and_clear_bit(CSK_LOGIN_PDU_DONE, &csk->com.flags)) { + ret = cxgbit_wait_rxq(csk); + if (ret) { + clear_bit(CSK_LOGIN_PDU_DONE, &csk->com.flags); + break; + } + } + + return ret; +} + +void cxgbit_get_rx_pdu(struct iscsi_conn *conn) +{ + struct cxgbit_sock *csk = conn->context; + + while (!kthread_should_stop()) { + iscsit_thread_check_cpumask(conn, current, 0); + if (cxgbit_wait_rxq(csk)) + return; + } +} diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 961202f4e9aa..50f3d3a0dd7b 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -478,16 +478,16 @@ int iscsit_del_np(struct iscsi_np *np) return 0; } -static int iscsit_immediate_queue(struct iscsi_conn *, struct iscsi_cmd *, int); -static int iscsit_response_queue(struct iscsi_conn *, struct iscsi_cmd *, int); +static void iscsit_get_rx_pdu(struct iscsi_conn *); -static int iscsit_queue_rsp(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +int iscsit_queue_rsp(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); return 0; } +EXPORT_SYMBOL(iscsit_queue_rsp); -static void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { bool scsi_cmd = (cmd->iscsi_opcode == ISCSI_OP_SCSI_CMD); @@ -498,6 +498,169 @@ static void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) __iscsit_free_cmd(cmd, scsi_cmd, true); } +EXPORT_SYMBOL(iscsit_aborted_task); + +static void iscsit_do_crypto_hash_buf(struct ahash_request *, const void *, + u32, u32, u8 *, u8 *); +static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *); + +static int +iscsit_xmit_nondatain_pdu(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + const void *data_buf, u32 data_buf_len) +{ + struct iscsi_hdr *hdr = (struct iscsi_hdr *)cmd->pdu; + struct kvec *iov; + u32 niov = 0, tx_size = ISCSI_HDR_LEN; + int ret; + + iov = &cmd->iov_misc[0]; + iov[niov].iov_base = cmd->pdu; + iov[niov++].iov_len = ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, + ISCSI_HDR_LEN, 0, NULL, + (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest" + " to opcode 0x%x 0x%08x\n", + hdr->opcode, *header_digest); + } + + if (data_buf_len) { + u32 padding = ((-data_buf_len) & 3); + + iov[niov].iov_base = (void *)data_buf; + iov[niov++].iov_len = data_buf_len; + tx_size += data_buf_len; + + if (padding != 0) { + iov[niov].iov_base = &cmd->pad_bytes; + iov[niov++].iov_len = padding; + tx_size += padding; + pr_debug("Attaching %u additional" + " padding bytes.\n", padding); + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(conn->conn_tx_hash, + data_buf, data_buf_len, + padding, + (u8 *)&cmd->pad_bytes, + (u8 *)&cmd->data_crc); + + iov[niov].iov_base = &cmd->data_crc; + iov[niov++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attached DataDigest for %u" + " bytes opcode 0x%x, CRC 0x%08x\n", + data_buf_len, hdr->opcode, cmd->data_crc); + } + } + + cmd->iov_misc_count = niov; + cmd->tx_size = tx_size; + + ret = iscsit_send_tx_data(cmd, conn, 1); + if (ret < 0) { + iscsit_tx_thread_wait_for_tcp(conn); + return ret; + } + + return 0; +} + +static int iscsit_map_iovec(struct iscsi_cmd *, struct kvec *, u32, u32); +static void iscsit_unmap_iovec(struct iscsi_cmd *); +static u32 iscsit_do_crypto_hash_sg(struct ahash_request *, struct iscsi_cmd *, + u32, u32, u32, u8 *); +static int +iscsit_xmit_datain_pdu(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + const struct iscsi_datain *datain) +{ + struct kvec *iov; + u32 iov_count = 0, tx_size = 0; + int ret, iov_ret; + + iov = &cmd->iov_data[0]; + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(conn->conn_tx_hash, cmd->pdu, + ISCSI_HDR_LEN, 0, NULL, + (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attaching CRC32 HeaderDigest for DataIN PDU 0x%08x\n", + *header_digest); + } + + iov_ret = iscsit_map_iovec(cmd, &cmd->iov_data[1], + datain->offset, datain->length); + if (iov_ret < 0) + return -1; + + iov_count += iov_ret; + tx_size += datain->length; + + cmd->padding = ((-datain->length) & 3); + if (cmd->padding) { + iov[iov_count].iov_base = cmd->pad_bytes; + iov[iov_count++].iov_len = cmd->padding; + tx_size += cmd->padding; + + pr_debug("Attaching %u padding bytes\n", cmd->padding); + } + + if (conn->conn_ops->DataDigest) { + cmd->data_crc = iscsit_do_crypto_hash_sg(conn->conn_tx_hash, + cmd, datain->offset, + datain->length, + cmd->padding, + cmd->pad_bytes); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attached CRC32C DataDigest %d bytes, crc 0x%08x\n", + datain->length + cmd->padding, cmd->data_crc); + } + + cmd->iov_data_count = iov_count; + cmd->tx_size = tx_size; + + ret = iscsit_fe_sendpage_sg(cmd, conn); + + iscsit_unmap_iovec(cmd); + + if (ret < 0) { + iscsit_tx_thread_wait_for_tcp(conn); + return ret; + } + + return 0; +} + +static int iscsit_xmit_pdu(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct iscsi_datain_req *dr, const void *buf, + u32 buf_len) +{ + if (dr) + return iscsit_xmit_datain_pdu(conn, cmd, buf); + else + return iscsit_xmit_nondatain_pdu(conn, cmd, buf, buf_len); +} static enum target_prot_op iscsit_get_sup_prot_ops(struct iscsi_conn *conn) { @@ -507,6 +670,7 @@ static enum target_prot_op iscsit_get_sup_prot_ops(struct iscsi_conn *conn) static struct iscsit_transport iscsi_target_transport = { .name = "iSCSI/TCP", .transport_type = ISCSI_TCP, + .rdma_shutdown = false, .owner = NULL, .iscsit_setup_np = iscsit_setup_np, .iscsit_accept_np = iscsit_accept_np, @@ -519,6 +683,8 @@ static struct iscsit_transport iscsi_target_transport = { .iscsit_queue_data_in = iscsit_queue_rsp, .iscsit_queue_status = iscsit_queue_rsp, .iscsit_aborted_task = iscsit_aborted_task, + .iscsit_xmit_pdu = iscsit_xmit_pdu, + .iscsit_get_rx_pdu = iscsit_get_rx_pdu, .iscsit_get_sup_prot_ops = iscsit_get_sup_prot_ops, }; @@ -634,7 +800,7 @@ static void __exit iscsi_target_cleanup_module(void) kfree(iscsit_global); } -static int iscsit_add_reject( +int iscsit_add_reject( struct iscsi_conn *conn, u8 reason, unsigned char *buf) @@ -664,6 +830,7 @@ static int iscsit_add_reject( return -1; } +EXPORT_SYMBOL(iscsit_add_reject); static int iscsit_add_reject_from_cmd( struct iscsi_cmd *cmd, @@ -719,6 +886,7 @@ int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8 reason, unsigned char *buf) { return iscsit_add_reject_from_cmd(cmd, reason, false, buf); } +EXPORT_SYMBOL(iscsit_reject_cmd); /* * Map some portion of the allocated scatterlist to an iovec, suitable for @@ -737,7 +905,14 @@ static int iscsit_map_iovec( /* * We know each entry in t_data_sg contains a page. */ - sg = &cmd->se_cmd.t_data_sg[data_offset / PAGE_SIZE]; + u32 ent = data_offset / PAGE_SIZE; + + if (ent >= cmd->se_cmd.t_data_nents) { + pr_err("Initial page entry out-of-bounds\n"); + return -1; + } + + sg = &cmd->se_cmd.t_data_sg[ent]; page_off = (data_offset % PAGE_SIZE); cmd->first_data_sg = sg; @@ -2335,7 +2510,7 @@ iscsit_handle_logout_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } EXPORT_SYMBOL(iscsit_handle_logout_cmd); -static int iscsit_handle_snack( +int iscsit_handle_snack( struct iscsi_conn *conn, unsigned char *buf) { @@ -2388,6 +2563,7 @@ static int iscsit_handle_snack( return 0; } +EXPORT_SYMBOL(iscsit_handle_snack); static void iscsit_rx_thread_wait_for_tcp(struct iscsi_conn *conn) { @@ -2534,7 +2710,6 @@ static int iscsit_send_conn_drop_async_message( { struct iscsi_async *hdr; - cmd->tx_size = ISCSI_HDR_LEN; cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT; hdr = (struct iscsi_async *) cmd->pdu; @@ -2552,25 +2727,11 @@ static int iscsit_send_conn_drop_async_message( hdr->param2 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait); hdr->param3 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Retain); - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - cmd->tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32C HeaderDigest to" - " Async Message 0x%08x\n", *header_digest); - } - - cmd->iov_misc[0].iov_base = cmd->pdu; - cmd->iov_misc[0].iov_len = cmd->tx_size; - cmd->iov_misc_count = 1; - pr_debug("Sending Connection Dropped Async Message StatSN:" " 0x%08x, for CID: %hu on CID: %hu\n", cmd->stat_sn, cmd->logout_cid, conn->cid); - return 0; + + return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0); } static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *conn) @@ -2583,7 +2744,7 @@ static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *conn) } } -static void +void iscsit_build_datain_pdu(struct iscsi_cmd *cmd, struct iscsi_conn *conn, struct iscsi_datain *datain, struct iscsi_data_rsp *hdr, bool set_statsn) @@ -2627,15 +2788,14 @@ iscsit_build_datain_pdu(struct iscsi_cmd *cmd, struct iscsi_conn *conn, cmd->init_task_tag, ntohl(hdr->statsn), ntohl(hdr->datasn), ntohl(hdr->offset), datain->length, conn->cid); } +EXPORT_SYMBOL(iscsit_build_datain_pdu); static int iscsit_send_datain(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_data_rsp *hdr = (struct iscsi_data_rsp *)&cmd->pdu[0]; struct iscsi_datain datain; struct iscsi_datain_req *dr; - struct kvec *iov; - u32 iov_count = 0, tx_size = 0; - int eodr = 0, ret, iov_ret; + int eodr = 0, ret; bool set_statsn = false; memset(&datain, 0, sizeof(struct iscsi_datain)); @@ -2677,64 +2837,9 @@ static int iscsit_send_datain(struct iscsi_cmd *cmd, struct iscsi_conn *conn) iscsit_build_datain_pdu(cmd, conn, &datain, hdr, set_statsn); - iov = &cmd->iov_data[0]; - iov[iov_count].iov_base = cmd->pdu; - iov[iov_count++].iov_len = ISCSI_HDR_LEN; - tx_size += ISCSI_HDR_LEN; - - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, cmd->pdu, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - iov[0].iov_len += ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - - pr_debug("Attaching CRC32 HeaderDigest" - " for DataIN PDU 0x%08x\n", *header_digest); - } - - iov_ret = iscsit_map_iovec(cmd, &cmd->iov_data[1], - datain.offset, datain.length); - if (iov_ret < 0) - return -1; - - iov_count += iov_ret; - tx_size += datain.length; - - cmd->padding = ((-datain.length) & 3); - if (cmd->padding) { - iov[iov_count].iov_base = cmd->pad_bytes; - iov[iov_count++].iov_len = cmd->padding; - tx_size += cmd->padding; - - pr_debug("Attaching %u padding bytes\n", - cmd->padding); - } - if (conn->conn_ops->DataDigest) { - cmd->data_crc = iscsit_do_crypto_hash_sg(conn->conn_tx_hash, cmd, - datain.offset, datain.length, cmd->padding, cmd->pad_bytes); - - iov[iov_count].iov_base = &cmd->data_crc; - iov[iov_count++].iov_len = ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - - pr_debug("Attached CRC32C DataDigest %d bytes, crc" - " 0x%08x\n", datain.length+cmd->padding, cmd->data_crc); - } - - cmd->iov_data_count = iov_count; - cmd->tx_size = tx_size; - - ret = iscsit_fe_sendpage_sg(cmd, conn); - - iscsit_unmap_iovec(cmd); - - if (ret < 0) { - iscsit_tx_thread_wait_for_tcp(conn); + ret = conn->conn_transport->iscsit_xmit_pdu(conn, cmd, dr, &datain, 0); + if (ret < 0) return ret; - } if (dr->dr_complete) { eodr = (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ? @@ -2843,34 +2948,14 @@ EXPORT_SYMBOL(iscsit_build_logout_rsp); static int iscsit_send_logout(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct kvec *iov; - int niov = 0, tx_size, rc; + int rc; rc = iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *)&cmd->pdu[0]); if (rc < 0) return rc; - tx_size = ISCSI_HDR_LEN; - iov = &cmd->iov_misc[0]; - iov[niov].iov_base = cmd->pdu; - iov[niov++].iov_len = ISCSI_HDR_LEN; - - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, &cmd->pdu[0], - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - iov[0].iov_len += ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32C HeaderDigest to" - " Logout Response 0x%08x\n", *header_digest); - } - cmd->iov_misc_count = niov; - cmd->tx_size = tx_size; - - return 0; + return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0); } void @@ -2910,34 +2995,16 @@ static int iscsit_send_unsolicited_nopin( int want_response) { struct iscsi_nopin *hdr = (struct iscsi_nopin *)&cmd->pdu[0]; - int tx_size = ISCSI_HDR_LEN, ret; + int ret; iscsit_build_nopin_rsp(cmd, conn, hdr, false); - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32C HeaderDigest to" - " NopIN 0x%08x\n", *header_digest); - } - - cmd->iov_misc[0].iov_base = cmd->pdu; - cmd->iov_misc[0].iov_len = tx_size; - cmd->iov_misc_count = 1; - cmd->tx_size = tx_size; - pr_debug("Sending Unsolicited NOPIN TTT: 0x%08x StatSN:" " 0x%08x CID: %hu\n", hdr->ttt, cmd->stat_sn, conn->cid); - ret = iscsit_send_tx_data(cmd, conn, 1); - if (ret < 0) { - iscsit_tx_thread_wait_for_tcp(conn); + ret = conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0); + if (ret < 0) return ret; - } spin_lock_bh(&cmd->istate_lock); cmd->i_state = want_response ? @@ -2951,75 +3018,24 @@ static int iscsit_send_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_nopin *hdr = (struct iscsi_nopin *)&cmd->pdu[0]; - struct kvec *iov; - u32 padding = 0; - int niov = 0, tx_size; iscsit_build_nopin_rsp(cmd, conn, hdr, true); - tx_size = ISCSI_HDR_LEN; - iov = &cmd->iov_misc[0]; - iov[niov].iov_base = cmd->pdu; - iov[niov++].iov_len = ISCSI_HDR_LEN; - - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - iov[0].iov_len += ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32C HeaderDigest" - " to NopIn 0x%08x\n", *header_digest); - } - /* * NOPOUT Ping Data is attached to struct iscsi_cmd->buf_ptr. * NOPOUT DataSegmentLength is at struct iscsi_cmd->buf_ptr_size. */ - if (cmd->buf_ptr_size) { - iov[niov].iov_base = cmd->buf_ptr; - iov[niov++].iov_len = cmd->buf_ptr_size; - tx_size += cmd->buf_ptr_size; - - pr_debug("Echoing back %u bytes of ping" - " data.\n", cmd->buf_ptr_size); - - padding = ((-cmd->buf_ptr_size) & 3); - if (padding != 0) { - iov[niov].iov_base = &cmd->pad_bytes; - iov[niov++].iov_len = padding; - tx_size += padding; - pr_debug("Attaching %u additional" - " padding bytes.\n", padding); - } - if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, - cmd->buf_ptr, cmd->buf_ptr_size, - padding, (u8 *)&cmd->pad_bytes, - (u8 *)&cmd->data_crc); - - iov[niov].iov_base = &cmd->data_crc; - iov[niov++].iov_len = ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attached DataDigest for %u" - " bytes of ping data, CRC 0x%08x\n", - cmd->buf_ptr_size, cmd->data_crc); - } - } + pr_debug("Echoing back %u bytes of ping data.\n", cmd->buf_ptr_size); - cmd->iov_misc_count = niov; - cmd->tx_size = tx_size; - - return 0; + return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, + cmd->buf_ptr, + cmd->buf_ptr_size); } static int iscsit_send_r2t( struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - int tx_size = 0; struct iscsi_r2t *r2t; struct iscsi_r2t_rsp *hdr; int ret; @@ -3035,7 +3051,10 @@ static int iscsit_send_r2t( int_to_scsilun(cmd->se_cmd.orig_fe_lun, (struct scsi_lun *)&hdr->lun); hdr->itt = cmd->init_task_tag; - r2t->targ_xfer_tag = session_get_next_ttt(conn->sess); + if (conn->conn_transport->iscsit_get_r2t_ttt) + conn->conn_transport->iscsit_get_r2t_ttt(conn, cmd, r2t); + else + r2t->targ_xfer_tag = session_get_next_ttt(conn->sess); hdr->ttt = cpu_to_be32(r2t->targ_xfer_tag); hdr->statsn = cpu_to_be32(conn->stat_sn); hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); @@ -3044,38 +3063,18 @@ static int iscsit_send_r2t( hdr->data_offset = cpu_to_be32(r2t->offset); hdr->data_length = cpu_to_be32(r2t->xfer_len); - cmd->iov_misc[0].iov_base = cmd->pdu; - cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN; - tx_size += ISCSI_HDR_LEN; - - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32 HeaderDigest for R2T" - " PDU 0x%08x\n", *header_digest); - } - pr_debug("Built %sR2T, ITT: 0x%08x, TTT: 0x%08x, StatSN:" " 0x%08x, R2TSN: 0x%08x, Offset: %u, DDTL: %u, CID: %hu\n", (!r2t->recovery_r2t) ? "" : "Recovery ", cmd->init_task_tag, r2t->targ_xfer_tag, ntohl(hdr->statsn), r2t->r2t_sn, r2t->offset, r2t->xfer_len, conn->cid); - cmd->iov_misc_count = 1; - cmd->tx_size = tx_size; - spin_lock_bh(&cmd->r2t_lock); r2t->sent_r2t = 1; spin_unlock_bh(&cmd->r2t_lock); - ret = iscsit_send_tx_data(cmd, conn, 1); + ret = conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0); if (ret < 0) { - iscsit_tx_thread_wait_for_tcp(conn); return ret; } @@ -3166,6 +3165,7 @@ int iscsit_build_r2ts_for_cmd( return 0; } +EXPORT_SYMBOL(iscsit_build_r2ts_for_cmd); void iscsit_build_rsp_pdu(struct iscsi_cmd *cmd, struct iscsi_conn *conn, bool inc_stat_sn, struct iscsi_scsi_rsp *hdr) @@ -3204,18 +3204,12 @@ EXPORT_SYMBOL(iscsit_build_rsp_pdu); static int iscsit_send_response(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *)&cmd->pdu[0]; - struct kvec *iov; - u32 padding = 0, tx_size = 0; - int iov_count = 0; bool inc_stat_sn = (cmd->i_state == ISTATE_SEND_STATUS); + void *data_buf = NULL; + u32 padding = 0, data_buf_len = 0; iscsit_build_rsp_pdu(cmd, conn, inc_stat_sn, hdr); - iov = &cmd->iov_misc[0]; - iov[iov_count].iov_base = cmd->pdu; - iov[iov_count++].iov_len = ISCSI_HDR_LEN; - tx_size += ISCSI_HDR_LEN; - /* * Attach SENSE DATA payload to iSCSI Response PDU */ @@ -3227,56 +3221,23 @@ static int iscsit_send_response(struct iscsi_cmd *cmd, struct iscsi_conn *conn) padding = -(cmd->se_cmd.scsi_sense_length) & 3; hton24(hdr->dlength, (u32)cmd->se_cmd.scsi_sense_length); - iov[iov_count].iov_base = cmd->sense_buffer; - iov[iov_count++].iov_len = - (cmd->se_cmd.scsi_sense_length + padding); - tx_size += cmd->se_cmd.scsi_sense_length; + data_buf = cmd->sense_buffer; + data_buf_len = cmd->se_cmd.scsi_sense_length + padding; if (padding) { memset(cmd->sense_buffer + cmd->se_cmd.scsi_sense_length, 0, padding); - tx_size += padding; pr_debug("Adding %u bytes of padding to" " SENSE.\n", padding); } - if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, - cmd->sense_buffer, - (cmd->se_cmd.scsi_sense_length + padding), - 0, NULL, (u8 *)&cmd->data_crc); - - iov[iov_count].iov_base = &cmd->data_crc; - iov[iov_count++].iov_len = ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - - pr_debug("Attaching CRC32 DataDigest for" - " SENSE, %u bytes CRC 0x%08x\n", - (cmd->se_cmd.scsi_sense_length + padding), - cmd->data_crc); - } - pr_debug("Attaching SENSE DATA: %u bytes to iSCSI" " Response PDU\n", cmd->se_cmd.scsi_sense_length); } - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, cmd->pdu, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - iov[0].iov_len += ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32 HeaderDigest for Response" - " PDU 0x%08x\n", *header_digest); - } - - cmd->iov_misc_count = iov_count; - cmd->tx_size = tx_size; - - return 0; + return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, data_buf, + data_buf_len); } static u8 iscsit_convert_tcm_tmr_rsp(struct se_tmr_req *se_tmr) @@ -3323,30 +3284,10 @@ static int iscsit_send_task_mgt_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { struct iscsi_tm_rsp *hdr = (struct iscsi_tm_rsp *)&cmd->pdu[0]; - u32 tx_size = 0; iscsit_build_task_mgt_rsp(cmd, conn, hdr); - cmd->iov_misc[0].iov_base = cmd->pdu; - cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN; - tx_size += ISCSI_HDR_LEN; - - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32 HeaderDigest for Task" - " Mgmt Response PDU 0x%08x\n", *header_digest); - } - - cmd->iov_misc_count = 1; - cmd->tx_size = tx_size; - - return 0; + return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0); } static bool iscsit_check_inaddr_any(struct iscsi_np *np) @@ -3583,53 +3524,16 @@ static int iscsit_send_text_rsp( struct iscsi_conn *conn) { struct iscsi_text_rsp *hdr = (struct iscsi_text_rsp *)cmd->pdu; - struct kvec *iov; - u32 tx_size = 0; - int text_length, iov_count = 0, rc; - - rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_TCP); - if (rc < 0) - return rc; - - text_length = rc; - iov = &cmd->iov_misc[0]; - iov[iov_count].iov_base = cmd->pdu; - iov[iov_count++].iov_len = ISCSI_HDR_LEN; - iov[iov_count].iov_base = cmd->buf_ptr; - iov[iov_count++].iov_len = text_length; - - tx_size += (ISCSI_HDR_LEN + text_length); - - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - iov[0].iov_len += ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32 HeaderDigest for" - " Text Response PDU 0x%08x\n", *header_digest); - } - - if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, - cmd->buf_ptr, text_length, - 0, NULL, (u8 *)&cmd->data_crc); - - iov[iov_count].iov_base = &cmd->data_crc; - iov[iov_count++].iov_len = ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - - pr_debug("Attaching DataDigest for %u bytes of text" - " data, CRC 0x%08x\n", text_length, - cmd->data_crc); - } + int text_length; - cmd->iov_misc_count = iov_count; - cmd->tx_size = tx_size; + text_length = iscsit_build_text_rsp(cmd, conn, hdr, + conn->conn_transport->transport_type); + if (text_length < 0) + return text_length; - return 0; + return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, + cmd->buf_ptr, + text_length); } void @@ -3654,49 +3558,15 @@ static int iscsit_send_reject( struct iscsi_conn *conn) { struct iscsi_reject *hdr = (struct iscsi_reject *)&cmd->pdu[0]; - struct kvec *iov; - u32 iov_count = 0, tx_size; iscsit_build_reject(cmd, conn, hdr); - iov = &cmd->iov_misc[0]; - iov[iov_count].iov_base = cmd->pdu; - iov[iov_count++].iov_len = ISCSI_HDR_LEN; - iov[iov_count].iov_base = cmd->buf_ptr; - iov[iov_count++].iov_len = ISCSI_HDR_LEN; - - tx_size = (ISCSI_HDR_LEN + ISCSI_HDR_LEN); - - if (conn->conn_ops->HeaderDigest) { - u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, (u8 *)header_digest); - - iov[0].iov_len += ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32 HeaderDigest for" - " REJECT PDU 0x%08x\n", *header_digest); - } - - if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, cmd->buf_ptr, - ISCSI_HDR_LEN, 0, NULL, (u8 *)&cmd->data_crc); - - iov[iov_count].iov_base = &cmd->data_crc; - iov[iov_count++].iov_len = ISCSI_CRC_LEN; - tx_size += ISCSI_CRC_LEN; - pr_debug("Attaching CRC32 DataDigest for REJECT" - " PDU 0x%08x\n", cmd->data_crc); - } - - cmd->iov_misc_count = iov_count; - cmd->tx_size = tx_size; - pr_debug("Built Reject PDU StatSN: 0x%08x, Reason: 0x%02x," " CID: %hu\n", ntohl(hdr->statsn), hdr->reason, conn->cid); - return 0; + return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, + cmd->buf_ptr, + ISCSI_HDR_LEN); } void iscsit_thread_get_cpumask(struct iscsi_conn *conn) @@ -3724,33 +3594,7 @@ void iscsit_thread_get_cpumask(struct iscsi_conn *conn) cpumask_setall(conn->conn_cpumask); } -static inline void iscsit_thread_check_cpumask( - struct iscsi_conn *conn, - struct task_struct *p, - int mode) -{ - /* - * mode == 1 signals iscsi_target_tx_thread() usage. - * mode == 0 signals iscsi_target_rx_thread() usage. - */ - if (mode == 1) { - if (!conn->conn_tx_reset_cpumask) - return; - conn->conn_tx_reset_cpumask = 0; - } else { - if (!conn->conn_rx_reset_cpumask) - return; - conn->conn_rx_reset_cpumask = 0; - } - /* - * Update the CPU mask for this single kthread so that - * both TX and RX kthreads are scheduled to run on the - * same CPU. - */ - set_cpus_allowed_ptr(p, conn->conn_cpumask); -} - -static int +int iscsit_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) { int ret; @@ -3792,6 +3636,7 @@ iscsit_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state err: return -1; } +EXPORT_SYMBOL(iscsit_immediate_queue); static int iscsit_handle_immediate_queue(struct iscsi_conn *conn) @@ -3816,7 +3661,7 @@ iscsit_handle_immediate_queue(struct iscsi_conn *conn) return 0; } -static int +int iscsit_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) { int ret; @@ -3889,13 +3734,6 @@ check_rsp_state: if (ret < 0) goto err; - if (iscsit_send_tx_data(cmd, conn, 1) < 0) { - iscsit_tx_thread_wait_for_tcp(conn); - iscsit_unmap_iovec(cmd); - goto err; - } - iscsit_unmap_iovec(cmd); - switch (state) { case ISTATE_SEND_LOGOUTRSP: if (!iscsit_logout_post_handler(cmd, conn)) @@ -3928,6 +3766,7 @@ check_rsp_state: err: return -1; } +EXPORT_SYMBOL(iscsit_response_queue); static int iscsit_handle_response_queue(struct iscsi_conn *conn) { @@ -4087,36 +3926,12 @@ static bool iscsi_target_check_conn_state(struct iscsi_conn *conn) return ret; } -int iscsi_target_rx_thread(void *arg) +static void iscsit_get_rx_pdu(struct iscsi_conn *conn) { - int ret, rc; + int ret; u8 buffer[ISCSI_HDR_LEN], opcode; u32 checksum = 0, digest = 0; - struct iscsi_conn *conn = arg; struct kvec iov; - /* - * Allow ourselves to be interrupted by SIGINT so that a - * connection recovery / failure event can be triggered externally. - */ - allow_signal(SIGINT); - /* - * Wait for iscsi_post_login_handler() to complete before allowing - * incoming iscsi/tcp socket I/O, and/or failing the connection. - */ - rc = wait_for_completion_interruptible(&conn->rx_login_comp); - if (rc < 0 || iscsi_target_check_conn_state(conn)) - return 0; - - if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) { - struct completion comp; - - init_completion(&comp); - rc = wait_for_completion_interruptible(&comp); - if (rc < 0) - goto transport_err; - - goto transport_err; - } while (!kthread_should_stop()) { /* @@ -4134,7 +3949,7 @@ int iscsi_target_rx_thread(void *arg) ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN); if (ret != ISCSI_HDR_LEN) { iscsit_rx_thread_wait_for_tcp(conn); - goto transport_err; + return; } if (conn->conn_ops->HeaderDigest) { @@ -4144,7 +3959,7 @@ int iscsi_target_rx_thread(void *arg) ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN); if (ret != ISCSI_CRC_LEN) { iscsit_rx_thread_wait_for_tcp(conn); - goto transport_err; + return; } iscsit_do_crypto_hash_buf(conn->conn_rx_hash, @@ -4168,7 +3983,7 @@ int iscsi_target_rx_thread(void *arg) } if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) - goto transport_err; + return; opcode = buffer[0] & ISCSI_OPCODE_MASK; @@ -4179,15 +3994,38 @@ int iscsi_target_rx_thread(void *arg) " while in Discovery Session, rejecting.\n", opcode); iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, buffer); - goto transport_err; + return; } ret = iscsi_target_rx_opcode(conn, buffer); if (ret < 0) - goto transport_err; + return; } +} + +int iscsi_target_rx_thread(void *arg) +{ + int rc; + struct iscsi_conn *conn = arg; + + /* + * Allow ourselves to be interrupted by SIGINT so that a + * connection recovery / failure event can be triggered externally. + */ + allow_signal(SIGINT); + /* + * Wait for iscsi_post_login_handler() to complete before allowing + * incoming iscsi/tcp socket I/O, and/or failing the connection. + */ + rc = wait_for_completion_interruptible(&conn->rx_login_comp); + if (rc < 0 || iscsi_target_check_conn_state(conn)) + return 0; + + if (!conn->conn_transport->iscsit_get_rx_pdu) + return 0; + + conn->conn_transport->iscsit_get_rx_pdu(conn); -transport_err: if (!signal_pending(current)) atomic_set(&conn->transport_failed, 1); iscsit_take_action_for_connection_exit(conn); @@ -4240,16 +4078,17 @@ int iscsit_close_connection( pr_debug("Closing iSCSI connection CID %hu on SID:" " %u\n", conn->cid, sess->sid); /* - * Always up conn_logout_comp for the traditional TCP case just in case - * the RX Thread in iscsi_target_rx_opcode() is sleeping and the logout - * response never got sent because the connection failed. + * Always up conn_logout_comp for the traditional TCP and HW_OFFLOAD + * case just in case the RX Thread in iscsi_target_rx_opcode() is + * sleeping and the logout response never got sent because the + * connection failed. * * However for iser-target, isert_wait4logout() is using conn_logout_comp * to signal logout response TX interrupt completion. Go ahead and skip * this for iser since isert_rx_opcode() does not wait on logout failure, * and to avoid iscsi_conn pointer dereference in iser-target code. */ - if (conn->conn_transport->transport_type == ISCSI_TCP) + if (!conn->conn_transport->rdma_shutdown) complete(&conn->conn_logout_comp); if (!strcmp(current->comm, ISCSI_RX_THREAD_NAME)) { @@ -4438,7 +4277,7 @@ int iscsit_close_connection( if (!atomic_read(&sess->session_reinstatement) && atomic_read(&sess->session_fall_back_to_erl0)) { spin_unlock_bh(&sess->conn_lock); - target_put_session(sess->se_sess); + iscsit_close_session(sess); return 0; } else if (atomic_read(&sess->session_logout)) { @@ -4467,6 +4306,10 @@ int iscsit_close_connection( } } +/* + * If the iSCSI Session for the iSCSI Initiator Node exists, + * forcefully shutdown the iSCSI NEXUS. + */ int iscsit_close_session(struct iscsi_session *sess) { struct iscsi_portal_group *tpg = sess->tpg; @@ -4556,7 +4399,7 @@ static void iscsit_logout_post_handler_closesession( * always sleep waiting for RX/TX thread shutdown to complete * within iscsit_close_connection(). */ - if (conn->conn_transport->transport_type == ISCSI_TCP) + if (!conn->conn_transport->rdma_shutdown) sleep = cmpxchg(&conn->tx_thread_active, true, false); atomic_set(&conn->conn_logout_remove, 0); @@ -4565,7 +4408,7 @@ static void iscsit_logout_post_handler_closesession( iscsit_dec_conn_usage_count(conn); iscsit_stop_session(sess, sleep, sleep); iscsit_dec_session_usage_count(sess); - target_put_session(sess->se_sess); + iscsit_close_session(sess); } static void iscsit_logout_post_handler_samecid( @@ -4573,7 +4416,7 @@ static void iscsit_logout_post_handler_samecid( { int sleep = 1; - if (conn->conn_transport->transport_type == ISCSI_TCP) + if (!conn->conn_transport->rdma_shutdown) sleep = cmpxchg(&conn->tx_thread_active, true, false); atomic_set(&conn->conn_logout_remove, 0); @@ -4736,7 +4579,7 @@ int iscsit_free_session(struct iscsi_session *sess) } else spin_unlock_bh(&sess->conn_lock); - target_put_session(sess->se_sess); + iscsit_close_session(sess); return 0; } diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 97e5b69e0668..923c032f0b95 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -43,14 +43,15 @@ static inline struct iscsi_tpg_np *to_iscsi_tpg_np(struct config_item *item) return container_of(to_tpg_np(item), struct iscsi_tpg_np, se_tpg_np); } -static ssize_t lio_target_np_sctp_show(struct config_item *item, char *page) +static ssize_t lio_target_np_driver_show(struct config_item *item, char *page, + enum iscsit_transport_type type) { struct iscsi_tpg_np *tpg_np = to_iscsi_tpg_np(item); - struct iscsi_tpg_np *tpg_np_sctp; + struct iscsi_tpg_np *tpg_np_new; ssize_t rb; - tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP); - if (tpg_np_sctp) + tpg_np_new = iscsit_tpg_locate_child_np(tpg_np, type); + if (tpg_np_new) rb = sprintf(page, "1\n"); else rb = sprintf(page, "0\n"); @@ -58,19 +59,20 @@ static ssize_t lio_target_np_sctp_show(struct config_item *item, char *page) return rb; } -static ssize_t lio_target_np_sctp_store(struct config_item *item, - const char *page, size_t count) +static ssize_t lio_target_np_driver_store(struct config_item *item, + const char *page, size_t count, enum iscsit_transport_type type, + const char *mod_name) { struct iscsi_tpg_np *tpg_np = to_iscsi_tpg_np(item); struct iscsi_np *np; struct iscsi_portal_group *tpg; - struct iscsi_tpg_np *tpg_np_sctp = NULL; + struct iscsi_tpg_np *tpg_np_new = NULL; u32 op; - int ret; + int rc; - ret = kstrtou32(page, 0, &op); - if (ret) - return ret; + rc = kstrtou32(page, 0, &op); + if (rc) + return rc; if ((op != 1) && (op != 0)) { pr_err("Illegal value for tpg_enable: %u\n", op); return -EINVAL; @@ -87,107 +89,64 @@ static ssize_t lio_target_np_sctp_store(struct config_item *item, return -EINVAL; if (op) { - /* - * Use existing np->np_sockaddr for SCTP network portal reference - */ - tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, - tpg_np, ISCSI_SCTP_TCP); - if (!tpg_np_sctp || IS_ERR(tpg_np_sctp)) - goto out; - } else { - tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP); - if (!tpg_np_sctp) - goto out; + if (strlen(mod_name)) { + rc = request_module(mod_name); + if (rc != 0) { + pr_warn("Unable to request_module for %s\n", + mod_name); + rc = 0; + } + } - ret = iscsit_tpg_del_network_portal(tpg, tpg_np_sctp); - if (ret < 0) + tpg_np_new = iscsit_tpg_add_network_portal(tpg, + &np->np_sockaddr, tpg_np, type); + if (IS_ERR(tpg_np_new)) goto out; + } else { + tpg_np_new = iscsit_tpg_locate_child_np(tpg_np, type); + if (tpg_np_new) { + rc = iscsit_tpg_del_network_portal(tpg, tpg_np_new); + if (rc < 0) + goto out; + } } iscsit_put_tpg(tpg); return count; out: iscsit_put_tpg(tpg); - return -EINVAL; + return rc; } static ssize_t lio_target_np_iser_show(struct config_item *item, char *page) { - struct iscsi_tpg_np *tpg_np = to_iscsi_tpg_np(item); - struct iscsi_tpg_np *tpg_np_iser; - ssize_t rb; - - tpg_np_iser = iscsit_tpg_locate_child_np(tpg_np, ISCSI_INFINIBAND); - if (tpg_np_iser) - rb = sprintf(page, "1\n"); - else - rb = sprintf(page, "0\n"); - - return rb; + return lio_target_np_driver_show(item, page, ISCSI_INFINIBAND); } static ssize_t lio_target_np_iser_store(struct config_item *item, - const char *page, size_t count) + const char *page, size_t count) { - struct iscsi_tpg_np *tpg_np = to_iscsi_tpg_np(item); - struct iscsi_np *np; - struct iscsi_portal_group *tpg; - struct iscsi_tpg_np *tpg_np_iser = NULL; - char *endptr; - u32 op; - int rc = 0; - - op = simple_strtoul(page, &endptr, 0); - if ((op != 1) && (op != 0)) { - pr_err("Illegal value for tpg_enable: %u\n", op); - return -EINVAL; - } - np = tpg_np->tpg_np; - if (!np) { - pr_err("Unable to locate struct iscsi_np from" - " struct iscsi_tpg_np\n"); - return -EINVAL; - } - - tpg = tpg_np->tpg; - if (iscsit_get_tpg(tpg) < 0) - return -EINVAL; - - if (op) { - rc = request_module("ib_isert"); - if (rc != 0) { - pr_warn("Unable to request_module for ib_isert\n"); - rc = 0; - } - - tpg_np_iser = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, - tpg_np, ISCSI_INFINIBAND); - if (IS_ERR(tpg_np_iser)) { - rc = PTR_ERR(tpg_np_iser); - goto out; - } - } else { - tpg_np_iser = iscsit_tpg_locate_child_np(tpg_np, ISCSI_INFINIBAND); - if (tpg_np_iser) { - rc = iscsit_tpg_del_network_portal(tpg, tpg_np_iser); - if (rc < 0) - goto out; - } - } + return lio_target_np_driver_store(item, page, count, + ISCSI_INFINIBAND, "ib_isert"); +} +CONFIGFS_ATTR(lio_target_np_, iser); - iscsit_put_tpg(tpg); - return count; -out: - iscsit_put_tpg(tpg); - return rc; +static ssize_t lio_target_np_cxgbit_show(struct config_item *item, char *page) +{ + return lio_target_np_driver_show(item, page, ISCSI_CXGBIT); } -CONFIGFS_ATTR(lio_target_np_, sctp); -CONFIGFS_ATTR(lio_target_np_, iser); +static ssize_t lio_target_np_cxgbit_store(struct config_item *item, + const char *page, size_t count) +{ + return lio_target_np_driver_store(item, page, count, + ISCSI_CXGBIT, "cxgbit"); +} +CONFIGFS_ATTR(lio_target_np_, cxgbit); static struct configfs_attribute *lio_target_portal_attrs[] = { - &lio_target_np_attr_sctp, &lio_target_np_attr_iser, + &lio_target_np_attr_cxgbit, NULL, }; @@ -1554,7 +1513,7 @@ static int lio_tpg_check_prot_fabric_only( * This function calls iscsit_inc_session_usage_count() on the * struct iscsi_session in question. */ -static int lio_tpg_shutdown_session(struct se_session *se_sess) +static void lio_tpg_close_session(struct se_session *se_sess) { struct iscsi_session *sess = se_sess->fabric_sess_ptr; struct se_portal_group *se_tpg = &sess->tpg->tpg_se_tpg; @@ -1566,7 +1525,7 @@ static int lio_tpg_shutdown_session(struct se_session *se_sess) (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { spin_unlock(&sess->conn_lock); spin_unlock_bh(&se_tpg->session_lock); - return 0; + return; } atomic_set(&sess->session_reinstatement, 1); spin_unlock(&sess->conn_lock); @@ -1575,20 +1534,6 @@ static int lio_tpg_shutdown_session(struct se_session *se_sess) spin_unlock_bh(&se_tpg->session_lock); iscsit_stop_session(sess, 1, 1); - return 1; -} - -/* - * Calls iscsit_dec_session_usage_count() as inverse of - * lio_tpg_shutdown_session() - */ -static void lio_tpg_close_session(struct se_session *se_sess) -{ - struct iscsi_session *sess = se_sess->fabric_sess_ptr; - /* - * If the iSCSI Session for the iSCSI Initiator Node exists, - * forcefully shutdown the iSCSI NEXUS. - */ iscsit_close_session(sess); } @@ -1640,7 +1585,6 @@ const struct target_core_fabric_ops iscsi_ops = { .tpg_get_inst_index = lio_tpg_get_inst_index, .check_stop_free = lio_check_stop_free, .release_cmd = lio_release_cmd, - .shutdown_session = lio_tpg_shutdown_session, .close_session = lio_tpg_close_session, .sess_get_index = lio_sess_get_index, .sess_get_initiator_sid = lio_sess_get_initiator_sid, diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c index fb3b52b124ac..647d4a5dca52 100644 --- a/drivers/target/iscsi/iscsi_target_datain_values.c +++ b/drivers/target/iscsi/iscsi_target_datain_values.c @@ -524,3 +524,4 @@ struct iscsi_datain_req *iscsit_get_datain_values( return NULL; } +EXPORT_SYMBOL(iscsit_get_datain_values); diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 210f6e4830e3..b54e72c7ab0f 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -786,7 +786,7 @@ static void iscsit_handle_time2retain_timeout(unsigned long data) } spin_unlock_bh(&se_tpg->session_lock); - target_put_session(sess->se_sess); + iscsit_close_session(sess); } void iscsit_start_time2retain_handler(struct iscsi_session *sess) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 8436d56c5f0c..b5212f0f9571 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -228,7 +228,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) if (sess->session_state == TARG_SESS_STATE_FAILED) { spin_unlock_bh(&sess->conn_lock); iscsit_dec_session_usage_count(sess); - target_put_session(sess->se_sess); + iscsit_close_session(sess); return 0; } spin_unlock_bh(&sess->conn_lock); @@ -236,7 +236,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) iscsit_stop_session(sess, 1, 1); iscsit_dec_session_usage_count(sess); - target_put_session(sess->se_sess); + iscsit_close_session(sess); return 0; } @@ -258,7 +258,7 @@ static void iscsi_login_set_conn_values( mutex_unlock(&auth_id_lock); } -static __printf(2, 3) int iscsi_change_param_sprintf( +__printf(2, 3) int iscsi_change_param_sprintf( struct iscsi_conn *conn, const char *fmt, ...) { @@ -279,6 +279,7 @@ static __printf(2, 3) int iscsi_change_param_sprintf( return 0; } +EXPORT_SYMBOL(iscsi_change_param_sprintf); /* * This is the leading connection of a new session, @@ -1387,6 +1388,16 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto old_sess_out; } + if (conn->conn_transport->iscsit_validate_params) { + ret = conn->conn_transport->iscsit_validate_params(conn); + if (ret < 0) { + if (zero_tsih) + goto new_sess_out; + else + goto old_sess_out; + } + } + ret = iscsi_target_start_negotiation(login, conn); if (ret < 0) goto new_sess_out; diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 9fc9117d0f22..89d34bd6d87f 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -269,6 +269,7 @@ int iscsi_target_check_login_request( return 0; } +EXPORT_SYMBOL(iscsi_target_check_login_request); static int iscsi_target_check_first_request( struct iscsi_conn *conn, @@ -1246,16 +1247,16 @@ int iscsi_target_start_negotiation( { int ret; - ret = iscsi_target_do_login(conn, login); - if (!ret) { - if (conn->sock) { - struct sock *sk = conn->sock->sk; + if (conn->sock) { + struct sock *sk = conn->sock->sk; - write_lock_bh(&sk->sk_callback_lock); - set_bit(LOGIN_FLAGS_READY, &conn->login_flags); - write_unlock_bh(&sk->sk_callback_lock); - } - } else if (ret < 0) { + write_lock_bh(&sk->sk_callback_lock); + set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + + ret = iscsi_target_do_login(conn, login); + if (ret < 0) { cancel_delayed_work_sync(&conn->login_work); cancel_delayed_work_sync(&conn->login_cleanup_work); iscsi_target_restore_sock_callbacks(conn); diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 3a1f9a7e6bb6..0efa80bb8962 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -680,6 +680,7 @@ struct iscsi_param *iscsi_find_param_from_key( pr_err("Unable to locate key \"%s\".\n", key); return NULL; } +EXPORT_SYMBOL(iscsi_find_param_from_key); int iscsi_extract_key_value(char *textbuf, char **key, char **value) { diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 57720385a751..1f38177207e0 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -514,6 +514,7 @@ void iscsit_add_cmd_to_immediate_queue( wake_up(&conn->queues_wq); } +EXPORT_SYMBOL(iscsit_add_cmd_to_immediate_queue); struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *conn) { @@ -725,6 +726,9 @@ void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd, iscsit_remove_cmd_from_immediate_queue(cmd, conn); iscsit_remove_cmd_from_response_queue(cmd, conn); } + + if (conn && conn->conn_transport->iscsit_release_cmd) + conn->conn_transport->iscsit_release_cmd(conn, cmd); } void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) @@ -773,6 +777,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) break; } } +EXPORT_SYMBOL(iscsit_free_cmd); int iscsit_check_session_usage_count(struct iscsi_session *sess) { diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 0ad5ac541a7f..5091b31b3e56 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -601,16 +601,6 @@ static int tcm_loop_get_cmd_state(struct se_cmd *se_cmd) return tl_cmd->sc_cmd_state; } -static int tcm_loop_shutdown_session(struct se_session *se_sess) -{ - return 0; -} - -static void tcm_loop_close_session(struct se_session *se_sess) -{ - return; -}; - static int tcm_loop_write_pending(struct se_cmd *se_cmd) { /* @@ -1243,8 +1233,6 @@ static const struct target_core_fabric_ops loop_ops = { .tpg_get_inst_index = tcm_loop_get_inst_index, .check_stop_free = tcm_loop_check_stop_free, .release_cmd = tcm_loop_release_cmd, - .shutdown_session = tcm_loop_shutdown_session, - .close_session = tcm_loop_close_session, .sess_get_index = tcm_loop_sess_get_index, .write_pending = tcm_loop_write_pending, .write_pending_status = tcm_loop_write_pending_status, diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index c57e7884973d..58bb6ed18185 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -1726,16 +1726,6 @@ static void sbp_release_cmd(struct se_cmd *se_cmd) sbp_free_request(req); } -static int sbp_shutdown_session(struct se_session *se_sess) -{ - return 0; -} - -static void sbp_close_session(struct se_session *se_sess) -{ - return; -} - static u32 sbp_sess_get_index(struct se_session *se_sess) { return 0; @@ -2349,8 +2339,6 @@ static const struct target_core_fabric_ops sbp_ops = { .tpg_check_prod_mode_write_protect = sbp_check_false, .tpg_get_inst_index = sbp_tpg_get_inst_index, .release_cmd = sbp_release_cmd, - .shutdown_session = sbp_shutdown_session, - .close_session = sbp_close_session, .sess_get_index = sbp_sess_get_index, .write_pending = sbp_write_pending, .write_pending_status = sbp_write_pending_status, diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 49aba4a31747..4c82bbe19003 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -932,7 +932,7 @@ static int core_alua_update_tpg_primary_metadata( tg_pt_gp->tg_pt_gp_alua_access_status); snprintf(path, ALUA_METADATA_PATH_LEN, - "/var/target/alua/tpgs_%s/%s", &wwn->unit_serial[0], + "%s/alua/tpgs_%s/%s", db_root, &wwn->unit_serial[0], config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item)); rc = core_alua_write_tpg_metadata(path, md_buf, len); @@ -1275,8 +1275,8 @@ static int core_alua_update_tpg_secondary_metadata(struct se_lun *lun) atomic_read(&lun->lun_tg_pt_secondary_offline), lun->lun_tg_pt_secondary_stat); - snprintf(path, ALUA_METADATA_PATH_LEN, "/var/target/alua/%s/%s/lun_%llu", - se_tpg->se_tpg_tfo->get_fabric_name(), wwn, + snprintf(path, ALUA_METADATA_PATH_LEN, "%s/alua/%s/%s/lun_%llu", + db_root, se_tpg->se_tpg_tfo->get_fabric_name(), wwn, lun->unpacked_lun); rc = core_alua_write_tpg_metadata(path, md_buf, len); diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index d498533f09ee..2001005bef45 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -99,6 +99,67 @@ static ssize_t target_core_item_version_show(struct config_item *item, CONFIGFS_ATTR_RO(target_core_item_, version); +char db_root[DB_ROOT_LEN] = DB_ROOT_DEFAULT; +static char db_root_stage[DB_ROOT_LEN]; + +static ssize_t target_core_item_dbroot_show(struct config_item *item, + char *page) +{ + return sprintf(page, "%s\n", db_root); +} + +static ssize_t target_core_item_dbroot_store(struct config_item *item, + const char *page, size_t count) +{ + ssize_t read_bytes; + struct file *fp; + + mutex_lock(&g_tf_lock); + if (!list_empty(&g_tf_list)) { + mutex_unlock(&g_tf_lock); + pr_err("db_root: cannot be changed: target drivers registered"); + return -EINVAL; + } + + if (count > (DB_ROOT_LEN - 1)) { + mutex_unlock(&g_tf_lock); + pr_err("db_root: count %d exceeds DB_ROOT_LEN-1: %u\n", + (int)count, DB_ROOT_LEN - 1); + return -EINVAL; + } + + read_bytes = snprintf(db_root_stage, DB_ROOT_LEN, "%s", page); + if (!read_bytes) { + mutex_unlock(&g_tf_lock); + return -EINVAL; + } + if (db_root_stage[read_bytes - 1] == '\n') + db_root_stage[read_bytes - 1] = '\0'; + + /* validate new db root before accepting it */ + fp = filp_open(db_root_stage, O_RDONLY, 0); + if (IS_ERR(fp)) { + mutex_unlock(&g_tf_lock); + pr_err("db_root: cannot open: %s\n", db_root_stage); + return -EINVAL; + } + if (!S_ISDIR(fp->f_inode->i_mode)) { + filp_close(fp, 0); + mutex_unlock(&g_tf_lock); + pr_err("db_root: not a directory: %s\n", db_root_stage); + return -EINVAL; + } + filp_close(fp, 0); + + strncpy(db_root, db_root_stage, read_bytes); + + mutex_unlock(&g_tf_lock); + + return read_bytes; +} + +CONFIGFS_ATTR(target_core_item_, dbroot); + static struct target_fabric_configfs *target_core_get_fabric( const char *name) { @@ -239,6 +300,7 @@ static struct configfs_group_operations target_core_fabric_group_ops = { */ static struct configfs_attribute *target_core_fabric_item_attrs[] = { &target_core_item_attr_version, + &target_core_item_attr_dbroot, NULL, }; @@ -323,14 +385,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->release_cmd()\n"); return -EINVAL; } - if (!tfo->shutdown_session) { - pr_err("Missing tfo->shutdown_session()\n"); - return -EINVAL; - } - if (!tfo->close_session) { - pr_err("Missing tfo->close_session()\n"); - return -EINVAL; - } if (!tfo->sess_get_index) { pr_err("Missing tfo->sess_get_index()\n"); return -EINVAL; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 86b4a8375628..fc91e85f54ba 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -155,4 +155,10 @@ void target_stat_setup_mappedlun_default_groups(struct se_lun_acl *); /* target_core_xcopy.c */ extern struct se_portal_group xcopy_pt_tpg; +/* target_core_configfs.c */ +#define DB_ROOT_LEN 4096 +#define DB_ROOT_DEFAULT "/var/target" + +extern char db_root[]; + #endif /* TARGET_CORE_INTERNAL_H */ diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index b1795735eafc..47463c99c318 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1985,7 +1985,7 @@ static int __core_scsi3_write_aptpl_to_file( return -EMSGSIZE; } - snprintf(path, 512, "/var/target/pr/aptpl_%s", &wwn->unit_serial[0]); + snprintf(path, 512, "%s/pr/aptpl_%s", db_root, &wwn->unit_serial[0]); file = filp_open(path, flags, 0600); if (IS_ERR(file)) { pr_err("filp_open(%s) for APTPL metadata" diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 47a833f3a145..24b36fd785f1 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -403,7 +403,6 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read) struct se_device *se_dev = cmd->se_dev; struct rd_dev *dev = RD_DEV(se_dev); struct rd_dev_sg_table *prot_table; - bool need_to_release = false; struct scatterlist *prot_sg; u32 sectors = cmd->data_length / se_dev->dev_attrib.block_size; u32 prot_offset, prot_page; @@ -432,9 +431,6 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read) if (!rc) sbc_dif_copy_prot(cmd, sectors, is_read, prot_sg, prot_offset); - if (need_to_release) - kfree(prot_sg); - return rc; } diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index ddf046080dc3..d99752c6cd60 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -336,44 +336,39 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( return acl; } -void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) +static void target_shutdown_sessions(struct se_node_acl *acl) { - struct se_portal_group *tpg = acl->se_tpg; - LIST_HEAD(sess_list); - struct se_session *sess, *sess_tmp; + struct se_session *sess; unsigned long flags; - int rc; - - mutex_lock(&tpg->acl_node_mutex); - if (acl->dynamic_node_acl) { - acl->dynamic_node_acl = 0; - } - list_del(&acl->acl_list); - mutex_unlock(&tpg->acl_node_mutex); +restart: spin_lock_irqsave(&acl->nacl_sess_lock, flags); - acl->acl_stop = 1; - - list_for_each_entry_safe(sess, sess_tmp, &acl->acl_sess_list, - sess_acl_list) { - if (sess->sess_tearing_down != 0) + list_for_each_entry(sess, &acl->acl_sess_list, sess_acl_list) { + if (sess->sess_tearing_down) continue; - if (!target_get_session(sess)) - continue; - list_move(&sess->sess_acl_list, &sess_list); + list_del_init(&sess->sess_acl_list); + spin_unlock_irqrestore(&acl->nacl_sess_lock, flags); + + if (acl->se_tpg->se_tpg_tfo->close_session) + acl->se_tpg->se_tpg_tfo->close_session(sess); + goto restart; } spin_unlock_irqrestore(&acl->nacl_sess_lock, flags); +} - list_for_each_entry_safe(sess, sess_tmp, &sess_list, sess_acl_list) { - list_del(&sess->sess_acl_list); +void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) +{ + struct se_portal_group *tpg = acl->se_tpg; + + mutex_lock(&tpg->acl_node_mutex); + if (acl->dynamic_node_acl) + acl->dynamic_node_acl = 0; + list_del(&acl->acl_list); + mutex_unlock(&tpg->acl_node_mutex); + + target_shutdown_sessions(acl); - rc = tpg->se_tpg_tfo->shutdown_session(sess); - target_put_session(sess); - if (!rc) - continue; - target_put_session(sess); - } target_put_nacl(acl); /* * Wait for last target_put_nacl() to complete in target_complete_nacl() @@ -400,11 +395,7 @@ int core_tpg_set_initiator_node_queue_depth( struct se_node_acl *acl, u32 queue_depth) { - LIST_HEAD(sess_list); struct se_portal_group *tpg = acl->se_tpg; - struct se_session *sess, *sess_tmp; - unsigned long flags; - int rc; /* * User has requested to change the queue depth for a Initiator Node. @@ -413,30 +404,10 @@ int core_tpg_set_initiator_node_queue_depth( */ target_set_nacl_queue_depth(tpg, acl, queue_depth); - spin_lock_irqsave(&acl->nacl_sess_lock, flags); - list_for_each_entry_safe(sess, sess_tmp, &acl->acl_sess_list, - sess_acl_list) { - if (sess->sess_tearing_down != 0) - continue; - if (!target_get_session(sess)) - continue; - spin_unlock_irqrestore(&acl->nacl_sess_lock, flags); - - /* - * Finally call tpg->se_tpg_tfo->close_session() to force session - * reinstatement to occur if there is an active session for the - * $FABRIC_MOD Initiator Node in question. - */ - rc = tpg->se_tpg_tfo->shutdown_session(sess); - target_put_session(sess); - if (!rc) { - spin_lock_irqsave(&acl->nacl_sess_lock, flags); - continue; - } - target_put_session(sess); - spin_lock_irqsave(&acl->nacl_sess_lock, flags); - } - spin_unlock_irqrestore(&acl->nacl_sess_lock, flags); + /* + * Shutdown all pending sessions to force session reinstatement. + */ + target_shutdown_sessions(acl); pr_debug("Successfully changed queue depth to: %d for Initiator" " Node: %s on %s Target Portal Group: %u\n", acl->queue_depth, diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 590384a2bf8b..5ab3967dda43 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -239,7 +239,6 @@ struct se_session *transport_init_session(enum target_prot_op sup_prot_ops) INIT_LIST_HEAD(&se_sess->sess_cmd_list); INIT_LIST_HEAD(&se_sess->sess_wait_list); spin_lock_init(&se_sess->sess_cmd_lock); - kref_init(&se_sess->sess_kref); se_sess->sup_prot_ops = sup_prot_ops; return se_sess; @@ -430,27 +429,6 @@ target_alloc_session(struct se_portal_group *tpg, } EXPORT_SYMBOL(target_alloc_session); -static void target_release_session(struct kref *kref) -{ - struct se_session *se_sess = container_of(kref, - struct se_session, sess_kref); - struct se_portal_group *se_tpg = se_sess->se_tpg; - - se_tpg->se_tpg_tfo->close_session(se_sess); -} - -int target_get_session(struct se_session *se_sess) -{ - return kref_get_unless_zero(&se_sess->sess_kref); -} -EXPORT_SYMBOL(target_get_session); - -void target_put_session(struct se_session *se_sess) -{ - kref_put(&se_sess->sess_kref, target_release_session); -} -EXPORT_SYMBOL(target_put_session); - ssize_t target_show_dynamic_sessions(struct se_portal_group *se_tpg, char *page) { struct se_session *se_sess; @@ -499,8 +477,8 @@ void transport_deregister_session_configfs(struct se_session *se_sess) se_nacl = se_sess->se_node_acl; if (se_nacl) { spin_lock_irqsave(&se_nacl->nacl_sess_lock, flags); - if (se_nacl->acl_stop == 0) - list_del(&se_sess->sess_acl_list); + if (!list_empty(&se_sess->sess_acl_list)) + list_del_init(&se_sess->sess_acl_list); /* * If the session list is empty, then clear the pointer. * Otherwise, set the struct se_session pointer from the tail diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index c30003bd4ff0..e28209b99b59 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -139,7 +139,6 @@ extern unsigned int ft_debug_logging; * Session ops. */ void ft_sess_put(struct ft_sess *); -int ft_sess_shutdown(struct se_session *); void ft_sess_close(struct se_session *); u32 ft_sess_get_index(struct se_session *); u32 ft_sess_get_port_name(struct se_session *, unsigned char *, u32); diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 4d375e95841b..42ee91123dca 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -442,7 +442,6 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .tpg_get_inst_index = ft_tpg_get_inst_index, .check_stop_free = ft_check_stop_free, .release_cmd = ft_release_cmd, - .shutdown_session = ft_sess_shutdown, .close_session = ft_sess_close, .sess_get_index = ft_sess_get_index, .sess_get_initiator_sid = NULL, diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index d0c3e1894c61..f5186a744399 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -303,18 +303,6 @@ static void ft_sess_delete_all(struct ft_tport *tport) */ /* - * Determine whether session is allowed to be shutdown in the current context. - * Returns non-zero if the session should be shutdown. - */ -int ft_sess_shutdown(struct se_session *se_sess) -{ - struct ft_sess *sess = se_sess->fabric_sess_ptr; - - pr_debug("port_id %x\n", sess->port_id); - return 1; -} - -/* * Remove session and send PRLO. * This is called when the ACL is being deleted or queue depth is changing. */ diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index 2ace0295408e..35fe3c80cfc0 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -1290,15 +1290,6 @@ static void usbg_release_cmd(struct se_cmd *se_cmd) percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } -static int usbg_shutdown_session(struct se_session *se_sess) -{ - return 0; -} - -static void usbg_close_session(struct se_session *se_sess) -{ -} - static u32 usbg_sess_get_index(struct se_session *se_sess) { return 0; @@ -1735,8 +1726,6 @@ static const struct target_core_fabric_ops usbg_ops = { .tpg_check_prod_mode_write_protect = usbg_check_false, .tpg_get_inst_index = usbg_tpg_get_inst_index, .release_cmd = usbg_release_cmd, - .shutdown_session = usbg_shutdown_session, - .close_session = usbg_close_session, .sess_get_index = usbg_sess_get_index, .sess_get_initiator_sid = NULL, .write_pending = usbg_send_write_request, diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 0e6fd556c982..9d6320e8ff3e 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -333,16 +333,6 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } -static int vhost_scsi_shutdown_session(struct se_session *se_sess) -{ - return 0; -} - -static void vhost_scsi_close_session(struct se_session *se_sess) -{ - return; -} - static u32 vhost_scsi_sess_get_index(struct se_session *se_sess) { return 0; @@ -2114,8 +2104,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .tpg_get_inst_index = vhost_scsi_tpg_get_inst_index, .release_cmd = vhost_scsi_release_cmd, .check_stop_free = vhost_scsi_check_stop_free, - .shutdown_session = vhost_scsi_shutdown_session, - .close_session = vhost_scsi_close_session, .sess_get_index = vhost_scsi_sess_get_index, .sess_get_initiator_sid = NULL, .write_pending = vhost_scsi_write_pending, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index ff932624eaad..d6950e0802b7 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1399,15 +1399,6 @@ static void scsiback_release_cmd(struct se_cmd *se_cmd) percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } -static int scsiback_shutdown_session(struct se_session *se_sess) -{ - return 0; -} - -static void scsiback_close_session(struct se_session *se_sess) -{ -} - static u32 scsiback_sess_get_index(struct se_session *se_sess) { return 0; @@ -1841,8 +1832,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .tpg_get_inst_index = scsiback_tpg_get_inst_index, .check_stop_free = scsiback_check_stop_free, .release_cmd = scsiback_release_cmd, - .shutdown_session = scsiback_shutdown_session, - .close_session = scsiback_close_session, .sess_get_index = scsiback_sess_get_index, .sess_get_initiator_sid = NULL, .write_pending = scsiback_write_pending, |