diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-28 11:04:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-28 11:04:16 -0700 |
commit | 1cbe06c3cf542d48eb22180163e00f91760ef8cd (patch) | |
tree | ba093bf9e32790950b99bfec838a0354df5bf0dc | |
parent | ed2608faa0f701b1dbc65277a9e5c7ff7118bfd4 (diff) | |
parent | 7a226f9c32b0481b0744e2726cd7f8349b866af5 (diff) | |
download | linux-1cbe06c3cf542d48eb22180163e00f91760ef8cd.tar.bz2 |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull more rdma updates from Doug Ledford:
"This is the second group of code for the 4.7 merge window. It looks
large, but only in one sense. I'll get to that in a minute. The list
of changes here breaks down as follows:
- Dynamic counter infrastructure in the IB drivers
This is a sysfs based code to allow free form access to the
hardware counters RDMA devices might support so drivers don't need
to code this up repeatedly themselves
- SendOnlyFullMember multicast support
- IB router support
- A couple misc fixes
- The big item on the list: hfi1 driver updates, plus moving the hfi1
driver out of staging
There was a group of 15 patches in the hfi1 list that I thought I had
in the first pull request but they weren't. So that added to the
length of the hfi1 section here.
As far as these go, everything but the hfi1 is pretty straight
forward.
The hfi1 is, if you recall, the driver that Al had complaints about
how it used the write/writev interfaces in an overloaded fashion. The
write portion of their interface behaved like the write handler in the
IB stack proper and did bi-directional communications. The writev
interface, on the other hand, only accepts SDMA request structures.
The completions for those structures are sent back via an entirely
different event mechanism.
With the security patch, we put security checks on the write
interface, however, we also knew they would be going away soon. Now,
we've converted the write handler in the hfi1 driver to use ioctls
from the IB reserved magic area for its bidirectional communications.
With that change, Intel has addressed all of the items originally on
their TODO when they went into staging (as well as many items added to
the list later).
As such, I moved them out, and since they were the last item in the
staging/rdma directory, and I don't have immediate plans to use the
staging area again, I removed the staging/rdma area.
Because of the move out of staging, as well as a series of 5 patches
in the hfi1 driver that removed code people thought should be done in
a different way and was optional to begin with (a snoop debug
interface, an eeprom driver for an eeprom connected directory to their
hfi1 chip and not via an i2c bus, and a few other things like that),
the line count, especially the removal count, is high"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (56 commits)
staging/rdma: Remove the entire rdma subdirectory of staging
IB/core: Make device counter infrastructure dynamic
IB/hfi1: Fix pio map initialization
IB/hfi1: Correct 8051 link parameter settings
IB/hfi1: Update pkey table properly after link down or FM start
IB/rdamvt: Fix rdmavt s_ack_queue sizing
IB/rdmavt: Max atomic value should be a u8
IB/hfi1: Fix hard lockup due to not using save/restore spin lock
IB/hfi1: Add tracing support for send with invalidate opcode
IB/hfi1, qib: Add ieth to the packet header definitions
IB/hfi1: Move driver out of staging
IB/hfi1: Do not free hfi1 cdev parent structure early
IB/hfi1: Add trace message in user IOCTL handling
IB/hfi1: Remove write(), use ioctl() for user cmds
IB/hfi1: Add ioctl() interface for user commands
IB/hfi1: Remove unused user command
IB/hfi1: Remove snoop/diag interface
IB/hfi1: Remove EPROM functionality from data device
IB/hfi1: Remove UI char device
IB/hfi1: Remove multiple device cdev
...
-rw-r--r-- | Documentation/infiniband/sysfs.txt | 12 | ||||
-rw-r--r-- | MAINTAINERS | 14 | ||||
-rw-r--r-- | drivers/infiniband/Kconfig | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/Makefile | 12 | ||||
-rw-r--r-- | drivers/infiniband/core/addr.c | 226 | ||||
-rw-r--r-- | drivers/infiniband/core/core_priv.h | 16 | ||||
-rw-r--r-- | drivers/infiniband/core/device.c | 58 | ||||
-rw-r--r-- | drivers/infiniband/core/mad.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/core/multicast.c | 23 | ||||
-rw-r--r-- | drivers/infiniband/core/sa_query.c | 211 | ||||
-rw-r--r-- | drivers/infiniband/core/sysfs.c | 366 | ||||
-rw-r--r-- | drivers/infiniband/hw/Makefile | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_hal.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_provider.c | 147 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 58 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/Kconfig (renamed from drivers/staging/rdma/hfi1/Kconfig) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/Makefile (renamed from drivers/staging/rdma/hfi1/Makefile) | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/affinity.c (renamed from drivers/staging/rdma/hfi1/affinity.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/affinity.h (renamed from drivers/staging/rdma/hfi1/affinity.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/aspm.h (renamed from drivers/staging/rdma/hfi1/aspm.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip.c (renamed from drivers/staging/rdma/hfi1/chip.c) | 41 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip.h (renamed from drivers/staging/rdma/hfi1/chip.h) | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip_registers.h (renamed from drivers/staging/rdma/hfi1/chip_registers.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/common.h (renamed from drivers/staging/rdma/hfi1/common.h) | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/debugfs.c (renamed from drivers/staging/rdma/hfi1/debugfs.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/debugfs.h (renamed from drivers/staging/rdma/hfi1/debugfs.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/device.c (renamed from drivers/staging/rdma/hfi1/device.c) | 18 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/device.h (renamed from drivers/staging/rdma/hfi1/device.h) | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/dma.c (renamed from drivers/staging/rdma/hfi1/dma.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/driver.c (renamed from drivers/staging/rdma/hfi1/driver.c) | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/efivar.c (renamed from drivers/staging/rdma/hfi1/efivar.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/efivar.h (renamed from drivers/staging/rdma/hfi1/efivar.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/eprom.c | 102 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/eprom.h (renamed from drivers/staging/rdma/hfi1/eprom.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/file_ops.c (renamed from drivers/staging/rdma/hfi1/file_ops.c) | 549 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/firmware.c (renamed from drivers/staging/rdma/hfi1/firmware.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/hfi.h (renamed from drivers/staging/rdma/hfi1/hfi.h) | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/init.c (renamed from drivers/staging/rdma/hfi1/init.c) | 22 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/intr.c (renamed from drivers/staging/rdma/hfi1/intr.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/iowait.h (renamed from drivers/staging/rdma/hfi1/iowait.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/mad.c (renamed from drivers/staging/rdma/hfi1/mad.c) | 99 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/mad.h (renamed from drivers/staging/rdma/hfi1/mad.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/mmu_rb.c (renamed from drivers/staging/rdma/hfi1/mmu_rb.c) | 22 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/mmu_rb.h (renamed from drivers/staging/rdma/hfi1/mmu_rb.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/opa_compat.h (renamed from drivers/staging/rdma/hfi1/opa_compat.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pcie.c (renamed from drivers/staging/rdma/hfi1/pcie.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.c (renamed from drivers/staging/rdma/hfi1/pio.c) | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.h (renamed from drivers/staging/rdma/hfi1/pio.h) | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio_copy.c (renamed from drivers/staging/rdma/hfi1/pio_copy.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/platform.c (renamed from drivers/staging/rdma/hfi1/platform.c) | 27 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/platform.h (renamed from drivers/staging/rdma/hfi1/platform.h) | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/qp.c (renamed from drivers/staging/rdma/hfi1/qp.c) | 9 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/qp.h (renamed from drivers/staging/rdma/hfi1/qp.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/qsfp.c (renamed from drivers/staging/rdma/hfi1/qsfp.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/qsfp.h (renamed from drivers/staging/rdma/hfi1/qsfp.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/rc.c (renamed from drivers/staging/rdma/hfi1/rc.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/ruc.c (renamed from drivers/staging/rdma/hfi1/ruc.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/sdma.c (renamed from drivers/staging/rdma/hfi1/sdma.c) | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/sdma.h (renamed from drivers/staging/rdma/hfi1/sdma.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/sdma_txreq.h (renamed from drivers/staging/rdma/hfi1/sdma_txreq.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/sysfs.c (renamed from drivers/staging/rdma/hfi1/sysfs.c) | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/trace.c (renamed from drivers/staging/rdma/hfi1/trace.c) | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/trace.h (renamed from drivers/staging/rdma/hfi1/trace.h) | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/twsi.c (renamed from drivers/staging/rdma/hfi1/twsi.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/twsi.h (renamed from drivers/staging/rdma/hfi1/twsi.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/uc.c (renamed from drivers/staging/rdma/hfi1/uc.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/ud.c (renamed from drivers/staging/rdma/hfi1/ud.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_exp_rcv.c (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_exp_rcv.h (renamed from drivers/staging/rdma/hfi1/user_exp_rcv.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_pages.c (renamed from drivers/staging/rdma/hfi1/user_pages.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_sdma.c (renamed from drivers/staging/rdma/hfi1/user_sdma.c) | 18 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/user_sdma.h (renamed from drivers/staging/rdma/hfi1/user_sdma.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs.c (renamed from drivers/staging/rdma/hfi1/verbs.c) | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs.h (renamed from drivers/staging/rdma/hfi1/verbs.h) | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs_txreq.c (renamed from drivers/staging/rdma/hfi1/verbs_txreq.c) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs_txreq.h (renamed from drivers/staging/rdma/hfi1/verbs_txreq.h) | 0 | ||||
-rw-r--r-- | drivers/infiniband/hw/i40iw/i40iw_verbs.c | 145 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_iba7322.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_mad.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_verbs.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/cq.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/mr.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/qp.c | 30 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 109 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 140 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 48 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.c | 2 | ||||
-rw-r--r-- | drivers/staging/Kconfig | 2 | ||||
-rw-r--r-- | drivers/staging/Makefile | 1 | ||||
-rw-r--r-- | drivers/staging/rdma/Kconfig | 27 | ||||
-rw-r--r-- | drivers/staging/rdma/Makefile | 2 | ||||
-rw-r--r-- | drivers/staging/rdma/hfi1/TODO | 6 | ||||
-rw-r--r-- | drivers/staging/rdma/hfi1/diag.c | 1925 | ||||
-rw-r--r-- | drivers/staging/rdma/hfi1/eprom.c | 471 | ||||
-rw-r--r-- | include/rdma/ib_mad.h | 60 | ||||
-rw-r--r-- | include/rdma/ib_pack.h | 5 | ||||
-rw-r--r-- | include/rdma/ib_sa.h | 12 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 126 | ||||
-rw-r--r-- | include/rdma/rdma_vt.h | 13 | ||||
-rw-r--r-- | include/rdma/rdmavt_qp.h | 5 | ||||
-rw-r--r-- | include/uapi/rdma/hfi/hfi1_user.h | 80 | ||||
-rw-r--r-- | include/uapi/rdma/rdma_netlink.h | 10 |
105 files changed, 1986 insertions, 3400 deletions
diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt index 3ecf0c3a133f..45bcafe6ff8a 100644 --- a/Documentation/infiniband/sysfs.txt +++ b/Documentation/infiniband/sysfs.txt @@ -56,6 +56,18 @@ SYSFS FILES ports/1/pkeys/10 contains the value at index 10 in port 1's P_Key table. + There is an optional "hw_counters" subdirectory that may be under either + the parent device or the port subdirectories or both. If present, + there are a list of counters provided by the hardware. They may match + some of the counters in the counters directory, but they often include + many other counters. In addition to the various counters, there will + be a file named "lifespan" that configures how frequently the core + should update the counters when they are being accessed (counters are + not updated if they are not being accessed). The lifespan is in milli- + seconds and defaults to 10 unless set to something else by the driver. + Users may echo a value between 0 - 10000 to the lifespan file to set + the length of time between updates in milliseconds. + MTHCA The Mellanox HCA driver also creates the files: diff --git a/MAINTAINERS b/MAINTAINERS index f466673f86ff..216165a1384d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5308,6 +5308,13 @@ F: drivers/block/cciss* F: include/linux/cciss_ioctl.h F: include/uapi/linux/cciss_ioctl.h +HFI1 DRIVER +M: Mike Marciniszyn <mike.marciniszyn@intel.com> +M: Dennis Dalessandro <dennis.dalessandro@intel.com> +L: linux-rdma@vger.kernel.org +S: Supported +F: drivers/infiniband/hw/hfi1 + HFS FILESYSTEM L: linux-fsdevel@vger.kernel.org S: Orphan @@ -5837,7 +5844,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git S: Supported F: Documentation/infiniband/ F: drivers/infiniband/ -F: drivers/staging/rdma/ F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: include/rdma/ @@ -10920,12 +10926,6 @@ M: Arnaud Patard <arnaud.patard@rtp-net.org> S: Odd Fixes F: drivers/staging/xgifb/ -HFI1 DRIVER -M: Mike Marciniszyn <infinipath@intel.com> -L: linux-rdma@vger.kernel.org -S: Supported -F: drivers/staging/rdma/hfi1 - STARFIRE/DURALAN NETWORK DRIVER M: Ion Badulescu <ionut@badula.org> S: Odd Fixes diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 6425c0e5d18a..2137adfbd8c3 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -85,4 +85,6 @@ source "drivers/infiniband/ulp/isert/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +source "drivers/infiniband/hw/hfi1/Kconfig" + endif # INFINIBAND diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 26987d9d7e1c..edaae9f9853c 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,8 +1,7 @@ infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o -obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o iw_cm.o ib_addr.o \ +obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \ $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ @@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ - roce_gid_mgmt.o mr_pool.o + roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ + multicast.o mad.o smi.o agent.o mad_rmpp.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o -ib_mad-y := mad.o smi.o agent.o mad_rmpp.o - -ib_sa-y := sa_query.o multicast.o - ib_cm-y := cm.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o @@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o rdma_ucm-y := ucma.o -ib_addr-y := addr.o - ib_umad-y := user_mad.o ib_ucm-y := ucm.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 337353d86cfa..1374541a4528 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -46,10 +46,10 @@ #include <net/ip6_route.h> #include <rdma/ib_addr.h> #include <rdma/ib.h> +#include <rdma/rdma_netlink.h> +#include <net/netlink.h> -MODULE_AUTHOR("Sean Hefty"); -MODULE_DESCRIPTION("IB Address Translation"); -MODULE_LICENSE("Dual BSD/GPL"); +#include "core_priv.h" struct addr_req { struct list_head list; @@ -62,8 +62,11 @@ struct addr_req { struct rdma_dev_addr *addr, void *context); unsigned long timeout; int status; + u32 seq; }; +static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); + static void process_req(struct work_struct *work); static DEFINE_MUTEX(lock); @@ -71,6 +74,126 @@ static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; +static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { + [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, + .len = sizeof(struct rdma_nla_ls_gid)}, +}; + +static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) +{ + struct nlattr *tb[LS_NLA_TYPE_MAX] = {}; + int ret; + + if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) + return false; + + ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), + nlmsg_len(nlh), ib_nl_addr_policy); + if (ret) + return false; + + return true; +} + +static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) +{ + const struct nlattr *head, *curr; + union ib_gid gid; + struct addr_req *req; + int len, rem; + int found = 0; + + head = (const struct nlattr *)nlmsg_data(nlh); + len = nlmsg_len(nlh); + + nla_for_each_attr(curr, head, len, rem) { + if (curr->nla_type == LS_NLA_TYPE_DGID) + memcpy(&gid, nla_data(curr), nla_len(curr)); + } + + mutex_lock(&lock); + list_for_each_entry(req, &req_list, list) { + if (nlh->nlmsg_seq != req->seq) + continue; + /* We set the DGID part, the rest was set earlier */ + rdma_addr_set_dgid(req->addr, &gid); + req->status = 0; + found = 1; + break; + } + mutex_unlock(&lock); + + if (!found) + pr_info("Couldn't find request waiting for DGID: %pI6\n", + &gid); +} + +int ib_nl_handle_ip_res_resp(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; + + if ((nlh->nlmsg_flags & NLM_F_REQUEST) || + !(NETLINK_CB(skb).sk) || + !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + + if (ib_nl_is_good_ip_resp(nlh)) + ib_nl_process_good_ip_rsep(nlh); + + return skb->len; +} + +static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, + const void *daddr, + u32 seq, u16 family) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + struct rdma_ls_ip_resolve_header *header; + void *data; + size_t size; + int attrtype; + int len; + + if (family == AF_INET) { + size = sizeof(struct in_addr); + attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4; + } else { + size = sizeof(struct in6_addr); + attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; + } + + len = nla_total_size(sizeof(size)); + len += NLMSG_ALIGN(sizeof(*header)); + + skb = nlmsg_new(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS, + RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST); + if (!data) { + nlmsg_free(skb); + return -ENODATA; + } + + /* Construct the family header first */ + header = (struct rdma_ls_ip_resolve_header *) + skb_put(skb, NLMSG_ALIGN(sizeof(*header))); + header->ifindex = dev_addr->bound_dev_if; + nla_put(skb, attrtype, size, daddr); + + /* Repair the nlmsg header length */ + nlmsg_end(skb, nlh); + ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL); + + /* Make the request retry, so when we get the response from userspace + * we will have something. + */ + return -ENODATA; +} + int rdma_addr_size(struct sockaddr *addr) { switch (addr->sa_family) { @@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } +static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, + const void *daddr, u32 seq, u16 family) +{ + if (ibnl_chk_listeners(RDMA_NL_GROUP_LS)) + return -EADDRNOTAVAIL; + + /* We fill in what we can, the response will fill the rest */ + rdma_copy_addr(dev_addr, dst->dev, NULL); + return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); +} + static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr) { @@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, return ret; } +static bool has_gateway(struct dst_entry *dst, sa_family_t family) +{ + struct rtable *rt; + struct rt6_info *rt6; + + if (family == AF_INET) { + rt = container_of(dst, struct rtable, dst); + return rt->rt_uses_gateway; + } + + rt6 = container_of(dst, struct rt6_info, dst); + return rt6->rt6i_flags & RTF_GATEWAY; +} + +static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, + const struct sockaddr *dst_in, u32 seq) +{ + const struct sockaddr_in *dst_in4 = + (const struct sockaddr_in *)dst_in; + const struct sockaddr_in6 *dst_in6 = + (const struct sockaddr_in6 *)dst_in; + const void *daddr = (dst_in->sa_family == AF_INET) ? + (const void *)&dst_in4->sin_addr.s_addr : + (const void *)&dst_in6->sin6_addr; + sa_family_t family = dst_in->sa_family; + + /* Gateway + ARPHRD_INFINIBAND -> IB router */ + if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) + return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); + else + return dst_fetch_ha(dst, dev_addr, daddr); +} + static int addr4_resolve(struct sockaddr_in *src_in, const struct sockaddr_in *dst_in, struct rdma_dev_addr *addr, @@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in, src_in->sin_family = AF_INET; src_in->sin_addr.s_addr = fl4.saddr; - /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't - * routable) and we could set the network type accordingly. + /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're + * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network + * type accordingly. */ - if (rt->rt_uses_gateway) + if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND) addr->network = RDMA_NETWORK_IPV4; addr->hoplimit = ip4_dst_hoplimit(&rt->dst); @@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, src_in->sin6_addr = fl6.saddr; } - /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't - * routable) and we could set the network type accordingly. + /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're + * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network + * type accordingly. */ - if (rt->rt6i_flags & RTF_GATEWAY) + if (rt->rt6i_flags & RTF_GATEWAY && + ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND) addr->network = RDMA_NETWORK_IPV6; addr->hoplimit = ip6_dst_hoplimit(dst); @@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, static int addr_resolve_neigh(struct dst_entry *dst, const struct sockaddr *dst_in, - struct rdma_dev_addr *addr) + struct rdma_dev_addr *addr, + u32 seq) { if (dst->dev->flags & IFF_LOOPBACK) { int ret; @@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, } /* If the device doesn't do ARP internally */ - if (!(dst->dev->flags & IFF_NOARP)) { - const struct sockaddr_in *dst_in4 = - (const struct sockaddr_in *)dst_in; - const struct sockaddr_in6 *dst_in6 = - (const struct sockaddr_in6 *)dst_in; - - return dst_fetch_ha(dst, addr, - dst_in->sa_family == AF_INET ? - (const void *)&dst_in4->sin_addr.s_addr : - (const void *)&dst_in6->sin6_addr); - } + if (!(dst->dev->flags & IFF_NOARP)) + return fetch_ha(dst, addr, dst_in, seq); return rdma_copy_addr(addr, dst->dev, NULL); } @@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, static int addr_resolve(struct sockaddr *src_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, - bool resolve_neigh) + bool resolve_neigh, + u32 seq) { struct net_device *ndev; struct dst_entry *dst; @@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in, return ret; if (resolve_neigh) - ret = addr_resolve_neigh(&rt->dst, dst_in, addr); + ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq); ndev = rt->dst.dev; dev_hold(ndev); @@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in, return ret; if (resolve_neigh) - ret = addr_resolve_neigh(dst, dst_in, addr); + ret = addr_resolve_neigh(dst, dst_in, addr, seq); ndev = dst->dev; dev_hold(ndev); @@ -412,7 +575,7 @@ static void process_req(struct work_struct *work) src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr, - true); + true, req->seq); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) @@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->context = context; req->client = client; atomic_inc(&client->refcount); + req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); - req->status = addr_resolve(src_in, dst_in, addr, true); + req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); switch (req->status) { case 0: req->timeout = jiffies; @@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, src_in->sa_family = dst_addr->sa_family; } - return addr_resolve(src_in, dst_addr, addr, false); + return addr_resolve(src_in, dst_addr, addr, false, 0); } EXPORT_SYMBOL(rdma_resolve_ip_route); @@ -634,7 +798,7 @@ static struct notifier_block nb = { .notifier_call = netevent_callback }; -static int __init addr_init(void) +int addr_init(void) { addr_wq = create_singlethread_workqueue("ib_addr"); if (!addr_wq) @@ -642,15 +806,13 @@ static int __init addr_init(void) register_netevent_notifier(&nb); rdma_addr_register_client(&self); + return 0; } -static void __exit addr_cleanup(void) +void addr_cleanup(void) { rdma_addr_unregister_client(&self); unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } - -module_init(addr_init); -module_exit(addr_cleanup); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index eab32215756b..19d499dcab76 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, return _upper == upper; } +int addr_init(void); +void addr_cleanup(void); + +int ib_mad_init(void); +void ib_mad_cleanup(void); + +int ib_sa_init(void); +void ib_sa_cleanup(void); + +int ib_nl_handle_resolve_resp(struct sk_buff *skb, + struct netlink_callback *cb); +int ib_nl_handle_set_timeout(struct sk_buff *skb, + struct netlink_callback *cb); +int ib_nl_handle_ip_res_resp(struct sk_buff *skb, + struct netlink_callback *cb); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 10979844026a..5516fb070344 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); +static struct ibnl_client_cbs ibnl_ls_cb_table[] = { + [RDMA_NL_LS_OP_RESOLVE] = { + .dump = ib_nl_handle_resolve_resp, + .module = THIS_MODULE }, + [RDMA_NL_LS_OP_SET_TIMEOUT] = { + .dump = ib_nl_handle_set_timeout, + .module = THIS_MODULE }, + [RDMA_NL_LS_OP_IP_RESOLVE] = { + .dump = ib_nl_handle_ip_res_resp, + .module = THIS_MODULE }, +}; + +static int ib_add_ibnl_clients(void) +{ + return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table), + ibnl_ls_cb_table); +} + +static void ib_remove_ibnl_clients(void) +{ + ibnl_remove_client(RDMA_NL_LS); +} + static int __init ib_core_init(void) { int ret; @@ -983,10 +1006,41 @@ static int __init ib_core_init(void) goto err_sysfs; } + ret = addr_init(); + if (ret) { + pr_warn("Could't init IB address resolution\n"); + goto err_ibnl; + } + + ret = ib_mad_init(); + if (ret) { + pr_warn("Couldn't init IB MAD\n"); + goto err_addr; + } + + ret = ib_sa_init(); + if (ret) { + pr_warn("Couldn't init SA\n"); + goto err_mad; + } + + if (ib_add_ibnl_clients()) { + pr_warn("Couldn't register ibnl clients\n"); + goto err_sa; + } + ib_cache_setup(); return 0; +err_sa: + ib_sa_cleanup(); +err_mad: + ib_mad_cleanup(); +err_addr: + addr_cleanup(); +err_ibnl: + ibnl_cleanup(); err_sysfs: class_unregister(&ib_class); err_comp: @@ -999,6 +1053,10 @@ err: static void __exit ib_core_cleanup(void) { ib_cache_cleanup(); + ib_remove_ibnl_clients(); + ib_sa_cleanup(); + ib_mad_cleanup(); + addr_cleanup(); ibnl_cleanup(); class_unregister(&ib_class); destroy_workqueue(ib_comp_wq); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9fa5bf33f5a3..82fb511112da 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -47,11 +47,7 @@ #include "smi.h" #include "opa_smi.h" #include "agent.h" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("kernel IB MAD API"); -MODULE_AUTHOR("Hal Rosenstock"); -MODULE_AUTHOR("Sean Hefty"); +#include "core_priv.h" static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; @@ -3316,7 +3312,7 @@ static struct ib_client mad_client = { .remove = ib_mad_remove_device }; -static int __init ib_mad_init_module(void) +int ib_mad_init(void) { mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); @@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void) return 0; } -static void __exit ib_mad_cleanup_module(void) +void ib_mad_cleanup(void) { ib_unregister_client(&mad_client); } - -module_init(ib_mad_init_module); -module_exit(ib_mad_cleanup_module); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 250937cb9a1a..a83ec28a147b 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -93,6 +93,18 @@ enum { struct mcast_member; +/* +* There are 4 types of join states: +* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. +*/ +enum { + FULLMEMBER_JOIN, + NONMEMBER_JOIN, + SENDONLY_NONMEBER_JOIN, + SENDONLY_FULLMEMBER_JOIN, + NUM_JOIN_MEMBERSHIP_TYPES, +}; + struct mcast_group { struct ib_sa_mcmember_rec rec; struct rb_node node; @@ -102,7 +114,7 @@ struct mcast_group { struct list_head pending_list; struct list_head active_list; struct mcast_member *last_join; - int members[3]; + int members[NUM_JOIN_MEMBERSHIP_TYPES]; atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; @@ -220,8 +232,9 @@ static void queue_join(struct mcast_member *member) } /* - * A multicast group has three types of members: full member, non member, and - * send only member. We need to keep track of the number of members of each + * A multicast group has four types of members: full member, non member, + * sendonly non member and sendonly full member. + * We need to keep track of the number of members of each * type based on their join state. Adjust the number of members the belong to * the specified join states. */ @@ -229,7 +242,7 @@ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) { int i; - for (i = 0; i < 3; i++, join_state >>= 1) + for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1) if (join_state & 0x1) group->members[i] += inc; } @@ -245,7 +258,7 @@ static u8 get_leave_state(struct mcast_group *group) u8 leave_state = 0; int i; - for (i = 0; i < 3; i++) + for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++) if (!group->members[i]) leave_state |= (0x1 << i); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 3ebd108bcc5f..e95538650dc6 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -53,10 +53,6 @@ #include "sa.h" #include "core_priv.h" -MODULE_AUTHOR("Roland Dreier"); -MODULE_DESCRIPTION("InfiniBand subnet administration query support"); -MODULE_LICENSE("Dual BSD/GPL"); - #define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 @@ -119,6 +115,12 @@ struct ib_sa_guidinfo_query { struct ib_sa_query sa_query; }; +struct ib_sa_classport_info_query { + void (*callback)(int, struct ib_class_port_info *, void *); + void *context; + struct ib_sa_query sa_query; +}; + struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; @@ -392,6 +394,82 @@ static const struct ib_field service_rec_table[] = { .size_bits = 2*64 }, }; +#define CLASSPORTINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ + .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \ + .field_name = "ib_class_port_info:" #field + +static const struct ib_field classport_info_rec_table[] = { + { CLASSPORTINFO_REC_FIELD(base_version), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 8 }, + { CLASSPORTINFO_REC_FIELD(class_version), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 8 }, + { CLASSPORTINFO_REC_FIELD(capability_mask), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_gid), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 128 }, + { CLASSPORTINFO_REC_FIELD(redirect_tcslfl), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_lid), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(redirect_pkey), + .offset_words = 7, + .offset_bits = 16, + .size_bits = 16 }, + + { CLASSPORTINFO_REC_FIELD(redirect_qp), + .offset_words = 8, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(redirect_qkey), + .offset_words = 9, + .offset_bits = 0, + .size_bits = 32 }, + + { CLASSPORTINFO_REC_FIELD(trap_gid), + .offset_words = 10, + .offset_bits = 0, + .size_bits = 128 }, + { CLASSPORTINFO_REC_FIELD(trap_tcslfl), + .offset_words = 14, + .offset_bits = 0, + .size_bits = 32 }, + + { CLASSPORTINFO_REC_FIELD(trap_lid), + .offset_words = 15, + .offset_bits = 0, + .size_bits = 16 }, + { CLASSPORTINFO_REC_FIELD(trap_pkey), + .offset_words = 15, + .offset_bits = 16, + .size_bits = 16 }, + + { CLASSPORTINFO_REC_FIELD(trap_hlqp), + .offset_words = 16, + .offset_bits = 0, + .size_bits = 32 }, + { CLASSPORTINFO_REC_FIELD(trap_qkey), + .offset_words = 17, + .offset_bits = 0, + .size_bits = 32 }, +}; + #define GUIDINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ @@ -705,8 +783,8 @@ static void ib_nl_request_timeout(struct work_struct *work) spin_unlock_irqrestore(&ib_nl_request_lock, flags); } -static int ib_nl_handle_set_timeout(struct sk_buff *skb, - struct netlink_callback *cb) +int ib_nl_handle_set_timeout(struct sk_buff *skb, + struct netlink_callback *cb) { const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; int timeout, delta, abs_delta; @@ -782,8 +860,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) return 1; } -static int ib_nl_handle_resolve_resp(struct sk_buff *skb, - struct netlink_callback *cb) +int ib_nl_handle_resolve_resp(struct sk_buff *skb, + struct netlink_callback *cb) { const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; unsigned long flags; @@ -838,15 +916,6 @@ resp_out: return skb->len; } -static struct ibnl_client_cbs ib_sa_cb_table[] = { - [RDMA_NL_LS_OP_RESOLVE] = { - .dump = ib_nl_handle_resolve_resp, - .module = THIS_MODULE }, - [RDMA_NL_LS_OP_SET_TIMEOUT] = { - .dump = ib_nl_handle_set_timeout, - .module = THIS_MODULE }, -}; - static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -1645,6 +1714,97 @@ err1: } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); +/* Support get SA ClassPortInfo */ +static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_classport_info_query *query = + container_of(sa_query, struct ib_sa_classport_info_query, sa_query); + + if (mad) { + struct ib_class_port_info rec; + + ib_unpack(classport_info_rec_table, + ARRAY_SIZE(classport_info_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else { + query->callback(status, NULL, query->context); + } +} + +static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_classport_info_query, + sa_query)); +} + +int ib_sa_classport_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_class_port_info *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_classport_info_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kzalloc(sizeof(*query), gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL; + + query->sa_query.release = ib_sa_portclass_info_rec_release; + /* support GET only */ + mad->mad_hdr.method = IB_MGMT_METHOD_GET; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO); + mad->sa_hdr.comp_mask = 0; + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_classport_info_rec_query); + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { @@ -1794,7 +1954,7 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data) kfree(sa_dev); } -static int __init ib_sa_init(void) +int ib_sa_init(void) { int ret; @@ -1820,17 +1980,10 @@ static int __init ib_sa_init(void) goto err3; } - if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table), - ib_sa_cb_table)) { - pr_err("Failed to add netlink callback\n"); - ret = -EINVAL; - goto err4; - } INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout); return 0; -err4: - destroy_workqueue(ib_nl_wq); + err3: mcast_cleanup(); err2: @@ -1839,9 +1992,8 @@ err1: return ret; } -static void __exit ib_sa_cleanup(void) +void ib_sa_cleanup(void) { - ibnl_remove_client(RDMA_NL_LS); cancel_delayed_work(&ib_nl_timed_work); flush_workqueue(ib_nl_wq); destroy_workqueue(ib_nl_wq); @@ -1849,6 +2001,3 @@ static void __exit ib_sa_cleanup(void) ib_unregister_client(&sa_client); idr_destroy(&query_idr); } - -module_init(ib_sa_init); -module_exit(ib_sa_cleanup); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 14606afbfaa8..5e573bb18660 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -56,8 +56,10 @@ struct ib_port { struct gid_attr_group *gid_attr_group; struct attribute_group gid_group; struct attribute_group pkey_group; - u8 port_num; struct attribute_group *pma_table; + struct attribute_group *hw_stats_ag; + struct rdma_hw_stats *hw_stats; + u8 port_num; }; struct port_attribute { @@ -80,6 +82,18 @@ struct port_table_attribute { __be16 attr_id; }; +struct hw_stats_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); + ssize_t (*store)(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count); + int index; + u8 port_num; +}; + static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -733,6 +747,212 @@ static struct attribute_group *get_counter_table(struct ib_device *dev, return &pma_group; } +static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + int ret; + + if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan)) + return 0; + ret = dev->get_hw_stats(dev, stats, port_num, index); + if (ret < 0) + return ret; + if (ret == stats->num_counters) + stats->timestamp = jiffies; + + return 0; +} + +static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf) +{ + return sprintf(buf, "%llu\n", stats->value[index]); +} + +static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct ib_device *dev; + struct ib_port *port; + struct hw_stats_attribute *hsa; + struct rdma_hw_stats *stats; + int ret; + + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + dev = container_of((struct device *)kobj, + struct ib_device, dev); + stats = dev->hw_stats; + } else { + port = container_of(kobj, struct ib_port, kobj); + dev = port->ibdev; + stats = port->hw_stats; + } + ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index); + if (ret) + return ret; + return print_hw_stat(stats, hsa->index, buf); +} + +static ssize_t show_stats_lifespan(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct hw_stats_attribute *hsa; + int msecs; + + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + struct ib_device *dev = container_of((struct device *)kobj, + struct ib_device, dev); + msecs = jiffies_to_msecs(dev->hw_stats->lifespan); + } else { + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + msecs = jiffies_to_msecs(p->hw_stats->lifespan); + } + return sprintf(buf, "%d\n", msecs); +} + +static ssize_t set_stats_lifespan(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct hw_stats_attribute *hsa; + int msecs; + int jiffies; + int ret; + + ret = kstrtoint(buf, 10, &msecs); + if (ret) + return ret; + if (msecs < 0 || msecs > 10000) + return -EINVAL; + jiffies = msecs_to_jiffies(msecs); + hsa = container_of(attr, struct hw_stats_attribute, attr); + if (!hsa->port_num) { + struct ib_device *dev = container_of((struct device *)kobj, + struct ib_device, dev); + dev->hw_stats->lifespan = jiffies; + } else { + struct ib_port *p = container_of(kobj, struct ib_port, kobj); + p->hw_stats->lifespan = jiffies; + } + return count; +} + +static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group) +{ + struct attribute **attr; + + sysfs_remove_group(kobj, attr_group); + + for (attr = attr_group->attrs; *attr; attr++) + kfree(*attr); + kfree(attr_group); +} + +static struct attribute *alloc_hsa(int index, u8 port_num, const char *name) +{ + struct hw_stats_attribute *hsa; + + hsa = kmalloc(sizeof(*hsa), GFP_KERNEL); + if (!hsa) + return NULL; + + hsa->attr.name = (char *)name; + hsa->attr.mode = S_IRUGO; + hsa->show = show_hw_stats; + hsa->store = NULL; + hsa->index = index; + hsa->port_num = port_num; + + return &hsa->attr; +} + +static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num) +{ + struct hw_stats_attribute *hsa; + + hsa = kmalloc(sizeof(*hsa), GFP_KERNEL); + if (!hsa) + return NULL; + + hsa->attr.name = name; + hsa->attr.mode = S_IWUSR | S_IRUGO; + hsa->show = show_stats_lifespan; + hsa->store = set_stats_lifespan; + hsa->index = 0; + hsa->port_num = port_num; + + return &hsa->attr; +} + +static void setup_hw_stats(struct ib_device *device, struct ib_port *port, + u8 port_num) +{ + struct attribute_group *hsag = NULL; + struct rdma_hw_stats *stats; + int i = 0, ret; + + stats = device->alloc_hw_stats(device, port_num); + + if (!stats) + return; + + if (!stats->names || stats->num_counters <= 0) + goto err; + + hsag = kzalloc(sizeof(*hsag) + + // 1 extra for the lifespan config entry + sizeof(void *) * (stats->num_counters + 1), + GFP_KERNEL); + if (!hsag) + return; + + ret = device->get_hw_stats(device, stats, port_num, + stats->num_counters); + if (ret != stats->num_counters) + goto err; + + stats->timestamp = jiffies; + + hsag->name = "hw_counters"; + hsag->attrs = (void *)hsag + sizeof(*hsag); + + for (i = 0; i < stats->num_counters; i++) { + hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]); + if (!hsag->attrs[i]) + goto err; + } + + /* treat an error here as non-fatal */ + hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num); + + if (port) { + struct kobject *kobj = &port->kobj; + ret = sysfs_create_group(kobj, hsag); + if (ret) + goto err; + port->hw_stats_ag = hsag; + port->hw_stats = stats; + } else { + struct kobject *kobj = &device->dev.kobj; + ret = sysfs_create_group(kobj, hsag); + if (ret) + goto err; + device->hw_stats_ag = hsag; + device->hw_stats = stats; + } + + return; + +err: + kfree(stats); + for (; i >= 0; i--) + kfree(hsag->attrs[i]); + kfree(hsag); + return; +} + static int add_port(struct ib_device *device, int port_num, int (*port_callback)(struct ib_device *, u8, struct kobject *)) @@ -835,6 +1055,14 @@ static int add_port(struct ib_device *device, int port_num, goto err_remove_pkey; } + /* + * If port == 0, it means we have only one port and the parent + * device, not this port device, should be the holder of the + * hw_counters + */ + if (device->alloc_hw_stats && port_num) + setup_hw_stats(device, p, port_num); + list_add_tail(&p->kobj.entry, &device->port_list); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -972,120 +1200,6 @@ static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_desc }; -/* Show a given an attribute in the statistics group */ -static ssize_t show_protocol_stat(const struct device *device, - struct device_attribute *attr, char *buf, - unsigned offset) -{ - struct ib_device *dev = container_of(device, struct ib_device, dev); - union rdma_protocol_stats stats; - ssize_t ret; - - ret = dev->get_protocol_stats(dev, &stats); - if (ret) - return ret; - - return sprintf(buf, "%llu\n", - (unsigned long long) ((u64 *) &stats)[offset]); -} - -/* generate a read-only iwarp statistics attribute */ -#define IW_STATS_ENTRY(name) \ -static ssize_t show_##name(struct device *device, \ - struct device_attribute *attr, char *buf) \ -{ \ - return show_protocol_stat(device, attr, buf, \ - offsetof(struct iw_protocol_stats, name) / \ - sizeof (u64)); \ -} \ -static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) - -IW_STATS_ENTRY(ipInReceives); -IW_STATS_ENTRY(ipInHdrErrors); -IW_STATS_ENTRY(ipInTooBigErrors); -IW_STATS_ENTRY(ipInNoRoutes); -IW_STATS_ENTRY(ipInAddrErrors); -IW_STATS_ENTRY(ipInUnknownProtos); -IW_STATS_ENTRY(ipInTruncatedPkts); -IW_STATS_ENTRY(ipInDiscards); -IW_STATS_ENTRY(ipInDelivers); -IW_STATS_ENTRY(ipOutForwDatagrams); -IW_STATS_ENTRY(ipOutRequests); -IW_STATS_ENTRY(ipOutDiscards); -IW_STATS_ENTRY(ipOutNoRoutes); -IW_STATS_ENTRY(ipReasmTimeout); -IW_STATS_ENTRY(ipReasmReqds); -IW_STATS_ENTRY(ipReasmOKs); -IW_STATS_ENTRY(ipReasmFails); -IW_STATS_ENTRY(ipFragOKs); -IW_STATS_ENTRY(ipFragFails); -IW_STATS_ENTRY(ipFragCreates); -IW_STATS_ENTRY(ipInMcastPkts); -IW_STATS_ENTRY(ipOutMcastPkts); -IW_STATS_ENTRY(ipInBcastPkts); -IW_STATS_ENTRY(ipOutBcastPkts); -IW_STATS_ENTRY(tcpRtoAlgorithm); -IW_STATS_ENTRY(tcpRtoMin); -IW_STATS_ENTRY(tcpRtoMax); -IW_STATS_ENTRY(tcpMaxConn); -IW_STATS_ENTRY(tcpActiveOpens); -IW_STATS_ENTRY(tcpPassiveOpens); -IW_STATS_ENTRY(tcpAttemptFails); -IW_STATS_ENTRY(tcpEstabResets); -IW_STATS_ENTRY(tcpCurrEstab); -IW_STATS_ENTRY(tcpInSegs); -IW_STATS_ENTRY(tcpOutSegs); -IW_STATS_ENTRY(tcpRetransSegs); -IW_STATS_ENTRY(tcpInErrs); -IW_STATS_ENTRY(tcpOutRsts); - -static struct attribute *iw_proto_stats_attrs[] = { - &dev_attr_ipInReceives.attr, - &dev_attr_ipInHdrErrors.attr, - &dev_attr_ipInTooBigErrors.attr, - &dev_attr_ipInNoRoutes.attr, - &dev_attr_ipInAddrErrors.attr, - &dev_attr_ipInUnknownProtos.attr, - &dev_attr_ipInTruncatedPkts.attr, - &dev_attr_ipInDiscards.attr, - &dev_attr_ipInDelivers.attr, - &dev_attr_ipOutForwDatagrams.attr, - &dev_attr_ipOutRequests.attr, - &dev_attr_ipOutDiscards.attr, - &dev_attr_ipOutNoRoutes.attr, - &dev_attr_ipReasmTimeout.attr, - &dev_attr_ipReasmReqds.attr, - &dev_attr_ipReasmOKs.attr, - &dev_attr_ipReasmFails.attr, - &dev_attr_ipFragOKs.attr, - &dev_attr_ipFragFails.attr, - &dev_attr_ipFragCreates.attr, - &dev_attr_ipInMcastPkts.attr, - &dev_attr_ipOutMcastPkts.attr, - &dev_attr_ipInBcastPkts.attr, - &dev_attr_ipOutBcastPkts.attr, - &dev_attr_tcpRtoAlgorithm.attr, - &dev_attr_tcpRtoMin.attr, - &dev_attr_tcpRtoMax.attr, - &dev_attr_tcpMaxConn.attr, - &dev_attr_tcpActiveOpens.attr, - &dev_attr_tcpPassiveOpens.attr, - &dev_attr_tcpAttemptFails.attr, - &dev_attr_tcpEstabResets.attr, - &dev_attr_tcpCurrEstab.attr, - &dev_attr_tcpInSegs.attr, - &dev_attr_tcpOutSegs.attr, - &dev_attr_tcpRetransSegs.attr, - &dev_attr_tcpInErrs.attr, - &dev_attr_tcpOutRsts.attr, - NULL -}; - -static struct attribute_group iw_stats_group = { - .name = "proto_stats", - .attrs = iw_proto_stats_attrs, -}; - static void free_port_list_attributes(struct ib_device *device) { struct kobject *p, *t; @@ -1093,6 +1207,10 @@ static void free_port_list_attributes(struct ib_device *device) list_for_each_entry_safe(p, t, &device->port_list, entry) { struct ib_port *port = container_of(p, struct ib_port, kobj); list_del(&p->entry); + if (port->hw_stats) { + kfree(port->hw_stats); + free_hsag(&port->kobj, port->hw_stats_ag); + } sysfs_remove_group(p, port->pma_table); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); @@ -1149,11 +1267,8 @@ int ib_device_register_sysfs(struct ib_device *device, } } - if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) { - ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group); - if (ret) - goto err_put; - } + if (device->alloc_hw_stats) + setup_hw_stats(device, NULL, 0); return 0; @@ -1169,15 +1284,18 @@ err: void ib_device_unregister_sysfs(struct ib_device *device) { - /* Hold kobject until ib_dealloc_device() */ - struct kobject *kobj_dev = kobject_get(&device->dev.kobj); int i; - if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) - sysfs_remove_group(kobj_dev, &iw_stats_group); + /* Hold kobject until ib_dealloc_device() */ + kobject_get(&device->dev.kobj); free_port_list_attributes(device); + if (device->hw_stats) { + kfree(device->hw_stats); + free_hsag(&device->dev.kobj, device->hw_stats_ag); + } + for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) device_remove_file(&device->dev, ib_class_attributes[i]); diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index c7ad0a4c8b15..c0c7cf8af3f4 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ +obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index de1c61b417d6..ada2e5009c86 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -327,7 +327,7 @@ int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) kfree(cq->sw_queue); dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << (cq->size_log2)) - * sizeof(struct t3_cqe), cq->queue, + * sizeof(struct t3_cqe) + 1, cq->queue, dma_unmap_addr(cq, mapping)); cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); return err; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 47cb927a0dd6..bb1a839d4d6d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1218,59 +1218,119 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, iwch_dev->rdev.rnic_info.pdev->device); } -static int iwch_get_mib(struct ib_device *ibdev, - union rdma_protocol_stats *stats) +enum counters { + IPINRECEIVES, + IPINHDRERRORS, + IPINADDRERRORS, + IPINUNKNOWNPROTOS, + IPINDISCARDS, + IPINDELIVERS, + IPOUTREQUESTS, + IPOUTDISCARDS, + IPOUTNOROUTES, + IPREASMTIMEOUT, + IPREASMREQDS, + IPREASMOKS, + IPREASMFAILS, + TCPACTIVEOPENS, + TCPPASSIVEOPENS, + TCPATTEMPTFAILS, + TCPESTABRESETS, + TCPCURRESTAB, + TCPINSEGS, + TCPOUTSEGS, + TCPRETRANSSEGS, + TCPINERRS, + TCPOUTRSTS, + TCPRTOMIN, + TCPRTOMAX, + NR_COUNTERS +}; + +static const char * const names[] = { + [IPINRECEIVES] = "ipInReceives", + [IPINHDRERRORS] = "ipInHdrErrors", + [IPINADDRERRORS] = "ipInAddrErrors", + [IPINUNKNOWNPROTOS] = "ipInUnknownProtos", + [IPINDISCARDS] = "ipInDiscards", + [IPINDELIVERS] = "ipInDelivers", + [IPOUTREQUESTS] = "ipOutRequests", + [IPOUTDISCARDS] = "ipOutDiscards", + [IPOUTNOROUTES] = "ipOutNoRoutes", + [IPREASMTIMEOUT] = "ipReasmTimeout", + [IPREASMREQDS] = "ipReasmReqds", + [IPREASMOKS] = "ipReasmOKs", + [IPREASMFAILS] = "ipReasmFails", + [TCPACTIVEOPENS] = "tcpActiveOpens", + [TCPPASSIVEOPENS] = "tcpPassiveOpens", + [TCPATTEMPTFAILS] = "tcpAttemptFails", + [TCPESTABRESETS] = "tcpEstabResets", + [TCPCURRESTAB] = "tcpCurrEstab", + [TCPINSEGS] = "tcpInSegs", + [TCPOUTSEGS] = "tcpOutSegs", + [TCPRETRANSSEGS] = "tcpRetransSegs", + [TCPINERRS] = "tcpInErrs", + [TCPOUTRSTS] = "tcpOutRsts", + [TCPRTOMIN] = "tcpRtoMin", + [TCPRTOMAX] = "tcpRtoMax", +}; + +static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); + + /* Our driver only supports device level stats */ + if (port_num != 0) + return NULL; + + return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u8 port, int index) { struct iwch_dev *dev; struct tp_mib_stats m; int ret; + if (port != 0 || !stats) + return -ENOSYS; + PDBG("%s ibdev %p\n", __func__, ibdev); dev = to_iwch_dev(ibdev); ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); if (ret) return -ENOSYS; - memset(stats, 0, sizeof *stats); - stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) + - m.ipInReceive_lo; - stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) + - m.ipInHdrErrors_lo; - stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) + - m.ipInAddrErrors_lo; - stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) + - m.ipInUnknownProtos_lo; - stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) + - m.ipInDiscards_lo; - stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) + - m.ipInDelivers_lo; - stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) + - m.ipOutRequests_lo; - stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) + - m.ipOutDiscards_lo; - stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) + - m.ipOutNoRoutes_lo; - stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout; - stats->iw.ipReasmReqds = (u64) m.ipReasmReqds; - stats->iw.ipReasmOKs = (u64) m.ipReasmOKs; - stats->iw.ipReasmFails = (u64) m.ipReasmFails; - stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens; - stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens; - stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails; - stats->iw.tcpEstabResets = (u64) m.tcpEstabResets; - stats->iw.tcpOutRsts = (u64) m.tcpOutRsts; - stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab; - stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) + - m.tcpInSegs_lo; - stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) + - m.tcpOutSegs_lo; - stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) + - m.tcpRetransSeg_lo; - stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) + - m.tcpInErrs_lo; - stats->iw.tcpRtoMin = (u64) m.tcpRtoMin; - stats->iw.tcpRtoMax = (u64) m.tcpRtoMax; - return 0; + stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo; + stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo; + stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo; + stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo; + stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo; + stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo; + stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo; + stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo; + stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo; + stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout; + stats->value[IPREASMREQDS] = m.ipReasmReqds; + stats->value[IPREASMOKS] = m.ipReasmOKs; + stats->value[IPREASMFAILS] = m.ipReasmFails; + stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens; + stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens; + stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails; + stats->value[TCPESTABRESETS] = m.tcpEstabResets; + stats->value[TCPCURRESTAB] = m.tcpOutRsts; + stats->value[TCPINSEGS] = m.tcpCurrEstab; + stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo; + stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo; + stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo, + stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo; + stats->value[TCPRTOMIN] = m.tcpRtoMin; + stats->value[TCPRTOMAX] = m.tcpRtoMax; + + return stats->num_counters; } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); @@ -1373,7 +1433,8 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; - dev->ibdev.get_protocol_stats = iwch_get_mib; + dev->ibdev.alloc_hw_stats = iwch_alloc_stats; + dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = iwch_port_immutable; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 7574f394fdac..dd8a86b726d2 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -446,20 +446,59 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, c4iw_dev->rdev.lldi.pdev->device); } +enum counters { + IP4INSEGS, + IP4OUTSEGS, + IP4RETRANSSEGS, + IP4OUTRSTS, + IP6INSEGS, + IP6OUTSEGS, + IP6RETRANSSEGS, + IP6OUTRSTS, + NR_COUNTERS +}; + +static const char * const names[] = { + [IP4INSEGS] = "ip4InSegs", + [IP4OUTSEGS] = "ip4OutSegs", + [IP4RETRANSSEGS] = "ip4RetransSegs", + [IP4OUTRSTS] = "ip4OutRsts", + [IP6INSEGS] = "ip6InSegs", + [IP6OUTSEGS] = "ip6OutSegs", + [IP6RETRANSSEGS] = "ip6RetransSegs", + [IP6OUTRSTS] = "ip6OutRsts" +}; + +static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); + + if (port_num != 0) + return NULL; + + return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + static int c4iw_get_mib(struct ib_device *ibdev, - union rdma_protocol_stats *stats) + struct rdma_hw_stats *stats, + u8 port, int index) { struct tp_tcp_stats v4, v6; struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); - memset(stats, 0, sizeof *stats); - stats->iw.tcpInSegs = v4.tcp_in_segs + v6.tcp_in_segs; - stats->iw.tcpOutSegs = v4.tcp_out_segs + v6.tcp_out_segs; - stats->iw.tcpRetransSegs = v4.tcp_retrans_segs + v6.tcp_retrans_segs; - stats->iw.tcpOutRsts = v4.tcp_out_rsts + v6.tcp_out_rsts; - - return 0; + stats->value[IP4INSEGS] = v4.tcp_in_segs; + stats->value[IP4OUTSEGS] = v4.tcp_out_segs; + stats->value[IP4RETRANSSEGS] = v4.tcp_retrans_segs; + stats->value[IP4OUTRSTS] = v4.tcp_out_rsts; + stats->value[IP6INSEGS] = v6.tcp_in_segs; + stats->value[IP6OUTSEGS] = v6.tcp_out_segs; + stats->value[IP6RETRANSSEGS] = v6.tcp_retrans_segs; + stats->value[IP6OUTRSTS] = v6.tcp_out_rsts; + + return stats->num_counters; } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); @@ -562,7 +601,8 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.req_notify_cq = c4iw_arm_cq; dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; - dev->ibdev.get_protocol_stats = c4iw_get_mib; + dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; + dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.get_port_immutable = c4iw_port_immutable; dev->ibdev.drain_sq = c4iw_drain_sq; diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index a925fb0db706..a925fb0db706 100644 --- a/drivers/staging/rdma/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 8dc59382ee96..9b5382c94b0c 100644 --- a/drivers/staging/rdma/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -7,7 +7,7 @@ # obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o -hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \ +hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ eprom.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 6e7050ab9e16..6e7050ab9e16 100644 --- a/drivers/staging/rdma/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 20f52fe74091..20f52fe74091 100644 --- a/drivers/staging/rdma/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 0d58fe3b49b5..0d58fe3b49b5 100644 --- a/drivers/staging/rdma/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index dcae8e723f98..3b876da745a1 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1037,6 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *); static void dc_start(struct hfi1_devdata *); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); +static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6105,7 +6106,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) } /* this access is valid only when the link is up */ - if ((ppd->host_link_state & HLS_UP) == 0) { + if (ppd->host_link_state & HLS_DOWN) { dd_dev_info(dd, "%s: link state %s not up\n", __func__, link_state_name(ppd->host_link_state)); ret = -EBUSY; @@ -6961,6 +6962,8 @@ void handle_link_down(struct work_struct *work) } reset_neighbor_info(ppd); + if (ppd->mgmt_allowed) + remove_full_mgmt_pkey(ppd); /* disable the port */ clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); @@ -7069,6 +7072,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); } +static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd) +{ + ppd->pkeys[2] = 0; + (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); +} + /* * Convert the given link width to the OPA link width bitmask. */ @@ -7429,7 +7438,7 @@ void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths) retry: mutex_lock(&ppd->hls_lock); /* only apply if the link is up */ - if (!(ppd->host_link_state & HLS_UP)) { + if (ppd->host_link_state & HLS_DOWN) { /* still going up..wait and retry */ if (ppd->host_link_state & HLS_GOING_UP) { if (++tries < 1000) { @@ -9212,9 +9221,6 @@ void reset_qsfp(struct hfi1_pportdata *ppd) /* Reset the QSFP */ mask = (u64)QSFP_HFI0_RESET_N; - qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE); - qsfp_mask |= mask; - write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask); qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT); @@ -9252,6 +9258,12 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, dd_dev_info(dd, "%s: QSFP cable temperature too low\n", __func__); + /* + * The remaining alarms/warnings don't matter if the link is down. + */ + if (ppd->host_link_state & HLS_DOWN) + return 0; + if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) dd_dev_info(dd, "%s: QSFP supply voltage too high\n", @@ -9346,9 +9358,8 @@ void qsfp_event(struct work_struct *work) return; /* - * Turn DC back on after cables has been - * re-inserted. Up until now, the DC has been in - * reset to save power. + * Turn DC back on after cable has been re-inserted. Up until + * now, the DC has been in reset to save power. */ dc_start(dd); @@ -9480,7 +9491,15 @@ int bringup_serdes(struct hfi1_pportdata *ppd) return ret; } - /* tune the SERDES to a ballpark setting for + get_port_type(ppd); + if (ppd->port_type == PORT_TYPE_QSFP) { + set_qsfp_int_n(ppd, 0); + wait_for_qsfp_init(ppd); + set_qsfp_int_n(ppd, 1); + } + + /* + * Tune the SerDes to a ballpark setting for * optimal signal and bit error rate * Needs to be done before starting the link */ @@ -10074,7 +10093,7 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd) */ u32 driver_logical_state(struct hfi1_pportdata *ppd) { - if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP)) + if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN)) return IB_PORT_DOWN; switch (ppd->host_link_state & HLS_UP) { @@ -14578,7 +14597,7 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, (reason), (ret)) /* - * Initialize the Avago Thermal sensor. + * Initialize the thermal sensor. * * After initialization, enable polling of thermal sensor through * SBus interface. In order for this to work, the SBus Master diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 1948706fff1a..66a327978739 100644 --- a/drivers/staging/rdma/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -398,6 +398,12 @@ /* Lane ID for general configuration registers */ #define GENERAL_CONFIG 4 +/* LINK_TUNING_PARAMETERS fields */ +#define TUNING_METHOD_SHIFT 24 + +/* LINK_OPTIMIZATION_SETTINGS fields */ +#define ENABLE_EXT_DEV_CONFIG_SHIFT 24 + /* LOAD_DATA 8051 command shifts and fields */ #define LOAD_DATA_FIELD_ID_SHIFT 40 #define LOAD_DATA_FIELD_ID_MASK 0xfull diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 8744de6667c2..8744de6667c2 100644 --- a/drivers/staging/rdma/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index e9b6bb322025..fcc9c217a97a 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -178,7 +178,8 @@ HFI1_CAP_PKEY_CHECK | \ HFI1_CAP_NO_INTEGRITY) -#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << 16) | HFI1_USER_SWMINOR) +#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \ + HFI1_USER_SWMINOR) #ifndef HFI1_KERN_TYPE #define HFI1_KERN_TYPE 0 @@ -349,6 +350,8 @@ struct hfi1_message_header { #define HFI1_BECN_MASK 1 #define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) +#define HFI1_PSM_IOC_BASE_SEQ 0x0 + static inline __u64 rhf_to_cpu(const __le32 *rbuf) { return __le64_to_cpu(*((__le64 *)rbuf)); diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dbab9d9cc288..dbab9d9cc288 100644 --- a/drivers/staging/rdma/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index b6fb6814f1b8..b6fb6814f1b8 100644 --- a/drivers/staging/rdma/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index c05c39da83b1..bf64b5a7bfd7 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -60,7 +60,8 @@ static dev_t hfi1_dev; int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, struct cdev *cdev, struct device **devp, - bool user_accessible) + bool user_accessible, + struct kobject *parent) { const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor); struct device *device = NULL; @@ -68,6 +69,7 @@ int hfi1_cdev_init(int minor, const char *name, cdev_init(cdev, fops); cdev->owner = THIS_MODULE; + cdev->kobj.parent = parent; kobject_set_name(&cdev->kobj, name); ret = cdev_add(cdev, dev, 1); @@ -82,13 +84,13 @@ int hfi1_cdev_init(int minor, const char *name, else device = device_create(class, NULL, dev, NULL, "%s", name); - if (!IS_ERR(device)) - goto done; - ret = PTR_ERR(device); - device = NULL; - pr_err("Could not create device for minor %d, %s (err %d)\n", - minor, name, -ret); - cdev_del(cdev); + if (IS_ERR(device)) { + ret = PTR_ERR(device); + device = NULL; + pr_err("Could not create device for minor %d, %s (err %d)\n", + minor, name, -ret); + cdev_del(cdev); + } done: *devp = device; return ret; diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h index 5bb3e83cf2da..c3ec19cb0ac9 100644 --- a/drivers/staging/rdma/hfi1/device.h +++ b/drivers/infiniband/hw/hfi1/device.h @@ -50,7 +50,8 @@ int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, struct cdev *cdev, struct device **devp, - bool user_accessible); + bool user_accessible, + struct kobject *parent); void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp); const char *class_name(void); int __init dev_init(void); diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c index 7e8dab892848..7e8dab892848 100644 --- a/drivers/staging/rdma/hfi1/dma.c +++ b/drivers/infiniband/hw/hfi1/dma.c diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 700c6fa3a633..c75b0ae688f8 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1161,7 +1161,7 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc) ppd->lmc = lmc; hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0); - dd_dev_info(dd, "IB%u:%u got a lid: 0x%x\n", dd->unit, ppd->port, lid); + dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid); return 0; } diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c index 106349fc1fb9..106349fc1fb9 100644 --- a/drivers/staging/rdma/hfi1/efivar.c +++ b/drivers/infiniband/hw/hfi1/efivar.c diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h index 94e9e70de568..94e9e70de568 100644 --- a/drivers/staging/rdma/hfi1/efivar.h +++ b/drivers/infiniband/hw/hfi1/efivar.h diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c new file mode 100644 index 000000000000..36b77943cbfd --- /dev/null +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -0,0 +1,102 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/delay.h> +#include "hfi.h" +#include "common.h" +#include "eprom.h" + +#define CMD_SHIFT 24 +#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) + +/* controller interface speeds */ +#define EP_SPEED_FULL 0x2 /* full speed */ + +/* + * How long to wait for the EPROM to become available, in ms. + * The spec 32 Mb EPROM takes around 40s to erase then write. + * Double it for safety. + */ +#define EPROM_TIMEOUT 80000 /* ms */ +/* + * Initialize the EPROM handler. + */ +int eprom_init(struct hfi1_devdata *dd) +{ + int ret = 0; + + /* only the discrete chip has an EPROM */ + if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0) + return 0; + + /* + * It is OK if both HFIs reset the EPROM as long as they don't + * do it at the same time. + */ + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); + if (ret) { + dd_dev_err(dd, + "%s: unable to acquire EPROM resource, no EPROM support\n", + __func__); + goto done_asic; + } + + /* reset EPROM to be sure it is in a good state */ + + /* set reset */ + write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK); + /* clear reset, set speed */ + write_csr(dd, ASIC_EEP_CTL_STAT, + EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT); + + /* wake the device with command "release powerdown NoID" */ + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); + + dd->eprom_available = true; + release_chip_resource(dd, CR_EPROM); +done_asic: + return ret; +} diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h index d41f0b1afb15..d41f0b1afb15 100644 --- a/drivers/staging/rdma/hfi1/eprom.h +++ b/drivers/infiniband/hw/hfi1/eprom.h diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c1c5bf82addb..7a5b0e676cc7 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -72,8 +72,6 @@ */ static int hfi1_file_open(struct inode *, struct file *); static int hfi1_file_close(struct inode *, struct file *); -static ssize_t hfi1_file_write(struct file *, const char __user *, - size_t, loff_t *); static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); static int hfi1_file_mmap(struct file *, struct vm_area_struct *); @@ -86,8 +84,7 @@ static int get_ctxt_info(struct file *, void __user *, __u32); static int get_base_info(struct file *, void __user *, __u32); static int setup_ctxt(struct file *); static int setup_subctxt(struct hfi1_ctxtdata *); -static int get_user_context(struct file *, struct hfi1_user_info *, - int, unsigned); +static int get_user_context(struct file *, struct hfi1_user_info *, int); static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); static int allocate_ctxt(struct file *, struct hfi1_devdata *, struct hfi1_user_info *); @@ -97,13 +94,15 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); static int vma_fault(struct vm_area_struct *, struct vm_fault *); +static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, + unsigned long arg); static const struct file_operations hfi1_file_ops = { .owner = THIS_MODULE, - .write = hfi1_file_write, .write_iter = hfi1_write_iter, .open = hfi1_file_open, .release = hfi1_file_close, + .unlocked_ioctl = hfi1_file_ioctl, .poll = hfi1_poll, .mmap = hfi1_file_mmap, .llseek = noop_llseek, @@ -169,6 +168,13 @@ static inline int is_valid_mmap(u64 token) static int hfi1_file_open(struct inode *inode, struct file *fp) { + struct hfi1_devdata *dd = container_of(inode->i_cdev, + struct hfi1_devdata, + user_cdev); + + /* Just take a ref now. Not all opens result in a context assign */ + kobject_get(&dd->kobj); + /* The real work is performed later in assign_ctxt() */ fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); if (fp->private_data) /* no cpu affinity by default */ @@ -176,127 +182,59 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } -static ssize_t hfi1_file_write(struct file *fp, const char __user *data, - size_t count, loff_t *offset) +static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, + unsigned long arg) { - const struct hfi1_cmd __user *ucmd; struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_cmd cmd; struct hfi1_user_info uinfo; struct hfi1_tid_info tinfo; + int ret = 0; unsigned long addr; - ssize_t consumed = 0, copy = 0, ret = 0; - void *dest = NULL; - __u64 user_val = 0; - int uctxt_required = 1; - int must_be_root = 0; - - /* FIXME: This interface cannot continue out of staging */ - if (WARN_ON_ONCE(!ib_safe_file_access(fp))) - return -EACCES; - - if (count < sizeof(cmd)) { - ret = -EINVAL; - goto bail; - } - - ucmd = (const struct hfi1_cmd __user *)data; - if (copy_from_user(&cmd, ucmd, sizeof(cmd))) { - ret = -EFAULT; - goto bail; - } - - consumed = sizeof(cmd); - - switch (cmd.type) { - case HFI1_CMD_ASSIGN_CTXT: - uctxt_required = 0; /* assigned user context not required */ - copy = sizeof(uinfo); - dest = &uinfo; - break; - case HFI1_CMD_SDMA_STATUS_UPD: - case HFI1_CMD_CREDIT_UPD: - copy = 0; - break; - case HFI1_CMD_TID_UPDATE: - case HFI1_CMD_TID_FREE: - case HFI1_CMD_TID_INVAL_READ: - copy = sizeof(tinfo); - dest = &tinfo; - break; - case HFI1_CMD_USER_INFO: - case HFI1_CMD_RECV_CTRL: - case HFI1_CMD_POLL_TYPE: - case HFI1_CMD_ACK_EVENT: - case HFI1_CMD_CTXT_INFO: - case HFI1_CMD_SET_PKEY: - case HFI1_CMD_CTXT_RESET: - copy = 0; - user_val = cmd.addr; - break; - case HFI1_CMD_EP_INFO: - case HFI1_CMD_EP_ERASE_CHIP: - case HFI1_CMD_EP_ERASE_RANGE: - case HFI1_CMD_EP_READ_RANGE: - case HFI1_CMD_EP_WRITE_RANGE: - uctxt_required = 0; /* assigned user context not required */ - must_be_root = 1; /* validate user */ - copy = 0; - break; - default: - ret = -EINVAL; - goto bail; - } - - /* If the command comes with user data, copy it. */ - if (copy) { - if (copy_from_user(dest, (void __user *)cmd.addr, copy)) { - ret = -EFAULT; - goto bail; - } - consumed += copy; - } - - /* - * Make sure there is a uctxt when needed. - */ - if (uctxt_required && !uctxt) { - ret = -EINVAL; - goto bail; - } + int uval = 0; + unsigned long ul_uval = 0; + u16 uval16 = 0; + + hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); + if (cmd != HFI1_IOCTL_ASSIGN_CTXT && + cmd != HFI1_IOCTL_GET_VERS && + !uctxt) + return -EINVAL; - /* only root can do these operations */ - if (must_be_root && !capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto bail; - } + switch (cmd) { + case HFI1_IOCTL_ASSIGN_CTXT: + if (copy_from_user(&uinfo, + (struct hfi1_user_info __user *)arg, + sizeof(uinfo))) + return -EFAULT; - switch (cmd.type) { - case HFI1_CMD_ASSIGN_CTXT: ret = assign_ctxt(fp, &uinfo); if (ret < 0) - goto bail; - ret = setup_ctxt(fp); + return ret; + setup_ctxt(fp); if (ret) - goto bail; + return ret; ret = user_init(fp); break; - case HFI1_CMD_CTXT_INFO: - ret = get_ctxt_info(fp, (void __user *)(unsigned long) - user_val, cmd.len); - break; - case HFI1_CMD_USER_INFO: - ret = get_base_info(fp, (void __user *)(unsigned long) - user_val, cmd.len); + case HFI1_IOCTL_CTXT_INFO: + ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, + sizeof(struct hfi1_ctxt_info)); break; - case HFI1_CMD_SDMA_STATUS_UPD: + case HFI1_IOCTL_USER_INFO: + ret = get_base_info(fp, (void __user *)(unsigned long)arg, + sizeof(struct hfi1_base_info)); break; - case HFI1_CMD_CREDIT_UPD: + case HFI1_IOCTL_CREDIT_UPD: if (uctxt && uctxt->sc) sc_return_credits(uctxt->sc); break; - case HFI1_CMD_TID_UPDATE: + + case HFI1_IOCTL_TID_UPDATE: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + ret = hfi1_user_exp_rcv_setup(fp, &tinfo); if (!ret) { /* @@ -305,57 +243,82 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, * These fields are adjacent in the structure so * we can copy them at the same time. */ - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt) + sizeof(tinfo.length))) ret = -EFAULT; } break; - case HFI1_CMD_TID_INVAL_READ: - ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); + + case HFI1_IOCTL_TID_FREE: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_clear(fp, &tinfo); if (ret) break; - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt))) ret = -EFAULT; break; - case HFI1_CMD_TID_FREE: - ret = hfi1_user_exp_rcv_clear(fp, &tinfo); + + case HFI1_IOCTL_TID_INVAL_READ: + if (copy_from_user(&tinfo, + (struct hfi11_tid_info __user *)arg, + sizeof(tinfo))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); if (ret) break; - addr = (unsigned long)cmd.addr + - offsetof(struct hfi1_tid_info, tidcnt); + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, sizeof(tinfo.tidcnt))) ret = -EFAULT; break; - case HFI1_CMD_RECV_CTRL: - ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val); + + case HFI1_IOCTL_RECV_CTRL: + ret = get_user(uval, (int __user *)arg); + if (ret != 0) + return -EFAULT; + ret = manage_rcvq(uctxt, fd->subctxt, uval); break; - case HFI1_CMD_POLL_TYPE: - uctxt->poll_type = (typeof(uctxt->poll_type))user_val; + + case HFI1_IOCTL_POLL_TYPE: + ret = get_user(uval, (int __user *)arg); + if (ret != 0) + return -EFAULT; + uctxt->poll_type = (typeof(uctxt->poll_type))uval; break; - case HFI1_CMD_ACK_EVENT: - ret = user_event_ack(uctxt, fd->subctxt, user_val); + + case HFI1_IOCTL_ACK_EVENT: + ret = get_user(ul_uval, (unsigned long __user *)arg); + if (ret != 0) + return -EFAULT; + ret = user_event_ack(uctxt, fd->subctxt, ul_uval); break; - case HFI1_CMD_SET_PKEY: + + case HFI1_IOCTL_SET_PKEY: + ret = get_user(uval16, (u16 __user *)arg); + if (ret != 0) + return -EFAULT; if (HFI1_CAP_IS_USET(PKEY_CHECK)) - ret = set_ctxt_pkey(uctxt, fd->subctxt, user_val); + ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); else - ret = -EPERM; + return -EPERM; break; - case HFI1_CMD_CTXT_RESET: { + + case HFI1_IOCTL_CTXT_RESET: { struct send_context *sc; struct hfi1_devdata *dd; - if (!uctxt || !uctxt->dd || !uctxt->sc) { - ret = -EINVAL; - break; - } + if (!uctxt || !uctxt->dd || !uctxt->sc) + return -EINVAL; + /* * There is no protection here. User level has to * guarantee that no one will be writing to the send @@ -373,10 +336,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, wait_event_interruptible_timeout( sc->halt_wait, (sc->flags & SCF_HALTED), msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (!(sc->flags & SCF_HALTED)) { - ret = -ENOLCK; - break; - } + if (!(sc->flags & SCF_HALTED)) + return -ENOLCK; + /* * If the send context was halted due to a Freeze, * wait until the device has been "unfrozen" before @@ -387,18 +349,16 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, dd->event_queue, !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN), msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (dd->flags & HFI1_FROZEN) { - ret = -ENOLCK; - break; - } - if (dd->flags & HFI1_FORCED_FREEZE) { + if (dd->flags & HFI1_FROZEN) + return -ENOLCK; + + if (dd->flags & HFI1_FORCED_FREEZE) /* * Don't allow context reset if we are into * forced freeze */ - ret = -ENODEV; - break; - } + return -ENODEV; + sc_disable(sc); ret = sc_enable(sc); hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, @@ -410,18 +370,17 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, sc_return_credits(sc); break; } - case HFI1_CMD_EP_INFO: - case HFI1_CMD_EP_ERASE_CHIP: - case HFI1_CMD_EP_ERASE_RANGE: - case HFI1_CMD_EP_READ_RANGE: - case HFI1_CMD_EP_WRITE_RANGE: - ret = handle_eprom_command(fp, &cmd); + + case HFI1_IOCTL_GET_VERS: + uval = HFI1_USER_SWVERSION; + if (put_user(uval, (int __user *)arg)) + return -EFAULT; break; + + default: + return -EINVAL; } - if (ret >= 0) - ret = consumed; -bail: return ret; } @@ -738,7 +697,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) { struct hfi1_filedata *fdata = fp->private_data; struct hfi1_ctxtdata *uctxt = fdata->uctxt; - struct hfi1_devdata *dd; + struct hfi1_devdata *dd = container_of(inode->i_cdev, + struct hfi1_devdata, + user_cdev); unsigned long flags, *ev; fp->private_data = NULL; @@ -747,7 +708,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) goto done; hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); - dd = uctxt->dd; mutex_lock(&hfi1_mutex); flush_wc(); @@ -813,6 +773,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + kobject_put(&dd->kobj); kfree(fdata); return 0; } @@ -836,7 +797,7 @@ static u64 kvirt_to_phys(void *addr) static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) { int i_minor, ret = 0; - unsigned swmajor, swminor, alg = HFI1_ALG_ACROSS; + unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) { @@ -846,9 +807,6 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) swminor = uinfo->userversion & 0xffff; - if (uinfo->hfi1_alg < HFI1_ALG_COUNT) - alg = uinfo->hfi1_alg; - mutex_lock(&hfi1_mutex); /* First, lets check if we need to setup a shared context? */ if (uinfo->subctxt_cnt) { @@ -868,7 +826,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) */ if (!ret) { i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - ret = get_user_context(fp, uinfo, i_minor - 1, alg); + ret = get_user_context(fp, uinfo, i_minor); } done_unlock: mutex_unlock(&hfi1_mutex); @@ -876,71 +834,26 @@ done: return ret; } -/* return true if the device available for general use */ -static int usable_device(struct hfi1_devdata *dd) -{ - struct hfi1_pportdata *ppd = dd->pport; - - return driver_lstate(ppd) == IB_PORT_ACTIVE; -} - static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, - int devno, unsigned alg) + int devno) { struct hfi1_devdata *dd = NULL; - int ret = 0, devmax, npresent, nup, dev; + int devmax, npresent, nup; devmax = hfi1_count_units(&npresent, &nup); - if (!npresent) { - ret = -ENXIO; - goto done; - } - if (!nup) { - ret = -ENETDOWN; - goto done; - } - if (devno >= 0) { - dd = hfi1_lookup(devno); - if (!dd) - ret = -ENODEV; - else if (!dd->freectxts) - ret = -EBUSY; - } else { - struct hfi1_devdata *pdd; - - if (alg == HFI1_ALG_ACROSS) { - unsigned free = 0U; - - for (dev = 0; dev < devmax; dev++) { - pdd = hfi1_lookup(dev); - if (!pdd) - continue; - if (!usable_device(pdd)) - continue; - if (pdd->freectxts && - pdd->freectxts > free) { - dd = pdd; - free = pdd->freectxts; - } - } - } else { - for (dev = 0; dev < devmax; dev++) { - pdd = hfi1_lookup(dev); - if (!pdd) - continue; - if (!usable_device(pdd)) - continue; - if (pdd->freectxts) { - dd = pdd; - break; - } - } - } - if (!dd) - ret = -EBUSY; - } -done: - return ret ? ret : allocate_ctxt(fp, dd, uinfo); + if (!npresent) + return -ENXIO; + + if (!nup) + return -ENETDOWN; + + dd = hfi1_lookup(devno); + if (!dd) + return -ENODEV; + else if (!dd->freectxts) + return -EBUSY; + + return allocate_ctxt(fp, dd, uinfo); } static int find_shared_ctxt(struct file *fp, @@ -1546,170 +1459,10 @@ done: return ret; } -static int ui_open(struct inode *inode, struct file *filp) -{ - struct hfi1_devdata *dd; - - dd = container_of(inode->i_cdev, struct hfi1_devdata, ui_cdev); - filp->private_data = dd; /* for other methods */ - return 0; -} - -static int ui_release(struct inode *inode, struct file *filp) -{ - /* nothing to do */ - return 0; -} - -static loff_t ui_lseek(struct file *filp, loff_t offset, int whence) -{ - struct hfi1_devdata *dd = filp->private_data; - - return fixed_size_llseek(filp, offset, whence, - (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE); -} - -/* NOTE: assumes unsigned long is 8 bytes */ -static ssize_t ui_read(struct file *filp, char __user *buf, size_t count, - loff_t *f_pos) -{ - struct hfi1_devdata *dd = filp->private_data; - void __iomem *base = dd->kregbase; - unsigned long total, csr_off, - barlen = (dd->kregend - dd->kregbase); - u64 data; - - /* only read 8 byte quantities */ - if ((count % 8) != 0) - return -EINVAL; - /* offset must be 8-byte aligned */ - if ((*f_pos % 8) != 0) - return -EINVAL; - /* destination buffer must be 8-byte aligned */ - if ((unsigned long)buf % 8 != 0) - return -EINVAL; - /* must be in range */ - if (*f_pos + count > (barlen + DC8051_DATA_MEM_SIZE)) - return -EINVAL; - /* only set the base if we are not starting past the BAR */ - if (*f_pos < barlen) - base += *f_pos; - csr_off = *f_pos; - for (total = 0; total < count; total += 8, csr_off += 8) { - /* accessing LCB CSRs requires more checks */ - if (is_lcb_offset(csr_off)) { - if (read_lcb_csr(dd, csr_off, (u64 *)&data)) - break; /* failed */ - } - /* - * Cannot read ASIC GPIO/QSFP* clear and force CSRs without a - * false parity error. Avoid the whole issue by not reading - * them. These registers are defined as having a read value - * of 0. - */ - else if (csr_off == ASIC_GPIO_CLEAR || - csr_off == ASIC_GPIO_FORCE || - csr_off == ASIC_QSFP1_CLEAR || - csr_off == ASIC_QSFP1_FORCE || - csr_off == ASIC_QSFP2_CLEAR || - csr_off == ASIC_QSFP2_FORCE) - data = 0; - else if (csr_off >= barlen) { - /* - * read_8051_data can read more than just 8 bytes at - * a time. However, folding this into the loop and - * handling the reads in 8 byte increments allows us - * to smoothly transition from chip memory to 8051 - * memory. - */ - if (read_8051_data(dd, - (u32)(csr_off - barlen), - sizeof(data), &data)) - break; /* failed */ - } else - data = readq(base + total); - if (put_user(data, (unsigned long __user *)(buf + total))) - break; - } - *f_pos += total; - return total; -} - -/* NOTE: assumes unsigned long is 8 bytes */ -static ssize_t ui_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos) -{ - struct hfi1_devdata *dd = filp->private_data; - void __iomem *base; - unsigned long total, data, csr_off; - int in_lcb; - - /* only write 8 byte quantities */ - if ((count % 8) != 0) - return -EINVAL; - /* offset must be 8-byte aligned */ - if ((*f_pos % 8) != 0) - return -EINVAL; - /* source buffer must be 8-byte aligned */ - if ((unsigned long)buf % 8 != 0) - return -EINVAL; - /* must be in range */ - if (*f_pos + count > dd->kregend - dd->kregbase) - return -EINVAL; - - base = (void __iomem *)dd->kregbase + *f_pos; - csr_off = *f_pos; - in_lcb = 0; - for (total = 0; total < count; total += 8, csr_off += 8) { - if (get_user(data, (unsigned long __user *)(buf + total))) - break; - /* accessing LCB CSRs requires a special procedure */ - if (is_lcb_offset(csr_off)) { - if (!in_lcb) { - int ret = acquire_lcb_access(dd, 1); - - if (ret) - break; - in_lcb = 1; - } - } else { - if (in_lcb) { - release_lcb_access(dd, 1); - in_lcb = 0; - } - } - writeq(data, base + total); - } - if (in_lcb) - release_lcb_access(dd, 1); - *f_pos += total; - return total; -} - -static const struct file_operations ui_file_ops = { - .owner = THIS_MODULE, - .llseek = ui_lseek, - .read = ui_read, - .write = ui_write, - .open = ui_open, - .release = ui_release, -}; - -#define UI_OFFSET 192 /* device minor offset for UI devices */ -static int create_ui = 1; - -static struct cdev wildcard_cdev; -static struct device *wildcard_device; - -static atomic_t user_count = ATOMIC_INIT(0); - static void user_remove(struct hfi1_devdata *dd) { - if (atomic_dec_return(&user_count) == 0) - hfi1_cdev_cleanup(&wildcard_cdev, &wildcard_device); hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device); - hfi1_cdev_cleanup(&dd->ui_cdev, &dd->ui_device); } static int user_add(struct hfi1_devdata *dd) @@ -1717,34 +1470,13 @@ static int user_add(struct hfi1_devdata *dd) char name[10]; int ret; - if (atomic_inc_return(&user_count) == 1) { - ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops, - &wildcard_cdev, &wildcard_device, - true); - if (ret) - goto done; - } - snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); - ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops, + ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, &dd->user_cdev, &dd->user_device, - true); + true, &dd->kobj); if (ret) - goto done; + user_remove(dd); - if (create_ui) { - snprintf(name, sizeof(name), - "%s_ui%d", class_name(), dd->unit); - ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops, - &dd->ui_cdev, &dd->ui_device, - false); - if (ret) - goto done; - } - - return 0; -done: - user_remove(dd); return ret; } @@ -1753,13 +1485,7 @@ done: */ int hfi1_device_create(struct hfi1_devdata *dd) { - int r, ret; - - r = user_add(dd); - ret = hfi1_diag_add(dd); - if (r && !ret) - ret = r; - return ret; + return user_add(dd); } /* @@ -1769,5 +1495,4 @@ int hfi1_device_create(struct hfi1_devdata *dd) void hfi1_device_remove(struct hfi1_devdata *dd) { user_remove(dd); - hfi1_diag_remove(dd); } diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index ed680fda611d..ed680fda611d 100644 --- a/drivers/staging/rdma/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7b78d56de7f5..4417a0fd3ef9 100644 --- a/drivers/staging/rdma/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -453,6 +453,7 @@ struct rvt_sge_state; #define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP) #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) +#define HLS_DOWN ~(HLS_UP) /* use this MTU size if none other is given */ #define HFI1_DEFAULT_ACTIVE_MTU 10240 @@ -1168,6 +1169,7 @@ struct hfi1_devdata { atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + struct kobject kobj; }; /* 8051 firmware version helper */ @@ -1882,9 +1884,8 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) get_unit_name((dd)->unit), ##__VA_ARGS__) #define hfi1_dev_porterr(dd, port, fmt, ...) \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__) + dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \ + get_unit_name((dd)->unit), (port), ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 502b7cf4647d..5cc492e5776d 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -732,12 +732,12 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) lastfail = hfi1_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = hfi1_setup_eagerbufs(rcd); - if (lastfail) + if (lastfail) { dd_dev_err(dd, "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); + ret = lastfail; + } } - if (lastfail) - ret = lastfail; /* Allocate enough memory for user event notification. */ len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * @@ -989,8 +989,10 @@ static void release_asic_data(struct hfi1_devdata *dd) dd->asic_data = NULL; } -void hfi1_free_devdata(struct hfi1_devdata *dd) +static void __hfi1_free_devdata(struct kobject *kobj) { + struct hfi1_devdata *dd = + container_of(kobj, struct hfi1_devdata, kobj); unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); @@ -1007,6 +1009,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) rvt_dealloc_device(&dd->verbs_dev.rdi); } +static struct kobj_type hfi1_devdata_type = { + .release = __hfi1_free_devdata, +}; + +void hfi1_free_devdata(struct hfi1_devdata *dd) +{ + kobject_put(&dd->kobj); +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -1102,6 +1113,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) &pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; bail: @@ -1300,7 +1312,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_lock(&ppd->cc_state_lock); cc_state = get_cc_state(ppd); - rcu_assign_pointer(ppd->cc_state, NULL); + RCU_INIT_POINTER(ppd->cc_state, NULL); spin_unlock(&ppd->cc_state_lock); if (cc_state) diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 65348d16ab2f..65348d16ab2f 100644 --- a/drivers/staging/rdma/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 2ec6ef38d389..2ec6ef38d389 100644 --- a/drivers/staging/rdma/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index ed58cf21e790..219029576ba0 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1403,6 +1403,12 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) if (key == okey) continue; /* + * Don't update pkeys[2], if an HFI port without MgmtAllowed + * by neighbor is a switch. + */ + if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1) + continue; + /* * The SM gives us the complete PKey table. We have * to ensure that we put the PKeys in the matching * slots. @@ -3363,6 +3369,50 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, return reply((struct ib_mad_hdr *)smp); } +/* + * Apply congestion control information stored in the ppd to the + * active structure. + */ +static void apply_cc_state(struct hfi1_pportdata *ppd) +{ + struct cc_state *old_cc_state, *new_cc_state; + + new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); + if (!new_cc_state) + return; + + /* + * Hold the lock for updating *and* to prevent ppd information + * from changing during the update. + */ + spin_lock(&ppd->cc_state_lock); + + old_cc_state = get_cc_state(ppd); + if (!old_cc_state) { + /* never active, or shutting down */ + spin_unlock(&ppd->cc_state_lock); + kfree(new_cc_state); + return; + } + + *new_cc_state = *old_cc_state; + + new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1; + memcpy(new_cc_state->cct.entries, ppd->ccti_entries, + ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)); + + new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED; + new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map; + memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries, + OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry)); + + rcu_assign_pointer(ppd->cc_state, new_cc_state); + + spin_unlock(&ppd->cc_state_lock); + + call_rcu(&old_cc_state->rcu, cc_state_reclaim); +} + static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) @@ -3374,6 +3424,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct opa_congestion_setting_entry_shadow *entries; int i; + /* + * Save details from packet into the ppd. Hold the cc_state_lock so + * our information is consistent with anyone trying to apply the state. + */ + spin_lock(&ppd->cc_state_lock); ppd->cc_sl_control_map = be32_to_cpu(p->control_map); entries = ppd->congestion_entries; @@ -3384,6 +3439,10 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, p->entries[i].trigger_threshold; entries[i].ccti_min = p->entries[i].ccti_min; } + spin_unlock(&ppd->cc_state_lock); + + /* now apply the information */ + apply_cc_state(ppd); return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, resp_len); @@ -3526,7 +3585,6 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; u16 ccti_limit; - struct cc_state *old_cc_state, *new_cc_state; /* sanity check n_blocks, start_block */ if (n_blocks == 0 || @@ -3546,45 +3604,20 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL); - if (!new_cc_state) - goto getit; - + /* + * Save details from packet into the ppd. Hold the cc_state_lock so + * our information is consistent with anyone trying to apply the state. + */ spin_lock(&ppd->cc_state_lock); - - old_cc_state = get_cc_state(ppd); - - if (!old_cc_state) { - spin_unlock(&ppd->cc_state_lock); - kfree(new_cc_state); - return reply((struct ib_mad_hdr *)smp); - } - - *new_cc_state = *old_cc_state; - - new_cc_state->cct.ccti_limit = ccti_limit; - - entries = ppd->ccti_entries; ppd->total_cct_entry = ccti_limit + 1; - + entries = ppd->ccti_entries; for (j = 0, i = sentry; i < eentry; j++, i++) entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry); - - memcpy(new_cc_state->cct.entries, entries, - eentry * sizeof(struct ib_cc_table_entry)); - - new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED; - new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map; - memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries, - OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry)); - - rcu_assign_pointer(ppd->cc_state, new_cc_state); - spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + /* now apply the information */ + apply_cc_state(ppd); -getit: return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); } diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 55ee08675333..55ee08675333 100644 --- a/drivers/staging/rdma/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 2b0e91d3093d..b7a80aa1ae30 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -45,6 +45,7 @@ * */ #include <linux/list.h> +#include <linux/rculist.h> #include <linux/mmu_notifier.h> #include <linux/interval_tree_generic.h> @@ -97,7 +98,6 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node) int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) { struct mmu_rb_handler *handlr; - unsigned long flags; if (!ops->invalidate) return -EINVAL; @@ -111,9 +111,9 @@ int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) INIT_HLIST_NODE(&handlr->mn.hlist); spin_lock_init(&handlr->lock); handlr->mn.ops = &mn_opts; - spin_lock_irqsave(&mmu_rb_lock, flags); - list_add_tail(&handlr->list, &mmu_rb_handlers); - spin_unlock_irqrestore(&mmu_rb_lock, flags); + spin_lock(&mmu_rb_lock); + list_add_tail_rcu(&handlr->list, &mmu_rb_handlers); + spin_unlock(&mmu_rb_lock); return mmu_notifier_register(&handlr->mn, current->mm); } @@ -130,9 +130,10 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) if (current->mm) mmu_notifier_unregister(&handler->mn, current->mm); - spin_lock_irqsave(&mmu_rb_lock, flags); - list_del(&handler->list); - spin_unlock_irqrestore(&mmu_rb_lock, flags); + spin_lock(&mmu_rb_lock); + list_del_rcu(&handler->list); + spin_unlock(&mmu_rb_lock); + synchronize_rcu(); spin_lock_irqsave(&handler->lock, flags); if (!RB_EMPTY_ROOT(root)) { @@ -271,16 +272,15 @@ void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) { struct mmu_rb_handler *handler; - unsigned long flags; - spin_lock_irqsave(&mmu_rb_lock, flags); - list_for_each_entry(handler, &mmu_rb_handlers, list) { + rcu_read_lock(); + list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) { if (handler->root == root) goto unlock; } handler = NULL; unlock: - spin_unlock_irqrestore(&mmu_rb_lock, flags); + rcu_read_unlock(); return handler; } diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 7a57b9c49d27..7a57b9c49d27 100644 --- a/drivers/staging/rdma/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h index 6ef3c1cbdcd7..6ef3c1cbdcd7 100644 --- a/drivers/staging/rdma/hfi1/opa_compat.h +++ b/drivers/infiniband/hw/hfi1/opa_compat.h diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 0bac21e6a658..0bac21e6a658 100644 --- a/drivers/staging/rdma/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index c67b9ad3fcf4..d5edb1afbb8f 100644 --- a/drivers/staging/rdma/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1835,8 +1835,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) struct pio_vl_map *oldmap, *newmap; if (!vl_scontexts) { - /* send context 0 reserved for VL15 */ - for (i = 1; i < dd->num_send_contexts; i++) + for (i = 0; i < dd->num_send_contexts; i++) if (dd->send_contexts[i].type == SC_KERNEL) num_kernel_send_contexts++; /* truncate divide */ diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 53a08edb7f64..464cbd27b975 100644 --- a/drivers/staging/rdma/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -49,10 +49,10 @@ /* send context types */ #define SC_KERNEL 0 -#define SC_ACK 1 -#define SC_USER 2 -#define SC_VL15 3 -#define SC_MAX 4 +#define SC_VL15 1 +#define SC_ACK 2 +#define SC_USER 3 /* must be the last one: it may take all left */ +#define SC_MAX 4 /* count of send context types */ /* invalid send context index */ #define INVALID_SCI 0xff diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 8c25e1b58849..8c25e1b58849 100644 --- a/drivers/staging/rdma/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 8fe8a205b5bb..03df9322f862 100644 --- a/drivers/staging/rdma/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -87,6 +87,17 @@ void free_platform_config(struct hfi1_devdata *dd) */ } +void get_port_type(struct hfi1_pportdata *ppd) +{ + int ret; + + ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, + PORT_TABLE_PORT_TYPE, &ppd->port_type, + 4); + if (ret) + ppd->port_type = PORT_TYPE_UNKNOWN; +} + int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) { u8 tx_ctrl_byte = on ? 0x0 : 0xF; @@ -529,7 +540,8 @@ static void apply_tunings( /* Enable external device config if channel is limiting active */ read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, &config_data); - config_data |= limiting_active; + config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT); + config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT); ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -542,7 +554,8 @@ static void apply_tunings( /* Pass tuning method to 8051 */ read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, &config_data); - config_data |= tuning_method; + config_data &= ~(0xff << TUNING_METHOD_SHIFT); + config_data |= ((u32)tuning_method << TUNING_METHOD_SHIFT); ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -564,8 +577,8 @@ static void apply_tunings( ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, &config_data); /* Clear, then set the external device config field */ - config_data &= ~(0xFF << 24); - config_data |= (external_device_config << 24); + config_data &= ~(u32)0xFF; + config_data |= external_device_config; ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) @@ -784,12 +797,6 @@ void tune_serdes(struct hfi1_pportdata *ppd) return; } - ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, - PORT_TABLE_PORT_TYPE, &ppd->port_type, - 4); - if (ret) - ppd->port_type = PORT_TYPE_UNKNOWN; - switch (ppd->port_type) { case PORT_TYPE_DISCONNECTED: ppd->offline_disabled_reason = diff --git a/drivers/staging/rdma/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h index 19620cf546d5..e2c21613c326 100644 --- a/drivers/staging/rdma/hfi1/platform.h +++ b/drivers/infiniband/hw/hfi1/platform.h @@ -298,6 +298,7 @@ enum link_tuning_encoding { /* platform.c */ void get_platform_config(struct hfi1_devdata *dd); void free_platform_config(struct hfi1_devdata *dd); +void get_port_type(struct hfi1_pportdata *ppd); int set_qsfp_tx(struct hfi1_pportdata *ppd, int on); void tune_serdes(struct hfi1_pportdata *ppd); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 91eb42316df9..1a942ffba4cb 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -49,7 +49,6 @@ #include <linux/vmalloc.h> #include <linux/hash.h> #include <linux/module.h> -#include <linux/random.h> #include <linux/seq_file.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> @@ -161,9 +160,6 @@ static inline int opa_mtu_enum_to_int(int mtu) * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs * to blindly pass the MTU enum value from the PathRecord to us. - * - * The actual flag used to determine "8k MTU" will change and is currently - * unknown. */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { @@ -516,6 +512,7 @@ static void iowait_wakeup(struct iowait *wait, int reason) static void iowait_sdma_drained(struct iowait *wait) { struct rvt_qp *qp = iowait_to_qp(wait); + unsigned long flags; /* * This happens when the send engine notes @@ -523,12 +520,12 @@ static void iowait_sdma_drained(struct iowait *wait) * do the flush work until that QP's * sdma work has finished. */ - spin_lock(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_flags & RVT_S_WAIT_DMA) { qp->s_flags &= ~RVT_S_WAIT_DMA; hfi1_schedule_send(qp); } - spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); } /** diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index e7bc8d6cf681..e7bc8d6cf681 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 2441669f0817..2441669f0817 100644 --- a/drivers/staging/rdma/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index dadc66c442b9..dadc66c442b9 100644 --- a/drivers/staging/rdma/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 792f15eb8efe..792f15eb8efe 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a659aec3c3c6..a659aec3c3c6 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index abb8ebc1fcac..f9befc05b349 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -134,6 +134,7 @@ static const char * const sdma_state_names[] = { [sdma_state_s99_running] = "s99_Running", }; +#ifdef CONFIG_SDMA_VERBOSITY static const char * const sdma_event_names[] = { [sdma_event_e00_go_hw_down] = "e00_GoHwDown", [sdma_event_e10_go_hw_start] = "e10_GoHwStart", @@ -150,6 +151,7 @@ static const char * const sdma_event_names[] = { [sdma_event_e85_link_down] = "e85_LinkDown", [sdma_event_e90_sw_halted] = "e90_SwHalted", }; +#endif static const struct sdma_set_state_action sdma_action_table[] = { [sdma_state_s00_hw_down] = { @@ -376,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde, sdma_txclean(sde->dd, tx); if (complete) (*complete)(tx, res); - if (iowait_sdma_dec(wait) && wait) + if (wait && iowait_sdma_dec(wait)) iowait_drain_wakeup(wait); } diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 8f50c99fe711..8f50c99fe711 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h index bf7d777d756e..bf7d777d756e 100644 --- a/drivers/staging/rdma/hfi1/sdma_txreq.h +++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 8cd6df8634ad..91fc2aed6aed 100644 --- a/drivers/staging/rdma/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -721,8 +721,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, } dd_dev_info(dd, - "IB%u: Congestion Control Agent enabled for port %d\n", - dd->unit, port_num); + "Congestion Control Agent enabled for port %d\n", + port_num); return 0; diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 8b62fefcf903..79b2952c0dfb 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -66,6 +66,7 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) #define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" +#define IETH_PRN "ieth rkey 0x%.8x" #define ATOMICACKETH_PRN "origdata %lld" #define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" @@ -166,6 +167,12 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->ud.deth[0]), be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); break; + /* ieth */ + case OP(RC, SEND_LAST_WITH_INVALIDATE): + case OP(RC, SEND_ONLY_WITH_INVALIDATE): + trace_seq_printf(p, IETH_PRN, + be32_to_cpu(eh->ieth)); + break; } trace_seq_putc(p, 0); return ret; @@ -233,3 +240,4 @@ __hfi1_trace_fn(FIRMWARE); __hfi1_trace_fn(RCVCTRL); __hfi1_trace_fn(TID); __hfi1_trace_fn(MMU); +__hfi1_trace_fn(IOCTL); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index 963dc948c38a..28c1d0832886 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -74,8 +74,8 @@ __print_symbolic(etype, \ TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, - u64 eflags, u32 ctxt, + u64 eflags, u32 etype, u32 hlen, u32 tlen, @@ -392,6 +392,8 @@ __print_symbolic(opcode, \ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ ib_opcode_name(RC_COMPARE_SWAP), \ ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ + ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ ib_opcode_name(UC_SEND_FIRST), \ ib_opcode_name(UC_SEND_MIDDLE), \ ib_opcode_name(UC_SEND_LAST), \ @@ -1341,6 +1343,7 @@ __hfi1_trace_def(FIRMWARE); __hfi1_trace_def(RCVCTRL); __hfi1_trace_def(TID); __hfi1_trace_def(MMU); +__hfi1_trace_def(IOCTL); #define hfi1_cdbg(which, fmt, ...) \ __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/infiniband/hw/hfi1/twsi.c index e82e52a63d35..e82e52a63d35 100644 --- a/drivers/staging/rdma/hfi1/twsi.c +++ b/drivers/infiniband/hw/hfi1/twsi.c diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/infiniband/hw/hfi1/twsi.h index 5b8a5b5e7eae..5b8a5b5e7eae 100644 --- a/drivers/staging/rdma/hfi1/twsi.h +++ b/drivers/infiniband/hw/hfi1/twsi.h diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index df773d433297..df773d433297 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 1e503ad0bebb..1e503ad0bebb 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 1b640a35b3fe..1b640a35b3fe 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 9bc8d9fba87e..9bc8d9fba87e 100644 --- a/drivers/staging/rdma/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 88e10b5f55f1..88e10b5f55f1 100644 --- a/drivers/staging/rdma/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0014c9c0e967..29f4795f866c 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -166,6 +166,8 @@ static unsigned initial_pkt_count = 8; #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ +struct sdma_mmu_node; + struct user_sdma_iovec { struct list_head list; struct iovec iov; @@ -178,6 +180,7 @@ struct user_sdma_iovec { * which we last left off. */ u64 offset; + struct sdma_mmu_node *node; }; #define SDMA_CACHE_NODE_EVICT BIT(0) @@ -507,6 +510,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, struct sdma_req_info info; struct user_sdma_request *req; u8 opcode, sc, vl; + int req_queued = 0; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -703,6 +707,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); atomic_inc(&pq->n_reqs); + req_queued = 1; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount); if (unlikely(ret < 0 && ret != -EBUSY)) { @@ -747,7 +752,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, return 0; free_req: user_sdma_free_request(req, true); - pq_update(pq); + if (req_queued) + pq_update(pq); set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); return ret; } @@ -1153,6 +1159,7 @@ retry: } iovec->pages = node->pages; iovec->npages = npages; + iovec->node = node; ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); if (ret) { @@ -1519,18 +1526,13 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) } if (req->data_iovs) { struct sdma_mmu_node *node; - struct mmu_rb_node *mnode; int i; for (i = 0; i < req->data_iovs; i++) { - mnode = hfi1_mmu_rb_search( - &req->pq->sdma_rb_root, - (unsigned long)req->iovs[i].iov.iov_base, - req->iovs[i].iov.iov_len); - if (!mnode || IS_ERR(mnode)) + node = req->iovs[i].node; + if (!node) continue; - node = container_of(mnode, struct sdma_mmu_node, rb); if (unpin) hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, &node->rb); diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index b9240e351161..b9240e351161 100644 --- a/drivers/staging/rdma/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 9cdc85fa366f..849c4b9399d4 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -52,7 +52,6 @@ #include <linux/utsname.h> #include <linux/rculist.h> #include <linux/mm.h> -#include <linux/random.h> #include <linux/vmalloc.h> #include "hfi.h" @@ -336,6 +335,8 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, @@ -946,7 +947,6 @@ static int pio_wait(struct rvt_qp *qp, dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); - dev->n_piowait++; qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 3ee223983b20..488356775627 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -152,6 +152,7 @@ union ib_ehdrs { } at; __be32 imm_data; __be32 aeth; + __be32 ieth; struct ib_atomic_eth atomic_eth; } __packed; diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index bc95c4112c61..bc95c4112c61 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 1cf69b2fe4a5..1cf69b2fe4a5 100644 --- a/drivers/staging/rdma/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 4a740f7a0519..02a735b64208 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2361,58 +2361,130 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static const char * const i40iw_hw_stat_names[] = { + // 32bit names + [I40IW_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards", + [I40IW_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts", + [I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes", + [I40IW_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards", + [I40IW_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts", + [I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes", + [I40IW_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs", + [I40IW_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors", + [I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors", + // 64bit names + [I40IW_HW_STAT_INDEX_IP4RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4InOctets", + [I40IW_HW_STAT_INDEX_IP4RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4InPkts", + [I40IW_HW_STAT_INDEX_IP4RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4InReasmRqd", + [I40IW_HW_STAT_INDEX_IP4RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4InMcastPkts", + [I40IW_HW_STAT_INDEX_IP4TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4OutOctets", + [I40IW_HW_STAT_INDEX_IP4TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4OutPkts", + [I40IW_HW_STAT_INDEX_IP4TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4OutSegRqd", + [I40IW_HW_STAT_INDEX_IP4TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip4OutMcastPkts", + [I40IW_HW_STAT_INDEX_IP6RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6InOctets", + [I40IW_HW_STAT_INDEX_IP6RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6InPkts", + [I40IW_HW_STAT_INDEX_IP6RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6InReasmRqd", + [I40IW_HW_STAT_INDEX_IP6RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6InMcastPkts", + [I40IW_HW_STAT_INDEX_IP6TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6OutOctets", + [I40IW_HW_STAT_INDEX_IP6TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6OutPkts", + [I40IW_HW_STAT_INDEX_IP6TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6OutSegRqd", + [I40IW_HW_STAT_INDEX_IP6TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] = + "ip6OutMcastPkts", + [I40IW_HW_STAT_INDEX_TCPRXSEGS + I40IW_HW_STAT_INDEX_MAX_32] = + "tcpInSegs", + [I40IW_HW_STAT_INDEX_TCPTXSEG + I40IW_HW_STAT_INDEX_MAX_32] = + "tcpOutSegs", + [I40IW_HW_STAT_INDEX_RDMARXRDS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwInRdmaReads", + [I40IW_HW_STAT_INDEX_RDMARXSNDS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwInRdmaSends", + [I40IW_HW_STAT_INDEX_RDMARXWRS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwInRdmaWrites", + [I40IW_HW_STAT_INDEX_RDMATXRDS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwOutRdmaReads", + [I40IW_HW_STAT_INDEX_RDMATXSNDS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwOutRdmaSends", + [I40IW_HW_STAT_INDEX_RDMATXWRS + I40IW_HW_STAT_INDEX_MAX_32] = + "iwOutRdmaWrites", + [I40IW_HW_STAT_INDEX_RDMAVBND + I40IW_HW_STAT_INDEX_MAX_32] = + "iwRdmaBnd", + [I40IW_HW_STAT_INDEX_RDMAVINV + I40IW_HW_STAT_INDEX_MAX_32] = + "iwRdmaInv" +}; + /** - * i40iw_get_protocol_stats - Populates the rdma_stats structure - * @ibdev: ib dev struct - * @stats: iw protocol stats struct + * i40iw_alloc_hw_stats - Allocate a hw stats structure + * @ibdev: device pointer from stack + * @port_num: port number */ -static int i40iw_get_protocol_stats(struct ib_device *ibdev, - union rdma_protocol_stats *stats) +static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + struct i40iw_device *iwdev = to_iwdev(ibdev); + struct i40iw_sc_dev *dev = &iwdev->sc_dev; + int num_counters = I40IW_HW_STAT_INDEX_MAX_32 + + I40IW_HW_STAT_INDEX_MAX_64; + unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN; + + BUILD_BUG_ON(ARRAY_SIZE(i40iw_hw_stat_names) != + (I40IW_HW_STAT_INDEX_MAX_32 + + I40IW_HW_STAT_INDEX_MAX_64)); + + /* + * PFs get the default update lifespan, but VFs only update once + * per second + */ + if (!dev->is_pf) + lifespan = 1000; + return rdma_alloc_hw_stats_struct(i40iw_hw_stat_names, num_counters, + lifespan); +} + +/** + * i40iw_get_hw_stats - Populates the rdma_hw_stats structure + * @ibdev: device pointer from stack + * @stats: stats pointer from stack + * @port_num: port number + * @index: which hw counter the stack is requesting we update + */ +static int i40iw_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port_num, int index) { struct i40iw_device *iwdev = to_iwdev(ibdev); struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_dev_pestat *devstat = &dev->dev_pestat; struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; - struct timespec curr_time; - static struct timespec last_rd_time = {0, 0}; unsigned long flags; - curr_time = current_kernel_time(); - memset(stats, 0, sizeof(*stats)); - if (dev->is_pf) { spin_lock_irqsave(&devstat->stats_lock, flags); devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); spin_unlock_irqrestore(&devstat->stats_lock, flags); } else { - if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1) - if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) - return -ENOSYS; + if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) + return -ENOSYS; } - stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXPKTS]; - stats->iw.ipInTruncatedPkts = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] + - hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC]; - stats->iw.ipInDiscards = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] + - hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD]; - stats->iw.ipOutNoRoutes = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] + - hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE]; - stats->iw.ipReasmReqds = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS]; - stats->iw.ipFragCreates = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS]; - stats->iw.ipInMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS]; - stats->iw.ipOutMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] + - hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXMCPKTS]; - stats->iw.tcpOutSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPTXSEG]; - stats->iw.tcpInSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPRXSEGS]; - stats->iw.tcpRetransSegs = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_TCPRTXSEG]; - - last_rd_time = curr_time; - return 0; + memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats)); + + return stats->num_counters; } /** @@ -2551,7 +2623,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr; iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr; iwibdev->ibdev.dereg_mr = i40iw_dereg_mr; - iwibdev->ibdev.get_protocol_stats = i40iw_get_protocol_stats; + iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; + iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; iwibdev->ibdev.query_device = i40iw_query_device; iwibdev->ibdev.create_ah = i40iw_create_ah; iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 82d7c4bf5970..ce4034071f9c 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1308,21 +1308,6 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { SYM_LSB(IntMask, fldname##17IntMask)), \ .msg = #fldname "_C", .sz = sizeof(#fldname "_C") } -static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { - INTR_AUTO_P(SDmaInt), - INTR_AUTO_P(SDmaProgressInt), - INTR_AUTO_P(SDmaIdleInt), - INTR_AUTO_P(SDmaCleanupDone), - INTR_AUTO_C(RcvUrg), - INTR_AUTO_P(ErrInt), - INTR_AUTO(ErrInt), /* non-port-specific errs */ - INTR_AUTO(AssertGPIOInt), - INTR_AUTO_P(SendDoneInt), - INTR_AUTO(SendBufAvailInt), - INTR_AUTO_C(RcvAvail), - { .mask = 0, .sz = 0 } -}; - #define TXSYMPTOM_AUTO_P(fldname) \ { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \ .msg = #fldname, .sz = sizeof(#fldname) } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 0bd18375d7df..d2ac29861af5 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1172,11 +1172,13 @@ static int pma_get_classportinfo(struct ib_pma_mad *pmp, * Set the most significant bit of CM2 to indicate support for * congestion statistics */ - p->reserved[0] = dd->psxmitwait_supported << 7; + ib_set_cpi_capmask2(p, + dd->psxmitwait_supported << + (31 - IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE)); /* * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. */ - p->resp_time_value = 18; + ib_set_cpi_resp_time(p, 18); return reply((struct ib_smp *) pmp); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 6888f03c6d61..4f878151f81f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -159,6 +159,7 @@ struct qib_other_headers { } at; __be32 imm_data; __be32 aeth; + __be32 ieth; struct ib_atomic_eth atomic_eth; } u; } __packed; diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index b1ffc8b4a6c0..6ca6fa80dd6e 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -525,6 +525,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) return PTR_ERR(task); } + set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); kthread_bind(task, cpu); wake_up_process(task); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 0ff765bfd619..0f4d4500f45e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -124,11 +124,13 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, int count) { int m, i = 0; + struct rvt_dev_info *dev = ib_to_rvt(pd->device); mr->mapsz = 0; m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; for (; i < m; i++) { - mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL); + mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, + dev->dparms.node); if (!mr->map[i]) { rvt_deinit_mregion(mr); return -ENOMEM; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0f12c211c385..5fa4d4d81ee0 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -397,6 +397,7 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) rvt_put_ss(&qp->s_rdma_read_sge); @@ -431,7 +432,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) if (qp->ibqp.qp_type != IB_QPT_RC) return; - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + for (n = 0; n < rvt_max_atomic(rdi); n++) { struct rvt_ack_entry *e = &qp->s_ack_queue[n]; if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && @@ -569,7 +570,12 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_ssn = 1; qp->s_lsn = 0; qp->s_mig_state = IB_MIG_MIGRATED; - memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + if (qp->s_ack_queue) + memset( + qp->s_ack_queue, + 0, + rvt_max_atomic(rdi) * + sizeof(*qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; qp->s_num_rd_atomic = 0; @@ -653,9 +659,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (gfp == GFP_NOIO) swq = __vmalloc( (init_attr->cap.max_send_wr + 1) * sz, - gfp, PAGE_KERNEL); + gfp | __GFP_ZERO, PAGE_KERNEL); else - swq = vmalloc_node( + swq = vzalloc_node( (init_attr->cap.max_send_wr + 1) * sz, rdi->dparms.node); if (!swq) @@ -677,6 +683,16 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_swq; RCU_INIT_POINTER(qp->next, NULL); + if (init_attr->qp_type == IB_QPT_RC) { + qp->s_ack_queue = + kzalloc_node( + sizeof(*qp->s_ack_queue) * + rvt_max_atomic(rdi), + gfp, + rdi->dparms.node); + if (!qp->s_ack_queue) + goto bail_qp; + } /* * Driver needs to set up it's private QP structure and do any @@ -704,9 +720,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.wq = __vmalloc( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, - gfp, PAGE_KERNEL); + gfp | __GFP_ZERO, PAGE_KERNEL); else - qp->r_rq.wq = vmalloc_node( + qp->r_rq.wq = vzalloc_node( sizeof(struct rvt_rwq) + qp->r_rq.size * sz, rdi->dparms.node); @@ -857,6 +873,7 @@ bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); bail_qp: + kfree(qp->s_ack_queue); kfree(qp); bail_swq: @@ -1284,6 +1301,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp) vfree(qp->r_rq.wq); vfree(qp->s_wq); rdi->driver_f.qp_priv_free(rdi, qp); + kfree(qp->s_ack_queue); kfree(qp); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index caec8e9c4666..bab7db6fa9ab 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -92,6 +92,8 @@ enum { IPOIB_FLAG_UMCAST = 10, IPOIB_STOP_NEIGH_GC = 11, IPOIB_NEIGH_TBL_FLUSH = 12, + IPOIB_FLAG_DEV_ADDR_SET = 13, + IPOIB_FLAG_DEV_ADDR_CTRL = 14, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -392,6 +394,7 @@ struct ipoib_dev_priv { struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; unsigned max_send_sge; + bool sm_fullmember_sendonly_support; }; struct ipoib_ah { @@ -476,6 +479,7 @@ void ipoib_reap_ah(struct work_struct *work); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); +int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 418e5a1c8744..45c40a17d6a6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv) return 0; } +/* + * returns true if the device address of the ipoib interface has changed and the + * new address is a valid one (i.e in the gid table), return false otherwise. + */ +static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) +{ + union ib_gid search_gid; + union ib_gid gid0; + union ib_gid *netdev_gid; + int err; + u16 index; + u8 port; + bool ret = false; + + netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); + if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) + return false; + + netif_addr_lock(priv->dev); + + /* The subnet prefix may have changed, update it now so we won't have + * to do it later + */ + priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix; + netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix; + search_gid.global.subnet_prefix = gid0.global.subnet_prefix; + + search_gid.global.interface_id = priv->local_gid.global.interface_id; + + netif_addr_unlock(priv->dev); + + err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB, + priv->dev, &port, &index); + + netif_addr_lock(priv->dev); + + if (search_gid.global.interface_id != + priv->local_gid.global.interface_id) + /* There was a change while we were looking up the gid, bail + * here and let the next work sort this out + */ + goto out; + + /* The next section of code needs some background: + * Per IB spec the port GUID can't change if the HCA is powered on. + * port GUID is the basis for GID at index 0 which is the basis for + * the default device address of a ipoib interface. + * + * so it seems the flow should be: + * if user_changed_dev_addr && gid in gid tbl + * set bit dev_addr_set + * return true + * else + * return false + * + * The issue is that there are devices that don't follow the spec, + * they change the port GUID when the HCA is powered, so in order + * not to break userspace applications, We need to check if the + * user wanted to control the device address and we assume that + * if he sets the device address back to be based on GID index 0, + * he no longer wishs to control it. + * + * If the user doesn't control the the device address, + * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means + * the port GUID has changed and GID at index 0 has changed + * so we need to change priv->local_gid and priv->dev->dev_addr + * to reflect the new GID. + */ + if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { + if (!err && port == priv->port) { + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); + if (index == 0) + clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL, + &priv->flags); + else + set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags); + ret = true; + } else { + ret = false; + } + } else { + if (!err && port == priv->port) { + ret = true; + } else { + if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) { + memcpy(&priv->local_gid, &gid0, + sizeof(priv->local_gid)); + memcpy(priv->dev->dev_addr + 4, &gid0, + sizeof(priv->local_gid)); + ret = true; + } + } + } + +out: + netif_addr_unlock(priv->dev); + + return ret; +} + static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, enum ipoib_flush_level level, int nesting) @@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && level != IPOIB_FLUSH_HEAVY) { + /* Make sure the dev_addr is set even if not flushing */ + if (level == IPOIB_FLUSH_LIGHT) + ipoib_dev_addr_changed_valid(priv); ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); return; } @@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, update_parent_pkey(priv); else update_child_pkey(priv); - } + } else if (level == IPOIB_FLUSH_LIGHT) + ipoib_dev_addr_changed_valid(priv); ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); return; } @@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { if (level >= IPOIB_FLUSH_NORMAL) ipoib_ib_dev_up(dev); - ipoib_mcast_restart_task(&priv->restart_task); + if (ipoib_dev_addr_changed_valid(priv)) + ipoib_mcast_restart_task(&priv->restart_task); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b940ef1c19c7..2d7c16346648 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params( struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, void *client_data); +static int ipoib_set_mac(struct net_device *dev, void *addr); static struct ib_client ipoib_client = { .name = "ipoib", @@ -117,6 +118,8 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); + priv->sm_fullmember_sendonly_support = false; + if (ipoib_ib_dev_open(dev)) { if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return 0; @@ -629,6 +632,77 @@ void ipoib_mark_paths_invalid(struct net_device *dev) spin_unlock_irq(&priv->lock); } +struct classport_info_context { + struct ipoib_dev_priv *priv; + struct completion done; + struct ib_sa_query *sa_query; +}; + +static void classport_info_query_cb(int status, struct ib_class_port_info *rec, + void *context) +{ + struct classport_info_context *cb_ctx = context; + struct ipoib_dev_priv *priv; + + WARN_ON(!context); + + priv = cb_ctx->priv; + + if (status || !rec) { + pr_debug("device: %s failed query classport_info status: %d\n", + priv->dev->name, status); + /* keeps the default, will try next mcast_restart */ + priv->sm_fullmember_sendonly_support = false; + goto out; + } + + if (ib_get_cpi_capmask2(rec) & + IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) { + pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n", + priv->dev->name); + priv->sm_fullmember_sendonly_support = true; + } else { + pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n", + priv->dev->name); + priv->sm_fullmember_sendonly_support = false; + } + +out: + complete(&cb_ctx->done); +} + +int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv) +{ + struct classport_info_context *callback_context; + int ret; + + callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL); + if (!callback_context) + return -ENOMEM; + + callback_context->priv = priv; + init_completion(&callback_context->done); + + ret = ib_sa_classport_info_rec_query(&ipoib_sa_client, + priv->ca, priv->port, 3000, + GFP_KERNEL, + classport_info_query_cb, + callback_context, + &callback_context->sa_query); + if (ret < 0) { + pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n", + priv->dev->name, ret); + kfree(callback_context); + return ret; + } + + /* waiting for the callback to finish before returnning */ + wait_for_completion(&callback_context->done); + kfree(callback_context); + + return ret; +} + void ipoib_flush_paths(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1649,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_get_vf_config = ipoib_get_vf_config, .ndo_get_vf_stats = ipoib_get_vf_stats, .ndo_set_vf_guid = ipoib_set_vf_guid, + .ndo_set_mac_address = ipoib_set_mac, }; static const struct net_device_ops ipoib_netdev_ops_vf = { @@ -1771,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_umcast); } +static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) +{ + struct ipoib_dev_priv *child_priv; + struct net_device *netdev = priv->dev; + + netif_addr_lock(netdev); + + memcpy(&priv->local_gid.global.interface_id, + &gid->global.interface_id, + sizeof(gid->global.interface_id)); + memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); + clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); + + netif_addr_unlock(netdev); + + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { + down_read(&priv->vlan_rwsem); + list_for_each_entry(child_priv, &priv->child_intfs, list) + set_base_guid(child_priv, gid); + up_read(&priv->vlan_rwsem); + } +} + +static int ipoib_check_lladdr(struct net_device *dev, + struct sockaddr_storage *ss) +{ + union ib_gid *gid = (union ib_gid *)(ss->__data + 4); + int ret = 0; + + netif_addr_lock(dev); + + /* Make sure the QPN, reserved and subnet prefix match the current + * lladdr, it also makes sure the lladdr is unicast. + */ + if (memcmp(dev->dev_addr, ss->__data, + 4 + sizeof(gid->global.subnet_prefix)) || + gid->global.interface_id == 0) + ret = -EINVAL; + + netif_addr_unlock(dev); + + return ret; +} + +static int ipoib_set_mac(struct net_device *dev, void *addr) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sockaddr_storage *ss = addr; + int ret; + + if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) + return -EBUSY; + + ret = ipoib_check_lladdr(dev, ss); + if (ret) + return ret; + + set_base_guid(priv, (union ib_gid *)(ss->__data + 4)); + + queue_work(ipoib_workqueue, &priv->flush_light); + + return 0; +} + static ssize_t create_child(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -1894,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format, goto device_init_failed; } else memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); result = ipoib_dev_init(priv->dev, hca, port); if (result < 0) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 25889311b1e9..82fbc9442608 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -64,6 +64,9 @@ struct ipoib_mcast_iter { unsigned int send_only; }; +/* join state that allows creating mcg with sendonly member request */ +#define SENDONLY_FULLMEMBER_JOIN 8 + /* * This should be called with the priv->lock held */ @@ -326,12 +329,23 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, carrier_on_task); struct ib_port_attr attr; + int ret; if (ib_query_port(priv->ca, priv->port, &attr) || attr.state != IB_PORT_ACTIVE) { ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); return; } + /* + * Check if can send sendonly MCG's with sendonly-fullmember join state. + * It done here after the successfully join to the broadcast group, + * because the broadcast group must always be joined first and is always + * re-joined if the SM changes substantially. + */ + ret = ipoib_check_sm_sendonly_fullmember_support(priv); + if (ret < 0) + pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n", + priv->dev->name, ret); /* * Take rtnl_lock to avoid racing with ipoib_stop() and @@ -515,22 +529,20 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) rec.hop_limit = priv->broadcast->mcmember.hop_limit; /* - * Send-only IB Multicast joins do not work at the core - * IB layer yet, so we can't use them here. However, - * we are emulating an Ethernet multicast send, which - * does not require a multicast subscription and will - * still send properly. The most appropriate thing to + * Send-only IB Multicast joins work at the core IB layer but + * require specific SM support. + * We can use such joins here only if the current SM supports that feature. + * However, if not, we emulate an Ethernet multicast send, + * which does not require a multicast subscription and will + * still send properly. The most appropriate thing to * do is to create the group if it doesn't exist as that * most closely emulates the behavior, from a user space - * application perspecitive, of Ethernet multicast - * operation. For now, we do a full join, maybe later - * when the core IB layers support send only joins we - * will use them. + * application perspective, of Ethernet multicast operation. */ -#if 0 - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) - rec.join_state = 4; -#endif + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && + priv->sm_fullmember_sendonly_support) + /* SM supports sendonly-fullmember, otherwise fallback to full-member */ + rec.join_state = SENDONLY_FULLMEMBER_JOIN; } spin_unlock_irq(&priv->lock); @@ -570,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } priv->local_lid = port_attr.lid; + netif_addr_lock(dev); - if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL)) - ipoib_warn(priv, "ib_query_gid() failed\n"); - else - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { + netif_addr_unlock(dev); + return; + } + netif_addr_unlock(dev); spin_lock_irq(&priv->lock); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index b809c373e40e..1e7cbbaa15bd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler, queue_work(ipoib_workqueue, &priv->flush_normal); } else if (record->event == IB_EVENT_PKEY_CHANGE) { queue_work(ipoib_workqueue, &priv->flush_heavy); + } else if (record->event == IB_EVENT_GID_CHANGE && + !test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { + queue_work(ipoib_workqueue, &priv->flush_light); } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index fca1a882de27..64a35595eab8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, priv->pkey = pkey; memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); + memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); priv->dev->broadcast[8] = pkey >> 8; priv->dev->broadcast[9] = pkey & 0xff; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 2843f1ae75bd..887ebadd4774 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -254,8 +254,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad) memset(cif, 0, sizeof(*cif)); cif->base_version = 1; cif->class_version = 1; - cif->resp_time_value = 20; + ib_set_cpi_resp_time(cif, 20); mad->mad_hdr.status = 0; } diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 5bac28a3944e..7c197d1a1231 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -66,8 +66,6 @@ source "drivers/staging/nvec/Kconfig" source "drivers/staging/media/Kconfig" -source "drivers/staging/rdma/Kconfig" - source "drivers/staging/android/Kconfig" source "drivers/staging/board/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index a954242b0f2c..a470c7276142 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_FB_XGI) += xgifb/ obj-$(CONFIG_USB_EMXX) += emxx_udc/ obj-$(CONFIG_SPEAKUP) += speakup/ obj-$(CONFIG_MFD_NVEC) += nvec/ -obj-$(CONFIG_STAGING_RDMA) += rdma/ obj-$(CONFIG_ANDROID) += android/ obj-$(CONFIG_STAGING_BOARD) += board/ obj-$(CONFIG_LTE_GDM724X) += gdm724x/ diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig deleted file mode 100644 index f1f3ecadf0fb..000000000000 --- a/drivers/staging/rdma/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -menuconfig STAGING_RDMA - tristate "RDMA staging drivers" - depends on INFINIBAND - depends on PCI || BROKEN - depends on HAS_IOMEM - depends on NET - depends on INET - default n - ---help--- - This option allows you to select a number of RDMA drivers that - fall into one of two categories: deprecated drivers being held - here before finally being removed or new drivers that still need - some work before being moved to the normal RDMA driver area. - - If you wish to work on these drivers, to help improve them, or - to report problems you have with them, please use the - linux-rdma@vger.kernel.org mailing list. - - If in doubt, say N here. - - -# Please keep entries in alphabetic order -if STAGING_RDMA - -source "drivers/staging/rdma/hfi1/Kconfig" - -endif diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile deleted file mode 100644 index 8c7fc1de48a7..000000000000 --- a/drivers/staging/rdma/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# Entries for RDMA_STAGING tree -obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO deleted file mode 100644 index 4c6f1d7d2eaf..000000000000 --- a/drivers/staging/rdma/hfi1/TODO +++ /dev/null @@ -1,6 +0,0 @@ -July, 2015 - -- Remove unneeded file entries in sysfs -- Remove software processing of IB protocol and place in library for use - by qib, ipath (if still present), hfi1, and eventually soft-roce -- Replace incorrect uAPI diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c deleted file mode 100644 index bb2409ad891a..000000000000 --- a/drivers/staging/rdma/hfi1/diag.c +++ /dev/null @@ -1,1925 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -/* - * This file contains support for diagnostic functions. It is accessed by - * opening the hfi1_diag device, normally minor number 129. Diagnostic use - * of the chip may render the chip or board unusable until the driver - * is unloaded, or in some cases, until the system is rebooted. - * - * Accesses to the chip through this interface are not similar to going - * through the /sys/bus/pci resource mmap interface. - */ - -#include <linux/io.h> -#include <linux/pci.h> -#include <linux/poll.h> -#include <linux/vmalloc.h> -#include <linux/export.h> -#include <linux/fs.h> -#include <linux/uaccess.h> -#include <linux/module.h> -#include <rdma/ib_smi.h> -#include "hfi.h" -#include "device.h" -#include "common.h" -#include "verbs_txreq.h" -#include "trace.h" - -#undef pr_fmt -#define pr_fmt(fmt) DRIVER_NAME ": " fmt -#define snoop_dbg(fmt, ...) \ - hfi1_cdbg(SNOOP, fmt, ##__VA_ARGS__) - -/* Snoop option mask */ -#define SNOOP_DROP_SEND BIT(0) -#define SNOOP_USE_METADATA BIT(1) -#define SNOOP_SET_VL0TOVL15 BIT(2) - -static u8 snoop_flags; - -/* - * Extract packet length from LRH header. - * This is in Dwords so multiply by 4 to get size in bytes - */ -#define HFI1_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2) - -enum hfi1_filter_status { - HFI1_FILTER_HIT, - HFI1_FILTER_ERR, - HFI1_FILTER_MISS -}; - -/* snoop processing functions */ -rhf_rcv_function_ptr snoop_rhf_rcv_functions[8] = { - [RHF_RCV_TYPE_EXPECTED] = snoop_recv_handler, - [RHF_RCV_TYPE_EAGER] = snoop_recv_handler, - [RHF_RCV_TYPE_IB] = snoop_recv_handler, - [RHF_RCV_TYPE_ERROR] = snoop_recv_handler, - [RHF_RCV_TYPE_BYPASS] = snoop_recv_handler, - [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, - [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, - [RHF_RCV_TYPE_INVALID7] = process_receive_invalid -}; - -/* Snoop packet structure */ -struct snoop_packet { - struct list_head list; - u32 total_len; - u8 data[]; -}; - -/* Do not make these an enum or it will blow up the capture_md */ -#define PKT_DIR_EGRESS 0x0 -#define PKT_DIR_INGRESS 0x1 - -/* Packet capture metadata returned to the user with the packet. */ -struct capture_md { - u8 port; - u8 dir; - u8 reserved[6]; - union { - u64 pbc; - u64 rhf; - } u; -}; - -static atomic_t diagpkt_count = ATOMIC_INIT(0); -static struct cdev diagpkt_cdev; -static struct device *diagpkt_device; - -static ssize_t diagpkt_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); - -static const struct file_operations diagpkt_file_ops = { - .owner = THIS_MODULE, - .write = diagpkt_write, - .llseek = noop_llseek, -}; - -/* - * This is used for communication with user space for snoop extended IOCTLs - */ -struct hfi1_link_info { - __be64 node_guid; - u8 port_mode; - u8 port_state; - u16 link_speed_active; - u16 link_width_active; - u16 vl15_init; - u8 port_number; - /* - * Add padding to make this a full IB SMP payload. Note: changing the - * size of this structure will make the IOCTLs created with _IOWR - * change. - * Be sure to run tests on all IOCTLs when making changes to this - * structure. - */ - u8 res[47]; -}; - -/* - * This starts our ioctl sequence numbers *way* off from the ones - * defined in ib_core. - */ -#define SNOOP_CAPTURE_VERSION 0x1 - -#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl-number.txt */ -#define HFI1_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC -#define HFI1_SNOOP_IOC_BASE_SEQ 0x80 - -#define HFI1_SNOOP_IOCGETLINKSTATE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ) -#define HFI1_SNOOP_IOCSETLINKSTATE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 1) -#define HFI1_SNOOP_IOCCLEARQUEUE \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 2) -#define HFI1_SNOOP_IOCCLEARFILTER \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 3) -#define HFI1_SNOOP_IOCSETFILTER \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 4) -#define HFI1_SNOOP_IOCGETVERSION \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 5) -#define HFI1_SNOOP_IOCSET_OPTS \ - _IO(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6) - -/* - * These offsets +6/+7 could change, but these are already known and used - * IOCTL numbers so don't change them without a good reason. - */ -#define HFI1_SNOOP_IOCGETLINKSTATE_EXTRA \ - _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 6, \ - struct hfi1_link_info) -#define HFI1_SNOOP_IOCSETLINKSTATE_EXTRA \ - _IOWR(HFI1_SNOOP_IOC_MAGIC, HFI1_SNOOP_IOC_BASE_SEQ + 7, \ - struct hfi1_link_info) - -static int hfi1_snoop_open(struct inode *in, struct file *fp); -static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, - size_t pkt_len, loff_t *off); -static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, - size_t count, loff_t *off); -static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); -static unsigned int hfi1_snoop_poll(struct file *fp, - struct poll_table_struct *wait); -static int hfi1_snoop_release(struct inode *in, struct file *fp); - -struct hfi1_packet_filter_command { - int opcode; - int length; - void *value_ptr; -}; - -/* Can't re-use PKT_DIR_*GRESS here because 0 means no packets for this */ -#define HFI1_SNOOP_INGRESS 0x1 -#define HFI1_SNOOP_EGRESS 0x2 - -enum hfi1_packet_filter_opcodes { - FILTER_BY_LID, - FILTER_BY_DLID, - FILTER_BY_MAD_MGMT_CLASS, - FILTER_BY_QP_NUMBER, - FILTER_BY_PKT_TYPE, - FILTER_BY_SERVICE_LEVEL, - FILTER_BY_PKEY, - FILTER_BY_DIRECTION, -}; - -static const struct file_operations snoop_file_ops = { - .owner = THIS_MODULE, - .open = hfi1_snoop_open, - .read = hfi1_snoop_read, - .unlocked_ioctl = hfi1_ioctl, - .poll = hfi1_snoop_poll, - .write = hfi1_snoop_write, - .release = hfi1_snoop_release -}; - -struct hfi1_filter_array { - int (*filter)(void *, void *, void *); -}; - -static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data, - void *value); -static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value); -static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value); - -static const struct hfi1_filter_array hfi1_filters[] = { - { hfi1_filter_lid }, - { hfi1_filter_dlid }, - { hfi1_filter_mad_mgmt_class }, - { hfi1_filter_qp_number }, - { hfi1_filter_ibpacket_type }, - { hfi1_filter_ib_service_level }, - { hfi1_filter_ib_pkey }, - { hfi1_filter_direction }, -}; - -#define HFI1_MAX_FILTERS ARRAY_SIZE(hfi1_filters) -#define HFI1_DIAG_MINOR_BASE 129 - -static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name); - -int hfi1_diag_add(struct hfi1_devdata *dd) -{ - char name[16]; - int ret = 0; - - snprintf(name, sizeof(name), "%s_diagpkt%d", class_name(), - dd->unit); - /* - * Do this for each device as opposed to the normal diagpkt - * interface which is one per host - */ - ret = hfi1_snoop_add(dd, name); - if (ret) - dd_dev_err(dd, "Unable to init snoop/capture device"); - - snprintf(name, sizeof(name), "%s_diagpkt", class_name()); - if (atomic_inc_return(&diagpkt_count) == 1) { - ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name, - &diagpkt_file_ops, &diagpkt_cdev, - &diagpkt_device, false); - } - - return ret; -} - -/* this must be called w/ dd->snoop_in_lock held */ -static void drain_snoop_list(struct list_head *queue) -{ - struct list_head *pos, *q; - struct snoop_packet *packet; - - list_for_each_safe(pos, q, queue) { - packet = list_entry(pos, struct snoop_packet, list); - list_del(pos); - kfree(packet); - } -} - -static void hfi1_snoop_remove(struct hfi1_devdata *dd) -{ - unsigned long flags = 0; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); -} - -void hfi1_diag_remove(struct hfi1_devdata *dd) -{ - hfi1_snoop_remove(dd); - if (atomic_dec_and_test(&diagpkt_count)) - hfi1_cdev_cleanup(&diagpkt_cdev, &diagpkt_device); - hfi1_cdev_cleanup(&dd->diag_cdev, &dd->diag_device); -} - -/* - * Allocated structure shared between the credit return mechanism and - * diagpkt_send(). - */ -struct diagpkt_wait { - struct completion credits_returned; - int code; - atomic_t count; -}; - -/* - * When each side is finished with the structure, they call this. - * The last user frees the structure. - */ -static void put_diagpkt_wait(struct diagpkt_wait *wait) -{ - if (atomic_dec_and_test(&wait->count)) - kfree(wait); -} - -/* - * Callback from the credit return code. Set the complete, which - * will let diapkt_send() continue. - */ -static void diagpkt_complete(void *arg, int code) -{ - struct diagpkt_wait *wait = (struct diagpkt_wait *)arg; - - wait->code = code; - complete(&wait->credits_returned); - put_diagpkt_wait(wait); /* finished with the structure */ -} - -/** - * diagpkt_send - send a packet - * @dp: diag packet descriptor - */ -static ssize_t diagpkt_send(struct diag_pkt *dp) -{ - struct hfi1_devdata *dd; - struct send_context *sc; - struct pio_buf *pbuf; - u32 *tmpbuf = NULL; - ssize_t ret = 0; - u32 pkt_len, total_len; - pio_release_cb credit_cb = NULL; - void *credit_arg = NULL; - struct diagpkt_wait *wait = NULL; - int trycount = 0; - - dd = hfi1_lookup(dp->unit); - if (!dd || !(dd->flags & HFI1_PRESENT) || !dd->kregbase) { - ret = -ENODEV; - goto bail; - } - if (!(dd->flags & HFI1_INITTED)) { - /* no hardware, freeze, etc. */ - ret = -ENODEV; - goto bail; - } - - if (dp->version != _DIAG_PKT_VERS) { - dd_dev_err(dd, "Invalid version %u for diagpkt_write\n", - dp->version); - ret = -EINVAL; - goto bail; - } - - /* send count must be an exact number of dwords */ - if (dp->len & 3) { - ret = -EINVAL; - goto bail; - } - - /* there is only port 1 */ - if (dp->port != 1) { - ret = -EINVAL; - goto bail; - } - - /* need a valid context */ - if (dp->sw_index >= dd->num_send_contexts) { - ret = -EINVAL; - goto bail; - } - /* can only use kernel contexts */ - if (dd->send_contexts[dp->sw_index].type != SC_KERNEL && - dd->send_contexts[dp->sw_index].type != SC_VL15) { - ret = -EINVAL; - goto bail; - } - /* must be allocated */ - sc = dd->send_contexts[dp->sw_index].sc; - if (!sc) { - ret = -EINVAL; - goto bail; - } - /* must be enabled */ - if (!(sc->flags & SCF_ENABLED)) { - ret = -EINVAL; - goto bail; - } - - /* allocate a buffer and copy the data in */ - tmpbuf = vmalloc(dp->len); - if (!tmpbuf) { - ret = -ENOMEM; - goto bail; - } - - if (copy_from_user(tmpbuf, - (const void __user *)(unsigned long)dp->data, - dp->len)) { - ret = -EFAULT; - goto bail; - } - - /* - * pkt_len is how much data we have to write, includes header and data. - * total_len is length of the packet in Dwords plus the PBC should not - * include the CRC. - */ - pkt_len = dp->len >> 2; - total_len = pkt_len + 2; /* PBC + packet */ - - /* if 0, fill in a default */ - if (dp->pbc == 0) { - struct hfi1_pportdata *ppd = dd->pport; - - hfi1_cdbg(PKT, "Generating PBC"); - dp->pbc = create_pbc(ppd, 0, 0, 0, total_len); - } else { - hfi1_cdbg(PKT, "Using passed in PBC"); - } - - hfi1_cdbg(PKT, "Egress PBC content is 0x%llx", dp->pbc); - - /* - * The caller wants to wait until the packet is sent and to - * check for errors. The best we can do is wait until - * the buffer credits are returned and check if any packet - * error has occurred. If there are any late errors, this - * could miss it. If there are other senders who generate - * an error, this may find it. However, in general, it - * should catch most. - */ - if (dp->flags & F_DIAGPKT_WAIT) { - /* always force a credit return */ - dp->pbc |= PBC_CREDIT_RETURN; - /* turn on credit return interrupts */ - sc_add_credit_return_intr(sc); - wait = kmalloc(sizeof(*wait), GFP_KERNEL); - if (!wait) { - ret = -ENOMEM; - goto bail; - } - init_completion(&wait->credits_returned); - atomic_set(&wait->count, 2); - wait->code = PRC_OK; - - credit_cb = diagpkt_complete; - credit_arg = wait; - } - -retry: - pbuf = sc_buffer_alloc(sc, total_len, credit_cb, credit_arg); - if (!pbuf) { - if (trycount == 0) { - /* force a credit return and try again */ - sc_return_credits(sc); - trycount = 1; - goto retry; - } - /* - * No send buffer means no credit callback. Undo - * the wait set-up that was done above. We free wait - * because the callback will never be called. - */ - if (dp->flags & F_DIAGPKT_WAIT) { - sc_del_credit_return_intr(sc); - kfree(wait); - wait = NULL; - } - ret = -ENOSPC; - goto bail; - } - - pio_copy(dd, pbuf, dp->pbc, tmpbuf, pkt_len); - /* no flush needed as the HW knows the packet size */ - - ret = sizeof(*dp); - - if (dp->flags & F_DIAGPKT_WAIT) { - /* wait for credit return */ - ret = wait_for_completion_interruptible( - &wait->credits_returned); - /* - * If the wait returns an error, the wait was interrupted, - * e.g. with a ^C in the user program. The callback is - * still pending. This is OK as the wait structure is - * kmalloc'ed and the structure will free itself when - * all users are done with it. - * - * A context disable occurs on a send context restart, so - * include that in the list of errors below to check for. - * NOTE: PRC_FILL_ERR is at best informational and cannot - * be depended on. - */ - if (!ret && (((wait->code & PRC_STATUS_ERR) || - (wait->code & PRC_FILL_ERR) || - (wait->code & PRC_SC_DISABLE)))) - ret = -EIO; - - put_diagpkt_wait(wait); /* finished with the structure */ - sc_del_credit_return_intr(sc); - } - -bail: - vfree(tmpbuf); - return ret; -} - -static ssize_t diagpkt_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct hfi1_devdata *dd; - struct send_context *sc; - u8 vl; - - struct diag_pkt dp; - - if (count != sizeof(dp)) - return -EINVAL; - - if (copy_from_user(&dp, data, sizeof(dp))) - return -EFAULT; - - /* - * The Send Context is derived from the PbcVL value - * if PBC is populated - */ - if (dp.pbc) { - dd = hfi1_lookup(dp.unit); - if (!dd) - return -ENODEV; - vl = (dp.pbc >> PBC_VL_SHIFT) & PBC_VL_MASK; - sc = dd->vld[vl].sc; - if (sc) { - dp.sw_index = sc->sw_index; - hfi1_cdbg( - PKT, - "Packet sent over VL %d via Send Context %u(%u)", - vl, sc->sw_index, sc->hw_context); - } - } - - return diagpkt_send(&dp); -} - -static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name) -{ - int ret = 0; - - dd->hfi1_snoop.mode_flag = 0; - spin_lock_init(&dd->hfi1_snoop.snoop_lock); - INIT_LIST_HEAD(&dd->hfi1_snoop.queue); - init_waitqueue_head(&dd->hfi1_snoop.waitq); - - ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name, - &snoop_file_ops, - &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev, - false); - - if (ret) { - dd_dev_err(dd, "Couldn't create %s device: %d", name, ret); - hfi1_cdev_cleanup(&dd->hfi1_snoop.cdev, - &dd->hfi1_snoop.class_dev); - } - - return ret; -} - -static struct hfi1_devdata *hfi1_dd_from_sc_inode(struct inode *in) -{ - int unit = iminor(in) - HFI1_SNOOP_CAPTURE_BASE; - struct hfi1_devdata *dd; - - dd = hfi1_lookup(unit); - return dd; -} - -/* clear or restore send context integrity checks */ -static void adjust_integrity_checks(struct hfi1_devdata *dd) -{ - struct send_context *sc; - unsigned long sc_flags; - int i; - - spin_lock_irqsave(&dd->sc_lock, sc_flags); - for (i = 0; i < dd->num_send_contexts; i++) { - int enable; - - sc = dd->send_contexts[i].sc; - - if (!sc) - continue; /* not allocated */ - - enable = likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) && - dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE; - - set_pio_integrity(sc); - - if (enable) /* take HFI_CAP_* flags into account */ - hfi1_init_ctxt(sc); - } - spin_unlock_irqrestore(&dd->sc_lock, sc_flags); -} - -static int hfi1_snoop_open(struct inode *in, struct file *fp) -{ - int ret; - int mode_flag = 0; - unsigned long flags = 0; - struct hfi1_devdata *dd; - struct list_head *queue; - - mutex_lock(&hfi1_mutex); - - dd = hfi1_dd_from_sc_inode(in); - if (!dd) { - ret = -ENODEV; - goto bail; - } - - /* - * File mode determines snoop or capture. Some existing user - * applications expect the capture device to be able to be opened RDWR - * because they expect a dedicated capture device. For this reason we - * support a module param to force capture mode even if the file open - * mode matches snoop. - */ - if ((fp->f_flags & O_ACCMODE) == O_RDONLY) { - snoop_dbg("Capture Enabled"); - mode_flag = HFI1_PORT_CAPTURE_MODE; - } else if ((fp->f_flags & O_ACCMODE) == O_RDWR) { - snoop_dbg("Snoop Enabled"); - mode_flag = HFI1_PORT_SNOOP_MODE; - } else { - snoop_dbg("Invalid"); - ret = -EINVAL; - goto bail; - } - queue = &dd->hfi1_snoop.queue; - - /* - * We are not supporting snoop and capture at the same time. - */ - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (dd->hfi1_snoop.mode_flag) { - ret = -EBUSY; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - goto bail; - } - - dd->hfi1_snoop.mode_flag = mode_flag; - drain_snoop_list(queue); - - dd->hfi1_snoop.filter_callback = NULL; - dd->hfi1_snoop.filter_value = NULL; - - /* - * Send side packet integrity checks are not helpful when snooping so - * disable and re-enable when we stop snooping. - */ - if (mode_flag == HFI1_PORT_SNOOP_MODE) { - /* clear after snoop mode is on */ - adjust_integrity_checks(dd); /* clear */ - - /* - * We also do not want to be doing the DLID LMC check for - * ingressed packets. - */ - dd->hfi1_snoop.dcc_cfg = read_csr(dd, DCC_CFG_PORT_CONFIG1); - write_csr(dd, DCC_CFG_PORT_CONFIG1, - (dd->hfi1_snoop.dcc_cfg >> 32) << 32); - } - - /* - * As soon as we set these function pointers the recv and send handlers - * are active. This is a race condition so we must make sure to drain - * the queue and init filter values above. Technically we should add - * locking here but all that will happen is on recv a packet will get - * allocated and get stuck on the snoop_lock before getting added to the - * queue. Same goes for send. - */ - dd->rhf_rcv_function_map = snoop_rhf_rcv_functions; - dd->process_pio_send = snoop_send_pio_handler; - dd->process_dma_send = snoop_send_pio_handler; - dd->pio_inline_send = snoop_inline_pio_send; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - ret = 0; - -bail: - mutex_unlock(&hfi1_mutex); - - return ret; -} - -static int hfi1_snoop_release(struct inode *in, struct file *fp) -{ - unsigned long flags = 0; - struct hfi1_devdata *dd; - int mode_flag; - - dd = hfi1_dd_from_sc_inode(in); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - /* clear the snoop mode before re-adjusting send context CSRs */ - mode_flag = dd->hfi1_snoop.mode_flag; - dd->hfi1_snoop.mode_flag = 0; - - /* - * Drain the queue and clear the filters we are done with it. Don't - * forget to restore the packet integrity checks - */ - drain_snoop_list(&dd->hfi1_snoop.queue); - if (mode_flag == HFI1_PORT_SNOOP_MODE) { - /* restore after snoop mode is clear */ - adjust_integrity_checks(dd); /* restore */ - - /* - * Also should probably reset the DCC_CONFIG1 register for DLID - * checking on incoming packets again. Use the value saved when - * opening the snoop device. - */ - write_csr(dd, DCC_CFG_PORT_CONFIG1, dd->hfi1_snoop.dcc_cfg); - } - - dd->hfi1_snoop.filter_callback = NULL; - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = NULL; - - /* - * User is done snooping and capturing, return control to the normal - * handler. Re-enable SDMA handling. - */ - dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions; - dd->process_pio_send = hfi1_verbs_send_pio; - dd->process_dma_send = hfi1_verbs_send_dma; - dd->pio_inline_send = pio_copy; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - - snoop_dbg("snoop/capture device released"); - - return 0; -} - -static unsigned int hfi1_snoop_poll(struct file *fp, - struct poll_table_struct *wait) -{ - int ret = 0; - unsigned long flags = 0; - - struct hfi1_devdata *dd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - poll_wait(fp, &dd->hfi1_snoop.waitq, wait); - if (!list_empty(&dd->hfi1_snoop.queue)) - ret |= POLLIN | POLLRDNORM; - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - return ret; -} - -static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data, - size_t count, loff_t *off) -{ - struct diag_pkt dpkt; - struct hfi1_devdata *dd; - size_t ret; - u8 byte_two, sl, sc5, sc4, vl, byte_one; - struct send_context *sc; - u32 len; - u64 pbc; - struct hfi1_ibport *ibp; - struct hfi1_pportdata *ppd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - ppd = dd->pport; - snoop_dbg("received %lu bytes from user", count); - - memset(&dpkt, 0, sizeof(struct diag_pkt)); - dpkt.version = _DIAG_PKT_VERS; - dpkt.unit = dd->unit; - dpkt.port = 1; - - if (likely(!(snoop_flags & SNOOP_USE_METADATA))) { - /* - * We need to generate the PBC and not let diagpkt_send do it, - * to do this we need the VL and the length in dwords. - * The VL can be determined by using the SL and looking up the - * SC. Then the SC can be converted into VL. The exception to - * this is those packets which are from an SMI queue pair. - * Since we can't detect anything about the QP here we have to - * rely on the SC. If its 0xF then we assume its SMI and - * do not look at the SL. - */ - if (copy_from_user(&byte_one, data, 1)) - return -EINVAL; - - if (copy_from_user(&byte_two, data + 1, 1)) - return -EINVAL; - - sc4 = (byte_one >> 4) & 0xf; - if (sc4 == 0xF) { - snoop_dbg("Detected VL15 packet ignoring SL in packet"); - vl = sc4; - } else { - sl = (byte_two >> 4) & 0xf; - ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1); - sc5 = ibp->sl_to_sc[sl]; - vl = sc_to_vlt(dd, sc5); - if (vl != sc4) { - snoop_dbg("VL %d does not match SC %d of packet", - vl, sc4); - return -EINVAL; - } - } - - sc = dd->vld[vl].sc; /* Look up the context based on VL */ - if (sc) { - dpkt.sw_index = sc->sw_index; - snoop_dbg("Sending on context %u(%u)", sc->sw_index, - sc->hw_context); - } else { - snoop_dbg("Could not find context for vl %d", vl); - return -EINVAL; - } - - len = (count >> 2) + 2; /* Add in PBC */ - pbc = create_pbc(ppd, 0, 0, vl, len); - } else { - if (copy_from_user(&pbc, data, sizeof(pbc))) - return -EINVAL; - vl = (pbc >> PBC_VL_SHIFT) & PBC_VL_MASK; - sc = dd->vld[vl].sc; /* Look up the context based on VL */ - if (sc) { - dpkt.sw_index = sc->sw_index; - } else { - snoop_dbg("Could not find context for vl %d", vl); - return -EINVAL; - } - data += sizeof(pbc); - count -= sizeof(pbc); - } - dpkt.len = count; - dpkt.data = (unsigned long)data; - - snoop_dbg("PBC: vl=0x%llx Length=0x%llx", - (pbc >> 12) & 0xf, - (pbc & 0xfff)); - - dpkt.pbc = pbc; - ret = diagpkt_send(&dpkt); - /* - * diagpkt_send only returns number of bytes in the diagpkt so patch - * that up here before returning. - */ - if (ret == sizeof(dpkt)) - return count; - - return ret; -} - -static ssize_t hfi1_snoop_read(struct file *fp, char __user *data, - size_t pkt_len, loff_t *off) -{ - ssize_t ret = 0; - unsigned long flags = 0; - struct snoop_packet *packet = NULL; - struct hfi1_devdata *dd; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - - while (list_empty(&dd->hfi1_snoop.queue)) { - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - - if (fp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible( - dd->hfi1_snoop.waitq, - !list_empty(&dd->hfi1_snoop.queue))) - return -EINTR; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - } - - if (!list_empty(&dd->hfi1_snoop.queue)) { - packet = list_entry(dd->hfi1_snoop.queue.next, - struct snoop_packet, list); - list_del(&packet->list); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - if (pkt_len >= packet->total_len) { - if (copy_to_user(data, packet->data, - packet->total_len)) - ret = -EFAULT; - else - ret = packet->total_len; - } else { - ret = -EINVAL; - } - - kfree(packet); - } else { - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - } - - return ret; -} - -/** - * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others - * @ppd : ptr to hfi1 port data - * @value : options from user space - * - * Assumes the rest of the CM credit registers are zero from a - * previous global or credit reset. - * Leave shared count at zero for both global and all vls. - * In snoop mode ideally we don't use shared credits - * Reserve 8.5k for VL15 - * If total credits less than 8.5kbytes return error. - * Divide the rest of the credits across VL0 to VL7 and if - * each of these levels has less than 34 credits (at least 2048 + 128 bytes) - * return with an error. - * The credit registers will be reset to zero on link negotiation or link up - * so this function should be activated from user space only if the port has - * gone past link negotiation and link up. - * - * Return -- 0 if successful else error condition - * - */ -static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd, - int value) -{ -#define OPA_MIN_PER_VL_CREDITS 34 /* 2048 + 128 bytes */ - struct buffer_control t; - int i; - struct hfi1_devdata *dd = ppd->dd; - u16 total_credits = (value >> 16) & 0xffff; - u16 vl15_credits = dd->vl15_init / 2; - u16 per_vl_credits; - __be16 be_per_vl_credits; - - if (!(ppd->host_link_state & HLS_UP)) - goto err_exit; - if (total_credits < vl15_credits) - goto err_exit; - - per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL; - - if (per_vl_credits < OPA_MIN_PER_VL_CREDITS) - goto err_exit; - - memset(&t, 0, sizeof(t)); - be_per_vl_credits = cpu_to_be16(per_vl_credits); - - for (i = 0; i < TXE_NUM_DATA_VL; i++) - t.vl[i].dedicated = be_per_vl_credits; - - t.vl[15].dedicated = cpu_to_be16(vl15_credits); - return set_buffer_control(ppd, &t); - -err_exit: - snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d", - ppd->host_link_state, total_credits, vl15_credits); - - return -EINVAL; -} - -static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -{ - struct hfi1_devdata *dd; - void *filter_value = NULL; - long ret = 0; - int value = 0; - u8 phys_state = 0; - u8 link_state = 0; - u16 dev_state = 0; - unsigned long flags = 0; - unsigned long *argp = NULL; - struct hfi1_packet_filter_command filter_cmd = {0}; - int mode_flag = 0; - struct hfi1_pportdata *ppd = NULL; - unsigned int index; - struct hfi1_link_info link_info; - int read_cmd, write_cmd, read_ok, write_ok; - - dd = hfi1_dd_from_sc_inode(fp->f_inode); - if (!dd) - return -ENODEV; - - mode_flag = dd->hfi1_snoop.mode_flag; - read_cmd = _IOC_DIR(cmd) & _IOC_READ; - write_cmd = _IOC_DIR(cmd) & _IOC_WRITE; - write_ok = access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)); - read_ok = access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)); - - if ((read_cmd && !write_ok) || (write_cmd && !read_ok)) - return -EFAULT; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if ((mode_flag & HFI1_PORT_CAPTURE_MODE) && - (cmd != HFI1_SNOOP_IOCCLEARQUEUE) && - (cmd != HFI1_SNOOP_IOCCLEARFILTER) && - (cmd != HFI1_SNOOP_IOCSETFILTER)) - /* Capture devices are allowed only 3 operations - * 1.Clear capture queue - * 2.Clear capture filter - * 3.Set capture filter - * Other are invalid. - */ - return -EINVAL; - - switch (cmd) { - case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA: - memset(&link_info, 0, sizeof(link_info)); - - if (copy_from_user(&link_info, - (struct hfi1_link_info __user *)arg, - sizeof(link_info))) - return -EFAULT; - - value = link_info.port_state; - index = link_info.port_number; - if (index > dd->num_pports - 1) - return -EINVAL; - - ppd = &dd->pport[index]; - if (!ppd) - return -EINVAL; - - /* What we want to transition to */ - phys_state = (value >> 4) & 0xF; - link_state = value & 0xF; - snoop_dbg("Setting link state 0x%x", value); - - switch (link_state) { - case IB_PORT_NOP: - if (phys_state == 0) - break; - /* fall through */ - case IB_PORT_DOWN: - switch (phys_state) { - case 0: - dev_state = HLS_DN_DOWNDEF; - break; - case 2: - dev_state = HLS_DN_POLL; - break; - case 3: - dev_state = HLS_DN_DISABLE; - break; - default: - return -EINVAL; - } - ret = set_link_state(ppd, dev_state); - break; - case IB_PORT_ARMED: - ret = set_link_state(ppd, HLS_UP_ARMED); - if (!ret) - send_idle_sma(dd, SMA_IDLE_ARM); - break; - case IB_PORT_ACTIVE: - ret = set_link_state(ppd, HLS_UP_ACTIVE); - if (!ret) - send_idle_sma(dd, SMA_IDLE_ACTIVE); - break; - default: - return -EINVAL; - } - - if (ret) - break; - /* fall through */ - case HFI1_SNOOP_IOCGETLINKSTATE: - case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA: - if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) { - memset(&link_info, 0, sizeof(link_info)); - if (copy_from_user(&link_info, - (struct hfi1_link_info __user *)arg, - sizeof(link_info))) - return -EFAULT; - index = link_info.port_number; - } else { - ret = __get_user(index, (int __user *)arg); - if (ret != 0) - break; - } - - if (index > dd->num_pports - 1) - return -EINVAL; - - ppd = &dd->pport[index]; - if (!ppd) - return -EINVAL; - - value = hfi1_ibphys_portstate(ppd); - value <<= 4; - value |= driver_lstate(ppd); - - snoop_dbg("Link port | Link State: %d", value); - - if ((cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) || - (cmd == HFI1_SNOOP_IOCSETLINKSTATE_EXTRA)) { - link_info.port_state = value; - link_info.node_guid = cpu_to_be64(ppd->guid); - link_info.link_speed_active = - ppd->link_speed_active; - link_info.link_width_active = - ppd->link_width_active; - if (copy_to_user((struct hfi1_link_info __user *)arg, - &link_info, sizeof(link_info))) - return -EFAULT; - } else { - ret = __put_user(value, (int __user *)arg); - } - break; - - case HFI1_SNOOP_IOCCLEARQUEUE: - snoop_dbg("Clearing snoop queue"); - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - - case HFI1_SNOOP_IOCCLEARFILTER: - snoop_dbg("Clearing filter"); - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (dd->hfi1_snoop.filter_callback) { - /* Drain packets first */ - drain_snoop_list(&dd->hfi1_snoop.queue); - dd->hfi1_snoop.filter_callback = NULL; - } - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = NULL; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - - case HFI1_SNOOP_IOCSETFILTER: - snoop_dbg("Setting filter"); - /* just copy command structure */ - argp = (unsigned long *)arg; - if (copy_from_user(&filter_cmd, (void __user *)argp, - sizeof(filter_cmd))) - return -EFAULT; - - if (filter_cmd.opcode >= HFI1_MAX_FILTERS) { - pr_alert("Invalid opcode in request\n"); - return -EINVAL; - } - - snoop_dbg("Opcode %d Len %d Ptr %p", - filter_cmd.opcode, filter_cmd.length, - filter_cmd.value_ptr); - - filter_value = kcalloc(filter_cmd.length, sizeof(u8), - GFP_KERNEL); - if (!filter_value) - return -ENOMEM; - - /* copy remaining data from userspace */ - if (copy_from_user((u8 *)filter_value, - (void __user *)filter_cmd.value_ptr, - filter_cmd.length)) { - kfree(filter_value); - return -EFAULT; - } - /* Drain packets first */ - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - drain_snoop_list(&dd->hfi1_snoop.queue); - dd->hfi1_snoop.filter_callback = - hfi1_filters[filter_cmd.opcode].filter; - /* just in case we see back to back sets */ - kfree(dd->hfi1_snoop.filter_value); - dd->hfi1_snoop.filter_value = filter_value; - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - break; - case HFI1_SNOOP_IOCGETVERSION: - value = SNOOP_CAPTURE_VERSION; - snoop_dbg("Getting version: %d", value); - ret = __put_user(value, (int __user *)arg); - break; - case HFI1_SNOOP_IOCSET_OPTS: - snoop_flags = 0; - ret = __get_user(value, (int __user *)arg); - if (ret != 0) - break; - - snoop_dbg("Setting snoop option %d", value); - if (value & SNOOP_DROP_SEND) - snoop_flags |= SNOOP_DROP_SEND; - if (value & SNOOP_USE_METADATA) - snoop_flags |= SNOOP_USE_METADATA; - if (value & (SNOOP_SET_VL0TOVL15)) { - ppd = &dd->pport[0]; /* first port will do */ - ret = hfi1_assign_snoop_link_credits(ppd, value); - } - break; - default: - return -ENOTTY; - } - - return ret; -} - -static void snoop_list_add_tail(struct snoop_packet *packet, - struct hfi1_devdata *dd) -{ - unsigned long flags = 0; - - spin_lock_irqsave(&dd->hfi1_snoop.snoop_lock, flags); - if (likely((dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) || - (dd->hfi1_snoop.mode_flag & HFI1_PORT_CAPTURE_MODE))) { - list_add_tail(&packet->list, &dd->hfi1_snoop.queue); - snoop_dbg("Added packet to list"); - } - - /* - * Technically we can could have closed the snoop device while waiting - * on the above lock and it is gone now. The snoop mode_flag will - * prevent us from adding the packet to the queue though. - */ - - spin_unlock_irqrestore(&dd->hfi1_snoop.snoop_lock, flags); - wake_up_interruptible(&dd->hfi1_snoop.waitq); -} - -static inline int hfi1_filter_check(void *val, const char *msg) -{ - if (!val) { - snoop_dbg("Error invalid %s value for filter", msg); - return HFI1_FILTER_ERR; - } - return 0; -} - -static int hfi1_filter_lid(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - hdr = (struct hfi1_ib_header *)ibhdr; - - if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) /* matches slid */ - return HFI1_FILTER_HIT; /* matched */ - - return HFI1_FILTER_MISS; /* Not matched */ -} - -static int hfi1_filter_dlid(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1])) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -/* Not valid for outgoing packets, send handler passes null for data*/ -static int hfi1_filter_mad_mgmt_class(void *ibhdr, void *packet_data, - void *value) -{ - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - struct ib_smp *smp = NULL; - u32 qpn = 0; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(packet_data, "packet_data"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - /* Check for GRH */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ - else - ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ - - qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF; - if (qpn <= 1) { - smp = (struct ib_smp *)packet_data; - if (*((u8 *)value) == smp->mgmt_class) - return HFI1_FILTER_HIT; - else - return HFI1_FILTER_MISS; - } - return HFI1_FILTER_ERR; -} - -static int hfi1_filter_qp_number(void *ibhdr, void *packet_data, void *value) -{ - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - /* Check for GRH */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ - else - ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ - if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ibpacket_type(void *ibhdr, void *packet_data, - void *value) -{ - u32 lnh = 0; - u8 opcode = 0; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - lnh = (be16_to_cpu(hdr->lrh[0]) & 3); - - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - return HFI1_FILTER_ERR; - - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - - if (*((u8 *)value) == ((opcode >> 5) & 0x7)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data, - void *value) -{ - struct hfi1_ib_header *hdr; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - if ((*((u8 *)value)) == ((be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value) -{ - u32 lnh = 0; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr = NULL; - int ret; - - ret = hfi1_filter_check(ibhdr, "header"); - if (ret) - return ret; - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - hdr = (struct hfi1_ib_header *)ibhdr; - - lnh = (be16_to_cpu(hdr->lrh[0]) & 3); - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - return HFI1_FILTER_ERR; - - /* P_key is 16-bit entity, however top most bit indicates - * type of membership. 0 for limited and 1 for Full. - * Limited members cannot accept information from other - * Limited members, but communication is allowed between - * every other combination of membership. - * Hence we'll omit comparing top-most bit while filtering - */ - - if ((*(u16 *)value & 0x7FFF) == - ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF)) - return HFI1_FILTER_HIT; - - return HFI1_FILTER_MISS; -} - -/* - * If packet_data is NULL then this is coming from one of the send functions. - * Thus we know if its an ingressed or egressed packet. - */ -static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value) -{ - u8 user_dir = *(u8 *)value; - int ret; - - ret = hfi1_filter_check(value, "user"); - if (ret) - return ret; - - if (packet_data) { - /* Incoming packet */ - if (user_dir & HFI1_SNOOP_INGRESS) - return HFI1_FILTER_HIT; - } else { - /* Outgoing packet */ - if (user_dir & HFI1_SNOOP_EGRESS) - return HFI1_FILTER_HIT; - } - - return HFI1_FILTER_MISS; -} - -/* - * Allocate a snoop packet. The structure that is stored in the ring buffer, not - * to be confused with an hfi packet type. - */ -static struct snoop_packet *allocate_snoop_packet(u32 hdr_len, - u32 data_len, - u32 md_len) -{ - struct snoop_packet *packet; - - packet = kzalloc(sizeof(*packet) + hdr_len + data_len - + md_len, - GFP_ATOMIC | __GFP_NOWARN); - if (likely(packet)) - INIT_LIST_HEAD(&packet->list); - - return packet; -} - -/* - * Instead of having snoop and capture code intermixed with the recv functions, - * both the interrupt handler and hfi1_ib_rcv() we are going to hijack the call - * and land in here for snoop/capture but if not enabled the call will go - * through as before. This gives us a single point to constrain all of the snoop - * snoop recv logic. There is nothing special that needs to happen for bypass - * packets. This routine should not try to look into the packet. It just copied - * it. There is no guarantee for filters when it comes to bypass packets as - * there is no specific support. Bottom line is this routine does now even know - * what a bypass packet is. - */ -int snoop_recv_handler(struct hfi1_packet *packet) -{ - struct hfi1_pportdata *ppd = packet->rcd->ppd; - struct hfi1_ib_header *hdr = packet->hdr; - int header_size = packet->hlen; - void *data = packet->ebuf; - u32 tlen = packet->tlen; - struct snoop_packet *s_packet = NULL; - int ret; - int snoop_mode = 0; - u32 md_len = 0; - struct capture_md md; - - snoop_dbg("PACKET IN: hdr size %d tlen %d data %p", header_size, tlen, - data); - - trace_snoop_capture(ppd->dd, header_size, hdr, tlen - header_size, - data); - - if (!ppd->dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set"); - ret = HFI1_FILTER_HIT; - } else { - ret = ppd->dd->hfi1_snoop.filter_callback(hdr, data, - ppd->dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - break; - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - break; - case HFI1_FILTER_HIT: - - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - s_packet = allocate_snoop_packet(header_size, - tlen - header_size, - md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n"); - break; - } - - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_INGRESS; - md.u.rhf = packet->rhf; - memcpy(s_packet->data, &md, md_len); - } - - /* We should always have a header */ - if (hdr) { - memcpy(s_packet->data + md_len, hdr, header_size); - } else { - dd_dev_err(ppd->dd, "Unable to copy header to snoop/capture packet\n"); - kfree(s_packet); - break; - } - - /* - * Packets with no data are possible. If there is no data needed - * to take care of the last 4 bytes which are normally included - * with data buffers and are included in tlen. Since we kzalloc - * the buffer we do not need to set any values but if we decide - * not to use kzalloc we should zero them. - */ - if (data) - memcpy(s_packet->data + header_size + md_len, data, - tlen - header_size); - - s_packet->total_len = tlen + md_len; - snoop_list_add_tail(s_packet, ppd->dd); - - /* - * If we are snooping the packet not capturing then throw away - * after adding to the list. - */ - snoop_dbg("Capturing packet"); - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) { - snoop_dbg("Throwing packet away"); - /* - * If we are dropping the packet we still may need to - * handle the case where error flags are set, this is - * normally done by the type specific handler but that - * won't be called in this case. - */ - if (unlikely(rhf_err_flags(packet->rhf))) - handle_eflags(packet); - - /* throw the packet on the floor */ - return RHF_RCV_CONTINUE; - } - break; - default: - break; - } - - /* - * We do not care what type of packet came in here - just pass it off - * to the normal handler. - */ - return ppd->dd->normal_rhf_rcv_functions[rhf_rcv_type(packet->rhf)] - (packet); -} - -/* - * Handle snooping and capturing packets when sdma is being used. - */ -int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc) -{ - pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n"); - snoop_dbg("Unsupported Operation"); - return hfi1_verbs_send_dma(qp, ps, 0); -} - -/* - * Handle snooping and capturing packets when pio is being used. Does not handle - * bypass packets. The only way to send a bypass packet currently is to use the - * diagpkt interface. When that interface is enable snoop/capture is not. - */ -int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc) -{ - u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; - u32 dwords = (len + 3) >> 2; - u32 plen = hdrwords + dwords + 2; /* includes pbc */ - struct hfi1_pportdata *ppd = ps->ppd; - struct snoop_packet *s_packet = NULL; - u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; - u32 length = 0; - struct rvt_sge_state temp_ss; - void *data = NULL; - void *data_start = NULL; - int ret; - int snoop_mode = 0; - int md_len = 0; - struct capture_md md; - u32 vl; - u32 hdr_len = hdrwords << 2; - u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr); - - md.u.pbc = 0; - - snoop_dbg("PACKET OUT: hdrword %u len %u plen %u dwords %u tlen %u", - hdrwords, len, plen, dwords, tlen); - if (ppd->dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - /* not using ss->total_len as arg 2 b/c that does not count CRC */ - s_packet = allocate_snoop_packet(hdr_len, tlen - hdr_len, md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(ppd->dd, "Unable to allocate snoop/capture packet\n"); - goto out; - } - - s_packet->total_len = tlen + md_len; - - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_EGRESS; - if (likely(pbc == 0)) { - vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12; - md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen); - } else { - md.u.pbc = 0; - } - memcpy(s_packet->data, &md, md_len); - } else { - md.u.pbc = pbc; - } - - /* Copy header */ - if (likely(hdr)) { - memcpy(s_packet->data + md_len, hdr, hdr_len); - } else { - dd_dev_err(ppd->dd, - "Unable to copy header to snoop/capture packet\n"); - kfree(s_packet); - goto out; - } - - if (ss) { - data = s_packet->data + hdr_len + md_len; - data_start = data; - - /* - * Copy SGE State - * The update_sge() function below will not modify the - * individual SGEs in the array. It will make a copy each time - * and operate on that. So we only need to copy this instance - * and it won't impact PIO. - */ - temp_ss = *ss; - length = len; - - snoop_dbg("Need to copy %d bytes", length); - while (length) { - void *addr = temp_ss.sge.vaddr; - u32 slen = temp_ss.sge.length; - - if (slen > length) { - slen = length; - snoop_dbg("slen %d > len %d", slen, length); - } - snoop_dbg("copy %d to %p", slen, addr); - memcpy(data, addr, slen); - update_sge(&temp_ss, slen); - length -= slen; - data += slen; - snoop_dbg("data is now %p bytes left %d", data, length); - } - snoop_dbg("Completed SGE copy"); - } - - /* - * Why do the filter check down here? Because the event tracing has its - * own filtering and we need to have the walked the SGE list. - */ - if (!ppd->dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set\n"); - ret = HFI1_FILTER_HIT; - } else { - ret = ppd->dd->hfi1_snoop.filter_callback( - &ps->s_txreq->phdr.hdr, - NULL, - ppd->dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - /* fall through */ - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - kfree(s_packet); - break; - case HFI1_FILTER_HIT: - snoop_dbg("Capturing packet"); - snoop_list_add_tail(s_packet, ppd->dd); - - if (unlikely((snoop_flags & SNOOP_DROP_SEND) && - (ppd->dd->hfi1_snoop.mode_flag & - HFI1_PORT_SNOOP_MODE))) { - unsigned long flags; - - snoop_dbg("Dropping packet"); - if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); - hfi1_send_complete( - qp, - qp->s_wqe, - IB_WC_SUCCESS); - spin_unlock_irqrestore(&qp->s_lock, flags); - } else if (qp->ibqp.qp_type == IB_QPT_RC) { - spin_lock_irqsave(&qp->s_lock, flags); - hfi1_rc_send_complete(qp, - &ps->s_txreq->phdr.hdr); - spin_unlock_irqrestore(&qp->s_lock, flags); - } - - /* - * If snoop is dropping the packet we need to put the - * txreq back because no one else will. - */ - hfi1_put_txreq(ps->s_txreq); - return 0; - } - break; - default: - kfree(s_packet); - break; - } -out: - return hfi1_verbs_send_pio(qp, ps, md.u.pbc); -} - -/* - * Callers of this must pass a hfi1_ib_header type for the from ptr. Currently - * this can be used anywhere, but the intention is for inline ACKs for RC and - * CCA packets. We don't restrict this usage though. - */ -void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, - u64 pbc, const void *from, size_t count) -{ - int snoop_mode = 0; - int md_len = 0; - struct capture_md md; - struct snoop_packet *s_packet = NULL; - - /* - * count is in dwords so we need to convert to bytes. - * We also need to account for CRC which would be tacked on by hardware. - */ - int packet_len = (count << 2) + 4; - int ret; - - snoop_dbg("ACK OUT: len %d", packet_len); - - if (!dd->hfi1_snoop.filter_callback) { - snoop_dbg("filter not set"); - ret = HFI1_FILTER_HIT; - } else { - ret = dd->hfi1_snoop.filter_callback( - (struct hfi1_ib_header *)from, - NULL, - dd->hfi1_snoop.filter_value); - } - - switch (ret) { - case HFI1_FILTER_ERR: - snoop_dbg("Error in filter call"); - /* fall through */ - case HFI1_FILTER_MISS: - snoop_dbg("Filter Miss"); - break; - case HFI1_FILTER_HIT: - snoop_dbg("Capturing packet"); - if (dd->hfi1_snoop.mode_flag & HFI1_PORT_SNOOP_MODE) - snoop_mode = 1; - if ((snoop_mode == 0) || - unlikely(snoop_flags & SNOOP_USE_METADATA)) - md_len = sizeof(struct capture_md); - - s_packet = allocate_snoop_packet(packet_len, 0, md_len); - - if (unlikely(!s_packet)) { - dd_dev_warn_ratelimited(dd, "Unable to allocate snoop/capture packet\n"); - goto inline_pio_out; - } - - s_packet->total_len = packet_len + md_len; - - /* Fill in the metadata for the packet */ - if (md_len > 0) { - memset(&md, 0, sizeof(struct capture_md)); - md.port = 1; - md.dir = PKT_DIR_EGRESS; - md.u.pbc = pbc; - memcpy(s_packet->data, &md, md_len); - } - - /* Add the packet data which is a single buffer */ - memcpy(s_packet->data + md_len, from, packet_len); - - snoop_list_add_tail(s_packet, dd); - - if (unlikely((snoop_flags & SNOOP_DROP_SEND) && snoop_mode)) { - snoop_dbg("Dropping packet"); - return; - } - break; - default: - break; - } - -inline_pio_out: - pio_copy(dd, pbuf, pbc, from, count); -} diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c deleted file mode 100644 index bd8771570f81..000000000000 --- a/drivers/staging/rdma/hfi1/eprom.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright(c) 2015, 2016 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#include <linux/delay.h> -#include "hfi.h" -#include "common.h" -#include "eprom.h" - -/* - * The EPROM is logically divided into three partitions: - * partition 0: the first 128K, visible from PCI ROM BAR - * partition 1: 4K config file (sector size) - * partition 2: the rest - */ -#define P0_SIZE (128 * 1024) -#define P1_SIZE (4 * 1024) -#define P1_START P0_SIZE -#define P2_START (P0_SIZE + P1_SIZE) - -/* erase sizes supported by the controller */ -#define SIZE_4KB (4 * 1024) -#define MASK_4KB (SIZE_4KB - 1) - -#define SIZE_32KB (32 * 1024) -#define MASK_32KB (SIZE_32KB - 1) - -#define SIZE_64KB (64 * 1024) -#define MASK_64KB (SIZE_64KB - 1) - -/* controller page size, in bytes */ -#define EP_PAGE_SIZE 256 -#define EEP_PAGE_MASK (EP_PAGE_SIZE - 1) - -/* controller commands */ -#define CMD_SHIFT 24 -#define CMD_NOP (0) -#define CMD_PAGE_PROGRAM(addr) ((0x02 << CMD_SHIFT) | addr) -#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr) -#define CMD_READ_SR1 ((0x05 << CMD_SHIFT)) -#define CMD_WRITE_ENABLE ((0x06 << CMD_SHIFT)) -#define CMD_SECTOR_ERASE_4KB(addr) ((0x20 << CMD_SHIFT) | addr) -#define CMD_SECTOR_ERASE_32KB(addr) ((0x52 << CMD_SHIFT) | addr) -#define CMD_CHIP_ERASE ((0x60 << CMD_SHIFT)) -#define CMD_READ_MANUF_DEV_ID ((0x90 << CMD_SHIFT)) -#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) -#define CMD_SECTOR_ERASE_64KB(addr) ((0xd8 << CMD_SHIFT) | addr) - -/* controller interface speeds */ -#define EP_SPEED_FULL 0x2 /* full speed */ - -/* controller status register 1 bits */ -#define SR1_BUSY 0x1ull /* the BUSY bit in SR1 */ - -/* sleep length while waiting for controller */ -#define WAIT_SLEEP_US 100 /* must be larger than 5 (see usage) */ -#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US)) - -/* GPIO pins */ -#define EPROM_WP_N BIT_ULL(14) /* EPROM write line */ - -/* - * How long to wait for the EPROM to become available, in ms. - * The spec 32 Mb EPROM takes around 40s to erase then write. - * Double it for safety. - */ -#define EPROM_TIMEOUT 80000 /* ms */ - -/* - * Turn on external enable line that allows writing on the flash. - */ -static void write_enable(struct hfi1_devdata *dd) -{ - /* raise signal */ - write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N); - /* raise enable */ - write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N); -} - -/* - * Turn off external enable line that allows writing on the flash. - */ -static void write_disable(struct hfi1_devdata *dd) -{ - /* lower signal */ - write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N); - /* lower enable */ - write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N); -} - -/* - * Wait for the device to become not busy. Must be called after all - * write or erase operations. - */ -static int wait_for_not_busy(struct hfi1_devdata *dd) -{ - unsigned long count = 0; - u64 reg; - int ret = 0; - - /* starts page mode */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_SR1); - while (1) { - udelay(WAIT_SLEEP_US); - usleep_range(WAIT_SLEEP_US - 5, WAIT_SLEEP_US + 5); - count++; - reg = read_csr(dd, ASIC_EEP_DATA); - if ((reg & SR1_BUSY) == 0) - break; - /* 200s is the largest time for a 128Mb device */ - if (count > COUNT_DELAY_SEC(200)) { - dd_dev_err(dd, "waited too long for SPI FLASH busy to clear - failing\n"); - ret = -ETIMEDOUT; - break; /* break, not goto - must stop page mode */ - } - } - - /* stop page mode with a NOP */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); - - return ret; -} - -/* - * Read the device ID from the SPI controller. - */ -static u32 read_device_id(struct hfi1_devdata *dd) -{ - /* read the Manufacture Device ID */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_MANUF_DEV_ID); - return (u32)read_csr(dd, ASIC_EEP_DATA); -} - -/* - * Erase the whole flash. - */ -static int erase_chip(struct hfi1_devdata *dd) -{ - int ret; - - write_enable(dd); - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_CHIP_ERASE); - ret = wait_for_not_busy(dd); - - write_disable(dd); - - return ret; -} - -/* - * Erase a range. - */ -static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len) -{ - u32 end = start + len; - int ret = 0; - - if (end < start) - return -EINVAL; - - /* check the end points for the minimum erase */ - if ((start & MASK_4KB) || (end & MASK_4KB)) { - dd_dev_err(dd, - "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n", - __func__, start, end); - return -EINVAL; - } - - write_enable(dd); - - while (start < end) { - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - /* check in order of largest to smallest */ - if (((start & MASK_64KB) == 0) && (start + SIZE_64KB <= end)) { - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_64KB(start)); - start += SIZE_64KB; - } else if (((start & MASK_32KB) == 0) && - (start + SIZE_32KB <= end)) { - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_32KB(start)); - start += SIZE_32KB; - } else { /* 4KB will work */ - write_csr(dd, ASIC_EEP_ADDR_CMD, - CMD_SECTOR_ERASE_4KB(start)); - start += SIZE_4KB; - } - ret = wait_for_not_busy(dd); - if (ret) - goto done; - } - -done: - write_disable(dd); - - return ret; -} - -/* - * Read a 256 byte (64 dword) EPROM page. - * All callers have verified the offset is at a page boundary. - */ -static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) -{ - int i; - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); - for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++) - result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ -} - -/* - * Read length bytes starting at offset. Copy to user address addr. - */ -static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) -{ - u32 offset; - u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; - int ret = 0; - - /* reject anything not on an EPROM page boundary */ - if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK)) - return -EINVAL; - - for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { - read_page(dd, start + offset, buffer); - if (copy_to_user((void __user *)(addr + offset), - buffer, EP_PAGE_SIZE)) { - ret = -EFAULT; - goto done; - } - } - -done: - return ret; -} - -/* - * Write a 256 byte (64 dword) EPROM page. - * All callers have verified the offset is at a page boundary. - */ -static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data) -{ - int i; - - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE); - write_csr(dd, ASIC_EEP_DATA, data[0]); - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset)); - for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++) - write_csr(dd, ASIC_EEP_DATA, data[i]); - /* will close the open page */ - return wait_for_not_busy(dd); -} - -/* - * Write length bytes starting at offset. Read from user address addr. - */ -static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr) -{ - u32 offset; - u32 buffer[EP_PAGE_SIZE / sizeof(u32)]; - int ret = 0; - - /* reject anything not on an EPROM page boundary */ - if ((start & EEP_PAGE_MASK) || (len & EEP_PAGE_MASK)) - return -EINVAL; - - write_enable(dd); - - for (offset = 0; offset < len; offset += EP_PAGE_SIZE) { - if (copy_from_user(buffer, (void __user *)(addr + offset), - EP_PAGE_SIZE)) { - ret = -EFAULT; - goto done; - } - ret = write_page(dd, start + offset, buffer); - if (ret) - goto done; - } - -done: - write_disable(dd); - return ret; -} - -/* convert an range composite to a length, in bytes */ -static inline u32 extract_rlen(u32 composite) -{ - return (composite & 0xffff) * EP_PAGE_SIZE; -} - -/* convert an range composite to a start, in bytes */ -static inline u32 extract_rstart(u32 composite) -{ - return (composite >> 16) * EP_PAGE_SIZE; -} - -/* - * Perform the given operation on the EPROM. Called from user space. The - * user credentials have already been checked. - * - * Return 0 on success, -ERRNO on error - */ -int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd) -{ - struct hfi1_devdata *dd; - u32 dev_id; - u32 rlen; /* range length */ - u32 rstart; /* range start */ - int i_minor; - int ret = 0; - - /* - * Map the device file to device data using the relative minor. - * The device file minor number is the unit number + 1. 0 is - * the generic device file - reject it. - */ - i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - if (i_minor <= 0) - return -EINVAL; - dd = hfi1_lookup(i_minor - 1); - if (!dd) { - pr_err("%s: cannot find unit %d!\n", __func__, i_minor); - return -EINVAL; - } - - /* some devices do not have an EPROM */ - if (!dd->eprom_available) - return -EOPNOTSUPP; - - ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); - if (ret) { - dd_dev_err(dd, "%s: unable to acquire EPROM resource\n", - __func__); - goto done_asic; - } - - dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n", - __func__, cmd->type, cmd->len, cmd->addr); - - switch (cmd->type) { - case HFI1_CMD_EP_INFO: - if (cmd->len != sizeof(u32)) { - ret = -ERANGE; - break; - } - dev_id = read_device_id(dd); - /* addr points to a u32 user buffer */ - if (copy_to_user((void __user *)cmd->addr, &dev_id, - sizeof(u32))) - ret = -EFAULT; - break; - - case HFI1_CMD_EP_ERASE_CHIP: - ret = erase_chip(dd); - break; - - case HFI1_CMD_EP_ERASE_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = erase_range(dd, rstart, rlen); - break; - - case HFI1_CMD_EP_READ_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = read_length(dd, rstart, rlen, cmd->addr); - break; - - case HFI1_CMD_EP_WRITE_RANGE: - rlen = extract_rlen(cmd->len); - rstart = extract_rstart(cmd->len); - ret = write_length(dd, rstart, rlen, cmd->addr); - break; - - default: - dd_dev_err(dd, "%s: unexpected command %d\n", - __func__, cmd->type); - ret = -EINVAL; - break; - } - - release_chip_resource(dd, CR_EPROM); -done_asic: - return ret; -} - -/* - * Initialize the EPROM handler. - */ -int eprom_init(struct hfi1_devdata *dd) -{ - int ret = 0; - - /* only the discrete chip has an EPROM */ - if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0) - return 0; - - /* - * It is OK if both HFIs reset the EPROM as long as they don't - * do it at the same time. - */ - ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); - if (ret) { - dd_dev_err(dd, - "%s: unable to acquire EPROM resource, no EPROM support\n", - __func__); - goto done_asic; - } - - /* reset EPROM to be sure it is in a good state */ - - /* set reset */ - write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK); - /* clear reset, set speed */ - write_csr(dd, ASIC_EEP_CTL_STAT, - EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT); - - /* wake the device with command "release powerdown NoID" */ - write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID); - - dd->eprom_available = true; - release_chip_resource(dd, CR_EPROM); -done_asic: - return ret; -} diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 37dd534cbeab..c8a773ffe23b 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -239,12 +239,15 @@ struct ib_vendor_mad { #define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001) +#define IB_CLASS_PORT_INFO_RESP_TIME_MASK 0x1F +#define IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE 5 + struct ib_class_port_info { u8 base_version; u8 class_version; __be16 capability_mask; - u8 reserved[3]; - u8 resp_time_value; + /* 27 bits for cap_mask2, 5 bits for resp_time */ + __be32 cap_mask2_resp_time; u8 redirect_gid[16]; __be32 redirect_tcslfl; __be16 redirect_lid; @@ -259,6 +262,59 @@ struct ib_class_port_info { __be32 trap_qkey; }; +/** + * ib_get_cpi_resp_time - Returns the resp_time value from + * cap_mask2_resp_time in ib_class_port_info. + * @cpi: A struct ib_class_port_info mad. + */ +static inline u8 ib_get_cpi_resp_time(struct ib_class_port_info *cpi) +{ + return (u8)(be32_to_cpu(cpi->cap_mask2_resp_time) & + IB_CLASS_PORT_INFO_RESP_TIME_MASK); +} + +/** + * ib_set_cpi_resptime - Sets the response time in an + * ib_class_port_info mad. + * @cpi: A struct ib_class_port_info. + * @rtime: The response time to set. + */ +static inline void ib_set_cpi_resp_time(struct ib_class_port_info *cpi, + u8 rtime) +{ + cpi->cap_mask2_resp_time = + (cpi->cap_mask2_resp_time & + cpu_to_be32(~IB_CLASS_PORT_INFO_RESP_TIME_MASK)) | + cpu_to_be32(rtime & IB_CLASS_PORT_INFO_RESP_TIME_MASK); +} + +/** + * ib_get_cpi_capmask2 - Returns the capmask2 value from + * cap_mask2_resp_time in ib_class_port_info. + * @cpi: A struct ib_class_port_info mad. + */ +static inline u32 ib_get_cpi_capmask2(struct ib_class_port_info *cpi) +{ + return (be32_to_cpu(cpi->cap_mask2_resp_time) >> + IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); +} + +/** + * ib_set_cpi_capmask2 - Sets the capmask2 in an + * ib_class_port_info mad. + * @cpi: A struct ib_class_port_info. + * @capmask2: The capmask2 to set. + */ +static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi, + u32 capmask2) +{ + cpi->cap_mask2_resp_time = + (cpi->cap_mask2_resp_time & + cpu_to_be32(IB_CLASS_PORT_INFO_RESP_TIME_MASK)) | + cpu_to_be32(capmask2 << + IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); +} + struct ib_mad_notice_attr { u8 generic_type; u8 prod_type_msb; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 0f3daae44bf9..b13419ce99ff 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -103,6 +103,9 @@ enum { IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, IB_OPCODE_COMPARE_SWAP = 0x13, IB_OPCODE_FETCH_ADD = 0x14, + /* opcode 0x15 is reserved */ + IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, + IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, /* real constants follow -- see comment about above IB_OPCODE() macro for more details */ @@ -129,6 +132,8 @@ enum { IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RC, COMPARE_SWAP), IB_OPCODE(RC, FETCH_ADD), + IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), + IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), /* UC */ IB_OPCODE(UC, SEND_FIRST), diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index cdc1c81aa275..384041669489 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -94,6 +94,8 @@ enum ib_sa_selector { IB_SA_BEST = 3 }; +#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12) + /* * Structures for SA records are named "struct ib_sa_xxx_rec." No * attempt is made to pack structures to match the physical layout of @@ -439,4 +441,14 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context, struct ib_sa_query **sa_query); +/* Support get SA ClassPortInfo */ +int ib_sa_classport_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_class_port_info *resp, + void *context), + void *context, + struct ib_sa_query **sa_query); + #endif /* IB_SA_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fc0320c004a3..432bed510369 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -403,56 +403,55 @@ enum ib_port_speed { IB_SPEED_EDR = 32 }; -struct ib_protocol_stats { - /* TBD... */ -}; - -struct iw_protocol_stats { - u64 ipInReceives; - u64 ipInHdrErrors; - u64 ipInTooBigErrors; - u64 ipInNoRoutes; - u64 ipInAddrErrors; - u64 ipInUnknownProtos; - u64 ipInTruncatedPkts; - u64 ipInDiscards; - u64 ipInDelivers; - u64 ipOutForwDatagrams; - u64 ipOutRequests; - u64 ipOutDiscards; - u64 ipOutNoRoutes; - u64 ipReasmTimeout; - u64 ipReasmReqds; - u64 ipReasmOKs; - u64 ipReasmFails; - u64 ipFragOKs; - u64 ipFragFails; - u64 ipFragCreates; - u64 ipInMcastPkts; - u64 ipOutMcastPkts; - u64 ipInBcastPkts; - u64 ipOutBcastPkts; - - u64 tcpRtoAlgorithm; - u64 tcpRtoMin; - u64 tcpRtoMax; - u64 tcpMaxConn; - u64 tcpActiveOpens; - u64 tcpPassiveOpens; - u64 tcpAttemptFails; - u64 tcpEstabResets; - u64 tcpCurrEstab; - u64 tcpInSegs; - u64 tcpOutSegs; - u64 tcpRetransSegs; - u64 tcpInErrs; - u64 tcpOutRsts; -}; - -union rdma_protocol_stats { - struct ib_protocol_stats ib; - struct iw_protocol_stats iw; -}; +/** + * struct rdma_hw_stats + * @timestamp - Used by the core code to track when the last update was + * @lifespan - Used by the core code to determine how old the counters + * should be before being updated again. Stored in jiffies, defaults + * to 10 milliseconds, drivers can override the default be specifying + * their own value during their allocation routine. + * @name - Array of pointers to static names used for the counters in + * directory. + * @num_counters - How many hardware counters there are. If name is + * shorter than this number, a kernel oops will result. Driver authors + * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) + * in their code to prevent this. + * @value - Array of u64 counters that are accessed by the sysfs code and + * filled in by the drivers get_stats routine + */ +struct rdma_hw_stats { + unsigned long timestamp; + unsigned long lifespan; + const char * const *names; + int num_counters; + u64 value[]; +}; + +#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10 +/** + * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct + * for drivers. + * @names - Array of static const char * + * @num_counters - How many elements in array + * @lifespan - How many milliseconds between updates + */ +static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const char * const *names, int num_counters, + unsigned long lifespan) +{ + struct rdma_hw_stats *stats; + + stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64), + GFP_KERNEL); + if (!stats) + return NULL; + stats->names = names; + stats->num_counters = num_counters; + stats->lifespan = msecs_to_jiffies(lifespan); + + return stats; +} + /* Define bits for the various functionality this port needs to be supported by * the core. @@ -1707,8 +1706,29 @@ struct ib_device { struct iw_cm_verbs *iwcm; - int (*get_protocol_stats)(struct ib_device *device, - union rdma_protocol_stats *stats); + /** + * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the + * driver initialized data. The struct is kfree()'ed by the sysfs + * core when the device is removed. A lifespan of -1 in the return + * struct tells the core to set a default lifespan. + */ + struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, + u8 port_num); + /** + * get_hw_stats - Fill in the counter value(s) in the stats struct. + * @index - The index in the value array we wish to have updated, or + * num_counters if we want all stats updated + * Return codes - + * < 0 - Error, no counters updated + * index - Updated the single counter pointed to by index + * num_counters - Updated all counters (will reset the timestamp + * and prevent further calls for lifespan milliseconds) + * Drivers are allowed to update all counters in leiu of just the + * one given in index at their option + */ + int (*get_hw_stats)(struct ib_device *device, + struct rdma_hw_stats *stats, + u8 port, int index); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr, struct ib_udata *udata); @@ -1926,6 +1946,8 @@ struct ib_device { u8 node_type; u8 phys_port_cnt; struct ib_device_attr attrs; + struct attribute_group *hw_stats_ag; + struct rdma_hw_stats *hw_stats; /** * The following mandatory functions are used only at device diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index d57ceee90d26..16274e2133cd 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -149,15 +149,15 @@ struct rvt_driver_params { int qpn_res_end; int nports; int npkeys; - u8 qos_shift; char cq_name[RVT_CQN_MAX]; int node; - int max_rdma_atomic; int psn_mask; int psn_shift; int psn_modify_mask; u32 core_cap_flags; u32 max_mad_size; + u8 qos_shift; + u8 max_rdma_atomic; }; /* Protection domain */ @@ -426,6 +426,15 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) } /* + * Return the max atomic suitable for determining + * the size of the ack ring buffer in a QP. + */ +static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) +{ + return rdi->dparms.max_rdma_atomic + 1; +} + +/* * Return the indexed PKEY from the port PKEY table. */ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0e1ff2abfe92..6d23b879416a 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -211,8 +211,6 @@ struct rvt_mmap_info { unsigned size; }; -#define RVT_MAX_RDMA_ATOMIC 16 - /* * This structure holds the information that the send tasklet needs * to send a RDMA read response or atomic operation. @@ -282,8 +280,7 @@ struct rvt_qp { atomic_t refcount ____cacheline_aligned_in_smp; wait_queue_head_t wait; - struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] - ____cacheline_aligned_in_smp; + struct rvt_ack_entry *s_ack_queue; struct rvt_sge_state s_rdma_read_sge; spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index a533cecab14f..98bebf8bef55 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -66,7 +66,7 @@ * The major version changes when data structures change in an incompatible * way. The driver must be the same for initialization to succeed. */ -#define HFI1_USER_SWMAJOR 5 +#define HFI1_USER_SWMAJOR 6 /* * Minor version differences are always compatible @@ -75,7 +75,12 @@ * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define HFI1_USER_SWMINOR 0 +#define HFI1_USER_SWMINOR 1 + +/* + * We will encode the major/minor inside a single 32bit version number. + */ +#define HFI1_SWMAJOR_SHIFT 16 /* * Set of HW and driver capability/feature bits. @@ -107,19 +112,6 @@ #define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1) #define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2) -/* - * If the unit is specified via open, HFI choice is fixed. If port is - * specified, it's also fixed. Otherwise we try to spread contexts - * across ports and HFIs, using different algorithms. WITHIN is - * the old default, prior to this mechanism. - */ -#define HFI1_ALG_ACROSS 0 /* round robin contexts across HFIs, then - * ports; this is the default */ -#define HFI1_ALG_WITHIN 1 /* use all contexts on an HFI (round robin - * active ports within), then next HFI */ -#define HFI1_ALG_COUNT 2 /* number of algorithm choices */ - - /* User commands. */ #define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */ #define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */ @@ -127,7 +119,6 @@ #define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */ #define HFI1_CMD_TID_FREE 5 /* free expected TID entries */ #define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */ -#define HFI1_CMD_SDMA_STATUS_UPD 7 /* force update of SDMA status ring */ #define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */ #define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */ @@ -135,13 +126,46 @@ #define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ #define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ #define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */ -/* separate EPROM commands from normal PSM commands */ -#define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */ -#define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */ -/* range 66-74 no longer used */ -#define HFI1_CMD_EP_ERASE_RANGE 75 /* erase EPROM range */ -#define HFI1_CMD_EP_READ_RANGE 76 /* read EPROM range */ -#define HFI1_CMD_EP_WRITE_RANGE 77 /* write EPROM range */ +#define HFI1_CMD_GET_VERS 14 /* get the version of the user cdev */ + +/* + * User IOCTLs can not go above 128 if they do then see common.h and change the + * base for the snoop ioctl + */ +#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl/ioctl-number.txt */ + +/* + * Make the ioctls occupy the last 0xf0-0xff portion of the IB range + */ +#define __NUM(cmd) (HFI1_CMD_##cmd + 0xe0) + +struct hfi1_cmd; +#define HFI1_IOCTL_ASSIGN_CTXT \ + _IOWR(IB_IOCTL_MAGIC, __NUM(ASSIGN_CTXT), struct hfi1_user_info) +#define HFI1_IOCTL_CTXT_INFO \ + _IOW(IB_IOCTL_MAGIC, __NUM(CTXT_INFO), struct hfi1_ctxt_info) +#define HFI1_IOCTL_USER_INFO \ + _IOW(IB_IOCTL_MAGIC, __NUM(USER_INFO), struct hfi1_base_info) +#define HFI1_IOCTL_TID_UPDATE \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_UPDATE), struct hfi1_tid_info) +#define HFI1_IOCTL_TID_FREE \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_FREE), struct hfi1_tid_info) +#define HFI1_IOCTL_CREDIT_UPD \ + _IO(IB_IOCTL_MAGIC, __NUM(CREDIT_UPD)) +#define HFI1_IOCTL_RECV_CTRL \ + _IOW(IB_IOCTL_MAGIC, __NUM(RECV_CTRL), int) +#define HFI1_IOCTL_POLL_TYPE \ + _IOW(IB_IOCTL_MAGIC, __NUM(POLL_TYPE), int) +#define HFI1_IOCTL_ACK_EVENT \ + _IOW(IB_IOCTL_MAGIC, __NUM(ACK_EVENT), unsigned long) +#define HFI1_IOCTL_SET_PKEY \ + _IOW(IB_IOCTL_MAGIC, __NUM(SET_PKEY), __u16) +#define HFI1_IOCTL_CTXT_RESET \ + _IO(IB_IOCTL_MAGIC, __NUM(CTXT_RESET)) +#define HFI1_IOCTL_TID_INVAL_READ \ + _IOWR(IB_IOCTL_MAGIC, __NUM(TID_INVAL_READ), struct hfi1_tid_info) +#define HFI1_IOCTL_GET_VERS \ + _IOR(IB_IOCTL_MAGIC, __NUM(GET_VERS), int) #define _HFI1_EVENT_FROZEN_BIT 0 #define _HFI1_EVENT_LINKDOWN_BIT 1 @@ -199,9 +223,7 @@ struct hfi1_user_info { * Should be set to HFI1_USER_SWVERSION. */ __u32 userversion; - __u16 pad; - /* HFI selection algorithm, if unit has not selected */ - __u16 hfi1_alg; + __u32 pad; /* * If two or more processes wish to share a context, each process * must set the subcontext_cnt and subcontext_id to the same @@ -243,12 +265,6 @@ struct hfi1_tid_info { __u32 length; }; -struct hfi1_cmd { - __u32 type; /* command type */ - __u32 len; /* length of struct pointed to by add */ - __u64 addr; /* pointer to user structure */ -}; - enum hfi1_sdma_comp_state { FREE = 0, QUEUED, diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 6e373d151cad..02fe8390c18f 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -135,10 +135,12 @@ enum { * Local service operations: * RESOLVE - The client requests the local service to resolve a path. * SET_TIMEOUT - The local service requests the client to set the timeout. + * IP_RESOLVE - The client requests the local service to resolve an IP to GID. */ enum { RDMA_NL_LS_OP_RESOLVE = 0, RDMA_NL_LS_OP_SET_TIMEOUT, + RDMA_NL_LS_OP_IP_RESOLVE, RDMA_NL_LS_NUM_OPS }; @@ -176,6 +178,10 @@ struct rdma_ls_resolve_header { __u8 path_use; }; +struct rdma_ls_ip_resolve_header { + __u32 ifindex; +}; + /* Local service attribute type */ #define RDMA_NLA_F_MANDATORY (1 << 13) #define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \ @@ -193,6 +199,8 @@ struct rdma_ls_resolve_header { * TCLASS u8 * PKEY u16 cpu * QOS_CLASS u16 cpu + * IPV4 u32 BE + * IPV6 u8[16] BE */ enum { LS_NLA_TYPE_UNSPEC = 0, @@ -204,6 +212,8 @@ enum { LS_NLA_TYPE_TCLASS, LS_NLA_TYPE_PKEY, LS_NLA_TYPE_QOS_CLASS, + LS_NLA_TYPE_IPV4, + LS_NLA_TYPE_IPV6, LS_NLA_TYPE_MAX }; |