diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-15 20:38:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-15 20:38:15 -0700 |
commit | 2a3c389a0fde49b241430df806a34276568cfb29 (patch) | |
tree | 9cf35829317e8cc2aaffc4341fb824dad63fce02 /drivers | |
parent | 8de262531f5fbb7458463224a7587429800c24bf (diff) | |
parent | 0b043644c0ca601cb19943a81aa1f1455dbe9461 (diff) | |
download | linux-2a3c389a0fde49b241430df806a34276568cfb29.tar.bz2 |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A smaller cycle this time. Notably we see another new driver, 'Soft
iWarp', and the deletion of an ancient unused driver for nes.
- Revise and simplify the signature offload RDMA MR APIs
- More progress on hoisting object allocation boiler plate code out
of the drivers
- Driver bug fixes and revisions for hns, hfi1, efa, cxgb4, qib,
i40iw
- Tree wide cleanups: struct_size, put_user_page, xarray, rst doc
conversion
- Removal of obsolete ib_ucm chardev and nes driver
- netlink based discovery of chardevs and autoloading of the modules
providing them
- Move more of the rdamvt/hfi1 uapi to include/uapi/rdma
- New driver 'siw' for software based iWarp running on top of netdev,
much like rxe's software RoCE.
- mlx5 feature to report events in their raw devx format to userspace
- Expose per-object counters through rdma tool
- Adaptive interrupt moderation for RDMA (DIM), sharing the DIM core
from netdev"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (194 commits)
RMDA/siw: Require a 64 bit arch
RDMA/siw: Mark expected switch fall-throughs
RDMA/core: Fix -Wunused-const-variable warnings
rdma/siw: Remove set but not used variable 's'
rdma/siw: Add missing dependencies on LIBCRC32C and DMA_VIRT_OPS
RDMA/siw: Add missing rtnl_lock around access to ifa
rdma/siw: Use proper enumerated type in map_cqe_status
RDMA/siw: Remove unnecessary kthread create/destroy printouts
IB/rdmavt: Fix variable shadowing issue in rvt_create_cq
RDMA/core: Fix race when resolving IP address
RDMA/core: Make rdma_counter.h compile stand alone
IB/core: Work on the caller socket net namespace in nldev_newlink()
RDMA/rxe: Fill in wc byte_len with IB_WC_RECV_RDMA_WITH_IMM
RDMA/mlx5: Set RDMA DIM to be enabled by default
RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation to netlink
RDMA/core: Provide RDMA DIM support for ULPs
linux/dim: Implement RDMA adaptive moderation (DIM)
IB/mlx5: Report correctly tag matching rendezvous capability
docs: infiniband: add it to the driver-api bookset
IB/mlx5: Implement VHCA tunnel mechanism in DEVX
...
Diffstat (limited to 'drivers')
185 files changed, 17580 insertions, 24118 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 8ba41cbf1869..85e103b147cc 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -7,6 +7,7 @@ menuconfig INFINIBAND depends on m || IPV6 != m depends on !ALPHA select IRQ_POLL + select DIMLIB ---help--- Core support for InfiniBand (IB). Make sure to also select any protocols you wish to use as well as drivers for your @@ -36,17 +37,6 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from rdma-core <https://github.com/linux-rdma/rdma-core>. -config INFINIBAND_USER_ACCESS_UCM - tristate "Userspace CM (UCM, DEPRECATED)" - depends on BROKEN || COMPILE_TEST - depends on INFINIBAND_USER_ACCESS - help - The UCM module has known security flaws, which no one is - interested to fix. The user-space part of this code was - dropped from the upstream a long time ago. - - This option is DEPRECATED and planned to be removed. - config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)" depends on INFINIBAND_USER_ACCESS @@ -98,7 +88,6 @@ source "drivers/infiniband/hw/efa/Kconfig" source "drivers/infiniband/hw/i40iw/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" -source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" source "drivers/infiniband/hw/vmw_pvrdma/Kconfig" source "drivers/infiniband/hw/usnic/Kconfig" @@ -108,6 +97,7 @@ source "drivers/infiniband/hw/hfi1/Kconfig" source "drivers/infiniband/hw/qedr/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" source "drivers/infiniband/sw/rxe/Kconfig" +source "drivers/infiniband/sw/siw/Kconfig" endif source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 313f2349b518..09881bd5f12d 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -6,13 +6,12 @@ obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \ $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y) -obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ - nldev.o restrack.o + nldev.o restrack.o counters.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o @@ -29,8 +28,6 @@ rdma_ucm-y := ucma.o ib_umad-y := user_mad.o -ib_ucm-y := ucm.o - ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ uverbs_std_types_cq.o \ diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 2f7d14159841..9b76a8fcdd24 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -337,7 +337,7 @@ static int dst_fetch_ha(const struct dst_entry *dst, neigh_event_send(n, NULL); ret = -ENODATA; } else { - memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN); + neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev); } neigh_release(n); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index ff40a450b5d2..888d89ce81df 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -60,6 +60,7 @@ extern bool ib_devices_shared_netns; int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); int ib_device_rename(struct ib_device *ibdev, const char *name); +int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim); typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); @@ -88,6 +89,15 @@ typedef int (*nldev_callback)(struct ib_device *device, int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, struct netlink_callback *cb); +struct ib_client_nl_info { + struct sk_buff *nl_msg; + struct device *cdev; + unsigned int port; + u64 abi; +}; +int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, + struct ib_client_nl_info *res); + enum ib_cache_gid_default_mode { IB_CACHE_GID_DEFAULT_MODE_SET, IB_CACHE_GID_DEFAULT_MODE_DELETE diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c new file mode 100644 index 000000000000..01faef7bc061 --- /dev/null +++ b/drivers/infiniband/core/counters.c @@ -0,0 +1,634 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019 Mellanox Technologies. All rights reserved. + */ +#include <rdma/ib_verbs.h> +#include <rdma/rdma_counter.h> + +#include "core_priv.h" +#include "restrack.h" + +#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) + +static int __counter_set_mode(struct rdma_counter_mode *curr, + enum rdma_nl_counter_mode new_mode, + enum rdma_nl_counter_mask new_mask) +{ + if ((new_mode == RDMA_COUNTER_MODE_AUTO) && + ((new_mask & (~ALL_AUTO_MODE_MASKS)) || + (curr->mode != RDMA_COUNTER_MODE_NONE))) + return -EINVAL; + + curr->mode = new_mode; + curr->mask = new_mask; + return 0; +} + +/** + * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode + * + * When @on is true, the @mask must be set; When @on is false, it goes + * into manual mode if there's any counter, so that the user is able to + * manually access them. + */ +int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, + bool on, enum rdma_nl_counter_mask mask) +{ + struct rdma_port_counter *port_counter; + int ret; + + port_counter = &dev->port_data[port].port_counter; + mutex_lock(&port_counter->lock); + if (on) { + ret = __counter_set_mode(&port_counter->mode, + RDMA_COUNTER_MODE_AUTO, mask); + } else { + if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) { + ret = -EINVAL; + goto out; + } + + if (port_counter->num_counters) + ret = __counter_set_mode(&port_counter->mode, + RDMA_COUNTER_MODE_MANUAL, 0); + else + ret = __counter_set_mode(&port_counter->mode, + RDMA_COUNTER_MODE_NONE, 0); + } + +out: + mutex_unlock(&port_counter->lock); + return ret; +} + +static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, + enum rdma_nl_counter_mode mode) +{ + struct rdma_port_counter *port_counter; + struct rdma_counter *counter; + int ret; + + if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats) + return NULL; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return NULL; + + counter->device = dev; + counter->port = port; + counter->res.type = RDMA_RESTRACK_COUNTER; + counter->stats = dev->ops.counter_alloc_stats(counter); + if (!counter->stats) + goto err_stats; + + port_counter = &dev->port_data[port].port_counter; + mutex_lock(&port_counter->lock); + if (mode == RDMA_COUNTER_MODE_MANUAL) { + ret = __counter_set_mode(&port_counter->mode, + RDMA_COUNTER_MODE_MANUAL, 0); + if (ret) + goto err_mode; + } + + port_counter->num_counters++; + mutex_unlock(&port_counter->lock); + + counter->mode.mode = mode; + kref_init(&counter->kref); + mutex_init(&counter->lock); + + return counter; + +err_mode: + mutex_unlock(&port_counter->lock); + kfree(counter->stats); +err_stats: + kfree(counter); + return NULL; +} + +static void rdma_counter_free(struct rdma_counter *counter) +{ + struct rdma_port_counter *port_counter; + + port_counter = &counter->device->port_data[counter->port].port_counter; + mutex_lock(&port_counter->lock); + port_counter->num_counters--; + if (!port_counter->num_counters && + (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL)) + __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_NONE, + 0); + + mutex_unlock(&port_counter->lock); + + rdma_restrack_del(&counter->res); + kfree(counter->stats); + kfree(counter); +} + +static void auto_mode_init_counter(struct rdma_counter *counter, + const struct ib_qp *qp, + enum rdma_nl_counter_mask new_mask) +{ + struct auto_mode_param *param = &counter->mode.param; + + counter->mode.mode = RDMA_COUNTER_MODE_AUTO; + counter->mode.mask = new_mask; + + if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) + param->qp_type = qp->qp_type; +} + +static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, + enum rdma_nl_counter_mask auto_mask) +{ + struct auto_mode_param *param = &counter->mode.param; + bool match = true; + + if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) + return false; + + /* Ensure that counter belong to right PID */ + if (!rdma_is_kernel_res(&counter->res) && + !rdma_is_kernel_res(&qp->res) && + (task_pid_vnr(counter->res.task) != current->pid)) + return false; + + if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) + match &= (param->qp_type == qp->qp_type); + + return match; +} + +static int __rdma_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + int ret; + + if (qp->counter) + return -EINVAL; + + if (!qp->device->ops.counter_bind_qp) + return -EOPNOTSUPP; + + mutex_lock(&counter->lock); + ret = qp->device->ops.counter_bind_qp(counter, qp); + mutex_unlock(&counter->lock); + + return ret; +} + +static int __rdma_counter_unbind_qp(struct ib_qp *qp) +{ + struct rdma_counter *counter = qp->counter; + int ret; + + if (!qp->device->ops.counter_unbind_qp) + return -EOPNOTSUPP; + + mutex_lock(&counter->lock); + ret = qp->device->ops.counter_unbind_qp(qp); + mutex_unlock(&counter->lock); + + return ret; +} + +static void counter_history_stat_update(const struct rdma_counter *counter) +{ + struct ib_device *dev = counter->device; + struct rdma_port_counter *port_counter; + int i; + + port_counter = &dev->port_data[counter->port].port_counter; + if (!port_counter->hstats) + return; + + for (i = 0; i < counter->stats->num_counters; i++) + port_counter->hstats->value[i] += counter->stats->value[i]; +} + +/** + * rdma_get_counter_auto_mode - Find the counter that @qp should be bound + * with in auto mode + * + * Return: The counter (with ref-count increased) if found + */ +static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, + u8 port) +{ + struct rdma_port_counter *port_counter; + struct rdma_counter *counter = NULL; + struct ib_device *dev = qp->device; + struct rdma_restrack_entry *res; + struct rdma_restrack_root *rt; + unsigned long id = 0; + + port_counter = &dev->port_data[port].port_counter; + rt = &dev->res[RDMA_RESTRACK_COUNTER]; + xa_lock(&rt->xa); + xa_for_each(&rt->xa, id, res) { + if (!rdma_is_visible_in_pid_ns(res)) + continue; + + counter = container_of(res, struct rdma_counter, res); + if ((counter->device != qp->device) || (counter->port != port)) + goto next; + + if (auto_mode_match(qp, counter, port_counter->mode.mask)) + break; +next: + counter = NULL; + } + + if (counter && !kref_get_unless_zero(&counter->kref)) + counter = NULL; + + xa_unlock(&rt->xa); + return counter; +} + +static void rdma_counter_res_add(struct rdma_counter *counter, + struct ib_qp *qp) +{ + if (rdma_is_kernel_res(&qp->res)) { + rdma_restrack_set_task(&counter->res, qp->res.kern_name); + rdma_restrack_kadd(&counter->res); + } else { + rdma_restrack_attach_task(&counter->res, qp->res.task); + rdma_restrack_uadd(&counter->res); + } +} + +static void counter_release(struct kref *kref) +{ + struct rdma_counter *counter; + + counter = container_of(kref, struct rdma_counter, kref); + counter_history_stat_update(counter); + counter->device->ops.counter_dealloc(counter); + rdma_counter_free(counter); +} + +/** + * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on + * the auto-mode rule + */ +int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) +{ + struct rdma_port_counter *port_counter; + struct ib_device *dev = qp->device; + struct rdma_counter *counter; + int ret; + + if (!rdma_is_port_valid(dev, port)) + return -EINVAL; + + port_counter = &dev->port_data[port].port_counter; + if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) + return 0; + + counter = rdma_get_counter_auto_mode(qp, port); + if (counter) { + ret = __rdma_counter_bind_qp(counter, qp); + if (ret) { + kref_put(&counter->kref, counter_release); + return ret; + } + } else { + counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); + if (!counter) + return -ENOMEM; + + auto_mode_init_counter(counter, qp, port_counter->mode.mask); + + ret = __rdma_counter_bind_qp(counter, qp); + if (ret) { + rdma_counter_free(counter); + return ret; + } + + rdma_counter_res_add(counter, qp); + } + + return 0; +} + +/** + * rdma_counter_unbind_qp - Unbind a qp from a counter + * @force: + * true - Decrease the counter ref-count anyway (e.g., qp destroy) + */ +int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) +{ + struct rdma_counter *counter = qp->counter; + int ret; + + if (!counter) + return -EINVAL; + + ret = __rdma_counter_unbind_qp(qp); + if (ret && !force) + return ret; + + kref_put(&counter->kref, counter_release); + return 0; +} + +int rdma_counter_query_stats(struct rdma_counter *counter) +{ + struct ib_device *dev = counter->device; + int ret; + + if (!dev->ops.counter_update_stats) + return -EINVAL; + + mutex_lock(&counter->lock); + ret = dev->ops.counter_update_stats(counter); + mutex_unlock(&counter->lock); + + return ret; +} + +static u64 get_running_counters_hwstat_sum(struct ib_device *dev, + u8 port, u32 index) +{ + struct rdma_restrack_entry *res; + struct rdma_restrack_root *rt; + struct rdma_counter *counter; + unsigned long id = 0; + u64 sum = 0; + + rt = &dev->res[RDMA_RESTRACK_COUNTER]; + xa_lock(&rt->xa); + xa_for_each(&rt->xa, id, res) { + if (!rdma_restrack_get(res)) + continue; + + xa_unlock(&rt->xa); + + counter = container_of(res, struct rdma_counter, res); + if ((counter->device != dev) || (counter->port != port) || + rdma_counter_query_stats(counter)) + goto next; + + sum += counter->stats->value[index]; + +next: + xa_lock(&rt->xa); + rdma_restrack_put(res); + } + + xa_unlock(&rt->xa); + return sum; +} + +/** + * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a + * specific port, including the running ones and history data + */ +u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index) +{ + struct rdma_port_counter *port_counter; + u64 sum; + + port_counter = &dev->port_data[port].port_counter; + sum = get_running_counters_hwstat_sum(dev, port, index); + sum += port_counter->hstats->value[index]; + + return sum; +} + +static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) +{ + struct rdma_restrack_entry *res = NULL; + struct ib_qp *qp = NULL; + + res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num); + if (IS_ERR(res)) + return NULL; + + if (!rdma_is_visible_in_pid_ns(res)) + goto err; + + qp = container_of(res, struct ib_qp, res); + if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) + goto err; + + return qp; + +err: + rdma_restrack_put(&qp->res); + return NULL; +} + +static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, + struct ib_qp *qp) +{ + if ((counter->device != qp->device) || (counter->port != qp->port)) + return -EINVAL; + + return __rdma_counter_bind_qp(counter, qp); +} + +static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, + u32 counter_id) +{ + struct rdma_restrack_entry *res; + struct rdma_counter *counter; + + res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id); + if (IS_ERR(res)) + return NULL; + + if (!rdma_is_visible_in_pid_ns(res)) { + rdma_restrack_put(res); + return NULL; + } + + counter = container_of(res, struct rdma_counter, res); + kref_get(&counter->kref); + rdma_restrack_put(res); + + return counter; +} + +/** + * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id + */ +int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, + u32 qp_num, u32 counter_id) +{ + struct rdma_counter *counter; + struct ib_qp *qp; + int ret; + + qp = rdma_counter_get_qp(dev, qp_num); + if (!qp) + return -ENOENT; + + counter = rdma_get_counter_by_id(dev, counter_id); + if (!counter) { + ret = -ENOENT; + goto err; + } + + if (counter->res.task != qp->res.task) { + ret = -EINVAL; + goto err_task; + } + + ret = rdma_counter_bind_qp_manual(counter, qp); + if (ret) + goto err_task; + + rdma_restrack_put(&qp->res); + return 0; + +err_task: + kref_put(&counter->kref, counter_release); +err: + rdma_restrack_put(&qp->res); + return ret; +} + +/** + * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it + * The id of new counter is returned in @counter_id + */ +int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, + u32 qp_num, u32 *counter_id) +{ + struct rdma_counter *counter; + struct ib_qp *qp; + int ret; + + if (!rdma_is_port_valid(dev, port)) + return -EINVAL; + + qp = rdma_counter_get_qp(dev, qp_num); + if (!qp) + return -ENOENT; + + if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { + ret = -EINVAL; + goto err; + } + + counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); + if (!counter) { + ret = -ENOMEM; + goto err; + } + + ret = rdma_counter_bind_qp_manual(counter, qp); + if (ret) + goto err_bind; + + if (counter_id) + *counter_id = counter->id; + + rdma_counter_res_add(counter, qp); + + rdma_restrack_put(&qp->res); + return ret; + +err_bind: + rdma_counter_free(counter); +err: + rdma_restrack_put(&qp->res); + return ret; +} + +/** + * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter + */ +int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, + u32 qp_num, u32 counter_id) +{ + struct rdma_port_counter *port_counter; + struct ib_qp *qp; + int ret; + + if (!rdma_is_port_valid(dev, port)) + return -EINVAL; + + qp = rdma_counter_get_qp(dev, qp_num); + if (!qp) + return -ENOENT; + + if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) { + ret = -EINVAL; + goto out; + } + + port_counter = &dev->port_data[port].port_counter; + if (!qp->counter || qp->counter->id != counter_id || + port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) { + ret = -EINVAL; + goto out; + } + + ret = rdma_counter_unbind_qp(qp, false); + +out: + rdma_restrack_put(&qp->res); + return ret; +} + +int rdma_counter_get_mode(struct ib_device *dev, u8 port, + enum rdma_nl_counter_mode *mode, + enum rdma_nl_counter_mask *mask) +{ + struct rdma_port_counter *port_counter; + + port_counter = &dev->port_data[port].port_counter; + *mode = port_counter->mode.mode; + *mask = port_counter->mode.mask; + + return 0; +} + +void rdma_counter_init(struct ib_device *dev) +{ + struct rdma_port_counter *port_counter; + u32 port; + + if (!dev->ops.alloc_hw_stats || !dev->port_data) + return; + + rdma_for_each_port(dev, port) { + port_counter = &dev->port_data[port].port_counter; + port_counter->mode.mode = RDMA_COUNTER_MODE_NONE; + mutex_init(&port_counter->lock); + + port_counter->hstats = dev->ops.alloc_hw_stats(dev, port); + if (!port_counter->hstats) + goto fail; + } + + return; + +fail: + rdma_for_each_port(dev, port) { + port_counter = &dev->port_data[port].port_counter; + kfree(port_counter->hstats); + port_counter->hstats = NULL; + } + + return; +} + +void rdma_counter_release(struct ib_device *dev) +{ + struct rdma_port_counter *port_counter; + u32 port; + + if (!dev->ops.alloc_hw_stats) + return; + + rdma_for_each_port(dev, port) { + port_counter = &dev->port_data[port].port_counter; + kfree(port_counter->hstats); + } +} diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index a24c900fbdf6..7c599878ccf7 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -18,6 +18,53 @@ #define IB_POLL_FLAGS \ (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) +static const struct dim_cq_moder +rdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = { + {1, 0, 1, 0}, + {1, 0, 4, 0}, + {2, 0, 4, 0}, + {2, 0, 8, 0}, + {4, 0, 8, 0}, + {16, 0, 8, 0}, + {16, 0, 16, 0}, + {32, 0, 16, 0}, + {32, 0, 32, 0}, +}; + +static void ib_cq_rdma_dim_work(struct work_struct *w) +{ + struct dim *dim = container_of(w, struct dim, work); + struct ib_cq *cq = dim->priv; + + u16 usec = rdma_dim_prof[dim->profile_ix].usec; + u16 comps = rdma_dim_prof[dim->profile_ix].comps; + + dim->state = DIM_START_MEASURE; + + cq->device->ops.modify_cq(cq, comps, usec); +} + +static void rdma_dim_init(struct ib_cq *cq) +{ + struct dim *dim; + + if (!cq->device->ops.modify_cq || !cq->device->use_cq_dim || + cq->poll_ctx == IB_POLL_DIRECT) + return; + + dim = kzalloc(sizeof(struct dim), GFP_KERNEL); + if (!dim) + return; + + dim->state = DIM_START_MEASURE; + dim->tune_state = DIM_GOING_RIGHT; + dim->profile_ix = RDMA_DIM_START_PROFILE; + dim->priv = cq; + cq->dim = dim; + + INIT_WORK(&dim->work, ib_cq_rdma_dim_work); +} + static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, int batch) { @@ -78,6 +125,7 @@ static void ib_cq_completion_direct(struct ib_cq *cq, void *private) static int ib_poll_handler(struct irq_poll *iop, int budget) { struct ib_cq *cq = container_of(iop, struct ib_cq, iop); + struct dim *dim = cq->dim; int completed; completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH); @@ -87,6 +135,9 @@ static int ib_poll_handler(struct irq_poll *iop, int budget) irq_poll_sched(&cq->iop); } + if (dim) + rdma_dim(dim, completed); + return completed; } @@ -105,6 +156,8 @@ static void ib_cq_poll_work(struct work_struct *work) if (completed >= IB_POLL_BUDGET_WORKQUEUE || ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) queue_work(cq->comp_wq, &cq->work); + else if (cq->dim) + rdma_dim(cq->dim, completed); } static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) @@ -113,7 +166,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) } /** - * __ib_alloc_cq - allocate a completion queue + * __ib_alloc_cq_user - allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate @@ -139,25 +192,30 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, struct ib_cq *cq; int ret = -ENOMEM; - cq = dev->ops.create_cq(dev, &cq_attr, NULL); - if (IS_ERR(cq)) - return cq; + cq = rdma_zalloc_drv_obj(dev, ib_cq); + if (!cq) + return ERR_PTR(ret); cq->device = dev; - cq->uobject = NULL; - cq->event_handler = NULL; cq->cq_context = private; cq->poll_ctx = poll_ctx; atomic_set(&cq->usecnt, 0); cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL); if (!cq->wc) - goto out_destroy_cq; + goto out_free_cq; cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_set_task(&cq->res, caller); + + ret = dev->ops.create_cq(cq, &cq_attr, NULL); + if (ret) + goto out_free_wc; + rdma_restrack_kadd(&cq->res); + rdma_dim_init(cq); + switch (cq->poll_ctx) { case IB_POLL_DIRECT: cq->comp_handler = ib_cq_completion_direct; @@ -178,29 +236,29 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, break; default: ret = -EINVAL; - goto out_free_wc; + goto out_destroy_cq; } return cq; -out_free_wc: - kfree(cq->wc); - rdma_restrack_del(&cq->res); out_destroy_cq: + rdma_restrack_del(&cq->res); cq->device->ops.destroy_cq(cq, udata); +out_free_wc: + kfree(cq->wc); +out_free_cq: + kfree(cq); return ERR_PTR(ret); } EXPORT_SYMBOL(__ib_alloc_cq_user); /** - * ib_free_cq - free a completion queue + * ib_free_cq_user - free a completion queue * @cq: completion queue to free. * @udata: User data or NULL for kernel object */ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) { - int ret; - if (WARN_ON_ONCE(atomic_read(&cq->usecnt))) return; @@ -218,9 +276,12 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) WARN_ON_ONCE(1); } - kfree(cq->wc); rdma_restrack_del(&cq->res); - ret = cq->device->ops.destroy_cq(cq, udata); - WARN_ON_ONCE(ret); + cq->device->ops.destroy_cq(cq, udata); + if (cq->dim) + cancel_work_sync(&cq->dim->work); + kfree(cq->dim); + kfree(cq->wc); + kfree(cq); } EXPORT_SYMBOL(ib_free_cq_user); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3352a107b4a3..9773145dee09 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -46,6 +46,7 @@ #include <rdma/rdma_netlink.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> +#include <rdma/rdma_counter.h> #include "core_priv.h" #include "restrack.h" @@ -270,7 +271,7 @@ struct ib_port_data_rcu { struct ib_port_data pdata[]; }; -static int ib_device_check_mandatory(struct ib_device *device) +static void ib_device_check_mandatory(struct ib_device *device) { #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x } static const struct { @@ -305,8 +306,6 @@ static int ib_device_check_mandatory(struct ib_device *device) break; } } - - return 0; } /* @@ -375,7 +374,7 @@ struct ib_device *ib_device_get_by_name(const char *name, down_read(&devices_rwsem); device = __ib_device_get_by_name(name); if (device && driver_id != RDMA_DRIVER_UNKNOWN && - device->driver_id != driver_id) + device->ops.driver_id != driver_id) device = NULL; if (device) { @@ -449,6 +448,15 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) return 0; } +int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim) +{ + if (use_dim > 1) + return -EINVAL; + ibdev->use_cq_dim = use_dim; + + return 0; +} + static int alloc_name(struct ib_device *ibdev, const char *name) { struct ib_device *device; @@ -494,10 +502,12 @@ static void ib_device_release(struct device *device) if (dev->port_data) { ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); + rdma_counter_release(dev); kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, pdata[0]), rcu_head); } + xa_destroy(&dev->compat_devs); xa_destroy(&dev->client_data); kfree_rcu(dev, rcu_head); @@ -1193,10 +1203,7 @@ static int setup_device(struct ib_device *device) int ret; setup_dma_device(device); - - ret = ib_device_check_mandatory(device); - if (ret) - return ret; + ib_device_check_mandatory(device); ret = setup_port_data(device); if (ret) { @@ -1321,6 +1328,8 @@ int ib_register_device(struct ib_device *device, const char *name) ib_device_register_rdmacg(device); + rdma_counter_init(device); + /* * Ensure that ADD uevent is not fired because it * is too early amd device is not initialized yet. @@ -1479,7 +1488,7 @@ void ib_unregister_driver(enum rdma_driver_id driver_id) down_read(&devices_rwsem); xa_for_each (&devices, index, ib_dev) { - if (ib_dev->driver_id != driver_id) + if (ib_dev->ops.driver_id != driver_id) continue; get_device(&ib_dev->dev); @@ -1749,6 +1758,104 @@ void ib_unregister_client(struct ib_client *client) } EXPORT_SYMBOL(ib_unregister_client); +static int __ib_get_global_client_nl_info(const char *client_name, + struct ib_client_nl_info *res) +{ + struct ib_client *client; + unsigned long index; + int ret = -ENOENT; + + down_read(&clients_rwsem); + xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { + if (strcmp(client->name, client_name) != 0) + continue; + if (!client->get_global_nl_info) { + ret = -EOPNOTSUPP; + break; + } + ret = client->get_global_nl_info(res); + if (WARN_ON(ret == -ENOENT)) + ret = -EINVAL; + if (!ret && res->cdev) + get_device(res->cdev); + break; + } + up_read(&clients_rwsem); + return ret; +} + +static int __ib_get_client_nl_info(struct ib_device *ibdev, + const char *client_name, + struct ib_client_nl_info *res) +{ + unsigned long index; + void *client_data; + int ret = -ENOENT; + + down_read(&ibdev->client_data_rwsem); + xan_for_each_marked (&ibdev->client_data, index, client_data, + CLIENT_DATA_REGISTERED) { + struct ib_client *client = xa_load(&clients, index); + + if (!client || strcmp(client->name, client_name) != 0) + continue; + if (!client->get_nl_info) { + ret = -EOPNOTSUPP; + break; + } + ret = client->get_nl_info(ibdev, client_data, res); + if (WARN_ON(ret == -ENOENT)) + ret = -EINVAL; + + /* + * The cdev is guaranteed valid as long as we are inside the + * client_data_rwsem as remove_one can't be called. Keep it + * valid for the caller. + */ + if (!ret && res->cdev) + get_device(res->cdev); + break; + } + up_read(&ibdev->client_data_rwsem); + + return ret; +} + +/** + * ib_get_client_nl_info - Fetch the nl_info from a client + * @device - IB device + * @client_name - Name of the client + * @res - Result of the query + */ +int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name, + struct ib_client_nl_info *res) +{ + int ret; + + if (ibdev) + ret = __ib_get_client_nl_info(ibdev, client_name, res); + else + ret = __ib_get_global_client_nl_info(client_name, res); +#ifdef CONFIG_MODULES + if (ret == -ENOENT) { + request_module("rdma-client-%s", client_name); + if (ibdev) + ret = __ib_get_client_nl_info(ibdev, client_name, res); + else + ret = __ib_get_global_client_nl_info(client_name, res); + } +#endif + if (ret) { + if (ret == -ENOENT) + return -EOPNOTSUPP; + return ret; + } + + if (WARN_ON(!res->cdev)) + return -EINVAL; + return 0; +} + /** * ib_set_client_data - Set IB client context * @device:Device to set context for @@ -2039,7 +2146,7 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, (uintptr_t)ndev) { if (rcu_access_pointer(cur->netdev) == ndev && (driver_id == RDMA_DRIVER_UNKNOWN || - cur->ib_dev->driver_id == driver_id) && + cur->ib_dev->ops.driver_id == driver_id) && ib_device_try_get(cur->ib_dev)) { res = cur->ib_dev; break; @@ -2344,12 +2451,28 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) #define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name) + if (ops->driver_id != RDMA_DRIVER_UNKNOWN) { + WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN && + dev_ops->driver_id != ops->driver_id); + dev_ops->driver_id = ops->driver_id; + } + if (ops->owner) { + WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner); + dev_ops->owner = ops->owner; + } + if (ops->uverbs_abi_ver) + dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver; + + dev_ops->uverbs_no_driver_id_binding |= + ops->uverbs_no_driver_id_binding; + SET_DEVICE_OP(dev_ops, add_gid); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); SET_DEVICE_OP(dev_ops, alloc_fmr); SET_DEVICE_OP(dev_ops, alloc_hw_stats); SET_DEVICE_OP(dev_ops, alloc_mr); + SET_DEVICE_OP(dev_ops, alloc_mr_integrity); SET_DEVICE_OP(dev_ops, alloc_mw); SET_DEVICE_OP(dev_ops, alloc_pd); SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); @@ -2357,6 +2480,11 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, alloc_xrcd); SET_DEVICE_OP(dev_ops, attach_mcast); SET_DEVICE_OP(dev_ops, check_mr_status); + SET_DEVICE_OP(dev_ops, counter_alloc_stats); + SET_DEVICE_OP(dev_ops, counter_bind_qp); + SET_DEVICE_OP(dev_ops, counter_dealloc); + SET_DEVICE_OP(dev_ops, counter_unbind_qp); + SET_DEVICE_OP(dev_ops, counter_update_stats); SET_DEVICE_OP(dev_ops, create_ah); SET_DEVICE_OP(dev_ops, create_counters); SET_DEVICE_OP(dev_ops, create_cq); @@ -2409,6 +2537,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, iw_reject); SET_DEVICE_OP(dev_ops, iw_rem_ref); SET_DEVICE_OP(dev_ops, map_mr_sg); + SET_DEVICE_OP(dev_ops, map_mr_sg_pi); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); SET_DEVICE_OP(dev_ops, modify_ah); @@ -2445,6 +2574,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, unmap_fmr); SET_OBJ_SIZE(dev_ops, ib_ah); + SET_OBJ_SIZE(dev_ops, ib_cq); SET_OBJ_SIZE(dev_ops, ib_pd); SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); diff --git a/drivers/infiniband/core/mr_pool.c b/drivers/infiniband/core/mr_pool.c index d117f21ce9fd..c0e2df128b34 100644 --- a/drivers/infiniband/core/mr_pool.c +++ b/drivers/infiniband/core/mr_pool.c @@ -34,14 +34,18 @@ void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr) EXPORT_SYMBOL(ib_mr_pool_put); int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr, - enum ib_mr_type type, u32 max_num_sg) + enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg) { struct ib_mr *mr; unsigned long flags; int ret, i; for (i = 0; i < nr; i++) { - mr = ib_alloc_mr(qp->pd, type, max_num_sg); + if (type == IB_MR_TYPE_INTEGRITY) + mr = ib_alloc_mr_integrity(qp->pd, max_num_sg, + max_num_meta_sg); + else + mr = ib_alloc_mr(qp->pd, type, max_num_sg); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto out; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 69188cbbd99b..783e465e7c41 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -42,84 +42,105 @@ #include "cma_priv.h" #include "restrack.h" +/* + * Sort array elements by the netlink attribute name + */ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { - [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, - .len = IB_DEVICE_NAME_MAX - 1}, - [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, - .len = IB_FW_VERSION_NAME_MAX - 1}, - [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING, - .len = 16 }, - [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, - .len = TASK_COMM_LEN }, + [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE }, + [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, + .len = IB_DEVICE_NAME_MAX }, + [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, + [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, + [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, + .len = IFNAMSIZ }, + [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, + .len = IFNAMSIZ }, + [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { - .len = sizeof(struct __kernel_sockaddr_storage) }, - [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { - .len = sizeof(struct __kernel_sockaddr_storage) }, [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, - [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { + .len = sizeof(struct __kernel_sockaddr_storage) }, [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, - .len = IFNAMSIZ }, - [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, - [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, - .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, - [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, - [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, - [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, - [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, - .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, - [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, - [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, - .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { + .len = sizeof(struct __kernel_sockaddr_storage) }, + [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, + [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 }, [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, + [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -232,6 +253,8 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) return -EMSGSIZE; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim)) + return -EMSGSIZE; /* * Link type is determined on first port and mlx4 device @@ -532,6 +555,9 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) goto err; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) + goto err; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) goto err; if (!rdma_is_kernel_res(res) && @@ -623,6 +649,152 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, err: return -EMSGSIZE; } +static int fill_stat_counter_mode(struct sk_buff *msg, + struct rdma_counter *counter) +{ + struct rdma_counter_mode *m = &counter->mode; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) + return -EMSGSIZE; + + if (m->mode == RDMA_COUNTER_MODE_AUTO) + if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && + nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) + return -EMSGSIZE; + + return 0; +} + +static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn) +{ + struct nlattr *entry_attr; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); + if (!entry_attr) + return -EMSGSIZE; + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) + goto err; + + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); + return -EMSGSIZE; +} + +static int fill_stat_counter_qps(struct sk_buff *msg, + struct rdma_counter *counter) +{ + struct rdma_restrack_entry *res; + struct rdma_restrack_root *rt; + struct nlattr *table_attr; + struct ib_qp *qp = NULL; + unsigned long id = 0; + int ret = 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); + + rt = &counter->device->res[RDMA_RESTRACK_QP]; + xa_lock(&rt->xa); + xa_for_each(&rt->xa, id, res) { + if (!rdma_is_visible_in_pid_ns(res)) + continue; + + qp = container_of(res, struct ib_qp, res); + if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) + continue; + + if (!qp->counter || (qp->counter->id != counter->id)) + continue; + + ret = fill_stat_counter_qp_entry(msg, qp->qp_num); + if (ret) + goto err; + } + + xa_unlock(&rt->xa); + nla_nest_end(msg, table_attr); + return 0; + +err: + xa_unlock(&rt->xa); + nla_nest_cancel(msg, table_attr); + return ret; +} + +static int fill_stat_hwcounter_entry(struct sk_buff *msg, + const char *name, u64 value) +{ + struct nlattr *entry_attr; + + entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); + if (!entry_attr) + return -EMSGSIZE; + + if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, + name)) + goto err; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, + value, RDMA_NLDEV_ATTR_PAD)) + goto err; + + nla_nest_end(msg, entry_attr); + return 0; + +err: + nla_nest_cancel(msg, entry_attr); + return -EMSGSIZE; +} + +static int fill_stat_counter_hwcounters(struct sk_buff *msg, + struct rdma_counter *counter) +{ + struct rdma_hw_stats *st = counter->stats; + struct nlattr *table_attr; + int i; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + if (!table_attr) + return -EMSGSIZE; + + for (i = 0; i < st->num_counters; i++) + if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + goto err; + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, + uint32_t port) +{ + struct rdma_counter *counter = + container_of(res, struct rdma_counter, res); + + if (port && port != counter->port) + return 0; + + /* Dump it even query failed */ + rdma_counter_query_stats(counter); + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || + fill_res_name_pid(msg, &counter->res) || + fill_stat_counter_mode(msg, counter) || + fill_stat_counter_qps(msg, counter) || + fill_stat_counter_hwcounters(msg, counter)) + return -EMSGSIZE; + + return 0; +} + static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -704,6 +876,14 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto put_done; } + if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) { + u8 use_dim; + + use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]); + err = ib_device_set_dim(device, use_dim); + goto done; + } + done: ib_device_put(device); put_done: @@ -990,19 +1170,15 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, .id = RDMA_NLDEV_ATTR_RES_PDN, }, + [RDMA_RESTRACK_COUNTER] = { + .fill_res_func = fill_res_counter_entry, + .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, + .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, + .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, + .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, + }, }; -static bool is_visible_in_pid_ns(struct rdma_restrack_entry *res) -{ - /* - * 1. Kern resources should be visible in init name space only - * 2. Present only resources visible in the current namespace - */ - if (rdma_is_kernel_res(res)) - return task_active_pid_ns(current) == &init_pid_ns; - return task_active_pid_ns(current) == task_active_pid_ns(res->task); -} - static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, enum rdma_restrack_type res_type) @@ -1047,7 +1223,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - if (!is_visible_in_pid_ns(res)) { + if (!rdma_is_visible_in_pid_ns(res)) { ret = -ENOENT; goto err_get; } @@ -1159,7 +1335,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, * objects. */ xa_for_each(&rt->xa, id, res) { - if (!is_visible_in_pid_ns(res)) + if (!rdma_is_visible_in_pid_ns(res)) continue; if (idx < start || !rdma_restrack_get(res)) @@ -1237,6 +1413,7 @@ RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); +RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); static LIST_HEAD(link_ops); static DECLARE_RWSEM(link_ops_rwsem); @@ -1299,7 +1476,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], sizeof(ndev_name)); - ndev = dev_get_by_name(&init_net, ndev_name); + ndev = dev_get_by_name(sock_net(skb->sk), ndev_name); if (!ndev) return -ENODEV; @@ -1347,6 +1524,90 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE]; + struct ib_client_nl_info data = {}; + struct ib_device *ibdev = NULL; + struct sk_buff *msg; + u32 index; + int err; + + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, + extack); + if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) + return -EINVAL; + + nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], + sizeof(client_name)); + + if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + ibdev = ib_device_get_by_index(sock_net(skb->sk), index); + if (!ibdev) + return -EINVAL; + + if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { + data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(ibdev, data.port)) { + err = -EINVAL; + goto out_put; + } + } else { + data.port = -1; + } + } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { + return -EINVAL; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out_put; + } + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_GET_CHARDEV), + 0, 0); + + data.nl_msg = msg; + err = ib_get_client_nl_info(ibdev, client_name, &data); + if (err) + goto out_nlmsg; + + err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV, + huge_encode_dev(data.cdev->devt), + RDMA_NLDEV_ATTR_PAD); + if (err) + goto out_data; + err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi, + RDMA_NLDEV_ATTR_PAD); + if (err) + goto out_data; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME, + dev_name(data.cdev))) { + err = -EMSGSIZE; + goto out_data; + } + + nlmsg_end(msg, nlh); + put_device(data.cdev); + if (ibdev) + ib_device_put(ibdev); + return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +out_data: + put_device(data.cdev); +out_nlmsg: + nlmsg_free(msg); +out_put: + if (ibdev) + ib_device_put(ibdev); + return err; +} + static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -1399,11 +1660,375 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } +static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + u32 index, port, mode, mask = 0, qpn, cntn = 0; + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_device *device; + struct sk_buff *msg; + int ret; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + /* Currently only counter for QP is supported */ + if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || + !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE]) + return -EINVAL; + + if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + return -EINVAL; + + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), index); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_STAT_SET), + 0, 0); + + mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); + if (mode == RDMA_COUNTER_MODE_AUTO) { + if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) + mask = nla_get_u32( + tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); + + ret = rdma_counter_set_auto_mode(device, port, + mask ? true : false, mask); + if (ret) + goto err_msg; + } else { + qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); + if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { + cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); + ret = rdma_counter_bind_qpn(device, port, qpn, cntn); + } else { + ret = rdma_counter_bind_qpn_alloc(device, port, + qpn, &cntn); + } + if (ret) + goto err_msg; + + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { + ret = -EMSGSIZE; + goto err_fill; + } + } + + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_fill: + rdma_counter_unbind_qpn(device, port, qpn, cntn); +err_msg: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + +static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_device *device; + struct sk_buff *msg; + u32 index, port, qpn, cntn; + int ret; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || + !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || + !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] || + !tb[RDMA_NLDEV_ATTR_RES_LQPN]) + return -EINVAL; + + if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + return -EINVAL; + + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), index); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_STAT_SET), + 0, 0); + + cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); + qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); + ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); + if (ret) + goto err_unbind; + + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { + ret = -EMSGSIZE; + goto err_fill; + } + + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_fill: + rdma_counter_bind_qpn(device, port, qpn, cntn); +err_unbind: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + +static int stat_get_doit_default_counter(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr *tb[]) +{ + struct rdma_hw_stats *stats; + struct nlattr *table_attr; + struct ib_device *device; + int ret, num_cnts, i; + struct sk_buff *msg; + u32 index, port; + u64 v; + + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), index); + if (!device) + return -EINVAL; + + if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) { + ret = -EINVAL; + goto err; + } + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_STAT_GET), + 0, 0); + + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { + ret = -EMSGSIZE; + goto err_msg; + } + + stats = device->port_data ? device->port_data[port].hw_stats : NULL; + if (stats == NULL) { + ret = -EINVAL; + goto err_msg; + } + mutex_lock(&stats->lock); + + num_cnts = device->ops.get_hw_stats(device, stats, port, 0); + if (num_cnts < 0) { + ret = -EINVAL; + goto err_stats; + } + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + if (!table_attr) { + ret = -EMSGSIZE; + goto err_stats; + } + for (i = 0; i < num_cnts; i++) { + v = stats->value[i] + + rdma_counter_get_hwstat_value(device, port, i); + if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) { + ret = -EMSGSIZE; + goto err_table; + } + } + nla_nest_end(msg, table_attr); + + mutex_unlock(&stats->lock); + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_table: + nla_nest_cancel(msg, table_attr); +err_stats: + mutex_unlock(&stats->lock); +err_msg: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + +static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, struct nlattr *tb[]) + +{ + static enum rdma_nl_counter_mode mode; + static enum rdma_nl_counter_mask mask; + struct ib_device *device; + struct sk_buff *msg; + u32 index, port; + int ret; + + if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) + return nldev_res_get_counter_doit(skb, nlh, extack); + + if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] || + !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), index); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_STAT_GET), + 0, 0); + + ret = rdma_counter_get_mode(device, port, &mode, &mask); + if (ret) + goto err_msg; + + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) + goto err_msg; + + if ((mode == RDMA_COUNTER_MODE_AUTO) && + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) + goto err_msg; + + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_msg: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + +static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + int ret; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (ret) + return -EINVAL; + + if (!tb[RDMA_NLDEV_ATTR_STAT_RES]) + return stat_get_doit_default_counter(skb, nlh, extack, tb); + + switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { + case RDMA_NLDEV_ATTR_RES_QP: + ret = stat_get_doit_qp(skb, nlh, extack, tb); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int nldev_stat_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + int ret; + + ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NULL); + if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES]) + return -EINVAL; + + switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { + case RDMA_NLDEV_ATTR_RES_QP: + ret = nldev_res_get_counter_dumpit(skb, cb); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, .dump = nldev_get_dumpit, }, + [RDMA_NLDEV_CMD_GET_CHARDEV] = { + .doit = nldev_get_chardev, + }, [RDMA_NLDEV_CMD_SET] = { .doit = nldev_set_doit, .flags = RDMA_NL_ADMIN_PERM, @@ -1449,6 +2074,17 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { }, [RDMA_NLDEV_CMD_SYS_SET] = { .doit = nldev_set_sys_set_doit, + }, + [RDMA_NLDEV_CMD_STAT_SET] = { + .doit = nldev_stat_set_doit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_STAT_GET] = { + .doit = nldev_stat_get_doit, + .dump = nldev_stat_get_dumpit, + }, + [RDMA_NLDEV_CMD_STAT_DEL] = { + .doit = nldev_stat_del_doit, .flags = RDMA_NL_ADMIN_PERM, }, }; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 3b5ff2f7b5f8..bddff426ee0f 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -6,6 +6,7 @@ #include <rdma/rdma_cm.h> #include <rdma/ib_verbs.h> #include <rdma/restrack.h> +#include <rdma/rdma_counter.h> #include <linux/mutex.h> #include <linux/sched/task.h> #include <linux/pid_namespace.h> @@ -45,6 +46,7 @@ static const char *type2str(enum rdma_restrack_type type) [RDMA_RESTRACK_CM_ID] = "CM_ID", [RDMA_RESTRACK_MR] = "MR", [RDMA_RESTRACK_CTX] = "CTX", + [RDMA_RESTRACK_COUNTER] = "COUNTER", }; return names[type]; @@ -169,6 +171,8 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) return container_of(res, struct ib_mr, res)->device; case RDMA_RESTRACK_CTX: return container_of(res, struct ib_ucontext, res)->device; + case RDMA_RESTRACK_COUNTER: + return container_of(res, struct rdma_counter, res)->device; default: WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); return NULL; @@ -190,6 +194,20 @@ void rdma_restrack_set_task(struct rdma_restrack_entry *res, } EXPORT_SYMBOL(rdma_restrack_set_task); +/** + * rdma_restrack_attach_task() - attach the task onto this resource + * @res: resource entry + * @task: the task to attach, the current task will be used if it is NULL. + */ +void rdma_restrack_attach_task(struct rdma_restrack_entry *res, + struct task_struct *task) +{ + if (res->task) + put_task_struct(res->task); + get_task_struct(task); + res->task = task; +} + static void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); @@ -203,15 +221,22 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) kref_init(&res->kref); init_completion(&res->comp); - if (res->type != RDMA_RESTRACK_QP) - ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b, - &rt->next_id, GFP_KERNEL); - else { + if (res->type == RDMA_RESTRACK_QP) { /* Special case to ensure that LQPN points to right QP */ struct ib_qp *qp = container_of(res, struct ib_qp, res); ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL); res->id = ret ? 0 : qp->qp_num; + } else if (res->type == RDMA_RESTRACK_COUNTER) { + /* Special case to ensure that cntn points to right counter */ + struct rdma_counter *counter; + + counter = container_of(res, struct rdma_counter, res); + ret = xa_insert(&rt->xa, counter->id, res, GFP_KERNEL); + res->id = ret ? 0 : counter->id; + } else { + ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b, + &rt->next_id, GFP_KERNEL); } if (!ret) @@ -237,7 +262,8 @@ EXPORT_SYMBOL(rdma_restrack_kadd); */ void rdma_restrack_uadd(struct rdma_restrack_entry *res) { - if (res->type != RDMA_RESTRACK_CM_ID) + if ((res->type != RDMA_RESTRACK_CM_ID) && + (res->type != RDMA_RESTRACK_COUNTER)) res->task = NULL; if (!res->task) @@ -323,3 +349,16 @@ out: } } EXPORT_SYMBOL(rdma_restrack_del); + +bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res) +{ + /* + * 1. Kern resources should be visible in init + * namespace only + * 2. Present only resources visible in the current + * namespace + */ + if (rdma_is_kernel_res(res)) + return task_active_pid_ns(current) == &init_pid_ns; + return task_active_pid_ns(current) == task_active_pid_ns(res->task); +} diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index 09a1fbdf578e..7bd177cc0a61 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -25,4 +25,7 @@ struct rdma_restrack_root { int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); +void rdma_restrack_attach_task(struct rdma_restrack_entry *res, + struct task_struct *task); +bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res); #endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 32ca8429eaae..dce06108c8c3 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -51,10 +51,34 @@ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, return false; } -static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) +static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev, + bool pi_support) { + u32 max_pages; + + if (pi_support) + max_pages = dev->attrs.max_pi_fast_reg_page_list_len; + else + max_pages = dev->attrs.max_fast_reg_page_list_len; + /* arbitrary limit to avoid allocating gigantic resources */ - return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); + return min_t(u32, max_pages, 256); +} + +static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg) +{ + int count = 0; + + if (reg->mr->need_inval) { + reg->inv_wr.opcode = IB_WR_LOCAL_INV; + reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; + reg->inv_wr.next = ®->reg_wr.wr; + count++; + } else { + reg->inv_wr.next = NULL; + } + + return count; } /* Caller must have zero-initialized *reg. */ @@ -62,7 +86,8 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, u32 sg_cnt, u32 offset) { - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); u32 nents = min(sg_cnt, pages_per_mr); int count = 0, ret; @@ -70,14 +95,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, if (!reg->mr) return -EAGAIN; - if (reg->mr->need_inval) { - reg->inv_wr.opcode = IB_WR_LOCAL_INV; - reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; - reg->inv_wr.next = ®->reg_wr.wr; - count++; - } else { - reg->inv_wr.next = NULL; - } + count += rdma_rw_inv_key(reg); ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); if (ret < 0 || ret < nents) { @@ -102,7 +120,8 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct rdma_rw_reg_ctx *prev = NULL; - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); int i, j, ret = 0, count = 0; ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr; @@ -343,13 +362,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); struct ib_rdma_wr *rdma_wr; - struct ib_send_wr *prev_wr = NULL; int count = 0, ret; if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) { - pr_err("SG count too large\n"); + pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n", + sg_cnt, prot_sg_cnt, pages_per_mr); return -EINVAL; } @@ -358,75 +378,58 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -ENOMEM; sg_cnt = ret; - ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); - if (!ret) { - ret = -ENOMEM; - goto out_unmap_sg; + if (prot_sg_cnt) { + ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); + if (!ret) { + ret = -ENOMEM; + goto out_unmap_sg; + } + prot_sg_cnt = ret; } - prot_sg_cnt = ret; ctx->type = RDMA_RW_SIG_MR; ctx->nr_ops = 1; - ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL); - if (!ctx->sig) { + ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL); + if (!ctx->reg) { ret = -ENOMEM; goto out_unmap_prot_sg; } - ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0); - if (ret < 0) - goto out_free_ctx; - count += ret; - prev_wr = &ctx->sig->data.reg_wr.wr; - - ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, - prot_sg, prot_sg_cnt, 0); - if (ret < 0) - goto out_destroy_data_mr; - count += ret; - - if (ctx->sig->prot.inv_wr.next) - prev_wr->next = &ctx->sig->prot.inv_wr; - else - prev_wr->next = &ctx->sig->prot.reg_wr.wr; - prev_wr = &ctx->sig->prot.reg_wr.wr; - - ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs); - if (!ctx->sig->sig_mr) { + ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs); + if (!ctx->reg->mr) { ret = -EAGAIN; - goto out_destroy_prot_mr; + goto out_free_ctx; } - if (ctx->sig->sig_mr->need_inval) { - memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr)); + count += rdma_rw_inv_key(ctx->reg); - ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV; - ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey; + memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs)); - prev_wr->next = &ctx->sig->sig_inv_wr; - prev_wr = &ctx->sig->sig_inv_wr; + ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg, + prot_sg_cnt, NULL, SZ_4K); + if (unlikely(ret)) { + pr_err("failed to map PI sg (%d)\n", sg_cnt + prot_sg_cnt); + goto out_destroy_sig_mr; } - ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR; - ctx->sig->sig_wr.wr.wr_cqe = NULL; - ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge; - ctx->sig->sig_wr.wr.num_sge = 1; - ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; - ctx->sig->sig_wr.sig_attrs = sig_attrs; - ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr; - if (prot_sg_cnt) - ctx->sig->sig_wr.prot = &ctx->sig->prot.sge; - prev_wr->next = &ctx->sig->sig_wr.wr; - prev_wr = &ctx->sig->sig_wr.wr; + ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY; + ctx->reg->reg_wr.wr.wr_cqe = NULL; + ctx->reg->reg_wr.wr.num_sge = 0; + ctx->reg->reg_wr.wr.send_flags = 0; + ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE; + if (rdma_protocol_iwarp(qp->device, port_num)) + ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE; + ctx->reg->reg_wr.mr = ctx->reg->mr; + ctx->reg->reg_wr.key = ctx->reg->mr->lkey; count++; - ctx->sig->sig_sge.addr = 0; - ctx->sig->sig_sge.length = ctx->sig->data.sge.length; - if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE) - ctx->sig->sig_sge.length += ctx->sig->prot.sge.length; + ctx->reg->sge.addr = ctx->reg->mr->iova; + ctx->reg->sge.length = ctx->reg->mr->length; + if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE) + ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length; - rdma_wr = &ctx->sig->data.wr; - rdma_wr->wr.sg_list = &ctx->sig->sig_sge; + rdma_wr = &ctx->reg->wr; + rdma_wr->wr.sg_list = &ctx->reg->sge; rdma_wr->wr.num_sge = 1; rdma_wr->remote_addr = remote_addr; rdma_wr->rkey = rkey; @@ -434,21 +437,18 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; else rdma_wr->wr.opcode = IB_WR_RDMA_READ; - prev_wr->next = &rdma_wr->wr; - prev_wr = &rdma_wr->wr; + ctx->reg->reg_wr.wr.next = &rdma_wr->wr; count++; return count; -out_destroy_prot_mr: - if (prot_sg_cnt) - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); -out_destroy_data_mr: - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); +out_destroy_sig_mr: + ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); out_free_ctx: - kfree(ctx->sig); + kfree(ctx->reg); out_unmap_prot_sg: - ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); + if (prot_sg_cnt) + ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); out_unmap_sg: ib_dma_unmap_sg(dev, sg, sg_cnt, dir); return ret; @@ -491,22 +491,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, switch (ctx->type) { case RDMA_RW_SIG_MR: - rdma_rw_update_lkey(&ctx->sig->data, true); - if (ctx->sig->prot.mr) - rdma_rw_update_lkey(&ctx->sig->prot, true); - - ctx->sig->sig_mr->need_inval = true; - ib_update_fast_reg_key(ctx->sig->sig_mr, - ib_inc_rkey(ctx->sig->sig_mr->lkey)); - ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey; - - if (ctx->sig->data.inv_wr.next) - first_wr = &ctx->sig->data.inv_wr; - else - first_wr = &ctx->sig->data.reg_wr.wr; - last_wr = &ctx->sig->data.wr.wr; - break; case RDMA_RW_MR: + /* fallthrough */ for (i = 0; i < ctx->nr_ops; i++) { rdma_rw_update_lkey(&ctx->reg[i], ctx->reg[i].wr.wr.opcode != @@ -605,7 +591,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy); /** * rdma_rw_ctx_destroy_signature - release all resources allocated by - * rdma_rw_ctx_init_signature + * rdma_rw_ctx_signature_init * @ctx: context to release * @qp: queue pair to operate on * @port_num: port num to which the connection is bound @@ -623,16 +609,12 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR)) return; - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); - ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); + kfree(ctx->reg); - if (ctx->sig->prot.mr) { - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + if (prot_sg_cnt) ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); - } - - ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr); - kfree(ctx->sig); } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); @@ -653,7 +635,7 @@ unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, unsigned int mr_pages; if (rdma_rw_can_use_mr(device, port_num)) - mr_pages = rdma_rw_fr_page_list_len(device); + mr_pages = rdma_rw_fr_page_list_len(device, false); else mr_pages = device->attrs.max_sge_rd; return DIV_ROUND_UP(maxpages, mr_pages); @@ -679,9 +661,8 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) * we'll need two additional MRs for the registrations and the * invalidation. */ - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) - factor += 6; /* (inv + reg) * (data + prot + sig) */ - else if (rdma_rw_can_use_mr(dev, attr->port_num)) + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN || + rdma_rw_can_use_mr(dev, attr->port_num)) factor += 2; /* inv + reg */ attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs; @@ -697,20 +678,22 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) { struct ib_device *dev = qp->pd->device; - u32 nr_mrs = 0, nr_sig_mrs = 0; + u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0; int ret = 0; - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) { + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) { nr_sig_mrs = attr->cap.max_rdma_ctxs; - nr_mrs = attr->cap.max_rdma_ctxs * 2; + nr_mrs = attr->cap.max_rdma_ctxs; + max_num_sg = rdma_rw_fr_page_list_len(dev, true); } else if (rdma_rw_can_use_mr(dev, attr->port_num)) { nr_mrs = attr->cap.max_rdma_ctxs; + max_num_sg = rdma_rw_fr_page_list_len(dev, false); } if (nr_mrs) { ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs, IB_MR_TYPE_MEM_REG, - rdma_rw_fr_page_list_len(dev)); + max_num_sg, 0); if (ret) { pr_err("%s: failed to allocated %d MRs\n", __func__, nr_mrs); @@ -720,10 +703,10 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) if (nr_sig_mrs) { ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs, - IB_MR_TYPE_SIGNATURE, 2); + IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg); if (ret) { pr_err("%s: failed to allocated %d SIG MRs\n", - __func__, nr_mrs); + __func__, nr_sig_mrs); goto out_free_rdma_mrs; } } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c78d0c9646ae..b477295a96c2 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -43,6 +43,7 @@ #include <rdma/ib_mad.h> #include <rdma/ib_pma.h> #include <rdma/ib_cache.h> +#include <rdma/rdma_counter.h> struct ib_port; @@ -800,9 +801,12 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, return 0; } -static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf) +static ssize_t print_hw_stat(struct ib_device *dev, int port_num, + struct rdma_hw_stats *stats, int index, char *buf) { - return sprintf(buf, "%llu\n", stats->value[index]); + u64 v = rdma_counter_get_hwstat_value(dev, port_num, index); + + return sprintf(buf, "%llu\n", stats->value[index] + v); } static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, @@ -828,7 +832,7 @@ static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index); if (ret) goto unlock; - ret = print_hw_stat(stats, hsa->index, buf); + ret = print_hw_stat(dev, hsa->port_num, stats, hsa->index, buf); unlock: mutex_unlock(&stats->lock); @@ -999,6 +1003,8 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, goto err; port->hw_stats_ag = hsag; port->hw_stats = stats; + if (device->port_data) + device->port_data[port_num].hw_stats = stats; } else { struct kobject *kobj = &device->dev.kobj; ret = sysfs_create_group(kobj, hsag); @@ -1289,6 +1295,8 @@ const struct attribute_group ib_dev_attr_group = { void ib_free_port_attrs(struct ib_core_device *coredev) { + struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); + bool is_full_dev = &device->coredev == coredev; struct kobject *p, *t; list_for_each_entry_safe(p, t, &coredev->port_list, entry) { @@ -1298,6 +1306,8 @@ void ib_free_port_attrs(struct ib_core_device *coredev) if (port->hw_stats_ag) free_hsag(&port->kobj, port->hw_stats_ag); kfree(port->hw_stats); + if (device->port_data && is_full_dev) + device->port_data[port->port_num].hw_stats = NULL; if (port->pma_table) sysfs_remove_group(p, port->pma_table); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c deleted file mode 100644 index 8e7da2d41fd8..000000000000 --- a/drivers/infiniband/core/ucm.c +++ /dev/null @@ -1,1350 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/completion.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/device.h> -#include <linux/err.h> -#include <linux/poll.h> -#include <linux/sched.h> -#include <linux/file.h> -#include <linux/mount.h> -#include <linux/cdev.h> -#include <linux/xarray.h> -#include <linux/mutex.h> -#include <linux/slab.h> - -#include <linux/nospec.h> - -#include <linux/uaccess.h> - -#include <rdma/ib.h> -#include <rdma/ib_cm.h> -#include <rdma/ib_user_cm.h> -#include <rdma/ib_marshall.h> - -#include "core_priv.h" - -MODULE_AUTHOR("Libor Michalek"); -MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); -MODULE_LICENSE("Dual BSD/GPL"); - -struct ib_ucm_device { - int devnum; - struct cdev cdev; - struct device dev; - struct ib_device *ib_dev; -}; - -struct ib_ucm_file { - struct mutex file_mutex; - struct file *filp; - struct ib_ucm_device *device; - - struct list_head ctxs; - struct list_head events; - wait_queue_head_t poll_wait; -}; - -struct ib_ucm_context { - int id; - struct completion comp; - atomic_t ref; - int events_reported; - - struct ib_ucm_file *file; - struct ib_cm_id *cm_id; - __u64 uid; - - struct list_head events; /* list of pending events. */ - struct list_head file_list; /* member in file ctx list */ -}; - -struct ib_ucm_event { - struct ib_ucm_context *ctx; - struct list_head file_list; /* member in file event list */ - struct list_head ctx_list; /* member in ctx event list */ - - struct ib_cm_id *cm_id; - struct ib_ucm_event_resp resp; - void *data; - void *info; - int data_len; - int info_len; -}; - -enum { - IB_UCM_MAJOR = 231, - IB_UCM_BASE_MINOR = 224, - IB_UCM_MAX_DEVICES = RDMA_MAX_PORTS, - IB_UCM_NUM_FIXED_MINOR = 32, - IB_UCM_NUM_DYNAMIC_MINOR = IB_UCM_MAX_DEVICES - IB_UCM_NUM_FIXED_MINOR, -}; - -#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) -static dev_t dynamic_ucm_dev; - -static void ib_ucm_add_one(struct ib_device *device); -static void ib_ucm_remove_one(struct ib_device *device, void *client_data); - -static struct ib_client ucm_client = { - .name = "ucm", - .add = ib_ucm_add_one, - .remove = ib_ucm_remove_one -}; - -static DEFINE_XARRAY_ALLOC(ctx_id_table); -static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); - -static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) -{ - struct ib_ucm_context *ctx; - - xa_lock(&ctx_id_table); - ctx = xa_load(&ctx_id_table, id); - if (!ctx) - ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) - ctx = ERR_PTR(-EINVAL); - else - atomic_inc(&ctx->ref); - xa_unlock(&ctx_id_table); - - return ctx; -} - -static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) -{ - if (atomic_dec_and_test(&ctx->ref)) - complete(&ctx->comp); -} - -static inline int ib_ucm_new_cm_id(int event) -{ - return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; -} - -static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) -{ - struct ib_ucm_event *uevent; - - mutex_lock(&ctx->file->file_mutex); - list_del(&ctx->file_list); - while (!list_empty(&ctx->events)) { - - uevent = list_entry(ctx->events.next, - struct ib_ucm_event, ctx_list); - list_del(&uevent->file_list); - list_del(&uevent->ctx_list); - mutex_unlock(&ctx->file->file_mutex); - - /* clear incoming connections. */ - if (ib_ucm_new_cm_id(uevent->resp.event)) - ib_destroy_cm_id(uevent->cm_id); - - kfree(uevent); - mutex_lock(&ctx->file->file_mutex); - } - mutex_unlock(&ctx->file->file_mutex); -} - -static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) -{ - struct ib_ucm_context *ctx; - - ctx = kzalloc(sizeof *ctx, GFP_KERNEL); - if (!ctx) - return NULL; - - atomic_set(&ctx->ref, 1); - init_completion(&ctx->comp); - ctx->file = file; - INIT_LIST_HEAD(&ctx->events); - - if (xa_alloc(&ctx_id_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) - goto error; - - list_add_tail(&ctx->file_list, &file->ctxs); - return ctx; - -error: - kfree(ctx); - return NULL; -} - -static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, - const struct ib_cm_req_event_param *kreq) -{ - ureq->remote_ca_guid = kreq->remote_ca_guid; - ureq->remote_qkey = kreq->remote_qkey; - ureq->remote_qpn = kreq->remote_qpn; - ureq->qp_type = kreq->qp_type; - ureq->starting_psn = kreq->starting_psn; - ureq->responder_resources = kreq->responder_resources; - ureq->initiator_depth = kreq->initiator_depth; - ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; - ureq->flow_control = kreq->flow_control; - ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; - ureq->retry_count = kreq->retry_count; - ureq->rnr_retry_count = kreq->rnr_retry_count; - ureq->srq = kreq->srq; - ureq->port = kreq->port; - - ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); - if (kreq->alternate_path) - ib_copy_path_rec_to_user(&ureq->alternate_path, - kreq->alternate_path); -} - -static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, - const struct ib_cm_rep_event_param *krep) -{ - urep->remote_ca_guid = krep->remote_ca_guid; - urep->remote_qkey = krep->remote_qkey; - urep->remote_qpn = krep->remote_qpn; - urep->starting_psn = krep->starting_psn; - urep->responder_resources = krep->responder_resources; - urep->initiator_depth = krep->initiator_depth; - urep->target_ack_delay = krep->target_ack_delay; - urep->failover_accepted = krep->failover_accepted; - urep->flow_control = krep->flow_control; - urep->rnr_retry_count = krep->rnr_retry_count; - urep->srq = krep->srq; -} - -static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, - const struct ib_cm_sidr_rep_event_param *krep) -{ - urep->status = krep->status; - urep->qkey = krep->qkey; - urep->qpn = krep->qpn; -}; - -static int ib_ucm_event_process(const struct ib_cm_event *evt, - struct ib_ucm_event *uvt) -{ - void *info = NULL; - - switch (evt->event) { - case IB_CM_REQ_RECEIVED: - ib_ucm_event_req_get(&uvt->resp.u.req_resp, - &evt->param.req_rcvd); - uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; - uvt->resp.present = IB_UCM_PRES_PRIMARY; - uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? - IB_UCM_PRES_ALTERNATE : 0); - break; - case IB_CM_REP_RECEIVED: - ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, - &evt->param.rep_rcvd); - uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; - break; - case IB_CM_RTU_RECEIVED: - uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_DREQ_RECEIVED: - uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_DREP_RECEIVED: - uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; - uvt->resp.u.send_status = evt->param.send_status; - break; - case IB_CM_MRA_RECEIVED: - uvt->resp.u.mra_resp.timeout = - evt->param.mra_rcvd.service_timeout; - uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; - break; - case IB_CM_REJ_RECEIVED: - uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; - uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.rej_rcvd.ari_length; - info = evt->param.rej_rcvd.ari; - break; - case IB_CM_LAP_RECEIVED: - ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, - evt->param.lap_rcvd.alternate_path); - uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; - uvt->resp.present = IB_UCM_PRES_ALTERNATE; - break; - case IB_CM_APR_RECEIVED: - uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; - uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.apr_rcvd.info_len; - info = evt->param.apr_rcvd.apr_info; - break; - case IB_CM_SIDR_REQ_RECEIVED: - uvt->resp.u.sidr_req_resp.pkey = - evt->param.sidr_req_rcvd.pkey; - uvt->resp.u.sidr_req_resp.port = - evt->param.sidr_req_rcvd.port; - uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; - break; - case IB_CM_SIDR_REP_RECEIVED: - ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, - &evt->param.sidr_rep_rcvd); - uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; - uvt->info_len = evt->param.sidr_rep_rcvd.info_len; - info = evt->param.sidr_rep_rcvd.info; - break; - default: - uvt->resp.u.send_status = evt->param.send_status; - break; - } - - if (uvt->data_len) { - uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); - if (!uvt->data) - goto err1; - - uvt->resp.present |= IB_UCM_PRES_DATA; - } - - if (uvt->info_len) { - uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); - if (!uvt->info) - goto err2; - - uvt->resp.present |= IB_UCM_PRES_INFO; - } - return 0; - -err2: - kfree(uvt->data); -err1: - return -ENOMEM; -} - -static int ib_ucm_event_handler(struct ib_cm_id *cm_id, - const struct ib_cm_event *event) -{ - struct ib_ucm_event *uevent; - struct ib_ucm_context *ctx; - int result = 0; - - ctx = cm_id->context; - - uevent = kzalloc(sizeof *uevent, GFP_KERNEL); - if (!uevent) - goto err1; - - uevent->ctx = ctx; - uevent->cm_id = cm_id; - uevent->resp.uid = ctx->uid; - uevent->resp.id = ctx->id; - uevent->resp.event = event->event; - - result = ib_ucm_event_process(event, uevent); - if (result) - goto err2; - - mutex_lock(&ctx->file->file_mutex); - list_add_tail(&uevent->file_list, &ctx->file->events); - list_add_tail(&uevent->ctx_list, &ctx->events); - wake_up_interruptible(&ctx->file->poll_wait); - mutex_unlock(&ctx->file->file_mutex); - return 0; - -err2: - kfree(uevent); -err1: - /* Destroy new cm_id's */ - return ib_ucm_new_cm_id(event->event); -} - -static ssize_t ib_ucm_event(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_event_get cmd; - struct ib_ucm_event *uevent; - int result = 0; - - if (out_len < sizeof(struct ib_ucm_event_resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&file->file_mutex); - while (list_empty(&file->events)) { - mutex_unlock(&file->file_mutex); - - if (file->filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->events))) - return -ERESTARTSYS; - - mutex_lock(&file->file_mutex); - } - - uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); - - if (ib_ucm_new_cm_id(uevent->resp.event)) { - ctx = ib_ucm_ctx_alloc(file); - if (!ctx) { - result = -ENOMEM; - goto done; - } - - ctx->cm_id = uevent->cm_id; - ctx->cm_id->context = ctx; - uevent->resp.id = ctx->id; - } - - if (copy_to_user(u64_to_user_ptr(cmd.response), - &uevent->resp, sizeof(uevent->resp))) { - result = -EFAULT; - goto done; - } - - if (uevent->data) { - if (cmd.data_len < uevent->data_len) { - result = -ENOMEM; - goto done; - } - if (copy_to_user(u64_to_user_ptr(cmd.data), - uevent->data, uevent->data_len)) { - result = -EFAULT; - goto done; - } - } - - if (uevent->info) { - if (cmd.info_len < uevent->info_len) { - result = -ENOMEM; - goto done; - } - if (copy_to_user(u64_to_user_ptr(cmd.info), - uevent->info, uevent->info_len)) { - result = -EFAULT; - goto done; - } - } - - list_del(&uevent->file_list); - list_del(&uevent->ctx_list); - uevent->ctx->events_reported++; - - kfree(uevent->data); - kfree(uevent->info); - kfree(uevent); -done: - mutex_unlock(&file->file_mutex); - return result; -} - -static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_create_id cmd; - struct ib_ucm_create_id_resp resp; - struct ib_ucm_context *ctx; - int result; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - mutex_lock(&file->file_mutex); - ctx = ib_ucm_ctx_alloc(file); - mutex_unlock(&file->file_mutex); - if (!ctx) - return -ENOMEM; - - ctx->uid = cmd.uid; - ctx->cm_id = ib_create_cm_id(file->device->ib_dev, - ib_ucm_event_handler, ctx); - if (IS_ERR(ctx->cm_id)) { - result = PTR_ERR(ctx->cm_id); - goto err1; - } - - resp.id = ctx->id; - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) { - result = -EFAULT; - goto err2; - } - return 0; - -err2: - ib_destroy_cm_id(ctx->cm_id); -err1: - xa_erase(&ctx_id_table, ctx->id); - kfree(ctx); - return result; -} - -static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_destroy_id cmd; - struct ib_ucm_destroy_id_resp resp; - struct ib_ucm_context *ctx; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - xa_lock(&ctx_id_table); - ctx = xa_load(&ctx_id_table, cmd.id); - if (!ctx) - ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file) - ctx = ERR_PTR(-EINVAL); - else - __xa_erase(&ctx_id_table, ctx->id); - xa_unlock(&ctx_id_table); - - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ib_ucm_ctx_put(ctx); - wait_for_completion(&ctx->comp); - - /* No new events will be generated after destroying the cm_id. */ - ib_destroy_cm_id(ctx->cm_id); - /* Cleanup events not yet reported to the user. */ - ib_ucm_cleanup_events(ctx); - - resp.events_reported = ctx->events_reported; - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) - result = -EFAULT; - - kfree(ctx); - return result; -} - -static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_attr_id_resp resp; - struct ib_ucm_attr_id cmd; - struct ib_ucm_context *ctx; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - resp.service_id = ctx->cm_id->service_id; - resp.service_mask = ctx->cm_id->service_mask; - resp.local_id = ctx->cm_id->local_id; - resp.remote_id = ctx->cm_id->remote_id; - - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) - result = -EFAULT; - - ib_ucm_ctx_put(ctx); - return result; -} - -static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_uverbs_qp_attr resp; - struct ib_ucm_init_qp_attr cmd; - struct ib_ucm_context *ctx; - struct ib_qp_attr qp_attr; - int result = 0; - - if (out_len < sizeof(resp)) - return -ENOSPC; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - resp.qp_attr_mask = 0; - memset(&qp_attr, 0, sizeof qp_attr); - qp_attr.qp_state = cmd.qp_state; - result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); - if (result) - goto out; - - ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); - - if (copy_to_user(u64_to_user_ptr(cmd.response), - &resp, sizeof(resp))) - result = -EFAULT; - -out: - ib_ucm_ctx_put(ctx); - return result; -} - -static int ucm_validate_listen(__be64 service_id, __be64 service_mask) -{ - service_id &= service_mask; - - if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || - ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) - return -EINVAL; - - return 0; -} - -static ssize_t ib_ucm_listen(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_listen cmd; - struct ib_ucm_context *ctx; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - result = ucm_validate_listen(cmd.service_id, cmd.service_mask); - if (result) - goto out; - - result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); -out: - ib_ucm_ctx_put(ctx); - return result; -} - -static ssize_t ib_ucm_notify(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_notify cmd; - struct ib_ucm_context *ctx; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); - ib_ucm_ctx_put(ctx); - return result; -} - -static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) -{ - void *data; - - *dest = NULL; - - if (!len) - return 0; - - data = memdup_user(u64_to_user_ptr(src), len); - if (IS_ERR(data)) - return PTR_ERR(data); - - *dest = data; - return 0; -} - -static int ib_ucm_path_get(struct sa_path_rec **path, u64 src) -{ - struct ib_user_path_rec upath; - struct sa_path_rec *sa_path; - - *path = NULL; - - if (!src) - return 0; - - sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); - if (!sa_path) - return -ENOMEM; - - if (copy_from_user(&upath, u64_to_user_ptr(src), - sizeof(upath))) { - - kfree(sa_path); - return -EFAULT; - } - - ib_copy_path_rec_from_user(sa_path, &upath); - *path = sa_path; - return 0; -} - -static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_req_param param; - struct ib_ucm_context *ctx; - struct ib_ucm_req cmd; - int result; - - param.private_data = NULL; - param.primary_path = NULL; - param.alternate_path = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); - if (result) - goto done; - - param.private_data_len = cmd.len; - param.service_id = cmd.sid; - param.qp_num = cmd.qpn; - param.qp_type = cmd.qp_type; - param.starting_psn = cmd.psn; - param.peer_to_peer = cmd.peer_to_peer; - param.responder_resources = cmd.responder_resources; - param.initiator_depth = cmd.initiator_depth; - param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; - param.flow_control = cmd.flow_control; - param.local_cm_response_timeout = cmd.local_cm_response_timeout; - param.retry_count = cmd.retry_count; - param.rnr_retry_count = cmd.rnr_retry_count; - param.max_cm_retries = cmd.max_cm_retries; - param.srq = cmd.srq; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_req(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.primary_path); - kfree(param.alternate_path); - return result; -} - -static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_rep_param param; - struct ib_ucm_context *ctx; - struct ib_ucm_rep cmd; - int result; - - param.private_data = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - return result; - - param.qp_num = cmd.qpn; - param.starting_psn = cmd.psn; - param.private_data_len = cmd.len; - param.responder_resources = cmd.responder_resources; - param.initiator_depth = cmd.initiator_depth; - param.failover_accepted = cmd.failover_accepted; - param.flow_control = cmd.flow_control; - param.rnr_retry_count = cmd.rnr_retry_count; - param.srq = cmd.srq; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - ctx->uid = cmd.uid; - result = ib_send_cm_rep(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(param.private_data); - return result; -} - -static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, - const char __user *inbuf, int in_len, - int (*func)(struct ib_cm_id *cm_id, - const void *private_data, - u8 private_data_len)) -{ - struct ib_ucm_private_data cmd; - struct ib_ucm_context *ctx; - const void *private_data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); - if (result) - return result; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = func(ctx->cm_id, private_data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(private_data); - return result; -} - -static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); -} - -static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); -} - -static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); -} - -static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, - const char __user *inbuf, int in_len, - int (*func)(struct ib_cm_id *cm_id, - int status, - const void *info, - u8 info_len, - const void *data, - u8 data_len)) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_info cmd; - const void *data = NULL; - const void *info = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); - if (result) - goto done; - - result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); - if (result) - goto done; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = func(ctx->cm_id, cmd.status, info, cmd.info_len, - data, cmd.data_len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(data); - kfree(info); - return result; -} - -static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); -} - -static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); -} - -static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct ib_ucm_mra cmd; - const void *data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); - if (result) - return result; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - - kfree(data); - return result; -} - -static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_ucm_context *ctx; - struct sa_path_rec *path = NULL; - struct ib_ucm_lap cmd; - const void *data = NULL; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(&path, cmd.path); - if (result) - goto done; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(data); - kfree(path); - return result; -} - -static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_sidr_req_param param = {}; - struct ib_ucm_context *ctx; - struct ib_ucm_sidr_req cmd; - int result; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); - if (result) - goto done; - - result = ib_ucm_path_get(¶m.path, cmd.path); - if (result) - goto done; - - param.private_data_len = cmd.len; - param.service_id = cmd.sid; - param.timeout_ms = cmd.timeout; - param.max_cm_retries = cmd.max_cm_retries; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.path); - return result; -} - -static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) -{ - struct ib_cm_sidr_rep_param param; - struct ib_ucm_sidr_rep cmd; - struct ib_ucm_context *ctx; - int result; - - param.info = NULL; - - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; - - result = ib_ucm_alloc_data(¶m.private_data, - cmd.data, cmd.data_len); - if (result) - goto done; - - result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); - if (result) - goto done; - - param.qp_num = cmd.qpn; - param.qkey = cmd.qkey; - param.status = cmd.status; - param.info_length = cmd.info_len; - param.private_data_len = cmd.data_len; - - ctx = ib_ucm_ctx_get(file, cmd.id); - if (!IS_ERR(ctx)) { - result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); - ib_ucm_ctx_put(ctx); - } else - result = PTR_ERR(ctx); - -done: - kfree(param.private_data); - kfree(param.info); - return result; -} - -static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, - const char __user *inbuf, - int in_len, int out_len) = { - [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, - [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, - [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, - [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, - [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, - [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, - [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, - [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, - [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, - [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, - [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, - [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, - [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, - [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, - [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, - [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, - [IB_USER_CM_CMD_EVENT] = ib_ucm_event, - [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, -}; - -static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, - size_t len, loff_t *pos) -{ - struct ib_ucm_file *file = filp->private_data; - struct ib_ucm_cmd_hdr hdr; - ssize_t result; - - if (!ib_safe_file_access(filp)) { - pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", - task_tgid_vnr(current), current->comm); - return -EACCES; - } - - if (len < sizeof(hdr)) - return -EINVAL; - - if (copy_from_user(&hdr, buf, sizeof(hdr))) - return -EFAULT; - - if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) - return -EINVAL; - hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table)); - - if (hdr.in + sizeof(hdr) > len) - return -EINVAL; - - result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), - hdr.in, hdr.out); - if (!result) - result = len; - - return result; -} - -static __poll_t ib_ucm_poll(struct file *filp, - struct poll_table_struct *wait) -{ - struct ib_ucm_file *file = filp->private_data; - __poll_t mask = 0; - - poll_wait(filp, &file->poll_wait, wait); - - if (!list_empty(&file->events)) - mask = EPOLLIN | EPOLLRDNORM; - - return mask; -} - -/* - * ib_ucm_open() does not need the BKL: - * - * - no global state is referred to; - * - there is no ioctl method to race against; - * - no further module initialization is required for open to work - * after the device is registered. - */ -static int ib_ucm_open(struct inode *inode, struct file *filp) -{ - struct ib_ucm_file *file; - - file = kmalloc(sizeof(*file), GFP_KERNEL); - if (!file) - return -ENOMEM; - - INIT_LIST_HEAD(&file->events); - INIT_LIST_HEAD(&file->ctxs); - init_waitqueue_head(&file->poll_wait); - - mutex_init(&file->file_mutex); - - filp->private_data = file; - file->filp = filp; - file->device = container_of(inode->i_cdev, struct ib_ucm_device, cdev); - - return stream_open(inode, filp); -} - -static int ib_ucm_close(struct inode *inode, struct file *filp) -{ - struct ib_ucm_file *file = filp->private_data; - struct ib_ucm_context *ctx; - - mutex_lock(&file->file_mutex); - while (!list_empty(&file->ctxs)) { - ctx = list_entry(file->ctxs.next, - struct ib_ucm_context, file_list); - mutex_unlock(&file->file_mutex); - - xa_erase(&ctx_id_table, ctx->id); - ib_destroy_cm_id(ctx->cm_id); - ib_ucm_cleanup_events(ctx); - kfree(ctx); - - mutex_lock(&file->file_mutex); - } - mutex_unlock(&file->file_mutex); - kfree(file); - return 0; -} - -static void ib_ucm_release_dev(struct device *dev) -{ - struct ib_ucm_device *ucm_dev; - - ucm_dev = container_of(dev, struct ib_ucm_device, dev); - kfree(ucm_dev); -} - -static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev) -{ - clear_bit(ucm_dev->devnum, dev_map); -} - -static const struct file_operations ucm_fops = { - .owner = THIS_MODULE, - .open = ib_ucm_open, - .release = ib_ucm_close, - .write = ib_ucm_write, - .poll = ib_ucm_poll, - .llseek = no_llseek, -}; - -static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct ib_ucm_device *ucm_dev; - - ucm_dev = container_of(dev, struct ib_ucm_device, dev); - return sprintf(buf, "%s\n", ucm_dev->ib_dev->name); -} -static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); - -static void ib_ucm_add_one(struct ib_device *device) -{ - int devnum; - dev_t base; - struct ib_ucm_device *ucm_dev; - - if (!device->ops.alloc_ucontext || !rdma_cap_ib_cm(device, 1)) - return; - - ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); - if (!ucm_dev) - return; - - device_initialize(&ucm_dev->dev); - ucm_dev->ib_dev = device; - ucm_dev->dev.release = ib_ucm_release_dev; - - devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); - if (devnum >= IB_UCM_MAX_DEVICES) - goto err; - ucm_dev->devnum = devnum; - set_bit(devnum, dev_map); - if (devnum >= IB_UCM_NUM_FIXED_MINOR) - base = dynamic_ucm_dev + devnum - IB_UCM_NUM_FIXED_MINOR; - else - base = IB_UCM_BASE_DEV + devnum; - - cdev_init(&ucm_dev->cdev, &ucm_fops); - ucm_dev->cdev.owner = THIS_MODULE; - kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); - - ucm_dev->dev.class = &cm_class; - ucm_dev->dev.parent = device->dev.parent; - ucm_dev->dev.devt = base; - - dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); - if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev)) - goto err_devnum; - - if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) - goto err_dev; - - ib_set_client_data(device, &ucm_client, ucm_dev); - return; - -err_dev: - cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev); -err_devnum: - ib_ucm_free_dev(ucm_dev); -err: - put_device(&ucm_dev->dev); - return; -} - -static void ib_ucm_remove_one(struct ib_device *device, void *client_data) -{ - struct ib_ucm_device *ucm_dev = client_data; - - if (!ucm_dev) - return; - - cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev); - ib_ucm_free_dev(ucm_dev); - put_device(&ucm_dev->dev); -} - -static CLASS_ATTR_STRING(abi_version, S_IRUGO, - __stringify(IB_USER_CM_ABI_VERSION)); - -static int __init ib_ucm_init(void) -{ - int ret; - - ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR, - "infiniband_cm"); - if (ret) { - pr_err("ucm: couldn't register device number\n"); - goto error1; - } - - ret = alloc_chrdev_region(&dynamic_ucm_dev, 0, IB_UCM_NUM_DYNAMIC_MINOR, - "infiniband_cm"); - if (ret) { - pr_err("ucm: couldn't register dynamic device number\n"); - goto err_alloc; - } - - ret = class_create_file(&cm_class, &class_attr_abi_version.attr); - if (ret) { - pr_err("ucm: couldn't create abi_version attribute\n"); - goto error2; - } - - ret = ib_register_client(&ucm_client); - if (ret) { - pr_err("ucm: couldn't register client\n"); - goto error3; - } - return 0; - -error3: - class_remove_file(&cm_class, &class_attr_abi_version.attr); -error2: - unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR); -err_alloc: - unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR); -error1: - return ret; -} - -static void __exit ib_ucm_cleanup(void) -{ - ib_unregister_client(&ucm_client); - class_remove_file(&cm_class, &class_attr_abi_version.attr); - unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR); - unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR); - WARN_ON(!xa_empty(&ctx_id_table)); -} - -module_init(ib_ucm_init); -module_exit(ib_ucm_cleanup); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 140a338a135f..0274e9b704be 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -52,6 +52,8 @@ #include <rdma/rdma_cm_ib.h> #include <rdma/ib_addr.h> #include <rdma/ib.h> +#include <rdma/rdma_netlink.h> +#include "core_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); @@ -81,7 +83,7 @@ struct ucma_file { }; struct ucma_context { - int id; + u32 id; struct completion comp; atomic_t ref; int events_reported; @@ -94,7 +96,7 @@ struct ucma_context { struct list_head list; struct list_head mc_list; /* mark that device is in process of destroying the internal HW - * resources, protected by the global mut + * resources, protected by the ctx_table lock */ int closing; /* sync between removal event and id destroy, protected by file mut */ @@ -104,7 +106,7 @@ struct ucma_context { struct ucma_multicast { struct ucma_context *ctx; - int id; + u32 id; int events_reported; u64 uid; @@ -122,9 +124,8 @@ struct ucma_event { struct work_struct close_work; }; -static DEFINE_MUTEX(mut); -static DEFINE_IDR(ctx_idr); -static DEFINE_IDR(multicast_idr); +static DEFINE_XARRAY_ALLOC(ctx_table); +static DEFINE_XARRAY_ALLOC(multicast_table); static const struct file_operations ucma_fops; @@ -133,7 +134,7 @@ static inline struct ucma_context *_ucma_find_context(int id, { struct ucma_context *ctx; - ctx = idr_find(&ctx_idr, id); + ctx = xa_load(&ctx_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file || !ctx->cm_id) @@ -145,7 +146,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) { struct ucma_context *ctx; - mutex_lock(&mut); + xa_lock(&ctx_table); ctx = _ucma_find_context(id, file); if (!IS_ERR(ctx)) { if (ctx->closing) @@ -153,7 +154,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) else atomic_inc(&ctx->ref); } - mutex_unlock(&mut); + xa_unlock(&ctx_table); return ctx; } @@ -216,10 +217,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; - mutex_lock(&mut); - ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&mut); - if (ctx->id < 0) + if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) goto error; list_add_tail(&ctx->list, &file->ctx_list); @@ -238,13 +236,10 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) if (!mc) return NULL; - mutex_lock(&mut); - mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL); - mutex_unlock(&mut); - if (mc->id < 0) + mc->ctx = ctx; + if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) goto error; - mc->ctx = ctx; list_add_tail(&mc->list, &ctx->mc_list); return mc; @@ -319,9 +314,9 @@ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) * handled separately below. */ if (ctx->cm_id == cm_id) { - mutex_lock(&mut); + xa_lock(&ctx_table); ctx->closing = 1; - mutex_unlock(&mut); + xa_unlock(&ctx_table); queue_work(ctx->file->close_wq, &ctx->close_work); return; } @@ -523,9 +518,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, err2: rdma_destroy_id(cm_id); err1: - mutex_lock(&mut); - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); + xa_erase(&ctx_table, ctx->id); mutex_lock(&file->mut); list_del(&ctx->list); mutex_unlock(&file->mut); @@ -537,13 +530,13 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc, *tmp; - mutex_lock(&mut); + mutex_lock(&ctx->file->mut); list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { list_del(&mc->list); - idr_remove(&multicast_idr, mc->id); + xa_erase(&multicast_table, mc->id); kfree(mc); } - mutex_unlock(&mut); + mutex_unlock(&ctx->file->mut); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) @@ -614,11 +607,11 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - mutex_lock(&mut); + xa_lock(&ctx_table); ctx = _ucma_find_context(cmd.id, file); if (!IS_ERR(ctx)) - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); + __xa_erase(&ctx_table, ctx->id); + xa_unlock(&ctx_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -630,14 +623,14 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, flush_workqueue(ctx->file->close_wq); /* At this point it's guaranteed that there is no inflight * closing task */ - mutex_lock(&mut); + xa_lock(&ctx_table); if (!ctx->closing) { - mutex_unlock(&mut); + xa_unlock(&ctx_table); ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); rdma_destroy_id(ctx->cm_id); } else { - mutex_unlock(&mut); + xa_unlock(&ctx_table); } resp.events_reported = ucma_free_ctx(ctx); @@ -951,8 +944,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx, } } - if (copy_to_user(response, resp, - sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) + if (copy_to_user(response, resp, struct_size(resp, path_data, i))) ret = -EFAULT; kfree(resp); @@ -1432,9 +1424,7 @@ static ssize_t ucma_process_join(struct ucma_file *file, goto err3; } - mutex_lock(&mut); - idr_replace(&multicast_idr, mc, mc->id); - mutex_unlock(&mut); + xa_store(&multicast_table, mc->id, mc, 0); mutex_unlock(&file->mut); ucma_put_ctx(ctx); @@ -1444,9 +1434,7 @@ err3: rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); ucma_cleanup_mc_events(mc); err2: - mutex_lock(&mut); - idr_remove(&multicast_idr, mc->id); - mutex_unlock(&mut); + xa_erase(&multicast_table, mc->id); list_del(&mc->list); kfree(mc); err1: @@ -1508,8 +1496,8 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - mutex_lock(&mut); - mc = idr_find(&multicast_idr, cmd.id); + xa_lock(&multicast_table); + mc = xa_load(&multicast_table, cmd.id); if (!mc) mc = ERR_PTR(-ENOENT); else if (mc->ctx->file != file) @@ -1517,8 +1505,8 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, else if (!atomic_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); else - idr_remove(&multicast_idr, mc->id); - mutex_unlock(&mut); + __xa_erase(&multicast_table, mc->id); + xa_unlock(&multicast_table); if (IS_ERR(mc)) { ret = PTR_ERR(mc); @@ -1615,14 +1603,14 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, * events being added before existing events. */ ucma_lock_files(cur_file, new_file); - mutex_lock(&mut); + xa_lock(&ctx_table); list_move_tail(&ctx->list, &new_file->ctx_list); ucma_move_events(ctx, new_file); ctx->file = new_file; resp.events_reported = ctx->events_reported; - mutex_unlock(&mut); + xa_unlock(&ctx_table); ucma_unlock_files(cur_file, new_file); response: @@ -1757,18 +1745,15 @@ static int ucma_close(struct inode *inode, struct file *filp) ctx->destroying = 1; mutex_unlock(&file->mut); - mutex_lock(&mut); - idr_remove(&ctx_idr, ctx->id); - mutex_unlock(&mut); - + xa_erase(&ctx_table, ctx->id); flush_workqueue(file->close_wq); /* At that step once ctx was marked as destroying and workqueue * was flushed we are safe from any inflights handlers that * might put other closing task. */ - mutex_lock(&mut); + xa_lock(&ctx_table); if (!ctx->closing) { - mutex_unlock(&mut); + xa_unlock(&ctx_table); ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* rdma_destroy_id ensures that no event handlers are @@ -1776,7 +1761,7 @@ static int ucma_close(struct inode *inode, struct file *filp) */ rdma_destroy_id(ctx->cm_id); } else { - mutex_unlock(&mut); + xa_unlock(&ctx_table); } ucma_free_ctx(ctx); @@ -1805,6 +1790,19 @@ static struct miscdevice ucma_misc = { .fops = &ucma_fops, }; +static int ucma_get_global_nl_info(struct ib_client_nl_info *res) +{ + res->abi = RDMA_USER_CM_ABI_VERSION; + res->cdev = ucma_misc.this_device; + return 0; +} + +static struct ib_client rdma_cma_client = { + .name = "rdma_cm", + .get_global_nl_info = ucma_get_global_nl_info, +}; +MODULE_ALIAS_RDMA_CLIENT("rdma_cm"); + static ssize_t show_abi_version(struct device *dev, struct device_attribute *attr, char *buf) @@ -1833,7 +1831,14 @@ static int __init ucma_init(void) ret = -ENOMEM; goto err2; } + + ret = ib_register_client(&rdma_cma_client); + if (ret) + goto err3; + return 0; +err3: + unregister_net_sysctl_table(ucma_ctl_table_hdr); err2: device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); err1: @@ -1843,11 +1848,10 @@ err1: static void __exit ucma_cleanup(void) { + ib_unregister_client(&rdma_cma_client); unregister_net_sysctl_table(ucma_ctl_table_hdr); device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); - idr_destroy(&ctx_idr); - idr_destroy(&multicast_idr); } module_init(ucma_init); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index e7ea819fcb11..08da840ed7ee 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -54,9 +54,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { page = sg_page_iter_page(&sg_iter); - if (!PageDirty(page) && umem->writable && dirty) - set_page_dirty_lock(page); - put_page(page); + if (umem->writable && dirty) + put_user_pages_dirty_lock(&page, 1); + else + put_user_page(page); } sg_free_table(&umem->sg_head); @@ -244,7 +245,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, umem->context = context; umem->length = size; umem->address = addr; - umem->page_shift = PAGE_SHIFT; umem->writable = ib_access_writable(access); umem->owning_mm = mm = current->mm; mmgrab(mm); @@ -361,6 +361,9 @@ static void __ib_umem_release_tail(struct ib_umem *umem) */ void ib_umem_release(struct ib_umem *umem) { + if (!umem) + return; + if (umem->is_odp) { ib_umem_odp_release(to_ib_umem_odp(umem)); __ib_umem_release_tail(umem); @@ -385,7 +388,7 @@ int ib_umem_page_count(struct ib_umem *umem) n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) - n += sg_dma_len(sg) >> umem->page_shift; + n += sg_dma_len(sg) >> PAGE_SHIFT; return n; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index f962b5bbfa40..2a75c6f8d827 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -59,7 +59,7 @@ static u64 node_start(struct umem_odp_node *n) struct ib_umem_odp *umem_odp = container_of(n, struct ib_umem_odp, interval_tree); - return ib_umem_start(&umem_odp->umem); + return ib_umem_start(umem_odp); } /* Note that the representation of the intervals in the interval tree @@ -72,7 +72,7 @@ static u64 node_last(struct umem_odp_node *n) struct ib_umem_odp *umem_odp = container_of(n, struct ib_umem_odp, interval_tree); - return ib_umem_end(&umem_odp->umem) - 1; + return ib_umem_end(umem_odp) - 1; } INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, @@ -107,8 +107,6 @@ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp) static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, u64 start, u64 end, void *cookie) { - struct ib_umem *umem = &umem_odp->umem; - /* * Increase the number of notifiers running, to * prevent any further fault handling on this MR. @@ -119,8 +117,8 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, * all pending page faults. */ smp_wmb(); complete_all(&umem_odp->notifier_completion); - umem->context->invalidate_range(umem_odp, ib_umem_start(umem), - ib_umem_end(umem)); + umem_odp->umem.context->invalidate_range( + umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); return 0; } @@ -151,6 +149,7 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, { struct ib_ucontext_per_mm *per_mm = container_of(mn, struct ib_ucontext_per_mm, mn); + int rc; if (mmu_notifier_range_blockable(range)) down_read(&per_mm->umem_rwsem); @@ -167,11 +166,14 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, return 0; } - return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, - range->end, - invalidate_range_start_trampoline, - mmu_notifier_range_blockable(range), - NULL); + rc = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, + range->end, + invalidate_range_start_trampoline, + mmu_notifier_range_blockable(range), + NULL); + if (rc) + up_read(&per_mm->umem_rwsem); + return rc; } static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, @@ -205,10 +207,9 @@ static const struct mmu_notifier_ops ib_umem_notifiers = { static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) { struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - struct ib_umem *umem = &umem_odp->umem; down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem) != ib_umem_end(umem))) + if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) rbt_ib_umem_insert(&umem_odp->interval_tree, &per_mm->umem_tree); up_write(&per_mm->umem_rwsem); @@ -217,10 +218,9 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) { struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - struct ib_umem *umem = &umem_odp->umem; down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem) != ib_umem_end(umem))) + if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) rbt_ib_umem_remove(&umem_odp->interval_tree, &per_mm->umem_tree); complete_all(&umem_odp->notifier_completion); @@ -351,7 +351,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, umem->context = ctx; umem->length = size; umem->address = addr; - umem->page_shift = PAGE_SHIFT; + odp_data->page_shift = PAGE_SHIFT; umem->writable = root->umem.writable; umem->is_odp = 1; odp_data->per_mm = per_mm; @@ -405,18 +405,19 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) struct mm_struct *mm = umem->owning_mm; int ret_val; + umem_odp->page_shift = PAGE_SHIFT; if (access & IB_ACCESS_HUGETLB) { struct vm_area_struct *vma; struct hstate *h; down_read(&mm->mmap_sem); - vma = find_vma(mm, ib_umem_start(umem)); + vma = find_vma(mm, ib_umem_start(umem_odp)); if (!vma || !is_vm_hugetlb_page(vma)) { up_read(&mm->mmap_sem); return -EINVAL; } h = hstate_vma(vma); - umem->page_shift = huge_page_shift(h); + umem_odp->page_shift = huge_page_shift(h); up_read(&mm->mmap_sem); } @@ -424,16 +425,16 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) init_completion(&umem_odp->notifier_completion); - if (ib_umem_num_pages(umem)) { + if (ib_umem_odp_num_pages(umem_odp)) { umem_odp->page_list = vzalloc(array_size(sizeof(*umem_odp->page_list), - ib_umem_num_pages(umem))); + ib_umem_odp_num_pages(umem_odp))); if (!umem_odp->page_list) return -ENOMEM; umem_odp->dma_list = vzalloc(array_size(sizeof(*umem_odp->dma_list), - ib_umem_num_pages(umem))); + ib_umem_odp_num_pages(umem_odp))); if (!umem_odp->dma_list) { ret_val = -ENOMEM; goto out_page_list; @@ -456,16 +457,14 @@ out_page_list: void ib_umem_odp_release(struct ib_umem_odp *umem_odp) { - struct ib_umem *umem = &umem_odp->umem; - /* * Ensure that no more pages are mapped in the umem. * * It is the driver's responsibility to ensure, before calling us, * that the hardware will not attempt to access the MR any more. */ - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), - ib_umem_end(umem)); + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); remove_umem_from_per_mm(umem_odp); put_per_mm(umem_odp); @@ -487,7 +486,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. * - * The page is released via put_page even if the operation failed. For + * The page is released via put_user_page even if the operation failed. For * on-demand pinning, the page is released whenever it isn't stored in the * umem. */ @@ -498,8 +497,8 @@ static int ib_umem_odp_map_dma_single_page( u64 access_mask, unsigned long current_seq) { - struct ib_umem *umem = &umem_odp->umem; - struct ib_device *dev = umem->context->device; + struct ib_ucontext *context = umem_odp->umem.context; + struct ib_device *dev = context->device; dma_addr_t dma_addr; int remove_existing_mapping = 0; int ret = 0; @@ -514,10 +513,9 @@ static int ib_umem_odp_map_dma_single_page( goto out; } if (!(umem_odp->dma_list[page_index])) { - dma_addr = ib_dma_map_page(dev, - page, - 0, BIT(umem->page_shift), - DMA_BIDIRECTIONAL); + dma_addr = + ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift), + DMA_BIDIRECTIONAL); if (ib_dma_mapping_error(dev, dma_addr)) { ret = -EFAULT; goto out; @@ -536,15 +534,16 @@ static int ib_umem_odp_map_dma_single_page( } out: - put_page(page); + put_user_page(page); if (remove_existing_mapping) { ib_umem_notifier_start_account(umem_odp); - umem->context->invalidate_range( + context->invalidate_range( umem_odp, - ib_umem_start(umem) + (page_index << umem->page_shift), - ib_umem_start(umem) + - ((page_index + 1) << umem->page_shift)); + ib_umem_start(umem_odp) + + (page_index << umem_odp->page_shift), + ib_umem_start(umem_odp) + + ((page_index + 1) << umem_odp->page_shift)); ib_umem_notifier_end_account(umem_odp); ret = -EAGAIN; } @@ -581,27 +580,26 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, u64 bcnt, u64 access_mask, unsigned long current_seq) { - struct ib_umem *umem = &umem_odp->umem; struct task_struct *owning_process = NULL; struct mm_struct *owning_mm = umem_odp->umem.owning_mm; struct page **local_page_list = NULL; u64 page_mask, off; - int j, k, ret = 0, start_idx, npages = 0, page_shift; - unsigned int flags = 0; + int j, k, ret = 0, start_idx, npages = 0; + unsigned int flags = 0, page_shift; phys_addr_t p = 0; if (access_mask == 0) return -EINVAL; - if (user_virt < ib_umem_start(umem) || - user_virt + bcnt > ib_umem_end(umem)) + if (user_virt < ib_umem_start(umem_odp) || + user_virt + bcnt > ib_umem_end(umem_odp)) return -EFAULT; local_page_list = (struct page **)__get_free_page(GFP_KERNEL); if (!local_page_list) return -ENOMEM; - page_shift = umem->page_shift; + page_shift = umem_odp->page_shift; page_mask = ~(BIT(page_shift) - 1); off = user_virt & (~page_mask); user_virt = user_virt & page_mask; @@ -621,7 +619,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, if (access_mask & ODP_WRITE_ALLOWED_BIT) flags |= FOLL_WRITE; - start_idx = (user_virt - ib_umem_start(umem)) >> page_shift; + start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift; k = start_idx; while (bcnt > 0) { @@ -659,7 +657,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = -EFAULT; break; } - put_page(local_page_list[j]); + put_user_page(local_page_list[j]); continue; } @@ -686,8 +684,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * ib_umem_odp_map_dma_single_page(). */ if (npages - (j + 1) > 0) - release_pages(&local_page_list[j+1], - npages - (j + 1)); + put_user_pages(&local_page_list[j+1], + npages - (j + 1)); break; } } @@ -711,21 +709,20 @@ EXPORT_SYMBOL(ib_umem_odp_map_dma_pages); void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 bound) { - struct ib_umem *umem = &umem_odp->umem; int idx; u64 addr; - struct ib_device *dev = umem->context->device; + struct ib_device *dev = umem_odp->umem.context->device; - virt = max_t(u64, virt, ib_umem_start(umem)); - bound = min_t(u64, bound, ib_umem_end(umem)); + virt = max_t(u64, virt, ib_umem_start(umem_odp)); + bound = min_t(u64, bound, ib_umem_end(umem_odp)); /* Note that during the run of this function, the * notifiers_count of the MR is > 0, preventing any racing * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ mutex_lock(&umem_odp->umem_mutex); - for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) { - idx = (addr - ib_umem_start(umem)) >> umem->page_shift; + for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { + idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { struct page *page = umem_odp->page_list[idx]; dma_addr_t dma = umem_odp->dma_list[idx]; @@ -733,7 +730,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, WARN_ON(!dma_addr); - ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE, + ib_dma_unmap_page(dev, dma_addr, + BIT(umem_odp->page_shift), DMA_BIDIRECTIONAL); if (dma & ODP_WRITE_ALLOWED_BIT) { struct page *head_page = compound_head(page); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 671f07ba1fad..9f8a48016b41 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -54,6 +54,7 @@ #include <rdma/ib_mad.h> #include <rdma/ib_user_mad.h> +#include <rdma/rdma_netlink.h> #include "core_priv.h" @@ -744,7 +745,7 @@ found: "process %s did not enable P_Key index support.\n", current->comm); dev_warn(&file->port->dev, - " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); + " Documentation/infiniband/user_mad.rst has info on the new ABI.\n"); } } @@ -1124,11 +1125,48 @@ static const struct file_operations umad_sm_fops = { .llseek = no_llseek, }; +static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data, + struct ib_client_nl_info *res) +{ + struct ib_umad_device *umad_dev = client_data; + + if (!rdma_is_port_valid(ibdev, res->port)) + return -EINVAL; + + res->abi = IB_USER_MAD_ABI_VERSION; + res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev; + + return 0; +} + static struct ib_client umad_client = { .name = "umad", .add = ib_umad_add_one, - .remove = ib_umad_remove_one + .remove = ib_umad_remove_one, + .get_nl_info = ib_umad_get_nl_info, }; +MODULE_ALIAS_RDMA_CLIENT("umad"); + +static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data, + struct ib_client_nl_info *res) +{ + struct ib_umad_device *umad_dev = + ib_get_client_data(ibdev, &umad_client); + + if (!rdma_is_port_valid(ibdev, res->port)) + return -EINVAL; + + res->abi = IB_USER_MAD_ABI_VERSION; + res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev; + + return 0; +} + +static struct ib_client issm_client = { + .name = "issm", + .get_nl_info = ib_issm_get_nl_info, +}; +MODULE_ALIAS_RDMA_CLIENT("issm"); static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1387,13 +1425,17 @@ static int __init ib_umad_init(void) } ret = ib_register_client(&umad_client); - if (ret) { - pr_err("couldn't register ib_umad client\n"); + if (ret) goto out_class; - } + + ret = ib_register_client(&issm_client); + if (ret) + goto out_client; return 0; +out_client: + ib_unregister_client(&umad_client); out_class: class_unregister(&umad_class); @@ -1411,6 +1453,7 @@ out: static void __exit ib_umad_cleanup(void) { + ib_unregister_client(&issm_client); ib_unregister_client(&umad_client); class_unregister(&umad_class); unregister_chrdev_region(base_umad_dev, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 63fe14c7c68f..7ddd0e5bc6b3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -756,7 +756,9 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_USER; mr->dm = NULL; + mr->sig_attrs = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; @@ -1021,12 +1023,11 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, attr.comp_vector = cmd->comp_vector; attr.flags = cmd->flags; - cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); + cq = rdma_zalloc_drv_obj(ib_dev, ib_cq); + if (!cq) { + ret = -ENOMEM; goto err_file; } - cq->device = ib_dev; cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; @@ -1034,6 +1035,10 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); + ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); + if (ret) + goto err_free; + obj->uobject.object = cq; memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uobject.id; @@ -1054,7 +1059,9 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, err_cb: ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); - + cq = NULL; +err_free: + kfree(cq); err_file: if (ev_file) ib_uverbs_release_ucq(attrs->ufile, ev_file, obj); @@ -2541,7 +2548,7 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) struct ib_uqp_object *obj; struct ib_qp *qp; struct ib_uverbs_mcast_entry *mcast; - int ret = -EINVAL; + int ret; bool found = false; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); @@ -3715,9 +3722,6 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) * trailing driver_data flex array. In this case the size of the base struct * cannot be changed. */ -#define offsetof_after(_struct, _member) \ - (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member)) - #define UAPI_DEF_WRITE_IO(req, resp) \ .write.has_resp = 1 + \ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \ @@ -3748,11 +3752,11 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) */ #define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \ .write.has_resp = 1, \ - .write.req_size = offsetof_after(req, req_last_member), \ - .write.resp_size = offsetof_after(resp, resp_last_member) + .write.req_size = offsetofend(req, req_last_member), \ + .write.resp_size = offsetofend(resp, resp_last_member) #define UAPI_DEF_WRITE_I_EX(req, req_last_member) \ - .write.req_size = offsetof_after(req, req_last_member) + .write.req_size = offsetofend(req, req_last_member) const struct uapi_definition uverbs_def_write_intf[] = { DECLARE_UVERBS_OBJECT( diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 84a5e9a6d483..11c13c1381cf 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -51,6 +51,7 @@ #include <rdma/ib.h> #include <rdma/uverbs_std_types.h> +#include <rdma/rdma_netlink.h> #include "uverbs.h" #include "core_priv.h" @@ -198,7 +199,7 @@ void ib_uverbs_release_file(struct kref *ref) ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (ib_dev && !ib_dev->ops.disassociate_ucontext) - module_put(ib_dev->owner); + module_put(ib_dev->ops.owner); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); if (atomic_dec_and_test(&file->device->refcount)) @@ -1065,7 +1066,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) module_dependent = !(ib_dev->ops.disassociate_ucontext); if (module_dependent) { - if (!try_module_get(ib_dev->owner)) { + if (!try_module_get(ib_dev->ops.owner)) { ret = -ENODEV; goto err; } @@ -1100,7 +1101,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) return stream_open(inode, filp); err_module: - module_put(ib_dev->owner); + module_put(ib_dev->ops.owner); err: mutex_unlock(&dev->lists_mutex); @@ -1148,12 +1149,41 @@ static const struct file_operations uverbs_mmap_fops = { .compat_ioctl = ib_uverbs_ioctl, }; +static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data, + struct ib_client_nl_info *res) +{ + struct ib_uverbs_device *uverbs_dev = client_data; + int ret; + + if (res->port != -1) + return -EINVAL; + + res->abi = ibdev->ops.uverbs_abi_ver; + res->cdev = &uverbs_dev->dev; + + /* + * To support DRIVER_ID binding in userspace some of the driver need + * upgrading to expose their PCI dependent revision information + * through get_context instead of relying on modalias matching. When + * the drivers are fixed they can drop this flag. + */ + if (!ibdev->ops.uverbs_no_driver_id_binding) { + ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, + ibdev->ops.driver_id); + if (ret) + return ret; + } + return 0; +} + static struct ib_client uverbs_client = { .name = "uverbs", .no_kverbs_req = true, .add = ib_uverbs_add_one, - .remove = ib_uverbs_remove_one + .remove = ib_uverbs_remove_one, + .get_nl_info = ib_uverbs_get_nl_info, }; +MODULE_ALIAS_RDMA_CLIENT("uverbs"); static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, char *buf) @@ -1186,7 +1216,7 @@ static ssize_t abi_version_show(struct device *device, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); + ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 07ea4e3c4566..e39fe6a8aac4 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -111,9 +111,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); + cq = rdma_zalloc_drv_obj(ib_dev, ib_cq); + if (!cq) { + ret = -ENOMEM; goto err_event_file; } @@ -122,10 +122,15 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; - obj->uobject.object = cq; - obj->uobject.user_handle = user_handle; atomic_set(&cq->usecnt, 0); cq->res.type = RDMA_RESTRACK_CQ; + + ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); + if (ret) + goto err_free; + + obj->uobject.object = cq; + obj->uobject.user_handle = user_handle; rdma_restrack_uadd(&cq->res); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, @@ -136,7 +141,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( return 0; err_cq: ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); - + cq = NULL; +err_free: + kfree(cq); err_event_file: if (ev_file) uverbs_uobject_put(ev_file_uobj); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 997f7a3a558a..c1286a52dc84 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -128,6 +128,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_DM; mr->dm = dm; mr->uobject = uobj; atomic_inc(&pd->usecnt); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 7a987acf0c0b..00c547887132 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -22,6 +22,8 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size) return ERR_PTR(-EOVERFLOW); elm = kzalloc(alloc_size, GFP_KERNEL); + if (!elm) + return ERR_PTR(-ENOMEM); rc = radix_tree_insert(&uapi->radix, key, elm); if (rc) { kfree(elm); @@ -645,7 +647,7 @@ struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev) return ERR_PTR(-ENOMEM); INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL); - uapi->driver_id = ibdev->driver_id; + uapi->driver_id = ibdev->ops.driver_id; rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false); if (rc) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e666a1f7608d..92349bf37589 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -209,7 +209,7 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) EXPORT_SYMBOL(ib_rate_to_mbps); __attribute_const__ enum rdma_transport_type -rdma_node_get_transport(enum rdma_node_type node_type) +rdma_node_get_transport(unsigned int node_type) { if (node_type == RDMA_NODE_USNIC) @@ -299,6 +299,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_DMA; mr->uobject = NULL; mr->need_inval = false; @@ -316,7 +317,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, EXPORT_SYMBOL(__ib_alloc_pd); /** - * ib_dealloc_pd - Deallocates a protection domain. + * ib_dealloc_pd_user - Deallocates a protection domain. * @pd: The protection domain to deallocate. * @udata: Valid user data or NULL for kernel object * @@ -1157,6 +1158,10 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, qp_init_attr->cap.max_recv_sge)) return ERR_PTR(-EINVAL); + if ((qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) && + !(device->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER)) + return ERR_PTR(-EINVAL); + /* * If the callers is using the RDMA API calculate the resources * needed for the RDMA READ/WRITE operations. @@ -1232,6 +1237,8 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, qp->max_write_sge = qp_init_attr->cap.max_send_sge; qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, device->attrs.max_sge_rd); + if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) + qp->integrity_en = true; return qp; @@ -1683,6 +1690,14 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } } + /* + * Bind this qp to a counter automatically based on the rdma counter + * rules. This only set in RST2INIT with port specified + */ + if (!qp->counter && (attr_mask & IB_QP_PORT) && + ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT)) + rdma_counter_bind_qp_auto(qp, attr->port_num); + ret = ib_security_modify_qp(qp, attr, attr_mask, udata); if (ret) goto out; @@ -1878,6 +1893,7 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata) if (!qp->uobject) rdma_rw_cleanup_mrs(qp); + rdma_counter_unbind_qp(qp, true); rdma_restrack_del(&qp->res); ret = qp->device->ops.destroy_qp(qp, udata); if (!ret) { @@ -1916,21 +1932,28 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, const char *caller) { struct ib_cq *cq; + int ret; + + cq = rdma_zalloc_drv_obj(device, ib_cq); + if (!cq) + return ERR_PTR(-ENOMEM); - cq = device->ops.create_cq(device, cq_attr, NULL); - - if (!IS_ERR(cq)) { - cq->device = device; - cq->uobject = NULL; - cq->comp_handler = comp_handler; - cq->event_handler = event_handler; - cq->cq_context = cq_context; - atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_set_task(&cq->res, caller); - rdma_restrack_kadd(&cq->res); + cq->device = device; + cq->uobject = NULL; + cq->comp_handler = comp_handler; + cq->event_handler = event_handler; + cq->cq_context = cq_context; + atomic_set(&cq->usecnt, 0); + cq->res.type = RDMA_RESTRACK_CQ; + rdma_restrack_set_task(&cq->res, caller); + + ret = device->ops.create_cq(cq, cq_attr, NULL); + if (ret) { + kfree(cq); + return ERR_PTR(ret); } + rdma_restrack_kadd(&cq->res); return cq; } EXPORT_SYMBOL(__ib_create_cq); @@ -1949,7 +1972,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) return -EBUSY; rdma_restrack_del(&cq->res); - return cq->device->ops.destroy_cq(cq, udata); + cq->device->ops.destroy_cq(cq, udata); + kfree(cq); + return 0; } EXPORT_SYMBOL(ib_destroy_cq_user); @@ -1966,6 +1991,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; struct ib_dm *dm = mr->dm; + struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; rdma_restrack_del(&mr->res); @@ -1974,6 +2000,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) atomic_dec(&pd->usecnt); if (dm) atomic_dec(&dm->usecnt); + kfree(sig_attrs); } return ret; @@ -1981,7 +2008,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) EXPORT_SYMBOL(ib_dereg_mr_user); /** - * ib_alloc_mr() - Allocates a memory region + * ib_alloc_mr_user() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. @@ -2001,6 +2028,9 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, if (!pd->device->ops.alloc_mr) return ERR_PTR(-EOPNOTSUPP); + if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY)) + return ERR_PTR(-EINVAL); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { mr->device = pd->device; @@ -2011,12 +2041,66 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, mr->need_inval = false; mr->res.type = RDMA_RESTRACK_MR; rdma_restrack_kadd(&mr->res); + mr->type = mr_type; + mr->sig_attrs = NULL; } return mr; } EXPORT_SYMBOL(ib_alloc_mr_user); +/** + * ib_alloc_mr_integrity() - Allocates an integrity memory region + * @pd: protection domain associated with the region + * @max_num_data_sg: maximum data sg entries available for registration + * @max_num_meta_sg: maximum metadata sg entries available for + * registration + * + * Notes: + * Memory registration page/sg lists must not exceed max_num_sg, + * also the integrity page/sg lists must not exceed max_num_meta_sg. + * + */ +struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg) +{ + struct ib_mr *mr; + struct ib_sig_attrs *sig_attrs; + + if (!pd->device->ops.alloc_mr_integrity || + !pd->device->ops.map_mr_sg_pi) + return ERR_PTR(-EOPNOTSUPP); + + if (!max_num_meta_sg) + return ERR_PTR(-EINVAL); + + sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL); + if (!sig_attrs) + return ERR_PTR(-ENOMEM); + + mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, + max_num_meta_sg); + if (IS_ERR(mr)) { + kfree(sig_attrs); + return mr; + } + + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + mr->need_inval = false; + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_kadd(&mr->res); + mr->type = IB_MR_TYPE_INTEGRITY; + mr->sig_attrs = sig_attrs; + + return mr; +} +EXPORT_SYMBOL(ib_alloc_mr_integrity); + /* "Fast" memory regions */ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, @@ -2226,19 +2310,17 @@ EXPORT_SYMBOL(ib_create_wq); */ int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { - int err; struct ib_cq *cq = wq->cq; struct ib_pd *pd = wq->pd; if (atomic_read(&wq->usecnt)) return -EBUSY; - err = wq->device->ops.destroy_wq(wq, udata); - if (!err) { - atomic_dec(&pd->usecnt); - atomic_dec(&cq->usecnt); - } - return err; + wq->device->ops.destroy_wq(wq, udata); + atomic_dec(&pd->usecnt); + atomic_dec(&cq->usecnt); + + return 0; } EXPORT_SYMBOL(ib_destroy_wq); @@ -2376,6 +2458,43 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, EXPORT_SYMBOL(ib_set_vf_guid); /** + * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection + * information) and set an appropriate memory region for registration. + * @mr: memory region + * @data_sg: dma mapped scatterlist for data + * @data_sg_nents: number of entries in data_sg + * @data_sg_offset: offset in bytes into data_sg + * @meta_sg: dma mapped scatterlist for metadata + * @meta_sg_nents: number of entries in meta_sg + * @meta_sg_offset: offset in bytes into meta_sg + * @page_size: page vector desired page size + * + * Constraints: + * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY. + * + * Return: 0 on success. + * + * After this completes successfully, the memory region + * is ready for registration. + */ +int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset, unsigned int page_size) +{ + if (unlikely(!mr->device->ops.map_mr_sg_pi || + WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY))) + return -EOPNOTSUPP; + + mr->page_size = page_size; + + return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, + meta_sg_nents, meta_sg_offset); +} +EXPORT_SYMBOL(ib_map_mr_sg_pi); + +/** * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list * and set it the memory region. * @mr: memory region diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 77094be1b262..433fca59febd 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -7,7 +7,6 @@ obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ -obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 2c3685faa57a..a91653aabf38 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -805,10 +805,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) rdev->sqp_ah = NULL; } - if (!IS_ERR_OR_NULL(qp->rumem)) - ib_umem_release(qp->rumem); - if (!IS_ERR_OR_NULL(qp->sumem)) - ib_umem_release(qp->sumem); + ib_umem_release(qp->rumem); + ib_umem_release(qp->sumem); mutex_lock(&rdev->qp_lock); list_del(&qp->list); @@ -1201,12 +1199,8 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, qp_destroy: bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); free_umem: - if (udata) { - if (qp->rumem) - ib_umem_release(qp->rumem); - if (qp->sumem) - ib_umem_release(qp->sumem); - } + ib_umem_release(qp->rumem); + ib_umem_release(qp->sumem); fail: kfree(qp); return ERR_PTR(rc); @@ -1302,8 +1296,7 @@ void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) if (qplib_srq->cq) nq = qplib_srq->cq->nq; bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); - if (srq->umem) - ib_umem_release(srq->umem); + ib_umem_release(srq->umem); atomic_dec(&rdev->srq_count); if (nq) nq->budget--; @@ -1412,8 +1405,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, return 0; fail: - if (srq->umem) - ib_umem_release(srq->umem); + ib_umem_release(srq->umem); exit: return rc; } @@ -2517,9 +2509,8 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, } /* Completion Queues */ -int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { - int rc; struct bnxt_re_cq *cq; struct bnxt_qplib_nq *nq; struct bnxt_re_dev *rdev; @@ -2528,29 +2519,20 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) rdev = cq->rdev; nq = cq->qplib_cq.nq; - rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to destroy HW CQ"); - return rc; - } - if (!IS_ERR_OR_NULL(cq->umem)) - ib_umem_release(cq->umem); + bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); + ib_umem_release(cq->umem); atomic_dec(&rdev->cq_count); nq->budget--; kfree(cq->cql); - kfree(cq); - - return 0; } -struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { - struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev); struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; - struct bnxt_re_cq *cq = NULL; + struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq); int rc, entries; int cqe = attr->cqe; struct bnxt_qplib_nq *nq = NULL; @@ -2559,11 +2541,8 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { dev_err(rdev_to_dev(rdev), "Failed to create CQ -max exceeded"); - return ERR_PTR(-EINVAL); + return -EINVAL; } - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); cq->rdev = rdev; cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq); @@ -2641,15 +2620,13 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, } } - return &cq->ib_cq; + return 0; c2fail: - if (udata) - ib_umem_release(cq->umem); + ib_umem_release(cq->umem); fail: kfree(cq->cql); - kfree(cq); - return ERR_PTR(rc); + return rc; } static u8 __req_to_ib_wc_status(u8 qstatus) @@ -3353,8 +3330,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) mr->npages = 0; mr->pages = NULL; } - if (!IS_ERR_OR_NULL(mr->ib_umem)) - ib_umem_release(mr->ib_umem); + ib_umem_release(mr->ib_umem); kfree(mr); atomic_dec(&rdev->mr_count); @@ -3630,10 +3606,10 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) u32 chip_met_rev_num = 0; int rc; - dev_dbg(rdev_to_dev(rdev), "ABI version requested %d", - ibdev->uverbs_abi_ver); + dev_dbg(rdev_to_dev(rdev), "ABI version requested %u", + ibdev->ops.uverbs_abi_ver); - if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) { + if (ibdev->ops.uverbs_abi_ver != BNXT_RE_ABI_VERSION) { dev_dbg(rdev_to_dev(rdev), " is different from the device %d ", BNXT_RE_ABI_VERSION); return -EPERM; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 09a33049e42f..31662b1ee35a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -94,11 +94,11 @@ struct bnxt_re_qp { }; struct bnxt_re_cq { + struct ib_cq ib_cq; struct bnxt_re_dev *rdev; spinlock_t cq_lock; /* protect cq */ u16 cq_count; u16 cq_period; - struct ib_cq ib_cq; struct bnxt_qplib_cq qplib_cq; struct bnxt_qplib_cqe *cql; #define MAX_CQL_PER_POLL 1024 @@ -190,10 +190,9 @@ int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr); int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); -struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); -int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 814f959c7db9..029babe713f3 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -596,6 +596,10 @@ static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) } static const struct ib_device_ops bnxt_re_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_BNXT_RE, + .uverbs_abi_ver = BNXT_RE_ABI_VERSION, + .add_gid = bnxt_re_add_gid, .alloc_hw_stats = bnxt_re_ib_alloc_hw_stats, .alloc_mr = bnxt_re_alloc_mr, @@ -637,6 +641,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .reg_user_mr = bnxt_re_reg_user_mr, .req_notify_cq = bnxt_re_req_notify_cq, INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), + INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq), INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq), INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx), @@ -648,7 +653,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) int ret; /* ib device init */ - ibdev->owner = THIS_MODULE; ibdev->node_type = RDMA_NODE_IB_CA; strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", strlen(BNXT_RE_DESC) + 5); @@ -661,7 +665,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; /* User space */ - ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION; ibdev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -691,7 +694,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); - ibdev->driver_id = RDMA_DRIVER_BNXT_RE; ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); if (ret) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 8ac72ac7cbac..95b22a651673 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -174,7 +174,6 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) return -ENOMEM; } dma_unmap_addr_set(cq, mapping, cq->dma_addr); - memset(cq->queue, 0, size); setup.id = cq->cqid; setup.base_addr = (u64) (cq->dma_addr); setup.size = 1UL << cq->size_log2; @@ -187,20 +186,6 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); } -#ifdef notyet -int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) -{ - struct rdma_cq_setup setup; - setup.id = cq->cqid; - setup.base_addr = (u64) (cq->dma_addr); - setup.size = 1UL << cq->size_log2; - setup.credits = setup.size; - setup.credit_thres = setup.size; /* TBD: overflow recovery */ - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} -#endif - static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) { struct cxio_qpid_list *entry; @@ -219,7 +204,7 @@ static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) if (!qpid) goto out; for (i = qpid+1; i & rdev_p->qpmask; i++) { - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) break; entry->qpid = i; @@ -237,7 +222,7 @@ static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, { struct cxio_qpid_list *entry; - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return; pr_debug("%s qpid 0x%x\n", __func__, qpid); @@ -317,17 +302,15 @@ err1: return -ENOMEM; } -int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) +void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) { - int err; - err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); + cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); kfree(cq->sw_queue); dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << (cq->size_log2)) * sizeof(struct t3_cqe) + 1, cq->queue, dma_unmap_addr(cq, mapping)); cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); - return err; } int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, @@ -538,8 +521,6 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, rdev_p->ctrl_qp.dma_addr); rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - memset(rdev_p->ctrl_qp.workq, 0, - (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr)); mutex_init(&rdev_p->ctrl_qp.lock); init_waitqueue_head(&rdev_p->ctrl_qp.waitq); @@ -565,9 +546,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) wqe->sge_cmd = cpu_to_be64(sge_cmd); wqe->ctx1 = cpu_to_be64(ctx1); wqe->ctx0 = cpu_to_be64(ctx0); - pr_debug("CtrlQP dma_addr 0x%llx workq %p size %d\n", - (unsigned long long)rdev_p->ctrl_qp.dma_addr, - rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); + pr_debug("CtrlQP dma_addr %pad workq %p size %d\n", + &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, + 1 << T3_CTRL_QP_SIZE_LOG2); skb->priority = CPL_PRIORITY_CONTROL; return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); err: diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index c64e50b5a548..40c029ffa425 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -158,8 +158,7 @@ void cxio_rdev_close(struct cxio_rdev *rdev); int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, enum t3_cq_opcode op, u32 credit); int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); -int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); -int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); +void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 1c90c86fc8b8..0bca72cb4d9a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -170,7 +170,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) { struct cpl_tid_release *req; - skb = get_skb(skb, sizeof *req, GFP_KERNEL); + skb = get_skb(skb, sizeof(*req), GFP_KERNEL); if (!skb) return; req = skb_put(skb, sizeof(*req)); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 3a481dfb1607..e775c1a1a450 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -88,7 +88,7 @@ static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, return 0; } -static int iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct iwch_cq *chp; @@ -100,17 +100,16 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) wait_event(chp->wait, !atomic_read(&chp->refcnt)); cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); - kfree(chp); - return 0; } -static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +static int iwch_create_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; - struct iwch_dev *rhp; - struct iwch_cq *chp; + struct iwch_dev *rhp = to_iwch_dev(ibcq->device); + struct iwch_cq *chp = to_iwch_cq(ibcq); struct iwch_create_cq_resp uresp; struct iwch_create_cq_req ureq; static int warned; @@ -118,19 +117,13 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries); if (attr->flags) - return ERR_PTR(-EINVAL); - - rhp = to_iwch_dev(ibdev); - chp = kzalloc(sizeof(*chp), GFP_KERNEL); - if (!chp) - return ERR_PTR(-ENOMEM); + return -EINVAL; if (udata) { if (!t3a_device(rhp)) { - if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) { - kfree(chp); - return ERR_PTR(-EFAULT); - } + if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) + return -EFAULT; + chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr; } } @@ -151,10 +144,9 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, entries = roundup_pow_of_two(entries); chp->cq.size_log2 = ilog2(entries); - if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) { - kfree(chp); - return ERR_PTR(-ENOMEM); - } + if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) + return -ENOMEM; + chp->rhp = rhp; chp->ibcq.cqe = 1 << chp->cq.size_log2; spin_lock_init(&chp->lock); @@ -163,8 +155,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, init_waitqueue_head(&chp->wait); if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) { cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); - kfree(chp); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } if (udata) { @@ -172,10 +163,10 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, struct iwch_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct iwch_ucontext, ibucontext); - mm = kmalloc(sizeof *mm, GFP_KERNEL); + mm = kmalloc(sizeof(*mm), GFP_KERNEL); if (!mm) { iwch_destroy_cq(&chp->ibcq, udata); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } uresp.cqid = chp->cq.cqid; uresp.size_log2 = chp->cq.size_log2; @@ -185,7 +176,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, spin_unlock(&ucontext->mmap_lock); mm->key = uresp.key; mm->addr = virt_to_phys(chp->cq.queue); - if (udata->outlen < sizeof uresp) { + if (udata->outlen < sizeof(uresp)) { if (!warned++) pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n"); mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * @@ -196,86 +187,19 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, sizeof(struct t3_cqe)); uresp.memsize = mm->len; uresp.reserved = 0; - resplen = sizeof uresp; + resplen = sizeof(uresp); } if (ib_copy_to_udata(udata, &uresp, resplen)) { kfree(mm); iwch_destroy_cq(&chp->ibcq, udata); - return ERR_PTR(-EFAULT); + return -EFAULT; } insert_mmap(ucontext, mm); } - pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n", + pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n", chp->cq.cqid, chp, (1 << chp->cq.size_log2), - (unsigned long long)chp->cq.dma_addr); - return &chp->ibcq; -} - -static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ -#ifdef notyet - struct iwch_cq *chp = to_iwch_cq(cq); - struct t3_cq oldcq, newcq; - int ret; - - pr_debug("%s ib_cq %p cqe %d\n", __func__, cq, cqe); - - /* We don't downsize... */ - if (cqe <= cq->cqe) - return 0; - - /* create new t3_cq with new size */ - cqe = roundup_pow_of_two(cqe+1); - newcq.size_log2 = ilog2(cqe); - - /* Dont allow resize to less than the current wce count */ - if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) { - return -ENOMEM; - } - - /* Quiesce all QPs using this CQ */ - ret = iwch_quiesce_qps(chp); - if (ret) { - return ret; - } - - ret = cxio_create_cq(&chp->rhp->rdev, &newcq); - if (ret) { - return ret; - } - - /* copy CQEs */ - memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) * - sizeof(struct t3_cqe)); - - /* old iwch_qp gets new t3_cq but keeps old cqid */ - oldcq = chp->cq; - chp->cq = newcq; - chp->cq.cqid = oldcq.cqid; - - /* resize new t3_cq to update the HW context */ - ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq); - if (ret) { - chp->cq = oldcq; - return ret; - } - chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1; - - /* destroy old t3_cq */ - oldcq.cqid = newcq.cqid; - ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq); - if (ret) { - pr_err("%s - cxio_destroy_cq failed %d\n", __func__, ret); - } - - /* add user hooks here */ - - /* resume qps */ - ret = iwch_resume_qps(chp); - return ret; -#else - return -ENOSYS; -#endif + &chp->cq.dma_addr); + return 0; } static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) @@ -422,8 +346,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) xa_erase_irq(&rhp->mrs, mmid); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); - if (mhp->umem) - ib_umem_release(mhp->umem); + ib_umem_release(mhp->umem); pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); kfree(mhp); return 0; @@ -553,7 +476,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); - if (i == PAGE_SIZE / sizeof *pages) { + if (i == PAGE_SIZE / sizeof(*pages)) { err = iwch_write_pbl(mhp, pages, i, n); if (err) goto pbl_done; @@ -587,7 +510,7 @@ pbl_done: pr_debug("%s user resp pbl_addr 0x%x\n", __func__, uresp.pbl_addr); - if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { iwch_dereg_mr(&mhp->ibmr, udata); err = -EFAULT; goto err; @@ -880,13 +803,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, struct iwch_mm_entry *mm1, *mm2; - mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); + mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL); if (!mm1) { iwch_destroy_qp(&qhp->ibqp, udata); return ERR_PTR(-ENOMEM); } - mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); + mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL); if (!mm2) { kfree(mm1); iwch_destroy_qp(&qhp->ibqp, udata); @@ -903,7 +826,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, uresp.db_key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { kfree(mm1); kfree(mm2); iwch_destroy_qp(&qhp->ibqp, udata); @@ -911,7 +834,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, } mm1->key = uresp.key; mm1->addr = virt_to_phys(qhp->wq.queue); - mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr)); + mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr)); insert_mmap(ucontext, mm1); mm2->key = uresp.db_key; mm2->addr = qhp->wq.udb & PAGE_MASK; @@ -919,10 +842,11 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, insert_mmap(ucontext, mm2); } qhp->ibqp.qp_num = qhp->wq.qpid; - pr_debug("%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n", - __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.qpid, qhp, (unsigned long long)qhp->wq.dma_addr, - 1 << qhp->wq.size_log2, qhp->wq.rq_addr); + pr_debug( + "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n", + __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, + qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2, + qhp->wq.rq_addr); return &qhp->ibqp; } @@ -932,7 +856,7 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct iwch_dev *rhp; struct iwch_qp *qhp; enum iwch_qp_attr_mask mask = 0; - struct iwch_qp_attributes attrs; + struct iwch_qp_attributes attrs = {}; pr_debug("%s ib_qp %p\n", __func__, ibqp); @@ -944,7 +868,6 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (!attr_mask) return 0; - memset(&attrs, 0, sizeof attrs); qhp = to_iwch_qp(ibqp); rhp = qhp->rhp; @@ -1040,7 +963,6 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro return -EINVAL; dev = to_iwch_dev(ibdev); - memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); props->hw_ver = dev->rdev.t3cdev_p->type; props->fw_ver = fw_vers_string_to_u64(dev); @@ -1304,6 +1226,11 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) } static const struct ib_device_ops iwch_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_CXGB3, + .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION, + .uverbs_no_driver_id_binding = 1, + .alloc_hw_stats = iwch_alloc_stats, .alloc_mr = iwch_alloc_mr, .alloc_mw = iwch_alloc_mw, @@ -1341,8 +1268,8 @@ static const struct ib_device_ops iwch_dev_ops = { .query_port = iwch_query_port, .reg_user_mr = iwch_reg_user_mr, .req_notify_cq = iwch_arm_cq, - .resize_cq = iwch_resize_cq, INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext), }; @@ -1351,7 +1278,6 @@ int iwch_register_device(struct iwch_dev *dev) pr_debug("%s iwch_dev %p\n", __func__, dev); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - dev->ibdev.owner = THIS_MODULE; dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS; @@ -1383,12 +1309,10 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name, sizeof(dev->ibdev.iw_ifname)); - dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); return ib_register_device(&dev->ibdev, "cxgb3_%d"); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 09fcfc9e052d..e87fc0408470 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -953,7 +953,7 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, mpalen = sizeof(*mpa) + ep->plen; if (mpa_rev_to_use == 2) mpalen += sizeof(struct mpa_v2_conn_params); - wrlen = roundup(mpalen + sizeof *req, 16); + wrlen = roundup(mpalen + sizeof(*req), 16); skb = get_skb(skb, wrlen, GFP_KERNEL); if (!skb) { connect_reply_upcall(ep, -ENOMEM); @@ -997,8 +997,9 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, } if (mpa_rev_to_use == 2) { - mpa->private_data_size = htons(ntohs(mpa->private_data_size) + - sizeof (struct mpa_v2_conn_params)); + mpa->private_data_size = + htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); pr_debug("initiator ird %u ord %u\n", ep->ird, ep->ord); mpa_v2_params.ird = htons((u16)ep->ird); @@ -1057,7 +1058,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) mpalen = sizeof(*mpa) + plen; if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) mpalen += sizeof(struct mpa_v2_conn_params); - wrlen = roundup(mpalen + sizeof *req, 16); + wrlen = roundup(mpalen + sizeof(*req), 16); skb = get_skb(NULL, wrlen, GFP_KERNEL); if (!skb) { @@ -1088,8 +1089,9 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size = htons(ntohs(mpa->private_data_size) + - sizeof (struct mpa_v2_conn_params)); + mpa->private_data_size = + htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons(((u16)ep->ird) | (peer2peer ? MPA_V2_PEER2PEER_MODEL : 0)); @@ -1136,7 +1138,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) mpalen = sizeof(*mpa) + plen; if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) mpalen += sizeof(struct mpa_v2_conn_params); - wrlen = roundup(mpalen + sizeof *req, 16); + wrlen = roundup(mpalen + sizeof(*req), 16); skb = get_skb(NULL, wrlen, GFP_KERNEL); if (!skb) { @@ -1171,8 +1173,9 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size = htons(ntohs(mpa->private_data_size) + - sizeof (struct mpa_v2_conn_params)); + mpa->private_data_size = + htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); if (peer2peer && (ep->mpa_attr.p2p_type != diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 52ce586621c6..b1bb61c65f4f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -34,16 +34,15 @@ #include "iw_cxgb4.h" -static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, - struct c4iw_dev_ucontext *uctx, struct sk_buff *skb, - struct c4iw_wr_wait *wr_waitp) +static void destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, + struct c4iw_dev_ucontext *uctx, struct sk_buff *skb, + struct c4iw_wr_wait *wr_waitp) { struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; - int ret; - wr_len = sizeof *res_wr + sizeof *res; + wr_len = sizeof(*res_wr) + sizeof(*res); set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); res_wr = __skb_put_zero(skb, wr_len); @@ -59,14 +58,13 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, res->u.cq.iqid = cpu_to_be32(cq->cqid); c4iw_init_wr_wait(wr_waitp); - ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); kfree(cq->sw_queue); dma_free_coherent(&(rdev->lldi.pdev->dev), cq->memsize, cq->queue, dma_unmap_addr(cq, mapping)); c4iw_put_cqid(rdev, cq->cqid, uctx); - return ret; } static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, @@ -104,7 +102,6 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, goto err3; } dma_unmap_addr_set(cq, mapping, cq->dma_addr); - memset(cq->queue, 0, cq->memsize); if (user && ucontext->is_32b_cqe) { cq->qp_errp = &((struct t4_status_page *) @@ -117,7 +114,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, } /* build fw_ri_res_wr */ - wr_len = sizeof *res_wr + sizeof *res; + wr_len = sizeof(*res_wr) + sizeof(*res); skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { @@ -970,7 +967,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } -int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; struct c4iw_ucontext *ucontext; @@ -988,18 +985,16 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, chp->destroy_skb, chp->wr_waitp); c4iw_put_wr_wait(chp->wr_waitp); - kfree(chp); - return 0; } -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; int vector = attr->comp_vector; - struct c4iw_dev *rhp; - struct c4iw_cq *chp; + struct c4iw_dev *rhp = to_c4iw_dev(ibcq->device); + struct c4iw_cq *chp = to_c4iw_cq(ibcq); struct c4iw_create_cq ucmd; struct c4iw_create_cq_resp uresp; int ret, wr_len; @@ -1010,22 +1005,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) - return ERR_PTR(-EINVAL); - - rhp = to_c4iw_dev(ibdev); + return -EINVAL; if (vector >= rhp->rdev.lldi.nciq) - return ERR_PTR(-EINVAL); + return -EINVAL; if (udata) { if (udata->inlen < sizeof(ucmd)) ucontext->is_32b_cqe = 1; } - chp = kzalloc(sizeof(*chp), GFP_KERNEL); - if (!chp) - return ERR_PTR(-ENOMEM); - chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!chp->wr_waitp) { ret = -ENOMEM; @@ -1095,10 +1084,10 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (ucontext) { ret = -ENOMEM; - mm = kmalloc(sizeof *mm, GFP_KERNEL); + mm = kmalloc(sizeof(*mm), GFP_KERNEL); if (!mm) goto err_remove_handle; - mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); + mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL); if (!mm2) goto err_free_mm; @@ -1135,10 +1124,11 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, mm2->len = PAGE_SIZE; insert_mmap(ucontext, mm2); } - pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", - chp->cq.cqid, chp, chp->cq.size, - chp->cq.memsize, (unsigned long long)chp->cq.dma_addr); - return &chp->ibcq; + + pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n", + chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, + &chp->cq.dma_addr); + return 0; err_free_mm2: kfree(mm2); err_free_mm: @@ -1154,8 +1144,7 @@ err_free_skb: err_free_wr_wait: c4iw_put_wr_wait(chp->wr_waitp); err_free_chp: - kfree(chp); - return ERR_PTR(ret); + return ret; } int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 4c0d925c5ff5..a8b9548bd1a2 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -327,7 +327,7 @@ static int qp_open(struct inode *inode, struct file *file) unsigned long index; int count = 1; - qpd = kmalloc(sizeof *qpd, GFP_KERNEL); + qpd = kmalloc(sizeof(*qpd), GFP_KERNEL); if (!qpd) return -ENOMEM; @@ -421,7 +421,7 @@ static int stag_open(struct inode *inode, struct file *file) int ret = 0; int count = 1; - stagd = kmalloc(sizeof *stagd, GFP_KERNEL); + stagd = kmalloc(sizeof(*stagd), GFP_KERNEL); if (!stagd) { ret = -ENOMEM; goto out; @@ -1075,7 +1075,7 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop) pr_info("Chelsio T4/T5 RDMA Driver - version %s\n", DRV_VERSION); - ctx = kzalloc(sizeof *ctx, GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { ctx = ERR_PTR(-ENOMEM); goto out; @@ -1243,10 +1243,9 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) case CXGB4_STATE_START_RECOVERY: pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev)); if (ctx->dev) { - struct ib_event event; + struct ib_event event = {}; ctx->dev->rdev.flags |= T4_FATAL_ERROR; - memset(&event, 0, sizeof event); event.event = IB_EVENT_DEVICE_FATAL; event.device = &ctx->dev->ibdev; ib_dispatch_event(&event); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 916ef982172e..7d06b0f8d49a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -490,13 +490,13 @@ struct c4iw_qp { struct t4_wq wq; spinlock_t lock; struct mutex mutex; - struct kref kref; wait_queue_head_t wait; int sq_sig_all; struct c4iw_srq *srq; - struct work_struct free_work; struct c4iw_ucontext *ucontext; struct c4iw_wr_wait *wr_waitp; + struct completion qp_rel_comp; + refcount_t qp_refcnt; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -992,10 +992,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); -int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask srq_attr_mask, diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 811c0c8c5b16..aa772ee0706f 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -130,8 +130,9 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE : len; - wr_len = roundup(sizeof *req + sizeof *sc + - roundup(copy_len, T4_ULPTX_MIN_IO), 16); + wr_len = roundup(sizeof(*req) + sizeof(*sc) + + roundup(copy_len, T4_ULPTX_MIN_IO), + 16); if (!skb) { skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); @@ -807,8 +808,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) mhp->attr.pbl_size << 3); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); - if (mhp->umem) - ib_umem_release(mhp->umem); + ib_umem_release(mhp->umem); pr_debug("mmid 0x%x ptr %p\n", mmid, mhp); c4iw_put_wr_wait(mhp->wr_waitp); kfree(mhp); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 74b795642fca..5e59c5708729 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -271,7 +271,6 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro return -EINVAL; dev = to_c4iw_dev(ibdev); - memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type); props->fw_ver = dev->rdev.lldi.fw_vers; @@ -490,6 +489,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) } static const struct ib_device_ops c4iw_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_CXGB4, + .uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION, + .alloc_hw_stats = c4iw_alloc_stats, .alloc_mr = c4iw_alloc_mr, .alloc_mw = c4iw_alloc_mw, @@ -534,6 +537,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; @@ -561,7 +565,6 @@ void c4iw_register_device(struct work_struct *work) pr_debug("c4iw_dev %p\n", dev); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); - dev->ibdev.owner = THIS_MODULE; dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; if (fastreg_support) dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; @@ -594,13 +597,11 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; - dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; memcpy(dev->ibdev.iw_ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iw_ifname)); rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); - dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); ret = set_netdevs(&dev->ibdev, &dev->rdev); if (ret) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e92b9544357a..eb9368be28c1 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -274,7 +274,6 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, (unsigned long long)virt_to_phys(wq->sq.queue), wq->rq.queue, (unsigned long long)virt_to_phys(wq->rq.queue)); - memset(wq->rq.queue, 0, wq->rq.memsize); dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); } @@ -303,7 +302,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, wq->rq.msn = 1; /* build fw_ri_res_wr */ - wr_len = sizeof *res_wr + 2 * sizeof *res; + wr_len = sizeof(*res_wr) + 2 * sizeof(*res); if (need_rq) wr_len += sizeof(*res); skb = alloc_skb(wr_len, GFP_KERNEL); @@ -439,7 +438,7 @@ static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, rem -= len; } } - len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp); + len = roundup(plen + sizeof(*immdp), 16) - (plen + sizeof(*immdp)); if (len) memset(dstp, 0, len); immdp->op = FW_RI_DATA_IMMD; @@ -528,7 +527,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, T4_MAX_SEND_INLINE, &plen); if (ret) return ret; - size = sizeof wqe->send + sizeof(struct fw_ri_immd) + + size = sizeof(wqe->send) + sizeof(struct fw_ri_immd) + plen; } else { ret = build_isgl((__be64 *)sq->queue, @@ -537,7 +536,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, wr->sg_list, wr->num_sge, &plen); if (ret) return ret; - size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + + size = sizeof(wqe->send) + sizeof(struct fw_ri_isgl) + wr->num_sge * sizeof(struct fw_ri_sge); } } else { @@ -545,7 +544,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, wqe->send.u.immd_src[0].r1 = 0; wqe->send.u.immd_src[0].r2 = 0; wqe->send.u.immd_src[0].immdlen = 0; - size = sizeof wqe->send + sizeof(struct fw_ri_immd); + size = sizeof(wqe->send) + sizeof(struct fw_ri_immd); plen = 0; } *len16 = DIV_ROUND_UP(size, 16); @@ -579,7 +578,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, T4_MAX_WRITE_INLINE, &plen); if (ret) return ret; - size = sizeof wqe->write + sizeof(struct fw_ri_immd) + + size = sizeof(wqe->write) + sizeof(struct fw_ri_immd) + plen; } else { ret = build_isgl((__be64 *)sq->queue, @@ -588,7 +587,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, wr->sg_list, wr->num_sge, &plen); if (ret) return ret; - size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + + size = sizeof(wqe->write) + sizeof(struct fw_ri_isgl) + wr->num_sge * sizeof(struct fw_ri_sge); } } else { @@ -596,7 +595,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, wqe->write.u.immd_src[0].r1 = 0; wqe->write.u.immd_src[0].r2 = 0; wqe->write.u.immd_src[0].immdlen = 0; - size = sizeof wqe->write + sizeof(struct fw_ri_immd); + size = sizeof(wqe->write) + sizeof(struct fw_ri_immd); plen = 0; } *len16 = DIV_ROUND_UP(size, 16); @@ -683,7 +682,7 @@ static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, } wqe->read.r2 = 0; wqe->read.r5 = 0; - *len16 = DIV_ROUND_UP(sizeof wqe->read, 16); + *len16 = DIV_ROUND_UP(sizeof(wqe->read), 16); return 0; } @@ -766,8 +765,8 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); if (ret) return ret; - *len16 = DIV_ROUND_UP(sizeof wqe->recv + - wr->num_sge * sizeof(struct fw_ri_sge), 16); + *len16 = DIV_ROUND_UP( + sizeof(wqe->recv) + wr->num_sge * sizeof(struct fw_ri_sge), 16); return 0; } @@ -886,47 +885,21 @@ static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr, { wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; - *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); + *len16 = DIV_ROUND_UP(sizeof(wqe->inv), 16); return 0; } -static void free_qp_work(struct work_struct *work) -{ - struct c4iw_ucontext *ucontext; - struct c4iw_qp *qhp; - struct c4iw_dev *rhp; - - qhp = container_of(work, struct c4iw_qp, free_work); - ucontext = qhp->ucontext; - rhp = qhp->rhp; - - pr_debug("qhp %p ucontext %p\n", qhp, ucontext); - destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq); - - c4iw_put_wr_wait(qhp->wr_waitp); - kfree(qhp); -} - -static void queue_qp_free(struct kref *kref) -{ - struct c4iw_qp *qhp; - - qhp = container_of(kref, struct c4iw_qp, kref); - pr_debug("qhp %p\n", qhp); - queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work); -} - void c4iw_qp_add_ref(struct ib_qp *qp) { pr_debug("ib_qp %p\n", qp); - kref_get(&to_c4iw_qp(qp)->kref); + refcount_inc(&to_c4iw_qp(qp)->qp_refcnt); } void c4iw_qp_rem_ref(struct ib_qp *qp) { pr_debug("ib_qp %p\n", qp); - kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free); + if (refcount_dec_and_test(&to_c4iw_qp(qp)->qp_refcnt)) + complete(&to_c4iw_qp(qp)->qp_rel_comp); } static void add_to_fc_list(struct list_head *head, struct list_head *entry) @@ -1606,7 +1579,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; - wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); + wqe->u.terminate.immdlen = cpu_to_be32(sizeof(*term)); term = (struct terminate_message *)wqe->u.terminate.termmsg; if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) { term->layer_etype = qhp->attr.layer_etype; @@ -1751,16 +1724,15 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) { pr_debug("p2p_type = %d\n", p2p_type); - memset(&init->u, 0, sizeof init->u); + memset(&init->u, 0, sizeof(init->u)); switch (p2p_type) { case FW_RI_INIT_P2PTYPE_RDMA_WRITE: init->u.write.opcode = FW_RI_RDMA_WRITE_WR; init->u.write.stag_sink = cpu_to_be32(1); init->u.write.to_sink = cpu_to_be64(1); init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD; - init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write + - sizeof(struct fw_ri_immd), - 16); + init->u.write.len16 = DIV_ROUND_UP( + sizeof(init->u.write) + sizeof(struct fw_ri_immd), 16); break; case FW_RI_INIT_P2PTYPE_READ_REQ: init->u.write.opcode = FW_RI_RDMA_READ_WR; @@ -1768,7 +1740,7 @@ static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) init->u.read.to_src_lo = cpu_to_be32(1); init->u.read.stag_sink = cpu_to_be32(1); init->u.read.to_sink_lo = cpu_to_be32(1); - init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16); + init->u.read.len16 = DIV_ROUND_UP(sizeof(init->u.read), 16); break; } } @@ -1782,7 +1754,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp, qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord); - skb = alloc_skb(sizeof *wqe, GFP_KERNEL); + skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto out; @@ -2099,10 +2071,12 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; + struct c4iw_ucontext *ucontext; struct c4iw_qp_attributes attrs; qhp = to_c4iw_qp(ib_qp); rhp = qhp->rhp; + ucontext = qhp->ucontext; attrs.next_state = C4IW_QP_STATE_ERROR; if (qhp->attr.state == C4IW_QP_STATE_TERMINATE) @@ -2120,7 +2094,17 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) c4iw_qp_rem_ref(ib_qp); + wait_for_completion(&qhp->qp_rel_comp); + pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid); + pr_debug("qhp %p ucontext %p\n", qhp, ucontext); + + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq); + + c4iw_put_wr_wait(qhp->wr_waitp); + + kfree(qhp); return 0; } @@ -2230,8 +2214,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); - kref_init(&qhp->kref); - INIT_WORK(&qhp->free_work, free_qp_work); + init_completion(&qhp->qp_rel_comp); + refcount_set(&qhp->qp_refcnt, 1); ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL); if (ret) @@ -2302,7 +2286,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ucontext->key += PAGE_SIZE; } spin_unlock(&ucontext->mmap_lock); - ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (ret) goto err_free_ma_sync_key; sq_key_mm->key = uresp.sq_key; @@ -2386,7 +2370,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct c4iw_dev *rhp; struct c4iw_qp *qhp; enum c4iw_qp_attr_mask mask = 0; - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {}; pr_debug("ib_qp %p\n", ibqp); @@ -2398,7 +2382,6 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (!attr_mask) return 0; - memset(&attrs, 0, sizeof attrs); qhp = to_c4iw_qp(ibqp); rhp = qhp->rhp; @@ -2482,8 +2465,8 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct c4iw_qp *qhp = to_c4iw_qp(ibqp); - memset(attr, 0, sizeof *attr); - memset(init_attr, 0, sizeof *init_attr); + memset(attr, 0, sizeof(*attr)); + memset(init_attr, 0, sizeof(*init_attr)); attr->qp_state = to_ib_qp_state(qhp->attr.state); init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 57ed26b3cc21..5c95c789f302 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -126,7 +126,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) rdev->stats.qid.cur += rdev->qpmask + 1; mutex_unlock(&rdev->stats.lock); for (i = qid+1; i & rdev->qpmask; i++) { - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) goto out; entry->qid = i; @@ -137,13 +137,13 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) * now put the same ids on the qp list since they all * map to the same db/gts page. */ - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) goto out; entry->qid = qid; list_add_tail(&entry->entry, &uctx->qpids); for (i = qid+1; i & rdev->qpmask; i++) { - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) goto out; entry->qid = i; @@ -165,7 +165,7 @@ void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid, { struct c4iw_qid_list *entry; - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return; pr_debug("qid 0x%x\n", qid); @@ -200,7 +200,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) rdev->stats.qid.cur += rdev->qpmask + 1; mutex_unlock(&rdev->stats.lock); for (i = qid+1; i & rdev->qpmask; i++) { - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) goto out; entry->qid = i; @@ -211,13 +211,13 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) * now put the same ids on the cq list since they all * map to the same db/gts page. */ - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) goto out; entry->qid = qid; list_add_tail(&entry->entry, &uctx->cqids); for (i = qid; i & rdev->qpmask; i++) { - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) goto out; entry->qid = i; @@ -239,7 +239,7 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, { struct c4iw_qid_list *entry; - entry = kmalloc(sizeof *entry, GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return; pr_debug("qid 0x%x\n", qid); diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 9e3cc3239c13..119f8efec564 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -7,10 +7,8 @@ #define _EFA_H_ #include <linux/bitops.h> -#include <linux/idr.h> #include <linux/interrupt.h> #include <linux/pci.h> -#include <linux/sched.h> #include <rdma/efa-abi.h> #include <rdma/ib_verbs.h> @@ -136,10 +134,9 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); struct ib_qp *efa_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); -struct ib_cq *efa_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index a5c788741a04..2cb42484b0f8 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -39,8 +39,6 @@ enum efa_cmd_status { EFA_CMD_SUBMITTED, EFA_CMD_COMPLETED, - /* Abort - canceled by the driver */ - EFA_CMD_ABORTED, }; struct efa_comp_ctx { @@ -280,36 +278,34 @@ static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq, static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq, struct efa_comp_ctx *comp_ctx) { - u16 comp_id = comp_ctx->user_cqe->acq_common_descriptor.command & - EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command & + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + u16 ctx_id = cmd_id & (aq->depth - 1); - ibdev_dbg(aq->efa_dev, "Putting completion command_id %d\n", comp_id); + ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id); comp_ctx->occupied = 0; - efa_com_dealloc_ctx_id(aq, comp_id); + efa_com_dealloc_ctx_id(aq, ctx_id); } static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq, - u16 command_id, bool capture) + u16 cmd_id, bool capture) { - if (command_id >= aq->depth) { - ibdev_err(aq->efa_dev, - "command id is larger than the queue size. cmd_id: %u queue size %d\n", - command_id, aq->depth); - return NULL; - } + u16 ctx_id = cmd_id & (aq->depth - 1); - if (aq->comp_ctx[command_id].occupied && capture) { - ibdev_err(aq->efa_dev, "Completion context is occupied\n"); + if (aq->comp_ctx[ctx_id].occupied && capture) { + ibdev_err(aq->efa_dev, + "Completion context for command_id %#x is occupied\n", + cmd_id); return NULL; } if (capture) { - aq->comp_ctx[command_id].occupied = 1; - ibdev_dbg(aq->efa_dev, "Taking completion ctxt command_id %d\n", - command_id); + aq->comp_ctx[ctx_id].occupied = 1; + ibdev_dbg(aq->efa_dev, + "Take completion ctxt for command_id %#x\n", cmd_id); } - return &aq->comp_ctx[command_id]; + return &aq->comp_ctx[ctx_id]; } static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, @@ -320,6 +316,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu { struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; + u16 cmd_id; u16 ctx_id; u16 pi; @@ -328,13 +325,16 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu ctx_id = efa_com_alloc_ctx_id(aq); + /* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */ + cmd_id = ctx_id & queue_size_mask; + cmd_id |= aq->sq.pc & ~queue_size_mask; + cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; + + cmd->aq_common_descriptor.command_id = cmd_id; cmd->aq_common_descriptor.flags |= aq->sq.phase & EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; - cmd->aq_common_descriptor.command_id |= ctx_id & - EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; - - comp_ctx = efa_com_get_comp_ctx(aq, ctx_id, true); + comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true); if (!comp_ctx) { efa_com_dealloc_ctx_id(aq, ctx_id); return ERR_PTR(-EINVAL); @@ -532,16 +532,6 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c msleep(aq->poll_interval); } - if (comp_ctx->status == EFA_CMD_ABORTED) { - ibdev_err(aq->efa_dev, "Command was aborted\n"); - atomic64_inc(&aq->stats.aborted_cmd); - err = -ENODEV; - goto out; - } - - WARN_ONCE(comp_ctx->status != EFA_CMD_COMPLETED, - "Invalid completion status %d\n", comp_ctx->status); - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); out: efa_com_put_comp_ctx(aq, comp_ctx); @@ -666,66 +656,6 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, } /** - * efa_com_abort_admin_commands - Abort all the outstanding admin commands. - * @edev: EFA communication layer struct - * - * This method aborts all the outstanding admin commands. - * The caller should then call efa_com_wait_for_abort_completion to make sure - * all the commands were completed. - */ -static void efa_com_abort_admin_commands(struct efa_com_dev *edev) -{ - struct efa_com_admin_queue *aq = &edev->aq; - struct efa_comp_ctx *comp_ctx; - unsigned long flags; - u16 i; - - spin_lock(&aq->sq.lock); - spin_lock_irqsave(&aq->cq.lock, flags); - for (i = 0; i < aq->depth; i++) { - comp_ctx = efa_com_get_comp_ctx(aq, i, false); - if (!comp_ctx) - break; - - comp_ctx->status = EFA_CMD_ABORTED; - - complete(&comp_ctx->wait_event); - } - spin_unlock_irqrestore(&aq->cq.lock, flags); - spin_unlock(&aq->sq.lock); -} - -/** - * efa_com_wait_for_abort_completion - Wait for admin commands abort. - * @edev: EFA communication layer struct - * - * This method wait until all the outstanding admin commands will be completed. - */ -static void efa_com_wait_for_abort_completion(struct efa_com_dev *edev) -{ - struct efa_com_admin_queue *aq = &edev->aq; - int i; - - /* all mine */ - for (i = 0; i < aq->depth; i++) - down(&aq->avail_cmds); - - /* let it go */ - for (i = 0; i < aq->depth; i++) - up(&aq->avail_cmds); -} - -static void efa_com_admin_flush(struct efa_com_dev *edev) -{ - struct efa_com_admin_queue *aq = &edev->aq; - - clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); - - efa_com_abort_admin_commands(edev); - efa_com_wait_for_abort_completion(edev); -} - -/** * efa_com_admin_destroy - Destroy the admin and the async events queues. * @edev: EFA communication layer struct */ @@ -737,7 +667,7 @@ void efa_com_admin_destroy(struct efa_com_dev *edev) struct efa_com_admin_sq *sq = &aq->sq; u16 size; - efa_com_admin_flush(edev); + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); devm_kfree(edev->dmadev, aq->comp_ctx_pool); devm_kfree(edev->dmadev, aq->comp_ctx); diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index 84d96724a74b..c67dd8109d1c 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -45,7 +45,6 @@ struct efa_com_admin_sq { /* Don't use anything other than atomic64 */ struct efa_com_stats_admin { - atomic64_t aborted_cmd; atomic64_t submitted_cmd; atomic64_t completed_cmd; atomic64_t no_completion; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index c0016648804c..62345d8abf3c 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -3,7 +3,6 @@ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. */ -#include "efa.h" #include "efa_com.h" #include "efa_com_cmd.h" @@ -57,7 +56,7 @@ int efa_com_create_qp(struct efa_com_dev *edev, res->send_sub_cq_idx = cmd_completion.send_sub_cq_idx; res->recv_sub_cq_idx = cmd_completion.recv_sub_cq_idx; - return err; + return 0; } int efa_com_modify_qp(struct efa_com_dev *edev, @@ -181,7 +180,7 @@ int efa_com_create_cq(struct efa_com_dev *edev, result->cq_idx = cmd_completion.cq_idx; result->actual_depth = params->cq_depth; - return err; + return 0; } int efa_com_destroy_cq(struct efa_com_dev *edev, @@ -307,7 +306,8 @@ int efa_com_create_ah(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (err) { - ibdev_err(edev->efa_dev, "Failed to create ah [%d]\n", err); + ibdev_err(edev->efa_dev, "Failed to create ah for %pI6 [%d]\n", + ah_cmd.dest_addr, err); return err; } diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index db974caf1eb1..dd1c6d49466f 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -100,7 +100,7 @@ static int efa_request_mgmnt_irq(struct efa_dev *dev) nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector); irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); - return err; + return 0; } static void efa_setup_mgmnt_irq(struct efa_dev *dev) @@ -197,6 +197,10 @@ static void efa_stats_init(struct efa_dev *dev) } static const struct ib_device_ops efa_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_EFA, + .uverbs_abi_ver = EFA_UVERBS_ABI_VERSION, + .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, .create_ah = efa_create_ah, @@ -220,6 +224,7 @@ static const struct ib_device_ops efa_dev_ops = { .reg_user_mr = efa_reg_mr, INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext), }; @@ -259,12 +264,10 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) goto err_release_doorbell_bar; - dev->ibdev.owner = THIS_MODULE; dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION; dev->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -287,7 +290,6 @@ static int efa_ib_device_add(struct efa_dev *dev) dev->ibdev.uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - dev->ibdev.driver_id = RDMA_DRIVER_EFA; ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d"); diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index fb6115244d4c..df77bc312a25 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -447,12 +447,6 @@ void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct efa_dev *dev = to_edev(ibpd->device); struct efa_pd *pd = to_epd(ibpd); - if (udata->inlen && - !ib_is_udata_cleared(udata, 0, udata->inlen)) { - ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); - return; - } - ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); efa_pd_dealloc(dev, pd->pdn); } @@ -470,12 +464,6 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct efa_qp *qp = to_eqp(ibqp); int err; - if (udata->inlen && - !ib_is_udata_cleared(udata, 0, udata->inlen)) { - ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); - return -EINVAL; - } - ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); err = efa_destroy_qp_handle(dev, qp->qp_handle); if (err) @@ -870,31 +858,18 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) return efa_com_destroy_cq(&dev->edev, ¶ms); } -int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibcq->device); struct efa_cq *cq = to_ecq(ibcq); - int err; - - if (udata->inlen && - !ib_is_udata_cleared(udata, 0, udata->inlen)) { - ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); - return -EINVAL; - } ibdev_dbg(&dev->ibdev, "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); - err = efa_destroy_cq_idx(dev, cq->cq_idx); - if (err) - return err; - + efa_destroy_cq_idx(dev, cq->cq_idx); dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - - kfree(cq); - return 0; } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, @@ -910,17 +885,20 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, return 0; } -static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *ibucontext, - struct ib_udata *udata) +int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct efa_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct efa_ucontext, ibucontext); struct efa_ibv_create_cq_resp resp = {}; struct efa_com_create_cq_params params; struct efa_com_create_cq_result result; + struct ib_device *ibdev = ibcq->device; struct efa_dev *dev = to_edev(ibdev); struct efa_ibv_create_cq cmd = {}; + struct efa_cq *cq = to_ecq(ibcq); bool cq_entry_inserted = false; - struct efa_cq *cq; + int entries = attr->cqe; int err; ibdev_dbg(ibdev, "create_cq entries %d\n", entries); @@ -978,19 +956,13 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries, goto err_out; } - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - err = -ENOMEM; - goto err_out; - } - - cq->ucontext = to_eucontext(ibucontext); + cq->ucontext = ucontext; cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs); cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size, DMA_FROM_DEVICE); if (!cq->cpu_addr) { err = -ENOMEM; - goto err_free_cq; + goto err_out; } params.uarn = cq->ucontext->uarn; @@ -1009,8 +981,8 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries, err = cq_mmap_entries_setup(dev, cq, &resp); if (err) { - ibdev_dbg(ibdev, - "Could not setup cq[%u] mmap entries\n", cq->cq_idx); + ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", + cq->cq_idx); goto err_destroy_cq; } @@ -1026,11 +998,10 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries, } } - ibdev_dbg(ibdev, - "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", + ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr); - return &cq->ibcq; + return 0; err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); @@ -1039,23 +1010,9 @@ err_free_mapped: DMA_FROM_DEVICE); if (!cq_entry_inserted) free_pages_exact(cq->cpu_addr, cq->size); -err_free_cq: - kfree(cq); err_out: atomic64_inc(&dev->stats.sw_stats.create_cq_err); - return ERR_PTR(err); -} - -struct ib_cq *efa_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) -{ - struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, - struct efa_ucontext, - ibucontext); - - return do_create_cq(ibdev, attr->cqe, attr->comp_vector, - &ucontext->ibucontext, udata); + return err; } static int umem_to_page_list(struct efa_dev *dev, @@ -1065,21 +1022,15 @@ static int umem_to_page_list(struct efa_dev *dev, u8 hp_shift) { u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); - struct sg_dma_page_iter sg_iter; - unsigned int page_idx = 0; + struct ib_block_iter biter; unsigned int hp_idx = 0; ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", hp_cnt, pages_in_hp); - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - if (page_idx % pages_in_hp == 0) { - page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter); - hp_idx++; - } - - page_idx++; - } + rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, + BIT(hp_shift)) + page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); return 0; } @@ -1114,14 +1065,14 @@ err: */ static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) { - unsigned int entry, payloads_in_sg, chunk_list_size, chunk_idx, payload_idx; struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages; struct scatterlist *pages_sgl = pbl->phys.indirect.sgl; + unsigned int chunk_list_size, chunk_idx, payload_idx; int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt; struct efa_com_ctrl_buff_info *ctrl_buf; u64 *cur_chunk_buf, *prev_chunk_buf; - struct scatterlist *sg; + struct ib_block_iter biter; dma_addr_t dma_addr; int i; @@ -1155,18 +1106,15 @@ static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) chunk_idx = 0; payload_idx = 0; cur_chunk_buf = chunk_list->chunks[0].buf; - for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) { - payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT; - for (i = 0; i < payloads_in_sg; i++) { - cur_chunk_buf[payload_idx++] = - (sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) + - (EFA_CHUNK_PAYLOAD_SIZE * i); - - if (payload_idx == EFA_PTRS_PER_CHUNK) { - chunk_idx++; - cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; - payload_idx = 0; - } + rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt, + EFA_CHUNK_PAYLOAD_SIZE) { + cur_chunk_buf[payload_idx++] = + rdma_block_iter_dma_address(&biter); + + if (payload_idx == EFA_PTRS_PER_CHUNK) { + chunk_idx++; + cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; + payload_idx = 0; } } @@ -1314,30 +1262,30 @@ static int pbl_create(struct efa_dev *dev, int err; pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE; - pbl->pbl_buf = kzalloc(pbl->pbl_buf_size_in_bytes, - GFP_KERNEL | __GFP_NOWARN); - if (pbl->pbl_buf) { - pbl->physically_continuous = 1; + pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL); + if (!pbl->pbl_buf) + return -ENOMEM; + + if (is_vmalloc_addr(pbl->pbl_buf)) { + pbl->physically_continuous = 0; err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, hp_shift); if (err) - goto err_continuous; - err = pbl_continuous_initialize(dev, pbl); + goto err_free; + + err = pbl_indirect_initialize(dev, pbl); if (err) - goto err_continuous; + goto err_free; } else { - pbl->physically_continuous = 0; - pbl->pbl_buf = vzalloc(pbl->pbl_buf_size_in_bytes); - if (!pbl->pbl_buf) - return -ENOMEM; - + pbl->physically_continuous = 1; err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, hp_shift); if (err) - goto err_indirect; - err = pbl_indirect_initialize(dev, pbl); + goto err_free; + + err = pbl_continuous_initialize(dev, pbl); if (err) - goto err_indirect; + goto err_free; } ibdev_dbg(&dev->ibdev, @@ -1346,24 +1294,20 @@ static int pbl_create(struct efa_dev *dev, return 0; -err_continuous: - kfree(pbl->pbl_buf); - return err; -err_indirect: - vfree(pbl->pbl_buf); +err_free: + kvfree(pbl->pbl_buf); return err; } static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl) { - if (pbl->physically_continuous) { + if (pbl->physically_continuous) dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr, pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); - kfree(pbl->pbl_buf); - } else { + else pbl_indirect_terminate(dev, pbl); - vfree(pbl->pbl_buf); - } + + kvfree(pbl->pbl_buf); } static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, @@ -1417,56 +1361,6 @@ static int efa_create_pbl(struct efa_dev *dev, return 0; } -static void efa_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, u8 *shift, u32 *ncont) -{ - struct scatterlist *sg; - u64 base = ~0, p = 0; - unsigned long tmp; - unsigned long m; - u64 len, pfn; - int i = 0; - int entry; - - addr = addr >> PAGE_SHIFT; - tmp = (unsigned long)addr; - m = find_first_bit(&tmp, BITS_PER_LONG); - if (max_page_shift) - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE); - pfn = sg_dma_address(sg) >> PAGE_SHIFT; - if (base + p != pfn) { - /* - * If either the offset or the new - * base are unaligned update m - */ - tmp = (unsigned long)(pfn | p); - if (!IS_ALIGNED(tmp, 1 << m)) - m = find_first_bit(&tmp, BITS_PER_LONG); - - base = pfn; - p = 0; - } - - p += len; - i += len; - } - - if (i) { - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - *ncont = DIV_ROUND_UP(i, (1 << m)); - } else { - m = 0; - *ncont = 0; - } - - *shift = PAGE_SHIFT + m; - *count = i; -} - struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -1474,11 +1368,10 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, struct efa_dev *dev = to_edev(ibpd->device); struct efa_com_reg_mr_params params = {}; struct efa_com_reg_mr_result result = {}; - unsigned long max_page_shift; struct pbl_context pbl; + unsigned int pg_sz; struct efa_mr *mr; int inline_size; - int npages; int err; if (udata->inlen && @@ -1515,13 +1408,24 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, params.iova = virt_addr; params.mr_length_in_bytes = length; params.permissions = access_flags & 0x1; - max_page_shift = fls64(dev->dev_attr.page_size_cap); - efa_cont_pages(mr->umem, start, max_page_shift, &npages, - ¶ms.page_shift, ¶ms.page_num); + pg_sz = ib_umem_find_best_pgsz(mr->umem, + dev->dev_attr.page_size_cap, + virt_addr); + if (!pg_sz) { + err = -EOPNOTSUPP; + ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", + dev->dev_attr.page_size_cap); + goto err_unmap; + } + + params.page_shift = __ffs(pg_sz); + params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)), + pg_sz); + ibdev_dbg(&dev->ibdev, - "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n", - start, length, npages, params.page_shift, params.page_num); + "start %#llx length %#llx params.page_shift %u params.page_num %u\n", + start, length, params.page_shift, params.page_num); inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); if (params.page_num <= inline_size) { @@ -1567,12 +1471,6 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) struct efa_mr *mr = to_emr(ibmr); int err; - if (udata->inlen && - !ib_is_udata_cleared(udata, 0, udata->inlen)) { - ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); - return -EINVAL; - } - ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); if (mr->umem) { @@ -1580,8 +1478,8 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) err = efa_com_dereg_mr(&dev->edev, ¶ms); if (err) return err; - ib_umem_release(mr->umem); } + ib_umem_release(mr->umem); kfree(mr); @@ -1707,13 +1605,15 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, err = -EINVAL; } - if (err) + if (err) { ibdev_dbg( &dev->ibdev, "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", entry->address, length, entry->mmap_flag, err); + return err; + } - return err; + return 0; } int efa_mmap(struct ib_ucontext *ibucontext, diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 4044a8c8dbf4..0405d26d0833 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := \ affinity.o \ + aspm.o \ chip.o \ device.o \ driver.o \ diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c new file mode 100644 index 000000000000..a3c53be4072c --- /dev/null +++ b/drivers/infiniband/hw/hfi1/aspm.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2019 Intel Corporation. + * + */ + +#include "aspm.h" + +/* Time after which the timer interrupt will re-enable ASPM */ +#define ASPM_TIMER_MS 1000 +/* Time for which interrupts are ignored after a timer has been scheduled */ +#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2) +/* Two interrupts within this time trigger ASPM disable */ +#define ASPM_TRIGGER_MS 1 +#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull) +#define ASPM_L1_SUPPORTED(reg) \ + ((((reg) & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2) + +uint aspm_mode = ASPM_MODE_DISABLED; +module_param_named(aspm, aspm_mode, uint, 0444); +MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); + +static bool aspm_hw_l1_supported(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + u32 up, dn; + + /* + * If the driver does not have access to the upstream component, + * it cannot support ASPM L1 at all. + */ + if (!parent) + return false; + + pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn); + dn = ASPM_L1_SUPPORTED(dn); + + pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up); + up = ASPM_L1_SUPPORTED(up); + + /* ASPM works on A-step but is reported as not supported */ + return (!!dn || is_ax(dd)) && !!up; +} + +/* Set L1 entrance latency for slower entry to L1 */ +static void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd) +{ + u32 l1_ent_lat = 0x4u; + u32 reg32; + + pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, ®32); + reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK; + reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT; + pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32); +} + +static void aspm_hw_enable_l1(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + + /* + * If the driver does not have access to the upstream component, + * it cannot support ASPM L1 at all. + */ + if (!parent) + return; + + /* Enable ASPM L1 first in upstream component and then downstream */ + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, + PCI_EXP_LNKCTL_ASPM_L1); + pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, + PCI_EXP_LNKCTL_ASPM_L1); +} + +void aspm_hw_disable_l1(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + + /* Disable ASPM L1 first in downstream component and then upstream */ + pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, 0x0); + if (parent) + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, 0x0); +} + +static void aspm_enable(struct hfi1_devdata *dd) +{ + if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED || + !dd->aspm_supported) + return; + + aspm_hw_enable_l1(dd); + dd->aspm_enabled = true; +} + +static void aspm_disable(struct hfi1_devdata *dd) +{ + if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED) + return; + + aspm_hw_disable_l1(dd); + dd->aspm_enabled = false; +} + +static void aspm_disable_inc(struct hfi1_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->aspm_lock, flags); + aspm_disable(dd); + atomic_inc(&dd->aspm_disabled_cnt); + spin_unlock_irqrestore(&dd->aspm_lock, flags); +} + +static void aspm_enable_dec(struct hfi1_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->aspm_lock, flags); + if (atomic_dec_and_test(&dd->aspm_disabled_cnt)) + aspm_enable(dd); + spin_unlock_irqrestore(&dd->aspm_lock, flags); +} + +/* ASPM processing for each receive context interrupt */ +void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd) +{ + bool restart_timer; + bool close_interrupts; + unsigned long flags; + ktime_t now, prev; + + spin_lock_irqsave(&rcd->aspm_lock, flags); + /* PSM contexts are open */ + if (!rcd->aspm_intr_enable) + goto unlock; + + prev = rcd->aspm_ts_last_intr; + now = ktime_get(); + rcd->aspm_ts_last_intr = now; + + /* An interrupt pair close together in time */ + close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS; + + /* Don't push out our timer till this much time has elapsed */ + restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) > + ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC; + restart_timer = restart_timer && close_interrupts; + + /* Disable ASPM and schedule timer */ + if (rcd->aspm_enabled && close_interrupts) { + aspm_disable_inc(rcd->dd); + rcd->aspm_enabled = false; + restart_timer = true; + } + + if (restart_timer) { + mod_timer(&rcd->aspm_timer, + jiffies + msecs_to_jiffies(ASPM_TIMER_MS)); + rcd->aspm_ts_timer_sched = now; + } +unlock: + spin_unlock_irqrestore(&rcd->aspm_lock, flags); +} + +/* Timer function for re-enabling ASPM in the absence of interrupt activity */ +static void aspm_ctx_timer_function(struct timer_list *t) +{ + struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer); + unsigned long flags; + + spin_lock_irqsave(&rcd->aspm_lock, flags); + aspm_enable_dec(rcd->dd); + rcd->aspm_enabled = true; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); +} + +/* + * Disable interrupt processing for verbs contexts when PSM or VNIC contexts + * are open. + */ +void aspm_disable_all(struct hfi1_devdata *dd) +{ + struct hfi1_ctxtdata *rcd; + unsigned long flags; + u16 i; + + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) { + del_timer_sync(&rcd->aspm_timer); + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = false; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + hfi1_rcd_put(rcd); + } + } + + aspm_disable(dd); + atomic_set(&dd->aspm_disabled_cnt, 0); +} + +/* Re-enable interrupt processing for verbs contexts */ +void aspm_enable_all(struct hfi1_devdata *dd) +{ + struct hfi1_ctxtdata *rcd; + unsigned long flags; + u16 i; + + aspm_enable(dd); + + if (aspm_mode != ASPM_MODE_DYNAMIC) + return; + + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) { + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = true; + rcd->aspm_enabled = true; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + hfi1_rcd_put(rcd); + } + } +} + +static void aspm_ctx_init(struct hfi1_ctxtdata *rcd) +{ + spin_lock_init(&rcd->aspm_lock); + timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0); + rcd->aspm_intr_supported = rcd->dd->aspm_supported && + aspm_mode == ASPM_MODE_DYNAMIC && + rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; +} + +void aspm_init(struct hfi1_devdata *dd) +{ + struct hfi1_ctxtdata *rcd; + u16 i; + + spin_lock_init(&dd->aspm_lock); + dd->aspm_supported = aspm_hw_l1_supported(dd); + + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + aspm_ctx_init(rcd); + hfi1_rcd_put(rcd); + } + + /* Start with ASPM disabled */ + aspm_hw_set_l1_ent_latency(dd); + dd->aspm_enabled = false; + aspm_hw_disable_l1(dd); + + /* Now turn on ASPM if configured */ + aspm_enable_all(dd); +} + +void aspm_exit(struct hfi1_devdata *dd) +{ + aspm_disable_all(dd); + + /* Turn on ASPM on exit to conserve power */ + aspm_enable(dd); +} + diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index e8133870ee87..75d5d18da3da 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -57,266 +57,20 @@ enum aspm_mode { ASPM_MODE_DYNAMIC = 2, /* ASPM enabled/disabled dynamically */ }; -/* Time after which the timer interrupt will re-enable ASPM */ -#define ASPM_TIMER_MS 1000 -/* Time for which interrupts are ignored after a timer has been scheduled */ -#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2) -/* Two interrupts within this time trigger ASPM disable */ -#define ASPM_TRIGGER_MS 1 -#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull) -#define ASPM_L1_SUPPORTED(reg) \ - (((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2) +void aspm_init(struct hfi1_devdata *dd); +void aspm_exit(struct hfi1_devdata *dd); +void aspm_hw_disable_l1(struct hfi1_devdata *dd); +void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd); +void aspm_disable_all(struct hfi1_devdata *dd); +void aspm_enable_all(struct hfi1_devdata *dd); -static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd) -{ - struct pci_dev *parent = dd->pcidev->bus->self; - u32 up, dn; - - /* - * If the driver does not have access to the upstream component, - * it cannot support ASPM L1 at all. - */ - if (!parent) - return false; - - pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn); - dn = ASPM_L1_SUPPORTED(dn); - - pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up); - up = ASPM_L1_SUPPORTED(up); - - /* ASPM works on A-step but is reported as not supported */ - return (!!dn || is_ax(dd)) && !!up; -} - -/* Set L1 entrance latency for slower entry to L1 */ -static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd) -{ - u32 l1_ent_lat = 0x4u; - u32 reg32; - - pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, ®32); - reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK; - reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT; - pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32); -} - -static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd) -{ - struct pci_dev *parent = dd->pcidev->bus->self; - - /* - * If the driver does not have access to the upstream component, - * it cannot support ASPM L1 at all. - */ - if (!parent) - return; - - /* Enable ASPM L1 first in upstream component and then downstream */ - pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, - PCI_EXP_LNKCTL_ASPM_L1); - pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, - PCI_EXP_LNKCTL_ASPM_L1); -} - -static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd) -{ - struct pci_dev *parent = dd->pcidev->bus->self; - - /* Disable ASPM L1 first in downstream component and then upstream */ - pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, 0x0); - if (parent) - pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, 0x0); -} - -static inline void aspm_enable(struct hfi1_devdata *dd) -{ - if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED || - !dd->aspm_supported) - return; - - aspm_hw_enable_l1(dd); - dd->aspm_enabled = true; -} - -static inline void aspm_disable(struct hfi1_devdata *dd) -{ - if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED) - return; - - aspm_hw_disable_l1(dd); - dd->aspm_enabled = false; -} - -static inline void aspm_disable_inc(struct hfi1_devdata *dd) -{ - unsigned long flags; - - spin_lock_irqsave(&dd->aspm_lock, flags); - aspm_disable(dd); - atomic_inc(&dd->aspm_disabled_cnt); - spin_unlock_irqrestore(&dd->aspm_lock, flags); -} - -static inline void aspm_enable_dec(struct hfi1_devdata *dd) -{ - unsigned long flags; - - spin_lock_irqsave(&dd->aspm_lock, flags); - if (atomic_dec_and_test(&dd->aspm_disabled_cnt)) - aspm_enable(dd); - spin_unlock_irqrestore(&dd->aspm_lock, flags); -} - -/* ASPM processing for each receive context interrupt */ static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd) { - bool restart_timer; - bool close_interrupts; - unsigned long flags; - ktime_t now, prev; - /* Quickest exit for minimum impact */ - if (!rcd->aspm_intr_supported) - return; - - spin_lock_irqsave(&rcd->aspm_lock, flags); - /* PSM contexts are open */ - if (!rcd->aspm_intr_enable) - goto unlock; - - prev = rcd->aspm_ts_last_intr; - now = ktime_get(); - rcd->aspm_ts_last_intr = now; - - /* An interrupt pair close together in time */ - close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS; - - /* Don't push out our timer till this much time has elapsed */ - restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) > - ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC; - restart_timer = restart_timer && close_interrupts; - - /* Disable ASPM and schedule timer */ - if (rcd->aspm_enabled && close_interrupts) { - aspm_disable_inc(rcd->dd); - rcd->aspm_enabled = false; - restart_timer = true; - } - - if (restart_timer) { - mod_timer(&rcd->aspm_timer, - jiffies + msecs_to_jiffies(ASPM_TIMER_MS)); - rcd->aspm_ts_timer_sched = now; - } -unlock: - spin_unlock_irqrestore(&rcd->aspm_lock, flags); -} - -/* Timer function for re-enabling ASPM in the absence of interrupt activity */ -static inline void aspm_ctx_timer_function(struct timer_list *t) -{ - struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer); - unsigned long flags; - - spin_lock_irqsave(&rcd->aspm_lock, flags); - aspm_enable_dec(rcd->dd); - rcd->aspm_enabled = true; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); -} - -/* - * Disable interrupt processing for verbs contexts when PSM or VNIC contexts - * are open. - */ -static inline void aspm_disable_all(struct hfi1_devdata *dd) -{ - struct hfi1_ctxtdata *rcd; - unsigned long flags; - u16 i; - - for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) { - del_timer_sync(&rcd->aspm_timer); - spin_lock_irqsave(&rcd->aspm_lock, flags); - rcd->aspm_intr_enable = false; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); - hfi1_rcd_put(rcd); - } - } - - aspm_disable(dd); - atomic_set(&dd->aspm_disabled_cnt, 0); -} - -/* Re-enable interrupt processing for verbs contexts */ -static inline void aspm_enable_all(struct hfi1_devdata *dd) -{ - struct hfi1_ctxtdata *rcd; - unsigned long flags; - u16 i; - - aspm_enable(dd); - - if (aspm_mode != ASPM_MODE_DYNAMIC) + if (likely(!rcd->aspm_intr_supported)) return; - for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) { - spin_lock_irqsave(&rcd->aspm_lock, flags); - rcd->aspm_intr_enable = true; - rcd->aspm_enabled = true; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); - hfi1_rcd_put(rcd); - } - } -} - -static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) -{ - spin_lock_init(&rcd->aspm_lock); - timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0); - rcd->aspm_intr_supported = rcd->dd->aspm_supported && - aspm_mode == ASPM_MODE_DYNAMIC && - rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; -} - -static inline void aspm_init(struct hfi1_devdata *dd) -{ - struct hfi1_ctxtdata *rcd; - u16 i; - - spin_lock_init(&dd->aspm_lock); - dd->aspm_supported = aspm_hw_l1_supported(dd); - - for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) - aspm_ctx_init(rcd); - hfi1_rcd_put(rcd); - } - - /* Start with ASPM disabled */ - aspm_hw_set_l1_ent_latency(dd); - dd->aspm_enabled = false; - aspm_hw_disable_l1(dd); - - /* Now turn on ASPM if configured */ - aspm_enable_all(dd); -} - -static inline void aspm_exit(struct hfi1_devdata *dd) -{ - aspm_disable_all(dd); - - /* Turn on ASPM on exit to conserve power */ - aspm_enable(dd); + __aspm_ctx_disable(rcd); } #endif /* _ASPM_H */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 15efb4a380b2..d268bf9c42ee 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -987,9 +987,6 @@ static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target) struct hfi1_pportdata *ppd; int ret; - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - ppd = private2ppd(fp); ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); @@ -1155,6 +1152,7 @@ static int exprom_wp_debugfs_release(struct inode *in, struct file *fp) { \ .name = nm, \ .ops = { \ + .owner = THIS_MODULE, \ .read = readroutine, \ .write = writeroutine, \ .llseek = generic_file_llseek, \ @@ -1165,6 +1163,7 @@ static int exprom_wp_debugfs_release(struct inode *in, struct file *fp) { \ .name = nm, \ .ops = { \ + .owner = THIS_MODULE, \ .read = readf, \ .write = writef, \ .llseek = generic_file_llseek, \ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 4228393e6c4c..184dba3c2828 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2744,8 +2744,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, u16 link_width; u16 link_speed; - response_data_size = sizeof(struct opa_port_status_rsp) + - num_vls * sizeof(struct _vls_pctrs); + response_data_size = struct_size(rsp, vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE; return reply((struct ib_mad_hdr *)pmp); @@ -3014,8 +3013,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, } /* Sanity check */ - response_data_size = sizeof(struct opa_port_data_counters_msg) + - num_vls * sizeof(struct _vls_dctrs); + response_data_size = struct_size(req, port[0].vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3232,8 +3230,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - response_data_size = sizeof(struct opa_port_error_counters64_msg) + - num_vls * sizeof(struct _vls_ectrs); + response_data_size = struct_size(req, port[0].vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index c96d193bb236..61aa5504d7c3 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -450,10 +450,6 @@ static int hfi1_pcie_caps; module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); -uint aspm_mode = ASPM_MODE_DISABLED; -module_param_named(aspm, aspm_mode, uint, 0444); -MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); - /** * tune_pcie_caps() - Code to adjust PCIe capabilities. * @dd: Valid device data structure diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 4e5c2d1b8cfa..79126b2b14ab 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1594,9 +1594,8 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) else sc_del_credit_return_intr(sc); trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); - if (needint) { + if (needint) sc_return_credits(sc); - } } /** diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 4e0e9fc0a777..f8e733aa3bb8 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -348,7 +348,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) break; case IB_QPT_GSI: case IB_QPT_UD: - ah = ibah_to_rvtah(wqe->ud_wr.ah); + ah = rvt_get_swqe_ah(wqe); if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) @@ -702,8 +702,8 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) sde ? sde->this_idx : 0, send_context, send_context ? send_context->sw_index : 0, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, + ib_cq_head(qp->ibqp.send_cq), + ib_cq_tail(qp->ibqp.send_cq), qp->pid, qp->s_state, qp->s_ack_state, diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 7c8cfb149da0..0477c14633ab 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1830,23 +1830,14 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) } while (qp->s_last != qp->s_acked) { - u32 s_last; - wqe = rvt_get_swqe_ptr(qp, qp->s_last); if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; trdma_clean_swqe(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe); - s_last = qp->s_last; - trace_hfi1_qp_send_completion(qp, wqe, s_last); - if (++s_last >= qp->s_size) - s_last = 0; - qp->s_last = s_last; - /* see post_send() */ - barrier(); - rvt_put_qp_swqe(qp, wqe); - rvt_qp_swqe_complete(qp, + trace_hfi1_qp_send_completion(qp, wqe, qp->s_last); + rvt_qp_complete_swqe(qp, wqe, ib_hfi1_wc_opcode[wqe->wr.opcode], IB_WC_SUCCESS); @@ -1890,19 +1881,10 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, trace_hfi1_rc_completion(qp, wqe->lpsn); if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 || cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { - u32 s_last; - trdma_clean_swqe(qp, wqe); - rvt_put_qp_swqe(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe); - s_last = qp->s_last; - trace_hfi1_qp_send_completion(qp, wqe, s_last); - if (++s_last >= qp->s_size) - s_last = 0; - qp->s_last = s_last; - /* see post_send() */ - barrier(); - rvt_qp_swqe_complete(qp, + trace_hfi1_qp_send_completion(qp, wqe, qp->s_last); + rvt_qp_complete_swqe(qp, wqe, ib_hfi1_wc_opcode[wqe->wr.opcode], IB_WC_SUCCESS); @@ -3026,8 +3008,7 @@ send_last: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_ONLY): diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index aa9c8d3ef87b..92acccaaaa86 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -475,7 +475,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd, * Must hold the qp s_lock and the exp_lock. * * Return: - * false if either of the conditions below are statisfied: + * false if either of the conditions below are satisfied: * 1. The list is empty or * 2. The indicated qp is at the head of the list and the * HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags. @@ -2024,7 +2024,6 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req); if (e->opcode == TID_OP(READ_REQ)) { struct ib_reth *reth; - u32 offset; u32 len; u32 rkey; u64 vaddr; @@ -2036,7 +2035,6 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, * The requester always restarts from the start of the original * request. */ - offset = delta_psn(psn, e->psn) * qp->pmtu; len = be32_to_cpu(reth->length); if (psn != e->psn || len != req->total_len) goto unlock; @@ -4550,7 +4548,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct rvt_swqe *wqe; struct tid_rdma_request *req; struct tid_rdma_flow *flow; - u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn; + u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn; unsigned long flags; u16 fidx; @@ -4754,7 +4752,6 @@ done: IB_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ flow = &req->flows[req->acked_tail]; - fspsn = full_flow_psn(flow, flow->flow_state.spsn); trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2])); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index d1372cc66de6..2f84290a88ca 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -79,6 +79,8 @@ __print_symbolic(opcode, \ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ ib_opcode_name(RC_COMPARE_SWAP), \ ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ + ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ ib_opcode_name(TID_RDMA_WRITE_REQ), \ ib_opcode_name(TID_RDMA_WRITE_RESP), \ ib_opcode_name(TID_RDMA_WRITE_DATA), \ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 4ed4fcfabd6c..0c77f18120ed 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -476,8 +476,7 @@ last_imm: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 4cb0fce5c096..e804af71b629 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -87,7 +87,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) rcu_read_lock(); qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, - swqe->ud_wr.remote_qpn); + rvt_get_swqe_remote_qpn(swqe)); if (!qp) { ibp->rvp.n_pkt_drops++; rcu_read_unlock(); @@ -105,7 +105,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) goto drop; } - ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(swqe); ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -135,8 +135,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_num) { u32 qkey; - qkey = (int)swqe->ud_wr.remote_qkey < 0 ? - sqp->qkey : swqe->ud_wr.remote_qkey; + qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ? + sqp->qkey : rvt_get_swqe_remote_qkey(swqe); if (unlikely(qkey != qp->qkey)) goto drop; /* silently drop per IBTA spec */ } @@ -240,7 +240,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) { if (sqp->ibqp.qp_type == IB_QPT_GSI || sqp->ibqp.qp_type == IB_QPT_SMI) - wc.pkey_index = swqe->ud_wr.pkey_index; + wc.pkey_index = rvt_get_swqe_pkey_index(swqe); else wc.pkey_index = sqp->s_pkey_index; } else { @@ -255,8 +255,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - swqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); ibp->rvp.n_loop_pkts++; bail_unlock: spin_unlock_irqrestore(&qp->r_lock, flags); @@ -283,20 +282,21 @@ static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe, bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) - *pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + *pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe)); else *pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); if (!bypass) bth0 |= *pkey; ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); + ohdr->bth[1] = cpu_to_be32(rvt_get_swqe_remote_qpn(wqe)); ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? - qp->qkey : wqe->ud_wr.remote_qkey); + ohdr->u.ud.deth[0] = + cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey : + rvt_get_swqe_remote_qkey(wqe)); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); } @@ -316,7 +316,7 @@ void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(wqe); extra_bytes = -wqe->length & 3; nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC; @@ -380,7 +380,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; u32 dlid, slid, nwords, extra_bytes; - u32 dest_qp = wqe->ud_wr.remote_qpn; + u32 dest_qp = rvt_get_swqe_remote_qpn(wqe); u32 src_qp = qp->ibqp.qp_num; u16 len, pkey; u8 l4, sc5; @@ -388,7 +388,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(wqe); /* * Build 16B Management Packet if either the destination @@ -450,7 +450,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (is_mgmt) { l4 = OPA_16B_L4_FM; - pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe)); hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt, dest_qp, src_qp); } else { @@ -515,7 +515,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(wqe); priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr); if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) || (rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) { @@ -1061,7 +1061,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited); + rvt_recv_cq(qp, &wc, solicited); return; drop: diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 02eee8eff1db..b89a9b9aef7a 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -118,13 +118,10 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, size_t npages, bool dirty) { - size_t i; - - for (i = 0; i < npages; i++) { - if (dirty) - set_page_dirty_lock(p[i]); - put_page(p[i]); - } + if (dirty) + put_user_pages_dirty_lock(p, npages); + else + put_user_pages(p, npages); if (mm) { /* during close after signal, mm can be NULL */ atomic64_sub(npages, &mm->pinned_vm); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index bad3229bad37..c4b243f50c76 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1779,6 +1779,9 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, } static const struct ib_device_ops hfi1_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_HFI1, + .alloc_hw_stats = alloc_hw_stats, .alloc_rdma_netdev = hfi1_vnic_alloc_rn, .get_dev_fw_str = hfi1_get_dev_fw_str, @@ -1829,7 +1832,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) */ if (!ib_hfi1_sys_image_guid) ib_hfi1_sys_image_guid = ibdev->node_guid; - ibdev->owner = THIS_MODULE; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; @@ -1923,7 +1925,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &ib_hfi1_attr_group); - ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); + ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) goto err_verbs_txreq; diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index 61cda7d00627..8bf847bcd8d3 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -8,25 +8,24 @@ config INFINIBAND_HNS is used in Hisilicon Hip06 and more further ICT SoC based on platform device. - To compile this driver as a module, choose M here: the module - will be called hns-roce. + To compile HIP06 or HIP08 driver as module, choose M here. config INFINIBAND_HNS_HIP06 - tristate "Hisilicon Hip06 Family RoCE support" + bool "Hisilicon Hip06 Family RoCE support" depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and Hip07 SoC. These RoCE engines are platform devices. - To compile this driver as a module, choose M here: the module - will be called hns-roce-hw-v1. + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v1 config INFINIBAND_HNS_HIP08 - tristate "Hisilicon Hip08 Family RoCE support" + bool "Hisilicon Hip08 Family RoCE support" depends on INFINIBAND_HNS && PCI && HNS3 ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. - To compile this driver as a module, choose M here: the module - will be called hns-roce-hw-v2. + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index f22d9922cfee..e105945b94a1 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -5,11 +5,16 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 -obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o -obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o -hns-roce-hw-v1-objs := hns_roce_hw_v1.o -obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o -hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o + +ifdef CONFIG_INFINIBAND_HNS_HIP06 +hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs) +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o +endif + +ifdef CONFIG_INFINIBAND_HNS_HIP08 +hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o +endif diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index dac058d3df53..8c063c598d2a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -34,6 +34,7 @@ #include <linux/platform_device.h> #include <linux/vmalloc.h> #include "hns_roce_device.h" +#include <rdma/ib_umem.h> int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) { @@ -67,7 +68,6 @@ void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, { hns_roce_bitmap_free_range(bitmap, obj, 1, rr); } -EXPORT_SYMBOL_GPL(hns_roce_bitmap_free); int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, int align, unsigned long *obj) @@ -174,7 +174,6 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, kfree(buf->page_list); } } -EXPORT_SYMBOL_GPL(hns_roce_buf_free); int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf, u32 page_shift) @@ -238,6 +237,104 @@ err_free: return -ENOMEM; } +int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, + int buf_cnt, int start, struct hns_roce_buf *buf) +{ + int i, end; + int total; + + end = start + buf_cnt; + if (end > buf->npages) { + dev_err(hr_dev->dev, + "invalid kmem region,offset %d,buf_cnt %d,total %d!\n", + start, buf_cnt, buf->npages); + return -EINVAL; + } + + total = 0; + for (i = start; i < end; i++) + if (buf->nbufs == 1) + bufs[total++] = buf->direct.map + + ((dma_addr_t)i << buf->page_shift); + else + bufs[total++] = buf->page_list[i].map; + + return total; +} + +int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, + int buf_cnt, int start, struct ib_umem *umem, + int page_shift) +{ + struct ib_block_iter biter; + int total = 0; + int idx = 0; + u64 addr; + + if (page_shift < PAGE_SHIFT) { + dev_err(hr_dev->dev, "invalid page shift %d!\n", page_shift); + return -EINVAL; + } + + /* convert system page cnt to hw page cnt */ + rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, + 1 << page_shift) { + addr = rdma_block_iter_dma_address(&biter); + if (idx >= start) { + bufs[total++] = addr; + if (total >= buf_cnt) + goto done; + } + idx++; + } + +done: + return total; +} + +void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, + int offset, int buf_cnt) +{ + if (hopnum == HNS_ROCE_HOP_NUM_0) + region->hopnum = 0; + else + region->hopnum = hopnum; + + region->offset = offset; + region->count = buf_cnt; +} + +void hns_roce_free_buf_list(dma_addr_t **bufs, int region_cnt) +{ + int i; + + for (i = 0; i < region_cnt; i++) { + kfree(bufs[i]); + bufs[i] = NULL; + } +} + +int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, + dma_addr_t **bufs, int region_cnt) +{ + struct hns_roce_buf_region *r; + int i; + + for (i = 0; i < region_cnt; i++) { + r = ®ions[i]; + bufs[i] = kcalloc(r->count, sizeof(dma_addr_t), GFP_KERNEL); + if (!bufs[i]) + goto err_alloc; + } + + return 0; + +err_alloc: + hns_roce_free_buf_list(bufs, i); + + return -ENOMEM; +} + void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 2acf946d02e5..0cd09bf4d7ea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -103,7 +103,6 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, context->out_param = out_param; complete(&context->done); } -EXPORT_SYMBOL_GPL(hns_roce_cmd_event); /* this should be called with "use_events" */ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, @@ -162,7 +161,7 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { - int ret = 0; + int ret; down(&hr_dev->cmd.event_sem); ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, @@ -204,7 +203,6 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, return ret; } -EXPORT_SYMBOL_GPL(hns_roce_cmd_mbox); int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) { @@ -291,7 +289,6 @@ struct hns_roce_cmd_mailbox return mailbox; } -EXPORT_SYMBOL_GPL(hns_roce_alloc_cmd_mailbox); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox) @@ -302,4 +299,3 @@ void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } -EXPORT_SYMBOL_GPL(hns_roce_free_cmd_mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 9caf35061721..4e50c22a2da4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -205,7 +205,6 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } -EXPORT_SYMBOL_GPL(hns_roce_free_cq); static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, struct ib_udata *udata, @@ -235,8 +234,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, &buf->hr_mtt); } else { ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), - (*umem)->page_shift, - &buf->hr_mtt); + PAGE_SHIFT, &buf->hr_mtt); } if (ret) goto err_buf; @@ -300,15 +298,15 @@ static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, &buf->hr_buf); } -struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int hns_roce_ib_create_cq(struct ib_cq *ib_cq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct device *dev = hr_dev->dev; struct hns_roce_ib_create_cq ucmd; struct hns_roce_ib_create_cq_resp resp = {}; - struct hns_roce_cq *hr_cq = NULL; + struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); struct hns_roce_uar *uar = NULL; int vector = attr->comp_vector; int cq_entries = attr->cqe; @@ -319,13 +317,9 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", cq_entries, hr_dev->caps.max_cqes); - return ERR_PTR(-EINVAL); + return -EINVAL; } - hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL); - if (!hr_cq) - return ERR_PTR(-ENOMEM); - if (hr_dev->caps.min_cqes) cq_entries = max(cq_entries, hr_dev->caps.min_cqes); @@ -416,7 +410,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, goto err_cqc; } - return &hr_cq->ib_cq; + return 0; err_cqc: hns_roce_free_cq(hr_dev, hr_cq); @@ -428,9 +422,8 @@ err_dbmap: err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (udata) - ib_umem_release(hr_cq->umem); - else + ib_umem_release(hr_cq->umem); + if (!udata) hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); @@ -439,47 +432,37 @@ err_db: hns_roce_free_db(hr_dev, &hr_cq->db); err_cq: - kfree(hr_cq); - return ERR_PTR(ret); + return ret; } -EXPORT_SYMBOL_GPL(hns_roce_ib_create_cq); -int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - int ret = 0; if (hr_dev->hw->destroy_cq) { - ret = hr_dev->hw->destroy_cq(ib_cq, udata); - } else { - hns_roce_free_cq(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - - if (udata) { - ib_umem_release(hr_cq->umem); - - if (hr_cq->db_en == 1) - hns_roce_db_unmap_user( - rdma_udata_to_drv_context( - udata, - struct hns_roce_ucontext, - ibucontext), - &hr_cq->db); - } else { - /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, - ib_cq->cqe); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_cq->db); - } - - kfree(hr_cq); + hr_dev->hw->destroy_cq(ib_cq, udata); + return; } - return ret; + hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + + ib_umem_release(hr_cq->umem); + if (udata) { + if (hr_cq->db_en == 1) + hns_roce_db_unmap_user(rdma_udata_to_drv_context( + udata, + struct hns_roce_ucontext, + ibucontext), + &hr_cq->db); + } else { + /* Free the buff of stored cq */ + hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) + hns_roce_free_db(hr_dev, &hr_cq->db); + } } -EXPORT_SYMBOL_GPL(hns_roce_ib_destroy_cq); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) { @@ -495,7 +478,6 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) ++cq->arm_sn; cq->comp(cq); } -EXPORT_SYMBOL_GPL(hns_roce_cq_completion); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) { @@ -517,7 +499,6 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); } -EXPORT_SYMBOL_GPL(hns_roce_cq_event); int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) { diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 0c6c1fe87705..627aa46ef683 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -51,7 +51,6 @@ out: return ret; } -EXPORT_SYMBOL(hns_roce_db_map_user); void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, struct hns_roce_db *db) @@ -67,7 +66,6 @@ void hns_roce_db_unmap_user(struct hns_roce_ucontext *context, mutex_unlock(&context->page_mutex); } -EXPORT_SYMBOL(hns_roce_db_unmap_user); static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( struct device *dma_device) @@ -78,7 +76,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( if (!pgdir) return NULL; - bitmap_fill(pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2); + bitmap_fill(pgdir->order1, + HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT); pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE, @@ -116,7 +115,7 @@ found: db->u.pgdir = pgdir; db->index = i; db->db_record = pgdir->page + db->index; - db->dma = pgdir->db_dma + db->index * 4; + db->dma = pgdir->db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE; db->order = order; return 0; @@ -150,7 +149,6 @@ out: return ret; } -EXPORT_SYMBOL_GPL(hns_roce_alloc_db); void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) { @@ -170,7 +168,8 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) i >>= o; set_bit(i, db->u.pgdir->bits[o]); - if (bitmap_full(db->u.pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2)) { + if (bitmap_full(db->u.pgdir->order1, + HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT)) { dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); @@ -179,4 +178,3 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) mutex_unlock(&hr_dev->pgdir_mutex); } -EXPORT_SYMBOL_GPL(hns_roce_free_db); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 563cf39df6d5..a548b28aab63 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,9 +37,12 @@ #define DRV_NAME "hns_roce" +/* hip08 is a pci device, it includes two version according pci version id */ +#define PCI_REVISION_ID_HIP08_A 0x20 +#define PCI_REVISION_ID_HIP08_B 0x21 + #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') -#define MAC_ADDR_OCTET_NUM 6 #define HNS_ROCE_MAX_MSG_LEN 0x80000000 #define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) @@ -48,6 +51,10 @@ #define HNS_ROCE_BA_SIZE (32 * 4096) +#define BA_BYTE_LEN 8 + +#define BITS_PER_BYTE 8 + /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 @@ -55,6 +62,7 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 #define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 +#define HNS_ROCE_MAX_SGE_NUM 2 #define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20 #define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \ @@ -64,6 +72,9 @@ #define HNS_ROCE_MAX_IRQ_NUM 128 +#define HNS_ROCE_SGE_IN_WQE 2 +#define HNS_ROCE_SGE_SHIFT 4 + #define EQ_ENABLE 1 #define EQ_DISABLE 0 @@ -81,6 +92,7 @@ #define HNS_ROCE_MAX_PORTS 6 #define HNS_ROCE_MAX_GID_NUM 16 #define HNS_ROCE_GID_SIZE 16 +#define HNS_ROCE_SGE_SIZE 16 #define HNS_ROCE_HOP_NUM_0 0xff @@ -111,6 +123,8 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 +#define HNS_ROCE_PCI_BAR_NUM 2 + #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 @@ -213,6 +227,9 @@ enum hns_roce_mtt_type { MTT_TYPE_IDX }; +#define HNS_ROCE_DB_TYPE_COUNT 2 +#define HNS_ROCE_DB_UNIT_SIZE 4 + enum { HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4 }; @@ -324,6 +341,29 @@ struct hns_roce_mtt { enum hns_roce_mtt_type mtt_type; }; +struct hns_roce_buf_region { + int offset; /* page offset */ + u32 count; /* page count*/ + int hopnum; /* addressing hop num */ +}; + +#define HNS_ROCE_MAX_BT_REGION 3 +#define HNS_ROCE_MAX_BT_LEVEL 3 +struct hns_roce_hem_list { + struct list_head root_bt; + /* link all bt dma mem by hop config */ + struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL]; + struct list_head btm_bt; /* link all bottom bt in @mid_bt */ + dma_addr_t root_ba; /* pointer to the root ba table */ + int bt_pg_shift; +}; + +/* memory translate region */ +struct hns_roce_mtr { + struct hns_roce_hem_list hem_list; + int buf_pg_shift; +}; + struct hns_roce_mw { struct ib_mw ibmw; u32 pdn; @@ -413,8 +453,8 @@ struct hns_roce_buf { struct hns_roce_db_pgdir { struct list_head list; DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE); - DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / 2); - unsigned long *bits[2]; + DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT); + unsigned long *bits[HNS_ROCE_DB_TYPE_COUNT]; u32 *page; dma_addr_t db_dma; }; @@ -472,7 +512,7 @@ struct hns_roce_idx_que { u32 buf_size; struct ib_umem *umem; struct hns_roce_mtt mtt; - u64 *bitmap; + unsigned long *bitmap; }; struct hns_roce_srq { @@ -535,7 +575,7 @@ struct hns_roce_av { u8 hop_limit; __le32 sl_tclass_flowlabel; u8 dgid[HNS_ROCE_GID_SIZE]; - u8 mac[6]; + u8 mac[ETH_ALEN]; __le16 vlan; bool vlan_en; }; @@ -620,6 +660,14 @@ struct hns_roce_qp { struct ib_umem *umem; struct hns_roce_mtt mtt; + struct hns_roce_mtr mtr; + + /* this define must less than HNS_ROCE_MAX_BT_REGION */ +#define HNS_ROCE_WQE_REGION_MAX 3 + struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX]; + int region_cnt; + int wqe_bt_pg_shift; + u32 buff_size; struct mutex mutex; u8 port; @@ -830,6 +878,9 @@ struct hns_roce_caps { u32 mtt_ba_pg_sz; u32 mtt_buf_pg_sz; u32 mtt_hop_num; + u32 wqe_sq_hop_num; + u32 wqe_sge_hop_num; + u32 wqe_rq_hop_num; u32 sccc_ba_pg_sz; u32 sccc_buf_pg_sz; u32 sccc_hop_num; @@ -921,7 +972,7 @@ struct hns_roce_hw { int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct ib_udata *udata); - int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); + void (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); @@ -940,6 +991,16 @@ struct hns_roce_hw { const struct ib_device_ops *hns_roce_dev_srq_ops; }; +enum hns_phy_state { + HNS_ROCE_PHY_SLEEP = 1, + HNS_ROCE_PHY_POLLING = 2, + HNS_ROCE_PHY_DISABLED = 3, + HNS_ROCE_PHY_TRAINING = 4, + HNS_ROCE_PHY_LINKUP = 5, + HNS_ROCE_PHY_LINKERR = 6, + HNS_ROCE_PHY_TEST = 7 +}; + struct hns_roce_dev { struct ib_device ib_dev; struct platform_device *pdev; @@ -962,7 +1023,7 @@ struct hns_roce_dev { struct hns_roce_caps caps; struct xarray qp_table_xa; - unsigned char dev_addr[HNS_ROCE_MAX_PORTS][MAC_ADDR_OCTET_NUM]; + unsigned char dev_addr[HNS_ROCE_MAX_PORTS][ETH_ALEN]; u64 sys_image_guid; u32 vendor_id; u32 vendor_part_id; @@ -1084,6 +1145,19 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct hns_roce_buf *buf); +void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, + int buf_pg_shift); +int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t **bufs, struct hns_roce_buf_region *regions, + int region_cnt); +void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr); + +/* hns roce hw need current block and next block addr from mtt */ +#define MTT_MIN_COUNT 2 +int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); + int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev); @@ -1148,6 +1222,18 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); +void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, + int offset, int buf_cnt); +int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, + dma_addr_t **bufs, int count); +void hns_roce_free_buf_list(dma_addr_t **bufs, int count); + +int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, + int buf_cnt, int start, struct hns_roce_buf *buf); +int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, + int buf_cnt, int start, struct ib_umem *umem, + int page_shift); + int hns_roce_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); @@ -1178,11 +1264,11 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, __be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); -struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +int hns_roce_ib_create_cq(struct ib_cq *ib_cq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata); -int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); int hns_roce_db_map_user(struct hns_roce_ucontext *context, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 8e29dbb5b5fb..f4da5bd2884f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -56,7 +56,6 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) return false; } -EXPORT_SYMBOL_GPL(hns_roce_check_whether_mhop); static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx, u32 bt_chunk_num) @@ -165,7 +164,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, + PAGE_SHIFT); mhop->bt_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; mhop->hop_num = hr_dev->caps.mtt_hop_num; break; case HEM_TYPE_CQE: @@ -173,7 +172,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, + PAGE_SHIFT); mhop->bt_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; mhop->hop_num = hr_dev->caps.cqe_hop_num; break; case HEM_TYPE_SRQWQE: @@ -181,7 +180,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, + PAGE_SHIFT); mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; mhop->hop_num = hr_dev->caps.srqwqe_hop_num; break; case HEM_TYPE_IDX: @@ -189,7 +188,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, + PAGE_SHIFT); mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; mhop->hop_num = hr_dev->caps.idx_hop_num; break; default: @@ -206,7 +205,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, * MTT/CQE alloc hem for bt pages. */ bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num); - chunk_ba_num = mhop->bt_chunk_size / 8; + chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN; chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : mhop->bt_chunk_size; table_idx = (*obj & (table->num_obj - 1)) / @@ -234,7 +233,6 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, return 0; } -EXPORT_SYMBOL_GPL(hns_roce_calc_hem_mhop); static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, @@ -376,18 +374,19 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; - end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies; - while (1) { - if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { - if (!(time_before(jiffies, end))) { - dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); - spin_unlock_irqrestore(lock, flags); - return -EBUSY; - } - } else { + end = HW_SYNC_TIMEOUT_MSECS; + while (end) { + if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT)) break; - } + mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); + end -= HW_SYNC_SLEEP_TIME_INTERVAL; + } + + if (end <= 0) { + dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); + spin_unlock_irqrestore(lock, flags); + return -EBUSY; } bt_cmd_l = (u32)bt_ba; @@ -435,7 +434,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, buf_chunk_size = mhop.buf_chunk_size; bt_chunk_size = mhop.bt_chunk_size; hop_num = mhop.hop_num; - chunk_ba_num = bt_chunk_size / 8; + chunk_ba_num = bt_chunk_size / BA_BYTE_LEN; bt_num = hns_roce_get_bt_num(table->type, hop_num); switch (bt_num) { @@ -620,7 +619,6 @@ out: mutex_unlock(&table->mutex); return ret; } -EXPORT_SYMBOL_GPL(hns_roce_table_get); static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, @@ -645,7 +643,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, bt_chunk_size = mhop.bt_chunk_size; hop_num = mhop.hop_num; - chunk_ba_num = bt_chunk_size / 8; + chunk_ba_num = bt_chunk_size / BA_BYTE_LEN; bt_num = hns_roce_get_bt_num(table->type, hop_num); switch (bt_num) { @@ -763,7 +761,6 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, mutex_unlock(&table->mutex); } -EXPORT_SYMBOL_GPL(hns_roce_table_put); void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, @@ -799,7 +796,7 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, i = mhop.l0_idx; j = mhop.l1_idx; if (mhop.hop_num == 2) - hem_idx = i * (mhop.bt_chunk_size / 8) + j; + hem_idx = i * (mhop.bt_chunk_size / BA_BYTE_LEN) + j; else if (mhop.hop_num == 1 || mhop.hop_num == HNS_ROCE_HOP_NUM_0) hem_idx = i; @@ -836,7 +833,6 @@ out: mutex_unlock(&table->mutex); return addr; } -EXPORT_SYMBOL_GPL(hns_roce_table_find); int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, @@ -999,7 +995,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, } obj_per_chunk = buf_chunk_size / obj_size; num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; - bt_chunk_num = bt_chunk_size / 8; + bt_chunk_num = bt_chunk_size / BA_BYTE_LEN; if (type >= HEM_TYPE_MTT) num_bt_l0 = bt_chunk_num; @@ -1156,3 +1152,463 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) &hr_dev->mr_table.mtt_cqe_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); } + +struct roce_hem_item { + struct list_head list; /* link all hems in the same bt level */ + struct list_head sibling; /* link all hems in last hop for mtt */ + void *addr; + dma_addr_t dma_addr; + size_t count; /* max ba numbers */ + int start; /* start buf offset in this hem */ + int end; /* end buf offset in this hem */ +}; + +static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, + int start, int end, + int count, bool exist_bt, + int bt_level) +{ + struct roce_hem_item *hem; + + hem = kzalloc(sizeof(*hem), GFP_KERNEL); + if (!hem) + return NULL; + + if (exist_bt) { + hem->addr = dma_alloc_coherent(hr_dev->dev, + count * BA_BYTE_LEN, + &hem->dma_addr, GFP_KERNEL); + if (!hem->addr) { + kfree(hem); + return NULL; + } + } + + hem->count = count; + hem->start = start; + hem->end = end; + INIT_LIST_HEAD(&hem->list); + INIT_LIST_HEAD(&hem->sibling); + + return hem; +} + +static void hem_list_free_item(struct hns_roce_dev *hr_dev, + struct roce_hem_item *hem, bool exist_bt) +{ + if (exist_bt) + dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN, + hem->addr, hem->dma_addr); + kfree(hem); +} + +static void hem_list_free_all(struct hns_roce_dev *hr_dev, + struct list_head *head, bool exist_bt) +{ + struct roce_hem_item *hem, *temp_hem; + + list_for_each_entry_safe(hem, temp_hem, head, list) { + list_del(&hem->list); + hem_list_free_item(hr_dev, hem, exist_bt); + } +} + +static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr, + u64 table_addr) +{ + *(u64 *)(base_addr) = table_addr; +} + +/* assign L0 table address to hem from root bt */ +static void hem_list_assign_bt(struct hns_roce_dev *hr_dev, + struct roce_hem_item *hem, void *cpu_addr, + u64 phy_addr) +{ + hem->addr = cpu_addr; + hem->dma_addr = (dma_addr_t)phy_addr; +} + +static inline bool hem_list_page_is_in_range(struct roce_hem_item *hem, + int offset) +{ + return (hem->start <= offset && offset <= hem->end); +} + +static struct roce_hem_item *hem_list_search_item(struct list_head *ba_list, + int page_offset) +{ + struct roce_hem_item *hem, *temp_hem; + struct roce_hem_item *found = NULL; + + list_for_each_entry_safe(hem, temp_hem, ba_list, list) { + if (hem_list_page_is_in_range(hem, page_offset)) { + found = hem; + break; + } + } + + return found; +} + +static bool hem_list_is_bottom_bt(int hopnum, int bt_level) +{ + /* + * hopnum base address table levels + * 0 L0(buf) + * 1 L0 -> buf + * 2 L0 -> L1 -> buf + * 3 L0 -> L1 -> L2 -> buf + */ + return bt_level >= (hopnum ? hopnum - 1 : hopnum); +} + +/** + * calc base address entries num + * @hopnum: num of mutihop addressing + * @bt_level: base address table level + * @unit: ba entries per bt page + */ +static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) +{ + u32 step; + int max; + int i; + + if (hopnum <= bt_level) + return 0; + /* + * hopnum bt_level range + * 1 0 unit + * ------------ + * 2 0 unit * unit + * 2 1 unit + * ------------ + * 3 0 unit * unit * unit + * 3 1 unit * unit + * 3 2 unit + */ + step = 1; + max = hopnum - bt_level; + for (i = 0; i < max; i++) + step = step * unit; + + return step; +} + +/** + * calc the root ba entries which could cover all regions + * @regions: buf region array + * @region_cnt: array size of @regions + * @unit: ba entries per bt page + */ +int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, + int region_cnt, int unit) +{ + struct hns_roce_buf_region *r; + int total = 0; + int step; + int i; + + for (i = 0; i < region_cnt; i++) { + r = (struct hns_roce_buf_region *)®ions[i]; + if (r->hopnum > 1) { + step = hem_list_calc_ba_range(r->hopnum, 1, unit); + if (step > 0) + total += (r->count + step - 1) / step; + } else { + total += r->count; + } + } + + return total; +} + +static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, + const struct hns_roce_buf_region *r, int unit, + int offset, struct list_head *mid_bt, + struct list_head *btm_bt) +{ + struct roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL }; + struct list_head temp_list[HNS_ROCE_MAX_BT_LEVEL]; + struct roce_hem_item *cur, *pre; + const int hopnum = r->hopnum; + int start_aligned; + int distance; + int ret = 0; + int max_ofs; + int level; + u32 step; + int end; + + if (hopnum <= 1) + return 0; + + if (hopnum > HNS_ROCE_MAX_BT_LEVEL) { + dev_err(hr_dev->dev, "invalid hopnum %d!\n", hopnum); + return -EINVAL; + } + + if (offset < r->offset) { + dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", + offset, r->offset); + return -EINVAL; + } + + distance = offset - r->offset; + max_ofs = r->offset + r->count - 1; + for (level = 0; level < hopnum; level++) + INIT_LIST_HEAD(&temp_list[level]); + + /* config L1 bt to last bt and link them to corresponding parent */ + for (level = 1; level < hopnum; level++) { + cur = hem_list_search_item(&mid_bt[level], offset); + if (cur) { + hem_ptrs[level] = cur; + continue; + } + + step = hem_list_calc_ba_range(hopnum, level, unit); + if (step < 1) { + ret = -EINVAL; + goto err_exit; + } + + start_aligned = (distance / step) * step + r->offset; + end = min_t(int, start_aligned + step - 1, max_ofs); + cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit, + true, level); + if (!cur) { + ret = -ENOMEM; + goto err_exit; + } + hem_ptrs[level] = cur; + list_add(&cur->list, &temp_list[level]); + if (hem_list_is_bottom_bt(hopnum, level)) + list_add(&cur->sibling, &temp_list[0]); + + /* link bt to parent bt */ + if (level > 1) { + pre = hem_ptrs[level - 1]; + step = (cur->start - pre->start) / step * BA_BYTE_LEN; + hem_list_link_bt(hr_dev, pre->addr + step, + cur->dma_addr); + } + } + + list_splice(&temp_list[0], btm_bt); + for (level = 1; level < hopnum; level++) + list_splice(&temp_list[level], &mid_bt[level]); + + return 0; + +err_exit: + for (level = 1; level < hopnum; level++) + hem_list_free_all(hr_dev, &temp_list[level], true); + + return ret; +} + +static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_list *hem_list, int unit, + const struct hns_roce_buf_region *regions, + int region_cnt) +{ + struct roce_hem_item *hem, *temp_hem, *root_hem; + struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; + const struct hns_roce_buf_region *r; + struct list_head temp_root; + struct list_head temp_btm; + void *cpu_base; + u64 phy_base; + int ret = 0; + int offset; + int total; + int step; + int i; + + r = ®ions[0]; + root_hem = hem_list_search_item(&hem_list->root_bt, r->offset); + if (root_hem) + return 0; + + INIT_LIST_HEAD(&temp_root); + total = r->offset; + /* indicate to last region */ + r = ®ions[region_cnt - 1]; + root_hem = hem_list_alloc_item(hr_dev, total, r->offset + r->count - 1, + unit, true, 0); + if (!root_hem) + return -ENOMEM; + list_add(&root_hem->list, &temp_root); + + hem_list->root_ba = root_hem->dma_addr; + + INIT_LIST_HEAD(&temp_btm); + for (i = 0; i < region_cnt; i++) + INIT_LIST_HEAD(&temp_list[i]); + + total = 0; + for (i = 0; i < region_cnt && total < unit; i++) { + r = ®ions[i]; + if (!r->count) + continue; + + /* all regions's mid[x][0] shared the root_bt's trunk */ + cpu_base = root_hem->addr + total * BA_BYTE_LEN; + phy_base = root_hem->dma_addr + total * BA_BYTE_LEN; + + /* if hopnum is 0 or 1, cut a new fake hem from the root bt + * which's address share to all regions. + */ + if (hem_list_is_bottom_bt(r->hopnum, 0)) { + hem = hem_list_alloc_item(hr_dev, r->offset, + r->offset + r->count - 1, + r->count, false, 0); + if (!hem) { + ret = -ENOMEM; + goto err_exit; + } + hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base); + list_add(&hem->list, &temp_list[i]); + list_add(&hem->sibling, &temp_btm); + total += r->count; + } else { + step = hem_list_calc_ba_range(r->hopnum, 1, unit); + if (step < 1) { + ret = -EINVAL; + goto err_exit; + } + /* if exist mid bt, link L1 to L0 */ + list_for_each_entry_safe(hem, temp_hem, + &hem_list->mid_bt[i][1], list) { + offset = hem->start / step * BA_BYTE_LEN; + hem_list_link_bt(hr_dev, cpu_base + offset, + hem->dma_addr); + total++; + } + } + } + + list_splice(&temp_btm, &hem_list->btm_bt); + list_splice(&temp_root, &hem_list->root_bt); + for (i = 0; i < region_cnt; i++) + list_splice(&temp_list[i], &hem_list->mid_bt[i][0]); + + return 0; + +err_exit: + for (i = 0; i < region_cnt; i++) + hem_list_free_all(hr_dev, &temp_list[i], false); + + hem_list_free_all(hr_dev, &temp_root, true); + + return ret; +} + +/* construct the base address table and link them by address hop config */ +int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_list *hem_list, + const struct hns_roce_buf_region *regions, + int region_cnt) +{ + const struct hns_roce_buf_region *r; + int ofs, end; + int ret = 0; + int unit; + int i; + + if (region_cnt > HNS_ROCE_MAX_BT_REGION) { + dev_err(hr_dev->dev, "invalid region region_cnt %d!\n", + region_cnt); + return -EINVAL; + } + + unit = (1 << hem_list->bt_pg_shift) / BA_BYTE_LEN; + for (i = 0; i < region_cnt; i++) { + r = ®ions[i]; + if (!r->count) + continue; + + end = r->offset + r->count; + for (ofs = r->offset; ofs < end; ofs += unit) { + ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs, + hem_list->mid_bt[i], + &hem_list->btm_bt); + if (ret) { + dev_err(hr_dev->dev, + "alloc hem trunk fail ret=%d!\n", ret); + goto err_alloc; + } + } + } + + ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions, + region_cnt); + if (ret) + dev_err(hr_dev->dev, "alloc hem root fail ret=%d!\n", ret); + else + return 0; + +err_alloc: + hns_roce_hem_list_release(hr_dev, hem_list); + + return ret; +} + +void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_list *hem_list) +{ + int i, j; + + for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) + for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) + hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j], + j != 0); + + hem_list_free_all(hr_dev, &hem_list->root_bt, true); + INIT_LIST_HEAD(&hem_list->btm_bt); + hem_list->root_ba = 0; +} + +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, + int bt_page_order) +{ + int i, j; + + INIT_LIST_HEAD(&hem_list->root_bt); + INIT_LIST_HEAD(&hem_list->btm_bt); + for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) + for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) + INIT_LIST_HEAD(&hem_list->mid_bt[i][j]); + + hem_list->bt_pg_shift = bt_page_order; +} + +void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_list *hem_list, + int offset, int *mtt_cnt, u64 *phy_addr) +{ + struct list_head *head = &hem_list->btm_bt; + struct roce_hem_item *hem, *temp_hem; + void *cpu_base = NULL; + u64 phy_base = 0; + int nr = 0; + + list_for_each_entry_safe(hem, temp_hem, head, sibling) { + if (hem_list_page_is_in_range(hem, offset)) { + nr = offset - hem->start; + cpu_base = hem->addr + nr * BA_BYTE_LEN; + phy_base = hem->dma_addr + nr * BA_BYTE_LEN; + nr = hem->end + 1 - offset; + break; + } + } + + if (mtt_cnt) + *mtt_cnt = nr; + + if (phy_addr) + *phy_addr = phy_base; + + return cpu_base; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index d9d668992e49..f1ccb8f35fe5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -34,8 +34,8 @@ #ifndef _HNS_ROCE_HEM_H #define _HNS_ROCE_HEM_H -#define HW_SYNC_TIMEOUT_MSECS 500 #define HW_SYNC_SLEEP_TIME_INTERVAL 20 +#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL) #define BT_CMD_SYNC_SHIFT 31 enum { @@ -133,6 +133,20 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop); bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type); +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, + int bt_page_order); +int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, + int region_cnt, int unit); +int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_list *hem_list, + const struct hns_roce_buf_region *regions, + int region_cnt); +void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_list *hem_list); +void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_list *hem_list, + int offset, int *mtt_cnt, u64 *phy_addr); + static inline void hns_roce_hem_first(struct hns_roce_hem *hem, struct hns_roce_hem_iter *iter) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 3afd3e9330e7..81e6dedb1e02 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -717,7 +717,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) union ib_gid dgid; u64 subnet_prefix; int attr_mask = 0; - int ret = -ENOMEM; + int ret; int i, j; u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 }; u8 phy_port; @@ -730,10 +730,16 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) /* Reserved cq for loop qp */ cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2; cq_init_attr.comp_vector = 0; - cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL); - if (IS_ERR(cq)) { - dev_err(dev, "Create cq for reserved loop qp failed!"); + + ibdev = &hr_dev->ib_dev; + cq = rdma_zalloc_drv_obj(ibdev, ib_cq); + if (!cq) return -ENOMEM; + + ret = hns_roce_ib_create_cq(cq, &cq_init_attr, NULL); + if (ret) { + dev_err(dev, "Create cq for reserved loop qp failed!"); + goto alloc_cq_failed; } free_mr->mr_free_cq = to_hr_cq(cq); free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev; @@ -743,7 +749,6 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_cq->ib_cq.cq_context = NULL; atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0); - ibdev = &hr_dev->ib_dev; pd = rdma_zalloc_drv_obj(ibdev, ib_pd); if (!pd) goto alloc_mem_failed; @@ -818,7 +823,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) attr.dest_qp_num = hr_qp->qpn; memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr), hr_dev->dev_addr[port], - MAC_ADDR_OCTET_NUM); + ETH_ALEN); memcpy(&dgid.raw, &subnet_prefix, sizeof(u64)); memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3); @@ -865,9 +870,9 @@ alloc_pd_failed: kfree(pd); alloc_mem_failed: - if (hns_roce_ib_destroy_cq(cq, NULL)) - dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); - + hns_roce_ib_destroy_cq(cq, NULL); +alloc_cq_failed: + kfree(cq); return ret; } @@ -894,10 +899,8 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) i, ret); } - ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); - if (ret) - dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret); - + hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); + kfree(&free_mr->mr_free_cq->ib_cq); hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); kfree(&free_mr->mr_free_pd->ibpd); } @@ -966,8 +969,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) struct hns_roce_free_mr *free_mr; struct hns_roce_v1_priv *priv; struct completion comp; - unsigned long end = - msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies; + unsigned long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS; priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; @@ -987,10 +989,11 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) queue_work(free_mr->free_mr_wq, &(lp_qp_work->work)); - while (time_before_eq(jiffies, end)) { + while (end) { if (try_wait_for_completion(&comp)) return 0; msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE); + end -= HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE; } lp_qp_work->comp_flag = 0; @@ -1104,8 +1107,7 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_free_mr *free_mr; struct hns_roce_v1_priv *priv; struct completion comp; - unsigned long end = - msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; + unsigned long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS; unsigned long start = jiffies; int npages; int ret = 0; @@ -1135,10 +1137,11 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, queue_work(free_mr->free_mr_wq, &(mr_work->work)); - while (time_before_eq(jiffies, end)) { + while (end) { if (try_wait_for_completion(&comp)) goto free_mr; msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); + end -= HNS_ROCE_V1_FREE_MR_WAIT_VALUE; } mr_work->comp_flag = 0; @@ -1161,8 +1164,7 @@ free_mr: hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, key_to_hw_index(mr->key), 0); - if (mr->umem) - ib_umem_release(mr->umem); + ib_umem_release(mr->umem); kfree(mr); @@ -1557,6 +1559,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->reserved_mrws = 1; caps->reserved_uars = 0; caps->reserved_cqs = 0; + caps->reserved_qps = 12; /* 2 SQP per port, six ports total 12 */ caps->chunk_sz = HNS_ROCE_V1_TABLE_CHUNK_SIZE; for (i = 0; i < caps->num_ports; i++) @@ -1742,11 +1745,14 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { + unsigned long flags; u32 *p = NULL; u8 gid_idx = 0; gid_idx = hns_get_gid_index(hr_dev, port, gid_index); + spin_lock_irqsave(&hr_dev->iboe.lock, flags); + p = (u32 *)&gid->raw[0]; roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_L_0_REG + (HNS_ROCE_V1_GID_NUM * gid_idx)); @@ -1763,6 +1769,8 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG + (HNS_ROCE_V1_GID_NUM * gid_idx)); + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + return 0; } @@ -2458,10 +2466,10 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; - end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies; + end = HW_SYNC_TIMEOUT_MSECS; while (1) { if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { - if (!(time_before(jiffies, end))) { + if (!end) { dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags); @@ -2470,7 +2478,8 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, } else { break; } - msleep(HW_SYNC_SLEEP_TIME_INTERVAL); + mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); + end -= HW_SYNC_SLEEP_TIME_INTERVAL; } bt_cmd_val[0] = (__le32)bt_ba; @@ -3633,9 +3642,8 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - if (udata) - ib_umem_release(hr_qp->umem); - else { + ib_umem_release(hr_qp->umem); + if (!udata) { kfree(hr_qp->sq.wrid); kfree(hr_qp->rq.wrid); @@ -3649,7 +3657,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return 0; } -static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); @@ -3658,7 +3666,6 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) u32 cqe_cnt_cur; u32 cq_buf_size; int wait_time = 0; - int ret = 0; hns_roce_free_cq(hr_dev, hr_cq); @@ -3680,7 +3687,6 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) { dev_warn(dev, "Destroy cq 0x%lx timeout!\n", hr_cq->cqn); - ret = -ETIMEDOUT; break; } wait_time++; @@ -3688,17 +3694,12 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (ibcq->uobject) - ib_umem_release(hr_cq->umem); - else { + ib_umem_release(hr_cq->umem); + if (!udata) { /* Free the buff of stored cq */ cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz; hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf); } - - kfree(hr_cq); - - return ret; } static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) @@ -3902,7 +3903,8 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, */ dma_rmb(); - dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe, + dev_dbg(dev, "aeqe = %pK, aeqe->asyn.event_type = 0x%lx\n", + aeqe, roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); @@ -4265,7 +4267,6 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, } eq->buf_list[i].map = tmp_dma_addr; - memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE); } eq->cons_index = 0; roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b5392cb5b20f..b76e3beeafb8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1098,7 +1098,7 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (ret == CMD_RST_PRC_SUCCESS) return 0; if (ret == CMD_RST_PRC_EBUSY) - return ret; + return -EBUSY; ret = __hns_roce_cmq_send(hr_dev, desc, num); if (ret) { @@ -1106,7 +1106,7 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (retval == CMD_RST_PRC_SUCCESS) return 0; else if (retval == CMD_RST_PRC_EBUSY) - return retval; + return -EBUSY; } return ret; @@ -1130,6 +1130,45 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) return 0; } +static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_func_clear *resp; + struct hns_roce_cmq_desc desc; + unsigned long end; + int ret; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false); + resp = (struct hns_roce_func_clear *)desc.data; + + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n", + ret); + return; + } + + msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL); + end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS; + while (end) { + msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT); + end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, + true); + + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + continue; + + if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) { + hr_dev->is_reset = true; + return; + } + } + + dev_err(hr_dev->dev, "Func clear fail.\n"); +} + static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev) { struct hns_roce_query_fw_info *resp; @@ -1574,7 +1613,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->mtt_ba_pg_sz = 0; caps->mtt_buf_pg_sz = 0; caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM; - caps->cqe_ba_pg_sz = 0; + caps->wqe_sq_hop_num = 2; + caps->wqe_sge_hop_num = 1; + caps->wqe_rq_hop_num = 2; + caps->cqe_ba_pg_sz = 6; caps->cqe_buf_pg_sz = 0; caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; caps->srqwqe_ba_pg_sz = 0; @@ -1774,7 +1816,6 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, goto err_alloc_buf_failed; link_tbl->pg_list[i].map = t; - memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz); entry[i].blk_ba0 = (t >> 12) & 0xffffffff; roce_set_field(entry[i].blk_ba1_nxt_ptr, @@ -1891,6 +1932,9 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; + if (hr_dev->pci_dev->revision == 0x21) + hns_roce_function_clear(hr_dev); + hns_roce_free_link_table(hr_dev, &priv->tpq); hns_roce_free_link_table(hr_dev, &priv->tsq); } @@ -1974,7 +2018,7 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, unsigned long timeout) { struct device *dev = hr_dev->dev; - unsigned long end = 0; + unsigned long end; u32 status; end = msecs_to_jiffies(timeout) + jiffies; @@ -2340,15 +2384,10 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, int n) static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) { - u32 bitmap_num; - int bit_num; - /* always called with interrupts disabled. */ spin_lock(&srq->lock); - bitmap_num = wqe_index / (sizeof(u64) * 8); - bit_num = wqe_index % (sizeof(u64) * 8); - srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); + bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); srq->tail++; spin_unlock(&srq->lock); @@ -2977,7 +3016,7 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; - int ret = 0; + int ret; u16 op = 0xff; if (!hns_roce_check_whether_mhop(hr_dev, table->type)) @@ -3026,7 +3065,6 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, } static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, enum ib_qp_state cur_state, enum ib_qp_state new_state, struct hns_roce_v2_qp_context *context, @@ -3426,7 +3464,9 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, else roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ? + V2_QPC_BYTE_4_SGE_SHIFT_S, + hr_qp->sq.max_gs > + HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, @@ -3520,6 +3560,31 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, } } +static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, int mtt_cnt, + u32 page_size) +{ + struct device *dev = hr_dev->dev; + + if (hr_qp->rq.wqe_cnt < 1) + return true; + + if (mtt_cnt < 1) { + dev_err(dev, "qp(0x%lx) rqwqe buf ba find failed\n", + hr_qp->qpn); + return false; + } + + if (mtt_cnt < MTT_MIN_COUNT && + (hr_qp->rq.offset + page_size) < hr_qp->buff_size) { + dev_err(dev, "qp(0x%lx) next rqwqe buf ba find failed\n", + hr_qp->qpn); + return false; + } + + return true; +} + static int modify_qp_init_to_rtr(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, struct hns_roce_v2_qp_context *context, @@ -3529,25 +3594,27 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct device *dev = hr_dev->dev; + u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle_3; dma_addr_t dma_handle_2; - dma_addr_t dma_handle; + u64 wqe_sge_ba; u32 page_size; u8 port_num; u64 *mtts_3; u64 *mtts_2; - u64 *mtts; + int count; u8 *dmac; u8 *smac; int port; /* Search qp buf's mtts */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "qp buf pa find failed\n"); - return -EINVAL; - } + page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, + hr_qp->rq.offset / page_size, mtts, + MTT_MIN_COUNT, &wqe_sge_ba); + if (!ibqp->srq) + if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size)) + return -EINVAL; /* Search IRRL's mtts */ mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, @@ -3571,7 +3638,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, } dmac = (u8 *)attr->ah_attr.roce.dmac; - context->wqe_sge_ba = (u32)(dma_handle >> 3); + context->wqe_sge_ba = (u32)(wqe_sge_ba >> 3); qpc_mask->wqe_sge_ba = 0; /* @@ -3581,22 +3648,23 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, * 0 at the same time, else set them to 0x1. */ roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_WQE_SGE_BA_M, - V2_QPC_BYTE_12_WQE_SGE_BA_S, dma_handle >> (32 + 3)); + V2_QPC_BYTE_12_WQE_SGE_BA_S, wqe_sge_ba >> (32 + 3)); roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_WQE_SGE_BA_M, V2_QPC_BYTE_12_WQE_SGE_BA_S, 0); roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, - hr_dev->caps.mtt_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.mtt_hop_num); + hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ? + 0 : hr_dev->caps.wqe_sq_hop_num); roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, - ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ? - hr_dev->caps.mtt_hop_num : 0); + ((ibqp->qp_type == IB_QPT_GSI) || + hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? + hr_dev->caps.wqe_sge_hop_num : 0); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); @@ -3604,8 +3672,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, - hr_dev->caps.mtt_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.mtt_hop_num); + hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ? + 0 : hr_dev->caps.wqe_rq_hop_num); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0); @@ -3613,7 +3681,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, - hr_dev->caps.mtt_ba_pg_sz + PG_SHIFT_OFFSET); + hr_qp->wqe_bt_pg_shift + PG_SHIFT_OFFSET); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); @@ -3626,29 +3694,24 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - context->rq_cur_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size] - >> PAGE_ADDR_SHIFT); + context->rq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); qpc_mask->rq_cur_blk_addr = 0; roce_set_field(context->byte_92_srq_info, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, - mtts[hr_qp->rq.offset / page_size] - >> (32 + PAGE_ADDR_SHIFT)); + mtts[0] >> (32 + PAGE_ADDR_SHIFT)); roce_set_field(qpc_mask->byte_92_srq_info, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0); - context->rq_nxt_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size + 1] - >> PAGE_ADDR_SHIFT); + context->rq_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT); qpc_mask->rq_nxt_blk_addr = 0; roce_set_field(context->byte_104_rq_sge, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, - mtts[hr_qp->rq.offset / page_size + 1] - >> (32 + PAGE_ADDR_SHIFT)); + mtts[1] >> (32 + PAGE_ADDR_SHIFT)); roce_set_field(qpc_mask->byte_104_rq_sge, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0); @@ -3708,13 +3771,14 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); - memcpy(&(context->dmac), dmac, 4); + memcpy(&(context->dmac), dmac, sizeof(u32)); roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); qpc_mask->dmac = 0; roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, 0); + /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */ roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 4); roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, @@ -3756,6 +3820,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); + /* rocee send 2^lp_sgen_ini segs every time */ roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_LP_SGEN_INI_M, V2_QPC_BYTE_168_LP_SGEN_INI_S, 3); @@ -3774,18 +3839,30 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct device *dev = hr_dev->dev; - dma_addr_t dma_handle; + u64 sge_cur_blk = 0; + u64 sq_cur_blk = 0; u32 page_size; - u64 *mtts; + int count; /* Search qp buf's mtts */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "qp buf pa find failed\n"); + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); + if (count < 1) { + dev_err(dev, "qp(0x%lx) buf pa find failed\n", hr_qp->qpn); return -EINVAL; } + if (hr_qp->sge.offset) { + page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, + hr_qp->sge.offset / page_size, + &sge_cur_blk, 1, NULL); + if (count < 1) { + dev_err(dev, "qp(0x%lx) sge pa find failed\n", + hr_qp->qpn); + return -EINVAL; + } + } + /* Not support alternate path and path migration */ if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_PATH_MIG_STATE)) { @@ -3799,37 +3876,37 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); + context->sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT); roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, - mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + sq_cur_blk >> (32 + PAGE_ADDR_SHIFT)); qpc_mask->sq_cur_blk_addr = 0; roce_set_field(qpc_mask->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0); - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - context->sq_cur_sge_blk_addr = - ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ? - ((u32)(mtts[hr_qp->sge.offset / page_size] - >> PAGE_ADDR_SHIFT)) : 0; + context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) || + hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? + ((u32)(sge_cur_blk >> + PAGE_ADDR_SHIFT)) : 0; roce_set_field(context->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, - ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ? - (mtts[hr_qp->sge.offset / page_size] >> + ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > + HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? + (sge_cur_blk >> (32 + PAGE_ADDR_SHIFT)) : 0); qpc_mask->sq_cur_sge_blk_addr = 0; roce_set_field(qpc_mask->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0); - context->rx_sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); + context->rx_sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT); roce_set_field(context->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, - mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + sq_cur_blk >> (32 + PAGE_ADDR_SHIFT)); qpc_mask->rx_sq_cur_blk_addr = 0; roce_set_field(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, @@ -4144,7 +4221,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, roce_set_field(context->byte_224_retry_msg, V2_QPC_BYTE_224_RETRY_MSG_PSN_M, V2_QPC_BYTE_224_RETRY_MSG_PSN_S, - attr->sq_psn >> 16); + attr->sq_psn >> V2_QPC_BYTE_220_RETRY_MSG_PSN_S); roce_set_field(qpc_mask->byte_224_retry_msg, V2_QPC_BYTE_224_RETRY_MSG_PSN_M, V2_QPC_BYTE_224_RETRY_MSG_PSN_S, 0); @@ -4230,7 +4307,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_60_QP_ST_S, 0); /* SW pass context to HW */ - ret = hns_roce_v2_qp_modify(hr_dev, &hr_qp->mtt, cur_state, new_state, + ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state, context, hr_qp); if (ret) { dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret); @@ -4374,11 +4451,12 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S); qp_attr->qp_access_flags = ((roce_get_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_RRE_S)) << 2) | - ((roce_get_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_RWE_S)) << 1) | - ((roce_get_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_ATE_S)) << 3); + V2_QPC_BYTE_76_RRE_S)) << V2_QP_RWE_S) | + ((roce_get_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_RWE_S)) << V2_QP_RRE_S) | + ((roce_get_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_ATE_S)) << V2_QP_ATE_S); + if (hr_qp->ibqp.qp_type == IB_QPT_RC || hr_qp->ibqp.qp_type == IB_QPT_UC) { struct ib_global_route *grh = @@ -4487,7 +4565,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, (hr_qp->ibqp.qp_type == IB_QPT_UD)) hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); - hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); + hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); if (udata) { struct hns_roce_ucontext *context = @@ -4501,7 +4579,6 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) hns_roce_db_unmap_user(context, &hr_qp->rdb); - ib_umem_release(hr_qp->umem); } else { kfree(hr_qp->sq.wrid); kfree(hr_qp->rq.wrid); @@ -4509,6 +4586,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, if (hr_qp->rq.wqe_cnt) hns_roce_free_db(hr_dev, &hr_qp->rdb); } + ib_umem_release(hr_qp->umem); if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hr_qp->rq.wqe_cnt) { @@ -4682,7 +4760,6 @@ static void hns_roce_irq_work_handle(struct work_struct *work) dev_warn(dev, "Path migration failed.\n"); break; case HNS_ROCE_EVENT_TYPE_COMM_EST: - dev_info(dev, "Communication established.\n"); break; case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: dev_warn(dev, "Send queue drained.\n"); @@ -5151,8 +5228,8 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i], eq->l1_dma[i]); - for (j = 0; j < bt_chk_sz / 8; j++) { - idx = i * (bt_chk_sz / 8) + j; + for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { + idx = i * (bt_chk_sz / BA_BYTE_LEN) + j; if ((i == eq->l0_last_num - 1) && j == eq->l1_last_num - 1) { eqe_alloc = (buf_chk_sz / eq->eqe_size) @@ -5368,9 +5445,9 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); - ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) - / buf_chk_sz; - bt_num = (ba_num + bt_chk_sz / 8 - 1) / (bt_chk_sz / 8); + ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size), + buf_chk_sz); + bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN); /* hop_num = 0 */ if (mhop_num == HNS_ROCE_HOP_NUM_0) { @@ -5387,8 +5464,6 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, eq->cur_eqe_ba = eq->l0_dma; eq->nxt_eqe_ba = 0; - memset(eq->bt_l0, 0, eq->entries * eq->eqe_size); - return 0; } @@ -5415,12 +5490,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, goto err_dma_alloc_l0; if (mhop_num == 1) { - if (ba_num > (bt_chk_sz / 8)) + if (ba_num > (bt_chk_sz / BA_BYTE_LEN)) dev_err(dev, "ba_num %d is too large for 1 hop\n", ba_num); /* alloc buf */ - for (i = 0; i < bt_chk_sz / 8; i++) { + for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) { if (eq_buf_cnt + 1 < ba_num) { size = buf_chk_sz; } else { @@ -5444,7 +5519,7 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, } else if (mhop_num == 2) { /* alloc L1 BT and buf */ - for (i = 0; i < bt_chk_sz / 8; i++) { + for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) { eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz, &(eq->l1_dma[i]), GFP_KERNEL); @@ -5452,8 +5527,8 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, goto err_dma_alloc_l1; *(eq->bt_l0 + i) = eq->l1_dma[i]; - for (j = 0; j < bt_chk_sz / 8; j++) { - idx = i * bt_chk_sz / 8 + j; + for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { + idx = i * bt_chk_sz / BA_BYTE_LEN + j; if (eq_buf_cnt + 1 < ba_num) { size = buf_chk_sz; } else { @@ -5498,8 +5573,8 @@ err_dma_alloc_l1: dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i], eq->l1_dma[i]); - for (j = 0; j < bt_chk_sz / 8; j++) { - idx = i * bt_chk_sz / 8 + j; + for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { + idx = i * bt_chk_sz / BA_BYTE_LEN + j; dma_free_coherent(dev, buf_chk_sz, eq->buf[idx], eq->buf_dma[idx]); } @@ -5522,11 +5597,11 @@ err_dma_alloc_buf: dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i], eq->l1_dma[i]); - for (j = 0; j < bt_chk_sz / 8; j++) { + for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { if (i == record_i && j >= record_j) break; - idx = i * bt_chk_sz / 8 + j; + idx = i * bt_chk_sz / BA_BYTE_LEN + j; dma_free_coherent(dev, buf_chk_sz, eq->buf[idx], eq->buf_dma[idx]); @@ -5972,18 +6047,19 @@ out: return ret; } -static int find_empty_entry(struct hns_roce_idx_que *idx_que) +static int find_empty_entry(struct hns_roce_idx_que *idx_que, + unsigned long size) { - int bit_num; - int i; + int wqe_idx; - /* bitmap[i] is set zero if all bits are allocated */ - for (i = 0; idx_que->bitmap[i] == 0; ++i) - ; - bit_num = ffs(idx_que->bitmap[i]); - idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1)); + if (unlikely(bitmap_full(idx_que->bitmap, size))) + return -ENOSPC; + + wqe_idx = find_first_zero_bit(idx_que->bitmap, size); - return i * sizeof(u64) * 8 + (bit_num - 1); + bitmap_set(idx_que->bitmap, wqe_idx, 1); + + return wqe_idx; } static void fill_idx_queue(struct hns_roce_idx_que *idx_que, @@ -6029,7 +6105,13 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; } - wqe_idx = find_empty_entry(&srq->idx_que); + wqe_idx = find_empty_entry(&srq->idx_que, srq->max); + if (wqe_idx < 0) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + fill_idx_queue(&srq->idx_que, ind, wqe_idx); wqe = get_srq_wqe(srq, wqe_idx); dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; @@ -6041,9 +6123,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } if (i < srq->max_gs) { - dseg->len = 0; - dseg->lkey = cpu_to_le32(0x100); - dseg->addr = 0; + dseg[i].len = 0; + dseg[i].lkey = cpu_to_le32(0x100); + dseg[i].addr = 0; } srq->wrid[wqe_idx] = wr->wr_id; @@ -6059,7 +6141,8 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, */ wmb(); - srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn; + srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | + (srq->srqn & V2_DB_BYTE_4_TAG_M); srq_db.parameter = srq->head; hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); @@ -6301,6 +6384,7 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) if (!hr_dev) return 0; + hr_dev->is_reset = true; hr_dev->active = false; hr_dev->dis_db = true; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index edfdbe2ce0db..478f5a5b7aa1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -54,7 +54,7 @@ #define HNS_ROCE_V2_MAX_CQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100 #define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 -#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 +#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 #define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff @@ -241,6 +241,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_POST_MB = 0x8504, HNS_ROCE_OPC_QUERY_MB_ST = 0x8505, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, + HNS_ROCE_OPC_FUNC_CLEAR = 0x8508, HNS_ROCE_OPC_CLR_SCCC = 0x8509, HNS_ROCE_OPC_QUERY_SCCC = 0x850a, HNS_ROCE_OPC_RESET_SCCC = 0x850b, @@ -886,6 +887,10 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16) +#define V2_QP_RWE_S 1 /* rdma write enable */ +#define V2_QP_RRE_S 2 /* rdma read enable */ +#define V2_QP_ATE_S 3 /* rdma atomic enable */ + struct hns_roce_v2_cqe { __le32 byte_4; union { @@ -1226,6 +1231,22 @@ struct hns_roce_query_fw_info { __le32 rsv[5]; }; +struct hns_roce_func_clear { + __le32 rst_funcid_en; + __le32 func_done; + __le32 rsv[4]; +}; + +#define FUNC_CLEAR_RST_FUN_DONE_S 0 +/* Each physical function manages up to 248 virtual functionsï¼› + * it takes up to 100ms for each function to execute clearï¼› + * if an abnormal reset occurs, it is executed twice at most; + * so it takes up to 249 * 2 * 100ms. + */ +#define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (249 * 2 * 100) +#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40 +#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20 + struct hns_roce_cfg_llm_a { __le32 base_addr_l; __le32 base_addr_h; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 8da5f18bf820..1e4ba48f5613 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -57,17 +57,16 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) { return gid_index * hr_dev->caps.num_ports + port; } -EXPORT_SYMBOL_GPL(hns_get_gid_index); static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; u32 i = 0; - if (!memcmp(hr_dev->dev_addr[port], addr, MAC_ADDR_OCTET_NUM)) + if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN)) return 0; - for (i = 0; i < MAC_ADDR_OCTET_NUM; i++) + for (i = 0; i < ETH_ALEN; i++) hr_dev->dev_addr[port][i] = addr[i]; phy_port = hr_dev->iboe.phy_port[port]; @@ -78,18 +77,13 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); u8 port = attr->port_num - 1; - unsigned long flags; int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; - spin_lock_irqsave(&hr_dev->iboe.lock, flags); - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr); - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - return ret; } @@ -98,18 +92,13 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); struct ib_gid_attr zattr = { }; u8 port = attr->port_num - 1; - unsigned long flags; int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; - spin_lock_irqsave(&hr_dev->iboe.lock, flags); - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr); - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - return ret; } @@ -272,7 +261,8 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256; props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; - props->phys_state = (props->state == IB_PORT_ACTIVE) ? 5 : 3; + props->phys_state = (props->state == IB_PORT_ACTIVE) ? + HNS_ROCE_PHY_LINKUP : HNS_ROCE_PHY_DISABLED; spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); @@ -319,7 +309,7 @@ static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask, static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { - int ret = 0; + int ret; struct hns_roce_ucontext *context = to_hr_ucontext(uctx); struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); @@ -423,6 +413,11 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) } static const struct ib_device_ops hns_roce_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_HNS, + .uverbs_abi_ver = 1, + .uverbs_no_driver_id_binding = 1, + .add_gid = hns_roce_add_gid, .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, @@ -451,6 +446,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .reg_user_mr = hns_roce_reg_user_mr, INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq), INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext), }; @@ -489,14 +485,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev = &hr_dev->ib_dev; - ib_dev->owner = THIS_MODULE; ib_dev->node_type = RDMA_NODE_IB_CA; ib_dev->dev.parent = dev; ib_dev->phys_port_cnt = hr_dev->caps.num_ports; ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; - ib_dev->uverbs_abi_ver = 1; ib_dev->uverbs_cmd_mask = (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -545,7 +539,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); } - ib_dev->driver_id = RDMA_DRIVER_HNS; ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_ops); for (i = 0; i < hr_dev->caps.num_ports; i++) { @@ -980,7 +973,6 @@ error_failed_cmq_init: return ret; } -EXPORT_SYMBOL_GPL(hns_roce_init); void hns_roce_exit(struct hns_roce_dev *hr_dev) { @@ -1001,7 +993,6 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev) if (hr_dev->hw->reset) hr_dev->hw->reset(hr_dev, false); } -EXPORT_SYMBOL_GPL(hns_roce_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 6110ec408626..549e1a38dfe0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -47,7 +47,6 @@ unsigned long key_to_hw_index(u32 key) { return (key << 24) | (key >> 8); } -EXPORT_SYMBOL_GPL(key_to_hw_index); static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, @@ -66,7 +65,6 @@ int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } -EXPORT_SYMBOL_GPL(hns_roce_hw2sw_mpt); static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, unsigned long *seg) @@ -293,7 +291,6 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) break; } } -EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int err_loop_index, @@ -314,11 +311,11 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); - for (j = 0; j < pbl_bt_sz / 8; j++) { + for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { if (i == loop_i && j >= loop_j) break; - bt_idx = i * pbl_bt_sz / 8 + j; + bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l2[bt_idx], mr->pbl_l2_dma_addr[bt_idx]); @@ -329,8 +326,8 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); - for (j = 0; j < pbl_bt_sz / 8; j++) { - bt_idx = i * pbl_bt_sz / 8 + j; + for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { + bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l2[bt_idx], mr->pbl_l2_dma_addr[bt_idx]); @@ -533,7 +530,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, { struct device *dev = hr_dev->dev; unsigned long index = 0; - int ret = 0; + int ret; /* Allocate a key for mr from mr_table */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); @@ -559,7 +556,8 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, mr->pbl_l0_dma_addr = 0; } else { if (!hr_dev->caps.pbl_hop_num) { - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + mr->pbl_buf = dma_alloc_coherent(dev, + npages * BA_BYTE_LEN, &(mr->pbl_dma_addr), GFP_KERNEL); if (!mr->pbl_buf) @@ -590,9 +588,8 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, if (mhop_num == HNS_ROCE_HOP_NUM_0) return; - /* hop_num = 1 */ if (mhop_num == 1) { - dma_free_coherent(dev, (unsigned int)(npages * 8), + dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN), mr->pbl_buf, mr->pbl_dma_addr); return; } @@ -603,12 +600,13 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, if (mhop_num == 2) { for (i = 0; i < mr->l0_chunk_last_num; i++) { if (i == mr->l0_chunk_last_num - 1) { - npages_allocated = i * (pbl_bt_sz / 8); + npages_allocated = + i * (pbl_bt_sz / BA_BYTE_LEN); dma_free_coherent(dev, - (npages - npages_allocated) * 8, - mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); + (npages - npages_allocated) * BA_BYTE_LEN, + mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); break; } @@ -621,16 +619,17 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); - for (j = 0; j < pbl_bt_sz / 8; j++) { - bt_idx = i * (pbl_bt_sz / 8) + j; + for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { + bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j; if ((i == mr->l0_chunk_last_num - 1) && j == mr->l1_chunk_last_num - 1) { npages_allocated = bt_idx * - (pbl_bt_sz / 8); + (pbl_bt_sz / BA_BYTE_LEN); dma_free_coherent(dev, - (npages - npages_allocated) * 8, + (npages - npages_allocated) * + BA_BYTE_LEN, mr->pbl_bt_l2[bt_idx], mr->pbl_l2_dma_addr[bt_idx]); @@ -675,7 +674,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, npages = ib_umem_page_count(mr->umem); if (!hr_dev->caps.pbl_hop_num) - dma_free_coherent(dev, (unsigned int)(npages * 8), + dma_free_coherent(dev, + (unsigned int)(npages * BA_BYTE_LEN), mr->pbl_buf, mr->pbl_dma_addr); else hns_roce_mhop_free(hr_dev, mr); @@ -1059,6 +1059,7 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { page_addr = sg_page_iter_dma_address(&sg_iter); if (!hr_dev->caps.pbl_hop_num) { + /* for hip06, page addr is aligned to 4K */ mr->pbl_buf[i++] = page_addr >> 12; } else if (hr_dev->caps.pbl_hop_num == 1) { mr->pbl_buf[i++] = page_addr; @@ -1069,7 +1070,7 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, mr->pbl_bt_l2[i][j] = page_addr; j++; - if (j >= (pbl_bt_sz / 8)) { + if (j >= (pbl_bt_sz / BA_BYTE_LEN)) { i++; j = 0; } @@ -1117,7 +1118,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } else { u64 pbl_size = 1; - bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8; + bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / + BA_BYTE_LEN; for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) pbl_size *= bt_size; if (n > pbl_size) { @@ -1293,9 +1295,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } else { hns_roce_mr_free(hr_dev, mr); - if (mr->umem) - ib_umem_release(mr->umem); - + ib_umem_release(mr->umem); kfree(mr); } @@ -1491,3 +1491,119 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) return 0; } + +void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, + int buf_pg_shift) +{ + hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift); + mtr->buf_pg_shift = buf_pg_shift; +} + +void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr) +{ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +} + +static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr, dma_addr_t *bufs, + struct hns_roce_buf_region *r) +{ + int offset; + int count; + int npage; + u64 *mtts; + int end; + int i; + + offset = r->offset; + end = offset + r->count; + npage = 0; + while (offset < end) { + mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, + offset, &count, NULL); + if (!mtts) + return -ENOBUFS; + + /* Save page addr, low 12 bits : 0 */ + for (i = 0; i < count; i++) { + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) + mtts[i] = cpu_to_le64(bufs[npage] >> + PAGE_ADDR_SHIFT); + else + mtts[i] = cpu_to_le64(bufs[npage]); + + npage++; + } + offset += count; + } + + return 0; +} + +int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t **bufs, struct hns_roce_buf_region *regions, + int region_cnt) +{ + struct hns_roce_buf_region *r; + int ret; + int i; + + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, + region_cnt); + if (ret) + return ret; + + for (i = 0; i < region_cnt; i++) { + r = ®ions[i]; + ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r); + if (ret) { + dev_err(hr_dev->dev, + "write mtr[%d/%d] err %d,offset=%d.\n", + i, region_cnt, ret, r->offset); + goto err_write; + } + } + + return 0; + +err_write: + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + + return ret; +} + +int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) +{ + u64 *mtts = mtt_buf; + int mtt_count; + int total = 0; + u64 *addr; + int npage; + int left; + + if (mtts == NULL || mtt_max < 1) + goto done; + + left = mtt_max; + while (left > 0) { + mtt_count = 0; + addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, + offset + total, + &mtt_count, NULL); + if (!addr || !mtt_count) + goto done; + + npage = min(mtt_count, left); + memcpy(&mtts[total], addr, BA_BYTE_LEN * npage); + left -= npage; + total += npage; + } + +done: + if (base_addr) + *base_addr = mtr->hem_list.root_ba; + + return total; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 813401384d78..912b89b4da34 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -83,18 +83,16 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -EXPORT_SYMBOL_GPL(hns_roce_alloc_pd); void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); } -EXPORT_SYMBOL_GPL(hns_roce_dealloc_pd); int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) { struct resource *res; - int ret = 0; + int ret; /* Using bitmap to manager UAR index */ ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 8db2817a249e..e0424029b058 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -64,7 +64,6 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) if (atomic_dec_and_test(&qp->refcount)) complete(&qp->free); } -EXPORT_SYMBOL_GPL(hns_roce_qp_event); static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, enum hns_roce_event type) @@ -139,7 +138,6 @@ enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) return HNS_ROCE_QP_NUM_STATE; } } -EXPORT_SYMBOL_GPL(to_hns_roce_state); static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, struct hns_roce_qp *hr_qp) @@ -242,7 +240,6 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) __xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1)); xa_unlock_irqrestore(xa, flags); } -EXPORT_SYMBOL_GPL(hns_roce_qp_remove); void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { @@ -257,22 +254,19 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); - hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); } } -EXPORT_SYMBOL_GPL(hns_roce_qp_free); void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, int cnt) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - if (base_qpn < SQP_NUM) + if (base_qpn < hr_dev->caps.reserved_qps) return; hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR); } -EXPORT_SYMBOL_GPL(hns_roce_release_range_qp); static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, bool is_user, int has_rq, @@ -392,8 +386,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift), PAGE_SIZE); } else { page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - hr_qp->sge.sge_cnt = - max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num); + hr_qp->sge.sge_cnt = ex_sge_num ? + max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size) + HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << @@ -422,6 +416,91 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, return 0; } +static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_buf_region *regions, + int region_max, int page_shift) +{ + int page_size = 1 << page_shift; + bool is_extend_sge; + int region_cnt = 0; + int buf_size; + int buf_cnt; + + if (hr_qp->buff_size < 1 || region_max < 1) + return region_cnt; + + if (hr_qp->sge.sge_cnt > 0) + is_extend_sge = true; + else + is_extend_sge = false; + + /* sq region */ + if (is_extend_sge) + buf_size = hr_qp->sge.offset - hr_qp->sq.offset; + else + buf_size = hr_qp->rq.offset - hr_qp->sq.offset; + + if (buf_size > 0 && region_cnt < region_max) { + buf_cnt = DIV_ROUND_UP(buf_size, page_size); + hns_roce_init_buf_region(®ions[region_cnt], + hr_dev->caps.wqe_sq_hop_num, + hr_qp->sq.offset / page_size, + buf_cnt); + region_cnt++; + } + + /* sge region */ + if (is_extend_sge) { + buf_size = hr_qp->rq.offset - hr_qp->sge.offset; + if (buf_size > 0 && region_cnt < region_max) { + buf_cnt = DIV_ROUND_UP(buf_size, page_size); + hns_roce_init_buf_region(®ions[region_cnt], + hr_dev->caps.wqe_sge_hop_num, + hr_qp->sge.offset / page_size, + buf_cnt); + region_cnt++; + } + } + + /* rq region */ + buf_size = hr_qp->buff_size - hr_qp->rq.offset; + if (buf_size > 0) { + buf_cnt = DIV_ROUND_UP(buf_size, page_size); + hns_roce_init_buf_region(®ions[region_cnt], + hr_dev->caps.wqe_rq_hop_num, + hr_qp->rq.offset / page_size, + buf_cnt); + region_cnt++; + } + + return region_cnt; +} + +static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev, + struct hns_roce_buf_region *regions, + int region_cnt) +{ + int bt_pg_shift; + int ba_num; + int ret; + + bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz; + + /* all root ba entries must in one bt page */ + do { + ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN; + ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt, + ba_num); + if (ret <= ba_num) + break; + + bt_pg_shift++; + } while (ret > ba_num); + + return bt_pg_shift - PAGE_SHIFT; +} + static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) @@ -534,15 +613,17 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_udata *udata, unsigned long sqpn, struct hns_roce_qp *hr_qp) { + dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { 0 }; struct device *dev = hr_dev->dev; struct hns_roce_ib_create_qp ucmd; struct hns_roce_ib_create_qp_resp resp = {}; struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context( udata, struct hns_roce_ucontext, ibucontext); + struct hns_roce_buf_region *r; unsigned long qpn = 0; - int ret = 0; u32 page_shift; - u32 npages; + int buf_count; + int ret; int i; mutex_init(&hr_qp->mutex); @@ -596,6 +677,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, init_attr->cap.max_recv_sge]; } + page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "ib_copy_from_udata error for create qp\n"); @@ -617,32 +699,28 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ret = PTR_ERR(hr_qp->umem); goto err_rq_sge_list; } - - hr_qp->mtt.mtt_type = MTT_TYPE_WQE; - page_shift = PAGE_SHIFT; - if (hr_dev->caps.mtt_buf_pg_sz) { - npages = (ib_umem_page_count(hr_qp->umem) + - (1 << hr_dev->caps.mtt_buf_pg_sz) - 1) / - (1 << hr_dev->caps.mtt_buf_pg_sz); - page_shift += hr_dev->caps.mtt_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, - page_shift, - &hr_qp->mtt); - } else { - ret = hns_roce_mtt_init(hr_dev, - ib_umem_page_count(hr_qp->umem), - page_shift, &hr_qp->mtt); - } + hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp, + hr_qp->regions, ARRAY_SIZE(hr_qp->regions), + page_shift); + ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, + hr_qp->region_cnt); if (ret) { - dev_err(dev, "hns_roce_mtt_init error for create qp\n"); - goto err_buf; + dev_err(dev, "alloc buf_list error for create qp\n"); + goto err_alloc_list; } - ret = hns_roce_ib_umem_write_mtt(hr_dev, &hr_qp->mtt, - hr_qp->umem); - if (ret) { - dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n"); - goto err_mtt; + for (i = 0; i < hr_qp->region_cnt; i++) { + r = &hr_qp->regions[i]; + buf_count = hns_roce_get_umem_bufs(hr_dev, + buf_list[i], r->count, r->offset, + hr_qp->umem, page_shift); + if (buf_count != r->count) { + dev_err(dev, + "get umem buf err, expect %d,ret %d.\n", + r->count, buf_count); + ret = -ENOBUFS; + goto err_get_bufs; + } } if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && @@ -653,7 +731,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, &hr_qp->sdb); if (ret) { dev_err(dev, "sq record doorbell map failed!\n"); - goto err_mtt; + goto err_get_bufs; } /* indicate kernel supports sq record db */ @@ -715,7 +793,6 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } /* Allocate QP buf */ - page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, (1 << page_shift) * 2, &hr_qp->hr_buf, page_shift)) { @@ -723,21 +800,28 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ret = -ENOMEM; goto err_db; } - - hr_qp->mtt.mtt_type = MTT_TYPE_WQE; - /* Write MTT */ - ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages, - hr_qp->hr_buf.page_shift, &hr_qp->mtt); + hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp, + hr_qp->regions, ARRAY_SIZE(hr_qp->regions), + page_shift); + ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, + hr_qp->region_cnt); if (ret) { - dev_err(dev, "hns_roce_mtt_init error for kernel create qp\n"); - goto err_buf; + dev_err(dev, "alloc buf_list error for create qp!\n"); + goto err_alloc_list; } - ret = hns_roce_buf_write_mtt(hr_dev, &hr_qp->mtt, - &hr_qp->hr_buf); - if (ret) { - dev_err(dev, "hns_roce_buf_write_mtt error for kernel create qp\n"); - goto err_mtt; + for (i = 0; i < hr_qp->region_cnt; i++) { + r = &hr_qp->regions[i]; + buf_count = hns_roce_get_kmem_bufs(hr_dev, + buf_list[i], r->count, r->offset, + &hr_qp->hr_buf); + if (buf_count != r->count) { + dev_err(dev, + "get kmem buf err, expect %d,ret %d.\n", + r->count, buf_count); + ret = -ENOBUFS; + goto err_get_bufs; + } } hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), @@ -761,6 +845,17 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } } + hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions, + hr_qp->region_cnt); + hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, + page_shift); + ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, + hr_qp->regions, hr_qp->region_cnt); + if (ret) { + dev_err(dev, "mtr attach error for create qp\n"); + goto err_mtr; + } + if (init_attr->qp_type == IB_QPT_GSI && hr_dev->hw_rev == HNS_ROCE_HW_VER1) { /* In v1 engine, GSI QP context in RoCE engine's register */ @@ -796,6 +891,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->event = hns_roce_ib_qp_event; + hns_roce_free_buf_list(buf_list, hr_qp->region_cnt); return 0; @@ -810,6 +906,9 @@ err_qpn: if (!sqpn) hns_roce_release_range_qp(hr_dev, qpn, 1); +err_mtr: + hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); + err_wrid: if (udata) { if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && @@ -829,14 +928,13 @@ err_sq_dbmap: hns_roce_qp_has_sq(init_attr)) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); -err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); +err_get_bufs: + hns_roce_free_buf_list(buf_list, hr_qp->region_cnt); -err_buf: - if (hr_qp->umem) - ib_umem_release(hr_qp->umem); - else +err_alloc_list: + if (!hr_qp->umem) hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + ib_umem_release(hr_qp->umem); err_db: if (!udata && hns_roce_qp_has_rq(init_attr) && @@ -923,7 +1021,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, return &hr_qp->ibqp; } -EXPORT_SYMBOL_GPL(hns_roce_create_qp); int to_hr_qp_type(int qp_type) { @@ -942,7 +1039,6 @@ int to_hr_qp_type(int qp_type) return transport_type; } -EXPORT_SYMBOL_GPL(to_hr_qp_type); int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) @@ -1062,7 +1158,6 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } -EXPORT_SYMBOL_GPL(hns_roce_lock_cqs); void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) __releases(&send_cq->lock) @@ -1079,7 +1174,6 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, spin_unlock_irq(&recv_cq->lock); } } -EXPORT_SYMBOL_GPL(hns_roce_unlock_cqs); static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) { @@ -1091,20 +1185,17 @@ void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n) { return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); } -EXPORT_SYMBOL_GPL(get_recv_wqe); void *get_send_wqe(struct hns_roce_qp *hr_qp, int n) { return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); } -EXPORT_SYMBOL_GPL(get_send_wqe); void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n) { return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } -EXPORT_SYMBOL_GPL(get_send_extend_sge); bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, struct ib_cq *ib_cq) @@ -1123,7 +1214,6 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, return cur + nreq >= hr_wq->max_post; } -EXPORT_SYMBOL_GPL(hns_roce_wq_overflow); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) { @@ -1135,11 +1225,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) mutex_init(&qp_table->scc_mutex); xa_init(&hr_dev->qp_table_xa); - /* In hw v1, a port include two SQP, six ports total 12 */ - if (hr_dev->caps.max_sq_sg <= 2) - reserved_from_bot = SQP_NUM; - else - reserved_from_bot = hr_dev->caps.reserved_qps; + reserved_from_bot = hr_dev->caps.reserved_qps; ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, hr_dev->caps.num_qps - 1, reserved_from_bot, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index b3421b1f21e0..38bb548eaa6d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -30,7 +30,6 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type) if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); } -EXPORT_SYMBOL_GPL(hns_roce_srq_event); static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, enum hns_roce_event event_type) @@ -181,28 +180,19 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; - u32 bitmap_num; - int i; - bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64)); - - idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL); + idx_que->bitmap = bitmap_zalloc(srq->max, GFP_KERNEL); if (!idx_que->bitmap) return -ENOMEM; - bitmap_num = bitmap_num / (8 * sizeof(u64)); - idx_que->buf_size = srq->idx_que.buf_size; if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, &idx_que->idx_buf, page_shift)) { - kfree(idx_que->bitmap); + bitmap_free(idx_que->bitmap); return -ENOMEM; } - for (i = 0; i < bitmap_num; i++) - idx_que->bitmap[i] = ~(0UL); - return 0; } @@ -264,8 +254,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, } else ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->umem), - srq->umem->page_shift, - &srq->mtt); + PAGE_SHIFT, &srq->mtt); if (ret) goto err_buf; @@ -291,10 +280,9 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->idx_que.mtt); } else { - ret = hns_roce_mtt_init(hr_dev, - ib_umem_page_count(srq->idx_que.umem), - srq->idx_que.umem->page_shift, - &srq->idx_que.mtt); + ret = hns_roce_mtt_init( + hr_dev, ib_umem_page_count(srq->idx_que.umem), + PAGE_SHIFT, &srq->idx_que.mtt); } if (ret) { @@ -391,21 +379,19 @@ err_idx_buf: hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); err_idx_mtt: - if (udata) - ib_umem_release(srq->idx_que.umem); + ib_umem_release(srq->idx_que.umem); err_create_idx: hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf); - kfree(srq->idx_que.bitmap); + bitmap_free(srq->idx_que.bitmap); err_srq_mtt: hns_roce_mtt_cleanup(hr_dev, &srq->mtt); err_buf: - if (udata) - ib_umem_release(srq->umem); - else + ib_umem_release(srq->umem); + if (!udata) hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); return ret; @@ -419,15 +405,15 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) hns_roce_srq_free(hr_dev, srq); hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - if (ibsrq->uobject) { + if (udata) { hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - ib_umem_release(srq->idx_que.umem); - ib_umem_release(srq->umem); } else { kvfree(srq->wrid); hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, &srq->buf); } + ib_umem_release(srq->idx_que.umem); + ib_umem_release(srq->umem); } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 700a5d06b60c..2d6a378e8560 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -4279,11 +4279,11 @@ static void i40iw_qhash_ctrl(struct i40iw_device *iwdev, /* if not found then add a child listener if interface is going up */ if (!ifup) return; - child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC); + child_listen_node = kmemdup(parent_listen_node, + sizeof(*child_listen_node), GFP_ATOMIC); if (!child_listen_node) return; node_allocated = true; - memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node)); memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 5689d742bafb..d169a8031375 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -772,6 +772,8 @@ static int i40iw_query_qp(struct ib_qp *ibqp, struct i40iw_qp *iwqp = to_iwqp(ibqp); struct i40iw_sc_qp *qp = &iwqp->sc_qp; + attr->qp_state = iwqp->ibqp_state; + attr->cur_qp_state = attr->qp_state; attr->qp_access_flags = 0; attr->cap.max_send_wr = qp->qp_uk.sq_size; attr->cap.max_recv_wr = qp->qp_uk.rq_size; @@ -1064,44 +1066,38 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) * @ib_cq: cq pointer * @udata: user data or NULL for kernel object */ -static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct i40iw_cq *iwcq; struct i40iw_device *iwdev; struct i40iw_sc_cq *cq; - if (!ib_cq) { - i40iw_pr_err("ib_cq == NULL\n"); - return 0; - } - iwcq = to_iwcq(ib_cq); iwdev = to_iwdev(ib_cq->device); cq = &iwcq->sc_cq; i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources(iwdev, iwcq); - kfree(iwcq); i40iw_rem_devusecount(iwdev); - return 0; } /** * i40iw_create_cq - create cq - * @ibdev: device pointer from stack + * @ibcq: CQ allocated * @attr: attributes for cq * @udata: user data */ -static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +static int i40iw_create_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; struct i40iw_device *iwdev = to_iwdev(ibdev); - struct i40iw_cq *iwcq; + struct i40iw_cq *iwcq = to_iwcq(ibcq); struct i40iw_pbl *iwpbl; u32 cq_num = 0; struct i40iw_sc_cq *cq; struct i40iw_sc_dev *dev = &iwdev->sc_dev; - struct i40iw_cq_init_info info; + struct i40iw_cq_init_info info = {}; enum i40iw_status_code status; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; @@ -1111,22 +1107,16 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, int entries = attr->cqe; if (iwdev->closing) - return ERR_PTR(-ENODEV); + return -ENODEV; if (entries > iwdev->max_cqe) - return ERR_PTR(-EINVAL); - - iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL); - if (!iwcq) - return ERR_PTR(-ENOMEM); - - memset(&info, 0, sizeof(info)); + return -EINVAL; err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_cqs, iwdev->max_cq, &cq_num, &iwdev->next_cq); if (err_code) - goto error; + return err_code; cq = &iwcq->sc_cq; cq->back_cq = (void *)iwcq; @@ -1233,15 +1223,13 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, } i40iw_add_devusecount(iwdev); - return (struct ib_cq *)iwcq; + return 0; cq_destroy: i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources: cq_free_resources(iwdev, iwcq); -error: - kfree(iwcq); - return ERR_PTR(err_code); + return err_code; } /** @@ -2018,8 +2006,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) struct cqp_commands_info *cqp_info; u32 stag_idx; - if (iwmr->region) - ib_umem_release(iwmr->region); + ib_umem_release(iwmr->region); if (iwmr->type != IW_MEMREG_TYPE_MEM) { /* region is released. only test for userness. */ @@ -2655,6 +2642,11 @@ static int i40iw_query_pkey(struct ib_device *ibdev, } static const struct ib_device_ops i40iw_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_I40IW, + /* NOTE: Older kernels wrongly use 0 for the uverbs_abi_ver */ + .uverbs_abi_ver = I40IW_ABI_VER, + .alloc_hw_stats = i40iw_alloc_hw_stats, .alloc_mr = i40iw_alloc_mr, .alloc_pd = i40iw_alloc_pd, @@ -2694,6 +2686,7 @@ static const struct ib_device_ops i40iw_dev_ops = { .reg_user_mr = i40iw_reg_user_mr, .req_notify_cq = i40iw_req_notify_cq, INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_cq, i40iw_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_ucontext, i40iw_ucontext, ibucontext), }; @@ -2712,7 +2705,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev i40iw_pr_err("iwdev == NULL\n"); return NULL; } - iwibdev->ibdev.owner = THIS_MODULE; iwdev->iwibdev = iwibdev; iwibdev->iwdev = iwdev; @@ -2771,9 +2763,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev) */ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev) { - if (!iwibdev) - return; - ib_unregister_device(&iwibdev->ibdev); wait_event_timeout(iwibdev->iwdev->close_wq, !atomic64_read(&iwibdev->iwdev->use_count), @@ -2795,7 +2784,6 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return -ENOMEM; iwibdev = iwdev->iwibdev; rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); - iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW; ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 022a0b4ea452..a7d238d312f0 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -172,14 +172,14 @@ err_buf: } #define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; int vector = attr->comp_vector; struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct mlx4_ib_cq *cq; + struct mlx4_ib_cq *cq = to_mcq(ibcq); struct mlx4_uar *uar; void *buf_addr; int err; @@ -187,14 +187,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, udata, struct mlx4_ib_ucontext, ibucontext); if (entries < 1 || entries > dev->dev->caps.max_cqes) - return ERR_PTR(-EINVAL); + return -EINVAL; if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) - return ERR_PTR(-EINVAL); - - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + return -EINVAL; entries = roundup_pow_of_two(entries + 1); cq->ibcq.cqe = entries - 1; @@ -269,7 +265,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_cq_free; } - return &cq->ibcq; + return 0; err_cq_free: mlx4_cq_free(dev->dev, &cq->mcq); @@ -281,19 +277,15 @@ err_dbmap: err_mtt: mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); - if (udata) - ib_umem_release(cq->umem); - else + ib_umem_release(cq->umem); + if (!udata) mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_db: if (!udata) mlx4_db_free(dev->dev, &cq->db); - err_cq: - kfree(cq); - - return ERR_PTR(err); + return err; } static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, @@ -475,18 +467,15 @@ err_buf: kfree(cq->resize_buf); cq->resize_buf = NULL; - if (cq->resize_umem) { - ib_umem_release(cq->resize_umem); - cq->resize_umem = NULL; - } - + ib_umem_release(cq->resize_umem); + cq->resize_umem = NULL; out: mutex_unlock(&cq->resize_mutex); return err; } -int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(cq->device); struct mlx4_ib_cq *mcq = to_mcq(cq); @@ -501,15 +490,11 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) struct mlx4_ib_ucontext, ibucontext), &mcq->db); - ib_umem_release(mcq->umem); } else { mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); mlx4_db_free(dev->dev, &mcq->db); } - - kfree(mcq); - - return 0; + ib_umem_release(mcq->umem); } static void dump_cqe(void *cqe) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 25d09d53b51c..8790101facb7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1089,7 +1089,8 @@ static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx, if (!dev->ib_active) return -EAGAIN; - if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { + if (ibdev->ops.uverbs_abi_ver == + MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { resp_v3.qp_tab_size = dev->dev->caps.num_qps; resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; @@ -1111,7 +1112,7 @@ static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx, INIT_LIST_HEAD(&context->wqn_ranges_list); mutex_init(&context->wqn_ranges_mutex); - if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) + if (ibdev->ops.uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); else err = ib_copy_to_udata(udata, &resp, sizeof(resp)); @@ -2509,6 +2510,10 @@ static void get_fw_ver_str(struct ib_device *device, char *str) } static const struct ib_device_ops mlx4_ib_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_MLX4, + .uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION, + .add_gid = mlx4_ib_add_gid, .alloc_mr = mlx4_ib_alloc_mr, .alloc_pd = mlx4_ib_alloc_pd, @@ -2560,6 +2565,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { .resize_cq = mlx4_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext), @@ -2642,7 +2648,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->dev = dev; ibdev->bond_next_port = 0; - ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->num_ports = num_ports; @@ -2651,11 +2656,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - if (dev->caps.userspace_caps) - ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; - else - ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; - ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -2729,6 +2729,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } + if (!dev->caps.userspace_caps) + ibdev->ib_dev.ops.uverbs_abi_ver = + MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; + mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); @@ -2839,7 +2843,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_steer_free_bitmap; rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group); - ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4; if (ib_register_device(&ibdev->ib_dev, "mlx4_%d")) goto err_diag_counters; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 26897102057d..eb53bb4c0c91 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -743,10 +743,9 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); -int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); @@ -907,7 +906,7 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +void mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 355205a28544..753479285ce9 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -258,7 +258,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts) { u64 block_shift = MLX4_MAX_MTT_SHIFT; - u64 min_shift = umem->page_shift; + u64 min_shift = PAGE_SHIFT; u64 last_block_aligned_end = 0; u64 current_block_start = 0; u64 first_block_start = 0; @@ -295,8 +295,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, * in access to the wrong data. */ misalignment_bits = - (start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL))) - ^ current_block_start; + (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^ + current_block_start; block_shift = min(alignment_of(misalignment_bits), block_shift); } @@ -368,8 +368,7 @@ end: } static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start, - u64 length, u64 virt_addr, - int access_flags) + u64 length, int access_flags) { /* * Force registering the memory as writable if the underlying pages @@ -415,8 +414,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = - mlx4_get_umem_mr(udata, start, length, virt_addr, access_flags); + mr->umem = mlx4_get_umem_mr(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; @@ -505,7 +503,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); ib_umem_release(mmr->umem); - mmr->umem = mlx4_get_umem_mr(udata, start, length, virt_addr, + mmr->umem = mlx4_get_umem_mr(udata, start, length, mr_access_flags); if (IS_ERR(mmr->umem)) { err = PTR_ERR(mmr->umem); @@ -514,7 +512,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, goto release_mpt_entry; } n = ib_umem_page_count(mmr->umem); - shift = mmr->umem->page_shift; + shift = PAGE_SHIFT; err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, virt_addr, length, n, shift, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5221c0794d1d..82aff2f2fdc2 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1207,10 +1207,9 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &qp->mtt); err_buf: - if (qp->umem) - ib_umem_release(qp->umem); - else + if (!qp->umem) mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); + ib_umem_release(qp->umem); err_db: if (!udata && qp_has_rq(init_attr)) @@ -1421,7 +1420,6 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_ib_db_unmap_user(mcontext, &qp->db); } - ib_umem_release(qp->umem); } else { kvfree(qp->sq.wrid); kvfree(qp->rq.wrid); @@ -1432,6 +1430,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); } + ib_umem_release(qp->umem); del_gid_entries(qp); } @@ -4248,7 +4247,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, return err; } -int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) +void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibwq->device); struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); @@ -4259,8 +4258,6 @@ int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata); kfree(qp); - - return 0; } struct ib_rwq_ind_table diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 4bf2946b9759..848db7264cc9 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -115,7 +115,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, return PTR_ERR(srq->umem); err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), - srq->umem->page_shift, &srq->mtt); + PAGE_SHIFT, &srq->mtt); if (err) goto err_buf; @@ -204,10 +204,9 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &srq->mtt); err_buf: - if (srq->umem) - ib_umem_release(srq->umem); - else + if (!srq->umem) mlx4_buf_free(dev->dev, buf_size, &srq->buf); + ib_umem_release(srq->umem); err_db: if (!udata) @@ -275,13 +274,13 @@ void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) struct mlx4_ib_ucontext, ibucontext), &msrq->db); - ib_umem_release(msrq->umem); } else { kvfree(msrq->wrid); mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, &msrq->buf); mlx4_db_free(dev->dev, &msrq->db); } + ib_umem_release(msrq->umem); } void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 4efbbd2fce0c..45f48cde6b9d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -884,15 +884,15 @@ static void notify_soft_wc_handler(struct work_struct *work) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; int vector = attr->comp_vector; struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_cq *cq = to_mcq(ibcq); u32 out[MLX5_ST_SZ_DW(create_cq_out)]; - struct mlx5_ib_cq *cq; int uninitialized_var(index); int uninitialized_var(inlen); u32 *cqb = NULL; @@ -904,18 +904,14 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, if (entries < 0 || (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) - return ERR_PTR(-EINVAL); + return -EINVAL; if (check_cq_create_flags(attr->flags)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; entries = roundup_pow_of_two(entries + 1); if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) - return ERR_PTR(-EINVAL); - - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + return -EINVAL; cq->ibcq.cqe = entries - 1; mutex_init(&cq->resize_mutex); @@ -930,13 +926,13 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, &index, &inlen); if (err) - goto err_create; + return err; } else { cqe_size = cache_line_size() == 128 ? 128 : 64; err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, &inlen); if (err) - goto err_create; + return err; INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } @@ -981,7 +977,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, kvfree(cqb); - return &cq->ibcq; + return 0; err_cmd: mlx5_core_destroy_cq(dev->mdev, &cq->mcq); @@ -992,14 +988,10 @@ err_cqb: destroy_cq_user(cq, udata); else destroy_cq_kernel(dev, cq); - -err_create: - kfree(cq); - - return ERR_PTR(err); + return err; } -int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); @@ -1009,10 +1001,6 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); - - kfree(mcq); - - return 0; } static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) @@ -1138,11 +1126,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return 0; } -static void un_resize_user(struct mlx5_ib_cq *cq) -{ - ib_umem_release(cq->resize_umem); -} - static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, int cqe_size) { @@ -1165,12 +1148,6 @@ ex: return err; } -static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) -{ - free_cq_buf(dev, cq->resize_buf); - cq->resize_buf = NULL; -} - static int copy_resize_cqes(struct mlx5_ib_cq *cq) { struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); @@ -1351,10 +1328,11 @@ ex_alloc: kvfree(in); ex_resize: - if (udata) - un_resize_user(cq); - else - un_resize_kernel(dev, cq); + ib_umem_release(cq->resize_umem); + if (!udata) { + free_cq_buf(dev, cq->resize_buf); + cq->resize_buf = NULL; + } ex: mutex_unlock(&cq->resize_mutex); return err; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 931f587dfb8f..ec4370f99381 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -14,13 +14,17 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" +#include <linux/xarray.h> #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> +static void dispatch_event_fd(struct list_head *fd_list, const void *data); + enum devx_obj_flags { DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0, DEVX_OBJ_FLAGS_DCT = 1 << 1, + DEVX_OBJ_FLAGS_CQ = 1 << 2, }; struct devx_async_data { @@ -33,9 +37,61 @@ struct devx_async_data { struct mlx5_ib_uapi_devx_async_cmd_hdr hdr; }; +struct devx_async_event_data { + struct list_head list; /* headed in ev_file->event_list */ + struct mlx5_ib_uapi_devx_async_event_hdr hdr; +}; + +/* first level XA value data structure */ +struct devx_event { + struct xarray object_ids; /* second XA level, Key = object id */ + struct list_head unaffiliated_list; +}; + +/* second level XA value data structure */ +struct devx_obj_event { + struct rcu_head rcu; + struct list_head obj_sub_list; +}; + +struct devx_event_subscription { + struct list_head file_list; /* headed in ev_file-> + * subscribed_events_list + */ + struct list_head xa_list; /* headed in devx_event->unaffiliated_list or + * devx_obj_event->obj_sub_list + */ + struct list_head obj_list; /* headed in devx_object */ + struct list_head event_list; /* headed in ev_file->event_list or in + * temp list via subscription + */ + + u8 is_cleaned:1; + u32 xa_key_level1; + u32 xa_key_level2; + struct rcu_head rcu; + u64 cookie; + struct devx_async_event_file *ev_file; + struct file *filp; /* Upon hot unplug we need a direct access to */ + struct eventfd_ctx *eventfd; +}; + +struct devx_async_event_file { + struct ib_uobject uobj; + /* Head of events that are subscribed to this FD */ + struct list_head subscribed_events_list; + spinlock_t lock; + wait_queue_head_t poll_wait; + struct list_head event_list; + struct mlx5_ib_dev *dev; + u8 omit_data:1; + u8 is_overflow_err:1; + u8 is_destroyed:1; +}; + #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) struct devx_obj { - struct mlx5_core_dev *mdev; + struct mlx5_ib_dev *ib_dev; u64 obj_id; u32 dinlen; /* destroy inbox length */ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; @@ -43,7 +99,9 @@ struct devx_obj { union { struct mlx5_ib_devx_mr devx_mr; struct mlx5_core_dct core_dct; + struct mlx5_core_cq core_cq; }; + struct list_head event_sub; /* holds devx_event_subscription entries */ }; struct devx_umem { @@ -149,6 +207,127 @@ bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) return false; } +static bool is_legacy_unaffiliated_event_num(u16 event_num) +{ + switch (event_num) { + case MLX5_EVENT_TYPE_PORT_CHANGE: + return true; + default: + return false; + } +} + +static bool is_legacy_obj_event_num(u16 event_num) +{ + switch (event_num) { + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_CQ_ERROR: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_DCT_DRAINED: + case MLX5_EVENT_TYPE_COMP: + return true; + default: + return false; + } +} + +static u16 get_legacy_obj_type(u16 opcode) +{ + switch (opcode) { + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_EVENT_QUEUE_TYPE_RQ; + case MLX5_CMD_OP_CREATE_QP: + return MLX5_EVENT_QUEUE_TYPE_QP; + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_EVENT_QUEUE_TYPE_SQ; + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_EVENT_QUEUE_TYPE_DCT; + default: + return 0; + } +} + +static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num) +{ + u16 opcode; + + opcode = (obj->obj_id >> 32) & 0xffff; + + if (is_legacy_obj_event_num(event_num)) + return get_legacy_obj_type(opcode); + + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + return (obj->obj_id >> 48); + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_OBJ_TYPE_RQ; + case MLX5_CMD_OP_CREATE_QP: + return MLX5_OBJ_TYPE_QP; + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_OBJ_TYPE_SQ; + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_OBJ_TYPE_DCT; + case MLX5_CMD_OP_CREATE_TIR: + return MLX5_OBJ_TYPE_TIR; + case MLX5_CMD_OP_CREATE_TIS: + return MLX5_OBJ_TYPE_TIS; + case MLX5_CMD_OP_CREATE_PSV: + return MLX5_OBJ_TYPE_PSV; + case MLX5_OBJ_TYPE_MKEY: + return MLX5_OBJ_TYPE_MKEY; + case MLX5_CMD_OP_CREATE_RMP: + return MLX5_OBJ_TYPE_RMP; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return MLX5_OBJ_TYPE_XRC_SRQ; + case MLX5_CMD_OP_CREATE_XRQ: + return MLX5_OBJ_TYPE_XRQ; + case MLX5_CMD_OP_CREATE_RQT: + return MLX5_OBJ_TYPE_RQT; + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + return MLX5_OBJ_TYPE_FLOW_COUNTER; + case MLX5_CMD_OP_CREATE_CQ: + return MLX5_OBJ_TYPE_CQ; + default: + return 0; + } +} + +static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe) +{ + switch (event_type) { + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return eqe->data.qp_srq.type; + case MLX5_EVENT_TYPE_CQ_ERROR: + return 0; + case MLX5_EVENT_TYPE_DCT_DRAINED: + return MLX5_EVENT_QUEUE_TYPE_DCT; + default: + return MLX5_GET(affiliated_event_header, &eqe->data, obj_type); + } +} + +static u32 get_dec_obj_id(u64 obj_id) +{ + return (obj_id & 0xffffffff); +} + /* * As the obj_id in the firmware is not globally unique the object type * must be considered upon checking for a valid object id. @@ -715,12 +894,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in) return c->devx_uid; } -static bool devx_is_general_cmd(void *in) + +static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); - if (opcode >= MLX5_CMD_OP_GENERAL_START && - opcode < MLX5_CMD_OP_GENERAL_END) + /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */ + if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) && + MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) || + (opcode >= MLX5_CMD_OP_GENERAL_START && + opcode < MLX5_CMD_OP_GENERAL_END)) return true; switch (opcode) { @@ -846,7 +1029,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( return uid; /* Only white list of some general HCA commands are allowed for this method. */ - if (!devx_is_general_cmd(cmd_in)) + if (!devx_is_general_cmd(cmd_in, dev)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1111,33 +1294,72 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu) */ static void devx_cleanup_mkey(struct devx_obj *obj) { - xa_erase(&obj->mdev->priv.mkey_table, + xa_erase(&obj->ib_dev->mdev->priv.mkey_table, mlx5_base_mkey(obj->devx_mr.mmkey.key)); } +static void devx_cleanup_subscription(struct mlx5_ib_dev *dev, + struct devx_event_subscription *sub) +{ + struct devx_event *event; + struct devx_obj_event *xa_val_level2; + + if (sub->is_cleaned) + return; + + sub->is_cleaned = 1; + list_del_rcu(&sub->xa_list); + + if (list_empty(&sub->obj_list)) + return; + + list_del_rcu(&sub->obj_list); + /* check whether key level 1 for this obj_sub_list is empty */ + event = xa_load(&dev->devx_event_table.event_xa, + sub->xa_key_level1); + WARN_ON(!event); + + xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2); + if (list_empty(&xa_val_level2->obj_sub_list)) { + xa_erase(&event->object_ids, + sub->xa_key_level2); + kfree_rcu(xa_val_level2, rcu); + } +} + static int devx_obj_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) { u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct mlx5_devx_event_table *devx_event_table; struct devx_obj *obj = uobject->object; + struct devx_event_subscription *sub_entry, *tmp; + struct mlx5_ib_dev *dev; int ret; + dev = mlx5_udata_to_mdev(&attrs->driver_udata); if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) devx_cleanup_mkey(obj); if (obj->flags & DEVX_OBJ_FLAGS_DCT) - ret = mlx5_core_destroy_dct(obj->mdev, &obj->core_dct); + ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); + else if (obj->flags & DEVX_OBJ_FLAGS_CQ) + ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq); else - ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, - sizeof(out)); + ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, + obj->dinlen, out, sizeof(out)); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - struct mlx5_ib_dev *dev = - mlx5_udata_to_mdev(&attrs->driver_udata); + devx_event_table = &dev->devx_event_table; + + mutex_lock(&devx_event_table->event_xa_lock); + list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list) + devx_cleanup_subscription(dev, sub_entry); + mutex_unlock(&devx_event_table->event_xa_lock); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, devx_free_indirect_mkey); return ret; @@ -1147,6 +1369,29 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, return ret; } +static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) +{ + struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq); + struct mlx5_devx_event_table *table; + struct devx_event *event; + struct devx_obj_event *obj_event; + u32 obj_id = mcq->cqn; + + table = &obj->ib_dev->devx_event_table; + rcu_read_lock(); + event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP); + if (!event) + goto out; + + obj_event = xa_load(&event->object_ids, obj_id); + if (!obj_event) + goto out; + + dispatch_event_fd(&obj_event->obj_sub_list, eqe); +out: + rcu_read_unlock(); +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -1169,6 +1414,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( u32 obj_id; u16 opcode; + if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) + return -EINVAL; + uid = devx_get_uid(c, cmd_in); if (uid < 0) return uid; @@ -1198,6 +1446,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( err = mlx5_core_create_dct(dev->mdev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); + } else if (opcode == MLX5_CMD_OP_CREATE_CQ) { + obj->flags |= DEVX_OBJ_FLAGS_CQ; + obj->core_cq.comp = devx_cq_comp; + err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, + cmd_in, cmd_in_len, cmd_out, + cmd_out_len); } else { err = mlx5_cmd_exec(dev->mdev, cmd_in, cmd_in_len, @@ -1208,7 +1462,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( goto obj_free; uobj->object = obj; - obj->mdev = dev->mdev; + INIT_LIST_HEAD(&obj->event_sub); + obj->ib_dev = dev; devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); @@ -1235,9 +1490,11 @@ err_copy: devx_cleanup_mkey(obj); obj_destroy: if (obj->flags & DEVX_OBJ_FLAGS_DCT) - mlx5_core_destroy_dct(obj->mdev, &obj->core_dct); + mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); + else if (obj->flags & DEVX_OBJ_FLAGS_CQ) + mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq); else - mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, + mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); obj_free: kfree(obj); @@ -1259,6 +1516,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( int err; int uid; + if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) + return -EINVAL; + uid = devx_get_uid(c, cmd_in); if (uid < 0) return uid; @@ -1301,6 +1561,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( int uid; struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); + if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) + return -EINVAL; + uid = devx_get_uid(c, cmd_in); if (uid < 0) return uid; @@ -1365,6 +1628,38 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)( return 0; } +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE); + struct devx_async_event_file *ev_file; + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + u32 flags; + int err; + + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS, + MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA); + + if (err) + return err; + + ev_file = container_of(uobj, struct devx_async_event_file, + uobj); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA) + ev_file->omit_data = 1; + INIT_LIST_HEAD(&ev_file->subscribed_events_list); + ev_file->dev = dev; + get_device(&dev->ib_dev.dev); + return 0; +} + static void devx_query_callback(int status, struct mlx5_async_work *context) { struct devx_async_data *async_data = @@ -1406,6 +1701,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( struct devx_async_cmd_event_file *ev_file; struct devx_async_data *async_data; + if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) + return -EINVAL; + uid = devx_get_uid(c, cmd_in); if (uid < 0) return uid; @@ -1474,6 +1772,331 @@ sub_bytes: return err; } +static void +subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table, + u32 key_level1, + bool is_level2, + u32 key_level2) +{ + struct devx_event *event; + struct devx_obj_event *xa_val_level2; + + /* Level 1 is valid for future use, no need to free */ + if (!is_level2) + return; + + event = xa_load(&devx_event_table->event_xa, key_level1); + WARN_ON(!event); + + xa_val_level2 = xa_load(&event->object_ids, + key_level2); + if (list_empty(&xa_val_level2->obj_sub_list)) { + xa_erase(&event->object_ids, + key_level2); + kfree_rcu(xa_val_level2, rcu); + } +} + +static int +subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, + u32 key_level1, + bool is_level2, + u32 key_level2) +{ + struct devx_obj_event *obj_event; + struct devx_event *event; + int err; + + event = xa_load(&devx_event_table->event_xa, key_level1); + if (!event) { + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + INIT_LIST_HEAD(&event->unaffiliated_list); + xa_init(&event->object_ids); + + err = xa_insert(&devx_event_table->event_xa, + key_level1, + event, + GFP_KERNEL); + if (err) { + kfree(event); + return err; + } + } + + if (!is_level2) + return 0; + + obj_event = xa_load(&event->object_ids, key_level2); + if (!obj_event) { + obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL); + if (!obj_event) + /* Level1 is valid for future use, no need to free */ + return -ENOMEM; + + err = xa_insert(&event->object_ids, + key_level2, + obj_event, + GFP_KERNEL); + if (err) + return err; + INIT_LIST_HEAD(&obj_event->obj_sub_list); + } + + return 0; +} + +static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list, + struct devx_obj *obj) +{ + int i; + + for (i = 0; i < num_events; i++) { + if (obj) { + if (!is_legacy_obj_event_num(event_type_num_list[i])) + return false; + } else if (!is_legacy_unaffiliated_event_num( + event_type_num_list[i])) { + return false; + } + } + + return true; +} + +#define MAX_SUPP_EVENT_NUM 255 +static bool is_valid_events(struct mlx5_core_dev *dev, + int num_events, u16 *event_type_num_list, + struct devx_obj *obj) +{ + __be64 *aff_events; + __be64 *unaff_events; + int mask_entry; + int mask_bit; + int i; + + if (MLX5_CAP_GEN(dev, event_cap)) { + aff_events = MLX5_CAP_DEV_EVENT(dev, + user_affiliated_events); + unaff_events = MLX5_CAP_DEV_EVENT(dev, + user_unaffiliated_events); + } else { + return is_valid_events_legacy(num_events, event_type_num_list, + obj); + } + + for (i = 0; i < num_events; i++) { + if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM) + return false; + + mask_entry = event_type_num_list[i] / 64; + mask_bit = event_type_num_list[i] % 64; + + if (obj) { + /* CQ completion */ + if (event_type_num_list[i] == 0) + continue; + + if (!(be64_to_cpu(aff_events[mask_entry]) & + (1ull << mask_bit))) + return false; + + continue; + } + + if (!(be64_to_cpu(unaff_events[mask_entry]) & + (1ull << mask_bit))) + return false; + } + + return true; +} + +#define MAX_NUM_EVENTS 16 +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *devx_uobj = uverbs_attr_get_uobject( + attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE); + struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + struct ib_uobject *fd_uobj; + struct devx_obj *obj = NULL; + struct devx_async_event_file *ev_file; + struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table; + u16 *event_type_num_list; + struct devx_event_subscription *event_sub, *tmp_sub; + struct list_head sub_list; + int redirect_fd; + bool use_eventfd = false; + int num_events; + int num_alloc_xa_entries = 0; + u16 obj_type = 0; + u64 cookie = 0; + u32 obj_id = 0; + int err; + int i; + + if (!c->devx_uid) + return -EINVAL; + + if (!IS_ERR(devx_uobj)) { + obj = (struct devx_obj *)devx_uobj->object; + if (obj) + obj_id = get_dec_obj_id(obj->obj_id); + } + + fd_uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE); + if (IS_ERR(fd_uobj)) + return PTR_ERR(fd_uobj); + + ev_file = container_of(fd_uobj, struct devx_async_event_file, + uobj); + + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) { + err = uverbs_copy_from(&redirect_fd, attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM); + if (err) + return err; + + use_eventfd = true; + } + + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) { + if (use_eventfd) + return -EINVAL; + + err = uverbs_copy_from(&cookie, attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE); + if (err) + return err; + } + + num_events = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST, + sizeof(u16)); + + if (num_events < 0) + return num_events; + + if (num_events > MAX_NUM_EVENTS) + return -EINVAL; + + event_type_num_list = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST); + + if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj)) + return -EINVAL; + + INIT_LIST_HEAD(&sub_list); + + /* Protect from concurrent subscriptions to same XA entries to allow + * both to succeed + */ + mutex_lock(&devx_event_table->event_xa_lock); + for (i = 0; i < num_events; i++) { + u32 key_level1; + + if (obj) + obj_type = get_dec_obj_type(obj, + event_type_num_list[i]); + key_level1 = event_type_num_list[i] | obj_type << 16; + + err = subscribe_event_xa_alloc(devx_event_table, + key_level1, + obj, + obj_id); + if (err) + goto err; + + num_alloc_xa_entries++; + event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL); + if (!event_sub) + goto err; + + list_add_tail(&event_sub->event_list, &sub_list); + if (use_eventfd) { + event_sub->eventfd = + eventfd_ctx_fdget(redirect_fd); + + if (IS_ERR(event_sub)) { + err = PTR_ERR(event_sub->eventfd); + event_sub->eventfd = NULL; + goto err; + } + } + + event_sub->cookie = cookie; + event_sub->ev_file = ev_file; + event_sub->filp = fd_uobj->object; + /* May be needed upon cleanup the devx object/subscription */ + event_sub->xa_key_level1 = key_level1; + event_sub->xa_key_level2 = obj_id; + INIT_LIST_HEAD(&event_sub->obj_list); + } + + /* Once all the allocations and the XA data insertions were done we + * can go ahead and add all the subscriptions to the relevant lists + * without concern of a failure. + */ + list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) { + struct devx_event *event; + struct devx_obj_event *obj_event; + + list_del_init(&event_sub->event_list); + + spin_lock_irq(&ev_file->lock); + list_add_tail_rcu(&event_sub->file_list, + &ev_file->subscribed_events_list); + spin_unlock_irq(&ev_file->lock); + + event = xa_load(&devx_event_table->event_xa, + event_sub->xa_key_level1); + WARN_ON(!event); + + if (!obj) { + list_add_tail_rcu(&event_sub->xa_list, + &event->unaffiliated_list); + continue; + } + + obj_event = xa_load(&event->object_ids, obj_id); + WARN_ON(!obj_event); + list_add_tail_rcu(&event_sub->xa_list, + &obj_event->obj_sub_list); + list_add_tail_rcu(&event_sub->obj_list, + &obj->event_sub); + } + + mutex_unlock(&devx_event_table->event_xa_lock); + return 0; + +err: + list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) { + list_del(&event_sub->event_list); + + subscribe_event_xa_dealloc(devx_event_table, + event_sub->xa_key_level1, + obj, + obj_id); + + if (event_sub->eventfd) + eventfd_ctx_put(event_sub->eventfd); + + kfree(event_sub); + } + + mutex_unlock(&devx_event_table->event_xa_lock); + return err; +} + static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, struct uverbs_attr_bundle *attrs, struct devx_umem *obj) @@ -1621,6 +2244,203 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, return 0; } +static bool is_unaffiliated_event(struct mlx5_core_dev *dev, + unsigned long event_type) +{ + __be64 *unaff_events; + int mask_entry; + int mask_bit; + + if (!MLX5_CAP_GEN(dev, event_cap)) + return is_legacy_unaffiliated_event_num(event_type); + + unaff_events = MLX5_CAP_DEV_EVENT(dev, + user_unaffiliated_events); + WARN_ON(event_type > MAX_SUPP_EVENT_NUM); + + mask_entry = event_type / 64; + mask_bit = event_type % 64; + + if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit))) + return false; + + return true; +} + +static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data) +{ + struct mlx5_eqe *eqe = data; + u32 obj_id = 0; + + switch (event_type) { + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + break; + case MLX5_EVENT_TYPE_DCT_DRAINED: + obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + break; + case MLX5_EVENT_TYPE_CQ_ERROR: + obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + break; + default: + obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id); + break; + } + + return obj_id; +} + +static int deliver_event(struct devx_event_subscription *event_sub, + const void *data) +{ + struct devx_async_event_file *ev_file; + struct devx_async_event_data *event_data; + unsigned long flags; + + ev_file = event_sub->ev_file; + + if (ev_file->omit_data) { + spin_lock_irqsave(&ev_file->lock, flags); + if (!list_empty(&event_sub->event_list)) { + spin_unlock_irqrestore(&ev_file->lock, flags); + return 0; + } + + list_add_tail(&event_sub->event_list, &ev_file->event_list); + spin_unlock_irqrestore(&ev_file->lock, flags); + wake_up_interruptible(&ev_file->poll_wait); + return 0; + } + + event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe), + GFP_ATOMIC); + if (!event_data) { + spin_lock_irqsave(&ev_file->lock, flags); + ev_file->is_overflow_err = 1; + spin_unlock_irqrestore(&ev_file->lock, flags); + return -ENOMEM; + } + + event_data->hdr.cookie = event_sub->cookie; + memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe)); + + spin_lock_irqsave(&ev_file->lock, flags); + list_add_tail(&event_data->list, &ev_file->event_list); + spin_unlock_irqrestore(&ev_file->lock, flags); + wake_up_interruptible(&ev_file->poll_wait); + + return 0; +} + +static void dispatch_event_fd(struct list_head *fd_list, + const void *data) +{ + struct devx_event_subscription *item; + + list_for_each_entry_rcu(item, fd_list, xa_list) { + if (!get_file_rcu(item->filp)) + continue; + + if (item->eventfd) { + eventfd_signal(item->eventfd, 1); + fput(item->filp); + continue; + } + + deliver_event(item, data); + fput(item->filp); + } +} + +static int devx_event_notifier(struct notifier_block *nb, + unsigned long event_type, void *data) +{ + struct mlx5_devx_event_table *table; + struct mlx5_ib_dev *dev; + struct devx_event *event; + struct devx_obj_event *obj_event; + u16 obj_type = 0; + bool is_unaffiliated; + u32 obj_id; + + /* Explicit filtering to kernel events which may occur frequently */ + if (event_type == MLX5_EVENT_TYPE_CMD || + event_type == MLX5_EVENT_TYPE_PAGE_REQUEST) + return NOTIFY_OK; + + table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb); + dev = container_of(table, struct mlx5_ib_dev, devx_event_table); + is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type); + + if (!is_unaffiliated) + obj_type = get_event_obj_type(event_type, data); + + rcu_read_lock(); + event = xa_load(&table->event_xa, event_type | (obj_type << 16)); + if (!event) { + rcu_read_unlock(); + return NOTIFY_DONE; + } + + if (is_unaffiliated) { + dispatch_event_fd(&event->unaffiliated_list, data); + rcu_read_unlock(); + return NOTIFY_OK; + } + + obj_id = devx_get_obj_id_from_event(event_type, data); + obj_event = xa_load(&event->object_ids, obj_id); + if (!obj_event) { + rcu_read_unlock(); + return NOTIFY_DONE; + } + + dispatch_event_fd(&obj_event->obj_sub_list, data); + + rcu_read_unlock(); + return NOTIFY_OK; +} + +void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_devx_event_table *table = &dev->devx_event_table; + + xa_init(&table->event_xa); + mutex_init(&table->event_xa_lock); + MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); + mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); +} + +void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_devx_event_table *table = &dev->devx_event_table; + struct devx_event_subscription *sub, *tmp; + struct devx_event *event; + void *entry; + unsigned long id; + + mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); + mutex_lock(&dev->devx_event_table.event_xa_lock); + xa_for_each(&table->event_xa, id, entry) { + event = entry; + list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list, + xa_list) + devx_cleanup_subscription(dev, sub); + kfree(entry); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + xa_destroy(&table->event_xa); +} + static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -1719,6 +2539,149 @@ static const struct file_operations devx_async_cmd_event_fops = { .llseek = no_llseek, }; +static ssize_t devx_async_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct devx_async_event_file *ev_file = filp->private_data; + struct devx_event_subscription *event_sub; + struct devx_async_event_data *uninitialized_var(event); + int ret = 0; + size_t eventsz; + bool omit_data; + void *event_data; + + omit_data = ev_file->omit_data; + + spin_lock_irq(&ev_file->lock); + + if (ev_file->is_overflow_err) { + ev_file->is_overflow_err = 0; + spin_unlock_irq(&ev_file->lock); + return -EOVERFLOW; + } + + if (ev_file->is_destroyed) { + spin_unlock_irq(&ev_file->lock); + return -EIO; + } + + while (list_empty(&ev_file->event_list)) { + spin_unlock_irq(&ev_file->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(ev_file->poll_wait, + (!list_empty(&ev_file->event_list) || + ev_file->is_destroyed))) { + return -ERESTARTSYS; + } + + spin_lock_irq(&ev_file->lock); + if (ev_file->is_destroyed) { + spin_unlock_irq(&ev_file->lock); + return -EIO; + } + } + + if (omit_data) { + event_sub = list_first_entry(&ev_file->event_list, + struct devx_event_subscription, + event_list); + eventsz = sizeof(event_sub->cookie); + event_data = &event_sub->cookie; + } else { + event = list_first_entry(&ev_file->event_list, + struct devx_async_event_data, list); + eventsz = sizeof(struct mlx5_eqe) + + sizeof(struct mlx5_ib_uapi_devx_async_event_hdr); + event_data = &event->hdr; + } + + if (eventsz > count) { + spin_unlock_irq(&ev_file->lock); + return -EINVAL; + } + + if (omit_data) + list_del_init(&event_sub->event_list); + else + list_del(&event->list); + + spin_unlock_irq(&ev_file->lock); + + if (copy_to_user(buf, event_data, eventsz)) + /* This points to an application issue, not a kernel concern */ + ret = -EFAULT; + else + ret = eventsz; + + if (!omit_data) + kfree(event); + return ret; +} + +static __poll_t devx_async_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct devx_async_event_file *ev_file = filp->private_data; + __poll_t pollflags = 0; + + poll_wait(filp, &ev_file->poll_wait, wait); + + spin_lock_irq(&ev_file->lock); + if (ev_file->is_destroyed) + pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + else if (!list_empty(&ev_file->event_list)) + pollflags = EPOLLIN | EPOLLRDNORM; + spin_unlock_irq(&ev_file->lock); + + return pollflags; +} + +static int devx_async_event_close(struct inode *inode, struct file *filp) +{ + struct devx_async_event_file *ev_file = filp->private_data; + struct devx_event_subscription *event_sub, *event_sub_tmp; + struct devx_async_event_data *entry, *tmp; + + mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock); + /* delete the subscriptions which are related to this FD */ + list_for_each_entry_safe(event_sub, event_sub_tmp, + &ev_file->subscribed_events_list, file_list) { + devx_cleanup_subscription(ev_file->dev, event_sub); + if (event_sub->eventfd) + eventfd_ctx_put(event_sub->eventfd); + + list_del_rcu(&event_sub->file_list); + /* subscription may not be used by the read API any more */ + kfree_rcu(event_sub, rcu); + } + + mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock); + + /* free the pending events allocation */ + if (!ev_file->omit_data) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(entry, tmp, + &ev_file->event_list, list) + kfree(entry); /* read can't come any more */ + spin_unlock_irq(&ev_file->lock); + } + + uverbs_close_fd(filp); + put_device(&ev_file->dev->ib_dev.dev); + return 0; +} + +static const struct file_operations devx_async_event_fops = { + .owner = THIS_MODULE, + .read = devx_async_event_read, + .poll = devx_async_event_poll, + .release = devx_async_event_close, + .llseek = no_llseek, +}; + static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj, enum rdma_remove_reason why) { @@ -1738,6 +2701,21 @@ static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj, return 0; }; +static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + struct devx_async_event_file *ev_file = + container_of(uobj, struct devx_async_event_file, + uobj); + + spin_lock_irq(&ev_file->lock); + ev_file->is_destroyed = 1; + spin_unlock_irq(&ev_file->lock); + + wake_up_interruptible(&ev_file->poll_wait); + return 0; +}; + DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_UMEM_REG, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, @@ -1869,10 +2847,32 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_TYPE(u64), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT, + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE, + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST, + UVERBS_ATTR_MIN_SIZE(sizeof(u16)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE, + UVERBS_ATTR_TYPE(u64), + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL)); + DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), - &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)); DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), @@ -1903,6 +2903,24 @@ DECLARE_UVERBS_NAMED_OBJECT( O_RDONLY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)); +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC, + UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE, + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS, + enum mlx5_ib_uapi_devx_create_event_channel_flags, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file), + devx_hot_unplug_async_event_file, + &devx_async_event_fops, "[devx_async_event]", + O_RDONLY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)); + static bool devx_is_supported(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); @@ -1923,5 +2941,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), {}, }; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 6c529e6f3a01..348c1df69cdc 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -200,19 +200,33 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, vl_15_dropped); } -static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num, +static int process_pma_cmd(struct mlx5_ib_dev *dev, u8 port_num, const struct ib_mad *in_mad, struct ib_mad *out_mad) { - int err; + struct mlx5_core_dev *mdev; + bool native_port = true; + u8 mdev_port_num; void *out_cnt; + int err; + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); + if (!mdev) { + /* Fail to get the native port, likely due to 2nd port is still + * unaffiliated. In such case default to 1st port and attached + * PF device. + */ + native_port = false; + mdev = dev->mdev; + mdev_port_num = 1; + } /* Declaring support of extended counters */ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { struct ib_class_port_info cpi = {}; cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; memcpy((out_mad->data + 40), &cpi, sizeof(cpi)); - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + goto done; } if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) { @@ -221,11 +235,13 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num, int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out); out_cnt = kvzalloc(sz, GFP_KERNEL); - if (!out_cnt) - return IB_MAD_RESULT_FAILURE; + if (!out_cnt) { + err = IB_MAD_RESULT_FAILURE; + goto done; + } err = mlx5_core_query_vport_counter(mdev, 0, 0, - port_num, out_cnt, sz); + mdev_port_num, out_cnt, sz); if (!err) pma_cnt_ext_assign(pma_cnt_ext, out_cnt); } else { @@ -234,20 +250,23 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num, int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); out_cnt = kvzalloc(sz, GFP_KERNEL); - if (!out_cnt) - return IB_MAD_RESULT_FAILURE; + if (!out_cnt) { + err = IB_MAD_RESULT_FAILURE; + goto done; + } - err = mlx5_core_query_ib_ppcnt(mdev, port_num, + err = mlx5_core_query_ib_ppcnt(mdev, mdev_port_num, out_cnt, sz); if (!err) pma_cnt_assign(pma_cnt, out_cnt); - } - + } kvfree(out_cnt); - if (err) - return IB_MAD_RESULT_FAILURE; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + err = err ? IB_MAD_RESULT_FAILURE : + IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +done: + if (native_port) + mlx5_ib_put_native_port_mdev(dev, port_num); + return err; } int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -259,8 +278,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - struct mlx5_core_dev *mdev; - u8 mdev_port_num; int ret; if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || @@ -269,19 +286,14 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, memset(out_mad->data, 0, sizeof(out_mad->data)); - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); - if (!mdev) - return IB_MAD_RESULT_FAILURE; - - if (MLX5_CAP_GEN(mdev, vport_counters) && + if (MLX5_CAP_GEN(dev->mdev, vport_counters) && in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { - ret = process_pma_cmd(mdev, mdev_port_num, in_mad, out_mad); + ret = process_pma_cmd(dev, port_num, in_mad, out_mad); } else { ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, in_mad, out_mad); } - mlx5_ib_put_native_port_mdev(dev, port_num); return ret; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ba312bf59c7a..c2a5780cb394 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -52,6 +52,7 @@ #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> +#include <linux/mlx5/eswitch.h> #include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> @@ -888,7 +889,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { - props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; + props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | IB_PROT_T10DIF_TYPE_2 | @@ -1008,6 +1009,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); + props->max_pi_fast_reg_page_list_len = + props->max_fast_reg_page_list_len / 2; get_atomic_caps_qp(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); @@ -1043,15 +1046,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (MLX5_CAP_GEN(mdev, tag_matching)) { - props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; props->tm_caps.max_num_tags = (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; - props->tm_caps.flags = IB_TM_CAP_RC; props->tm_caps.max_ops = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); props->tm_caps.max_sge = MLX5_TM_MAX_SGE; } + if (MLX5_CAP_GEN(mdev, tag_matching) && + MLX5_CAP_GEN(mdev, rndv_offload_rc)) { + props->tm_caps.flags = IB_TM_CAP_RNDV_RC; + props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; + } + if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) { props->cq_caps.max_cq_moderation_count = MLX5_MAX_CQ_COUNT; @@ -3257,11 +3264,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int max_table_size; int num_entries; int num_groups; + bool esw_encap; u32 flags = 0; int priority; max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { enum mlx5_flow_namespace_type fn_type; @@ -3274,10 +3284,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (ft_type == MLX5_IB_FT_RX) { fn_type = MLX5_FLOW_NAMESPACE_BYPASS; prio = &dev->flow_db->prios[priority]; - if (!dev->is_rep && + if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->is_rep && + if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; @@ -3287,7 +3297,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, log_max_ft_size)); fn_type = MLX5_FLOW_NAMESPACE_EGRESS; prio = &dev->flow_db->egress_prios[priority]; - if (!dev->is_rep && + if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } @@ -3923,6 +3933,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio = NULL; int max_table_size = 0; + bool esw_encap; u32 flags = 0; int priority; @@ -3931,22 +3942,30 @@ _get_flow_table(struct mlx5_ib_dev *dev, else priority = ib_prio_to_core_prio(fs_matcher->priority, false); + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) + reformat_l3_tunnel_to_l2) && + !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { max_table_size = BIT( MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { max_table_size = BIT( MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && + esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; priority = FDB_BYPASS_PATH; } @@ -4711,7 +4730,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) int err = -ENOMEM; struct ib_udata uhw = {.inlen = 0, .outlen = 0}; - pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); + pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) goto out; @@ -4725,7 +4744,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) goto out; } - memset(pprops, 0, sizeof(*pprops)); err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { mlx5_ib_warn(dev, "query_port %d failed %d\n", @@ -4926,18 +4944,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) if (ret) goto error0; - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL); - if (IS_ERR(devr->c0)) { - ret = PTR_ERR(devr->c0); + devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq); + if (!devr->c0) { + ret = -ENOMEM; goto error1; } - devr->c0->device = &dev->ib_dev; - devr->c0->uobject = NULL; - devr->c0->comp_handler = NULL; - devr->c0->event_handler = NULL; - devr->c0->cq_context = NULL; + + devr->c0->device = &dev->ib_dev; atomic_set(&devr->c0->usecnt, 0); + ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL); + if (ret) + goto err_create_cq; + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x0)) { ret = PTR_ERR(devr->x0); @@ -5029,6 +5048,8 @@ error3: mlx5_ib_dealloc_xrcd(devr->x0, NULL); error2: mlx5_ib_destroy_cq(devr->c0, NULL); +err_create_cq: + kfree(devr->c0); error1: mlx5_ib_dealloc_pd(devr->p0, NULL); error0: @@ -5047,6 +5068,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_xrcd(devr->x0, NULL); mlx5_ib_dealloc_xrcd(devr->x1, NULL); mlx5_ib_destroy_cq(devr->c0, NULL); + kfree(devr->c0); mlx5_ib_dealloc_pd(devr->p0, NULL); kfree(devr->p0); @@ -5459,7 +5481,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, struct mlx5_ib_port *port, - struct rdma_hw_stats *stats) + struct rdma_hw_stats *stats, + u16 set_id) { int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); void *out; @@ -5470,9 +5493,7 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, if (!out) return -ENOMEM; - ret = mlx5_core_query_q_counter(mdev, - port->cnts.set_id, 0, - out, outlen); + ret = mlx5_core_query_q_counter(mdev, set_id, 0, out, outlen); if (ret) goto free; @@ -5532,7 +5553,8 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, port->cnts.num_ext_ppcnt_counters; /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, port, stats); + ret = mlx5_ib_query_q_counters(dev->mdev, port, stats, + port->cnts.set_id); if (ret) return ret; @@ -5568,6 +5590,68 @@ done: return num_counters; } +static struct rdma_hw_stats * +mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + struct mlx5_ib_port *port = &dev->port[counter->port - 1]; + + /* Q counters are in the beginning of all counters */ + return rdma_alloc_hw_stats_struct(port->cnts.names, + port->cnts.num_q_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + struct mlx5_ib_port *port = &dev->port[counter->port - 1]; + + return mlx5_ib_query_q_counters(dev->mdev, port, + counter->stats, counter->id); +} + +static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + u16 cnt_set_id = 0; + int err; + + if (!counter->id) { + err = mlx5_cmd_alloc_q_counter(dev->mdev, + &cnt_set_id, + MLX5_SHARED_RESOURCE_UID); + if (err) + return err; + counter->id = cnt_set_id; + } + + err = mlx5_ib_qp_set_counter(qp, counter); + if (err) + goto fail_set_counter; + + return 0; + +fail_set_counter: + mlx5_core_dealloc_q_counter(dev->mdev, cnt_set_id); + counter->id = 0; + + return err; +} + +static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) +{ + return mlx5_ib_qp_set_counter(qp, NULL); +} + +static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + + return mlx5_core_dealloc_q_counter(dev->mdev, counter->id); +} + static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params) @@ -6079,7 +6163,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->ib_dev.phys_port_cnt = dev->num_ports; @@ -6159,8 +6242,13 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) } static const struct ib_device_ops mlx5_ib_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_MLX5, + .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION, + .add_gid = mlx5_ib_add_gid, .alloc_mr = mlx5_ib_alloc_mr, + .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity, .alloc_pd = mlx5_ib_alloc_pd, .alloc_ucontext = mlx5_ib_alloc_ucontext, .attach_mcast = mlx5_ib_mcg_attach, @@ -6190,6 +6278,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, .map_mr_sg = mlx5_ib_map_mr_sg, + .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi, .mmap = mlx5_ib_mmap, .modify_cq = mlx5_ib_modify_cq, .modify_device = mlx5_ib_modify_device, @@ -6214,6 +6303,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .resize_cq = mlx5_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), @@ -6256,7 +6346,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; dev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -6325,7 +6414,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); - dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) @@ -6340,6 +6428,8 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) mutex_init(&dev->lb.mutex); + dev->ib_dev.use_cq_dim = true; + return 0; } @@ -6487,6 +6577,11 @@ static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { .alloc_hw_stats = mlx5_ib_alloc_hw_stats, .get_hw_stats = mlx5_ib_get_hw_stats, + .counter_bind_qp = mlx5_ib_counter_bind_qp, + .counter_unbind_qp = mlx5_ib_counter_unbind_qp, + .counter_dealloc = mlx5_ib_counter_dealloc, + .counter_alloc_stats = mlx5_ib_counter_alloc_stats, + .counter_update_stats = mlx5_ib_counter_update_stats, }; static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) @@ -6607,15 +6702,19 @@ static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) int uid; uid = mlx5_ib_devx_create(dev, false); - if (uid > 0) + if (uid > 0) { dev->devx_whitelist_uid = uid; + mlx5_ib_devx_init_event_table(dev); + } return 0; } static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) { - if (dev->devx_whitelist_uid) + if (dev->devx_whitelist_uid) { + mlx5_ib_devx_cleanup_event_table(dev); mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); + } } void __mlx5_ib_remove(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 9f90be296ee0..fe1a76d8531c 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -55,9 +55,10 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int i = 0; struct scatterlist *sg; int entry; - unsigned long page_shift = umem->page_shift; if (umem->is_odp) { + unsigned int page_shift = to_ib_umem_odp(umem)->page_shift; + *ncont = ib_umem_page_count(umem); *count = *ncont << (page_shift - PAGE_SHIFT); *shift = page_shift; @@ -67,15 +68,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, return; } - addr = addr >> page_shift; + addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); if (max_page_shift) - m = min_t(unsigned long, max_page_shift - page_shift, m); + m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> page_shift; - pfn = sg_dma_address(sg) >> page_shift; + len = sg_dma_len(sg) >> PAGE_SHIFT; + pfn = sg_dma_address(sg) >> PAGE_SHIFT; if (base + p != pfn) { /* If either the offset or the new * base are unaligned update m @@ -107,7 +108,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *ncont = 0; } - *shift = page_shift + m; + *shift = PAGE_SHIFT + m; *count = i; } @@ -140,8 +141,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, __be64 *pas, int access_flags) { - unsigned long umem_page_shift = umem->page_shift; - int shift = page_shift - umem_page_shift; + int shift = page_shift - PAGE_SHIFT; int mask = (1 << shift) - 1; int i, k, idx; u64 cur = 0; @@ -165,7 +165,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, i = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> umem_page_shift; + len = sg_dma_len(sg) >> PAGE_SHIFT; base = sg_dma_address(sg); /* Skip elements below offset */ @@ -184,7 +184,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, for (; k < len; k++) { if (!(i & mask)) { - cur = base + (k << umem_page_shift); + cur = base + (k << PAGE_SHIFT); cur |= access_flags; idx = (i >> shift) - offset; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ee73dc122d28..c482f19958b3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -431,9 +431,6 @@ struct mlx5_ib_qp { int create_type; - /* Store signature errors */ - bool signature_en; - struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; @@ -442,6 +439,10 @@ struct mlx5_ib_qp { u32 flags_en; /* storage for qp sub type when core qp type is IB_QPT_DRIVER */ enum ib_qp_type qp_sub_type; + /* A flag to indicate if there's a new counter is configured + * but not take effective + */ + u32 counter_pending; }; struct mlx5_ib_cq_buf { @@ -587,6 +588,9 @@ struct mlx5_ib_mr { void *descs; dma_addr_t desc_map; int ndescs; + int data_length; + int meta_ndescs; + int meta_length; int max_descs; int desc_size; int access_mode; @@ -605,6 +609,13 @@ struct mlx5_ib_mr { int access_flags; /* Needed for rereg MR */ struct mlx5_ib_mr *parent; + /* Needed for IB_MR_TYPE_INTEGRITY */ + struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr *klm_mr; + struct mlx5_ib_mr *mtt_mr; + u64 data_iova; + u64 pi_iova; + atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; @@ -929,6 +940,13 @@ struct mlx5_ib_pf_eq { mempool_t *pool; }; +struct mlx5_devx_event_table { + struct mlx5_nb devx_nb; + /* serialize updating the event_xa */ + struct mutex event_xa_lock; + struct xarray event_xa; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -978,6 +996,7 @@ struct mlx5_ib_dev { u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; + struct mlx5_devx_event_table devx_event_table; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1115,10 +1134,9 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, int buflen, size_t *bc); int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, int buflen, size_t *bc); -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); -int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -1148,8 +1166,15 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_sg, + u32 max_num_meta_sg); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, @@ -1201,7 +1226,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, @@ -1311,6 +1336,8 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); +void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); +void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; @@ -1328,6 +1355,8 @@ static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { return -EOPNOTSUPP; } static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} +static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {} +static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {} static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) { @@ -1443,4 +1472,6 @@ void mlx5_ib_put_xlt_emergency_page(void); int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); + +int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 83b452d977d4..20ece6e0b2fc 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1507,10 +1507,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return 0; err: - if (mr->umem) { - ib_umem_release(mr->umem); - mr->umem = NULL; - } + ib_umem_release(mr->umem); + mr->umem = NULL; + clean_mr(dev, mr); return err; } @@ -1606,8 +1605,9 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ if (umem_odp->page_list) - mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem), - ib_umem_end(umem)); + mlx5_ib_invalidate_range(umem_odp, + ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); else mlx5_ib_free_implicit_mr(mr); /* @@ -1629,28 +1629,85 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) * remove the DMA mapping. */ mlx5_mr_cache_free(dev, mr); - if (umem) { - ib_umem_release(umem); + ib_umem_release(umem); + if (umem) atomic_sub(npages, &dev->mdev->priv.reg_pages); - } + if (!mr->allocated_from_cache) kfree(mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { - dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + + if (ibmr->type == IB_MR_TYPE_INTEGRITY) { + dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr); + dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); + } + + dereg_mr(to_mdev(ibmr->device), mmr); + return 0; } -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) +static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, + int access_mode, int page_shift) +{ + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, log_page_size, page_shift); +} + +static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, int desc_size, int page_shift, + int access_mode, u32 *in, int inlen) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + int err; + + mr->access_mode = access_mode; + mr->desc_size = desc_size; + mr->max_descs = ndescs; + + err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); + if (err) + return err; + + mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto err_free_descs; + + mr->mmkey.type = MLX5_MKEY_MR; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + + return 0; + +err_free_descs: + mlx5_free_priv_descs(mr); + return err; +} + +static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, + u32 max_num_sg, u32 max_num_meta_sg, + int desc_size, int access_mode) +{ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - int ndescs = ALIGN(max_num_sg, 4); + int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); + int page_shift = 0; struct mlx5_ib_mr *mr; - void *mkc; u32 *in; int err; @@ -1658,99 +1715,168 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, if (!mr) return ERR_PTR(-ENOMEM); + mr->ibmr.pd = pd; + mr->ibmr.device = pd->device; + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } + if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) + page_shift = PAGE_SHIFT; + + err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, + access_mode, in, inlen); + if (err) + goto err_free_in; + + mr->umem = NULL; + kfree(in); + + return mr; + +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, u32 *in, int inlen) +{ + return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), + PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, + inlen); +} + +static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, u32 *in, int inlen) +{ + return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), + 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); +} + +static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int max_num_sg, int max_num_meta_sg, + u32 *in, int inlen) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 psv_index[2]; + void *mkc; + int err; + + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) + return -ENOMEM; + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); + if (err) + goto err_free_sig; + + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; + mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, + sizeof(struct mlx5_klm), + MLX5_MKC_ACCESS_MODE_KLMS); + if (IS_ERR(mr->klm_mr)) { + err = PTR_ERR(mr->klm_mr); + goto err_destroy_psv; + } + mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, + sizeof(struct mlx5_mtt), + MLX5_MKC_ACCESS_MODE_MTT); + if (IS_ERR(mr->mtt_mr)) { + err = PTR_ERR(mr->mtt_mr); + goto err_free_klm_mr; + } + + /* Set bsf descriptors for mkey */ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, translations_octword_size, ndescs); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, bsf_en, 1); + MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); - if (mr_type == IB_MR_TYPE_MEM_REG) { - mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - err = mlx5_alloc_priv_descs(pd->device, mr, - ndescs, sizeof(struct mlx5_mtt)); - if (err) - goto err_free_in; + err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, + MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); + if (err) + goto err_free_mtt_mr; - mr->desc_size = sizeof(struct mlx5_mtt); - mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SG_GAPS) { - mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; + return 0; - err = mlx5_alloc_priv_descs(pd->device, mr, - ndescs, sizeof(struct mlx5_klm)); - if (err) - goto err_free_in; - mr->desc_size = sizeof(struct mlx5_klm); - mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SIGNATURE) { - u32 psv_index[2]; - - MLX5_SET(mkc, mkc, bsf_en, 1); - MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); - mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); - if (!mr->sig) { - err = -ENOMEM; - goto err_free_in; - } +err_free_mtt_mr: + dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); + mr->mtt_mr = NULL; +err_free_klm_mr: + dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); + mr->klm_mr = NULL; +err_destroy_psv: + if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); +err_free_sig: + kfree(mr->sig); - /* create mem & wire PSVs */ - err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, - 2, psv_index); - if (err) - goto err_free_sig; + return err; +} + +static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg, + u32 max_num_meta_sg) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + int ndescs = ALIGN(max_num_sg, 4); + struct mlx5_ib_mr *mr; + u32 *in; + int err; - mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; - mr->sig->psv_memory.psv_idx = psv_index[0]; - mr->sig->psv_wire.psv_idx = psv_index[1]; + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); - mr->sig->sig_status_checked = true; - mr->sig->sig_err_exists = false; - /* Next UMR, Arm SIGERR */ - ++mr->sig->sigerr_count; - } else { + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + mr->ibmr.device = pd->device; + mr->umem = NULL; + + switch (mr_type) { + case IB_MR_TYPE_MEM_REG: + err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); + break; + case IB_MR_TYPE_SG_GAPS: + err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); + break; + case IB_MR_TYPE_INTEGRITY: + err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, + max_num_meta_sg, in, inlen); + break; + default: mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); err = -EINVAL; - goto err_free_in; } - MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, umr_en, 1); - - mr->ibmr.device = pd->device; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) - goto err_destroy_psv; + goto err_free_in; - mr->mmkey.type = MLX5_MKEY_MR; - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; - mr->umem = NULL; kfree(in); return &mr->ibmr; -err_destroy_psv: - if (mr->sig) { - if (mlx5_core_destroy_psv(dev->mdev, - mr->sig->psv_memory.psv_idx)) - mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", - mr->sig->psv_memory.psv_idx); - if (mlx5_core_destroy_psv(dev->mdev, - mr->sig->psv_wire.psv_idx)) - mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", - mr->sig->psv_wire.psv_idx); - } - mlx5_free_priv_descs(mr); -err_free_sig: - kfree(mr->sig); err_free_in: kfree(in); err_free: @@ -1758,6 +1884,19 @@ err_free: return ERR_PTR(err); } +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) +{ + return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); +} + +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_sg, u32 max_num_meta_sg) +{ + return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, + max_num_meta_sg); +} + struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { @@ -1887,16 +2026,53 @@ done: } static int +mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + unsigned int sg_offset = 0; + int n = 0; + + mr->meta_length = 0; + if (data_sg_nents == 1) { + n++; + mr->ndescs = 1; + if (data_sg_offset) + sg_offset = *data_sg_offset; + mr->data_length = sg_dma_len(data_sg) - sg_offset; + mr->data_iova = sg_dma_address(data_sg) + sg_offset; + if (meta_sg_nents == 1) { + n++; + mr->meta_ndescs = 1; + if (meta_sg_offset) + sg_offset = *meta_sg_offset; + else + sg_offset = 0; + mr->meta_length = sg_dma_len(meta_sg) - sg_offset; + mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; + } + ibmr->length = mr->data_length + mr->meta_length; + } + + return n; +} + +static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, struct scatterlist *sgl, unsigned short sg_nents, - unsigned int *sg_offset_p) + unsigned int *sg_offset_p, + struct scatterlist *meta_sgl, + unsigned short meta_sg_nents, + unsigned int *meta_sg_offset_p) { struct scatterlist *sg = sgl; struct mlx5_klm *klms = mr->descs; unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; u32 lkey = mr->ibmr.pd->local_dma_lkey; - int i; + int i, j = 0; mr->ibmr.iova = sg_dma_address(sg) + sg_offset; mr->ibmr.length = 0; @@ -1911,12 +2087,36 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, sg_offset = 0; } - mr->ndescs = i; if (sg_offset_p) *sg_offset_p = sg_offset; - return i; + mr->ndescs = i; + mr->data_length = mr->ibmr.length; + + if (meta_sg_nents) { + sg = meta_sgl; + sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; + for_each_sg(meta_sgl, sg, meta_sg_nents, j) { + if (unlikely(i + j >= mr->max_descs)) + break; + klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + + sg_offset); + klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - + sg_offset); + klms[i + j].key = cpu_to_be32(lkey); + mr->ibmr.length += sg_dma_len(sg) - sg_offset; + + sg_offset = 0; + } + if (meta_sg_offset_p) + *meta_sg_offset_p = sg_offset; + + mr->meta_ndescs = j; + mr->meta_length = mr->ibmr.length - mr->data_length; + } + + return i + j; } static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) @@ -1933,6 +2133,181 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) return 0; } +static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + __be64 *descs; + + if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs)) + return -ENOMEM; + + descs = mr->descs; + descs[mr->ndescs + mr->meta_ndescs++] = + cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); + + return 0; +} + +static int +mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = mr->mtt_mr; + int n; + + pi_mr->ndescs = 0; + pi_mr->meta_ndescs = 0; + pi_mr->meta_length = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + pi_mr->ibmr.page_size = ibmr->page_size; + n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, + mlx5_set_page); + if (n != data_sg_nents) + return n; + + pi_mr->data_iova = pi_mr->ibmr.iova; + pi_mr->data_length = pi_mr->ibmr.length; + pi_mr->ibmr.length = pi_mr->data_length; + ibmr->length = pi_mr->data_length; + + if (meta_sg_nents) { + u64 page_mask = ~((u64)ibmr->page_size - 1); + u64 iova = pi_mr->data_iova; + + n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, + meta_sg_offset, mlx5_set_page_pi); + + pi_mr->meta_length = pi_mr->ibmr.length; + /* + * PI address for the HW is the offset of the metadata address + * relative to the first data page address. + * It equals to first data page address + size of data pages + + * metadata offset at the first metadata page + */ + pi_mr->pi_iova = (iova & page_mask) + + pi_mr->ndescs * ibmr->page_size + + (pi_mr->ibmr.iova & ~page_mask); + /* + * In order to use one MTT MR for data and metadata, we register + * also the gaps between the end of the data and the start of + * the metadata (the sig MR will verify that the HW will access + * to right addresses). This mapping is safe because we use + * internal mkey for the registration. + */ + pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; + pi_mr->ibmr.iova = iova; + ibmr->length += pi_mr->meta_length; + } + + ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + return n; +} + +static int +mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = mr->klm_mr; + int n; + + pi_mr->ndescs = 0; + pi_mr->meta_ndescs = 0; + pi_mr->meta_length = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, + meta_sg, meta_sg_nents, meta_sg_offset); + + ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + /* This is zero-based memory region */ + pi_mr->data_iova = 0; + pi_mr->ibmr.iova = 0; + pi_mr->pi_iova = pi_mr->data_length; + ibmr->length = pi_mr->ibmr.length; + + return n; +} + +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = NULL; + int n; + + WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); + + mr->ndescs = 0; + mr->data_length = 0; + mr->data_iova = 0; + mr->meta_ndescs = 0; + mr->pi_iova = 0; + /* + * As a performance optimization, if possible, there is no need to + * perform UMR operation to register the data/metadata buffers. + * First try to map the sg lists to PA descriptors with local_dma_lkey. + * Fallback to UMR only in case of a failure. + */ + n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (n == data_sg_nents + meta_sg_nents) + goto out; + /* + * As a performance optimization, if possible, there is no need to map + * the sg lists to KLM descriptors. First try to map the sg lists to MTT + * descriptors and fallback to KLM only in case of a failure. + * It's more efficient for the HW to work with MTT descriptors + * (especially in high load). + * Use KLM (indirect access) only if it's mandatory. + */ + pi_mr = mr->mtt_mr; + n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (n == data_sg_nents + meta_sg_nents) + goto out; + + pi_mr = mr->klm_mr; + n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (unlikely(n != data_sg_nents + meta_sg_nents)) + return -ENOMEM; + +out: + /* This is zero-based memory region */ + ibmr->iova = 0; + mr->pi_mr = pi_mr; + if (pi_mr) + ibmr->sig_attrs->meta_length = pi_mr->meta_length; + else + ibmr->sig_attrs->meta_length = mr->meta_length; + + return 0; +} + int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { @@ -1946,7 +2321,8 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, DMA_TO_DEVICE); if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) - n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, + NULL); else n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx5_set_page); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 831c450b271a..5b642d81e617 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -150,7 +150,7 @@ static struct ib_umem_odp *odp_lookup(u64 start, u64 length, if (!rb) goto not_found; odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(&odp->umem) > start + length) + if (ib_umem_start(odp) > start + length) goto not_found; } not_found: @@ -200,7 +200,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, static void mr_leaf_free_action(struct work_struct *work) { struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT; + int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; mr->parent = NULL; @@ -224,7 +224,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; - struct ib_umem *umem; int in_block = 0; u64 addr; @@ -232,15 +231,14 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, pr_err("invalidation called on NULL umem or non-ODP umem\n"); return; } - umem = &umem_odp->umem; mr = umem_odp->private; if (!mr || !mr->ibmr.pd) return; - start = max_t(u64, ib_umem_start(umem), start); - end = min_t(u64, ib_umem_end(umem), end); + start = max_t(u64, ib_umem_start(umem_odp), start); + end = min_t(u64, ib_umem_end(umem_odp), end); /* * Iteration one - zap the HW's MTTs. The notifiers_count ensures that @@ -249,8 +247,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, * but they will write 0s as well, so no difference in the end result. */ - for (addr = start; addr < end; addr += BIT(umem->page_shift)) { - idx = (addr - ib_umem_start(umem)) >> umem->page_shift; + for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { + idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; /* * Strive to write the MTTs in chunks, but avoid overwriting * non-existing MTTs. The huristic here can be improved to @@ -544,13 +542,12 @@ static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end, void *cookie) { struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; - struct ib_umem *umem = &umem_odp->umem; if (mr->parent != imr) return 0; - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), - ib_umem_end(umem)); + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); if (umem_odp->dying) return 0; @@ -602,9 +599,9 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, } next_mr: - size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt); + size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt); - page_shift = mr->umem->page_shift; + page_shift = odp->page_shift; page_mask = ~(BIT(page_shift) - 1); start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 768c7e81f688..2a97619ed603 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -34,6 +34,7 @@ #include <rdma/ib_umem.h> #include <rdma/ib_cache.h> #include <rdma/ib_user_verbs.h> +#include <rdma/rdma_counter.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" #include "ib_rep.h" @@ -442,9 +443,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN && ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) - return MLX5_SIG_WQE_SIZE; + return MLX5_SIG_WQE_SIZE; else return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } @@ -496,9 +497,6 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) - qp->signature_en = true; - wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { @@ -790,8 +788,7 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, atomic_dec(&dev->delay_drop.rqs_cnt); mlx5_ib_db_unmap_user(context, &rwq->db); - if (rwq->umem) - ib_umem_release(rwq->umem); + ib_umem_release(rwq->umem); } static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, @@ -977,8 +974,7 @@ err_free: kvfree(*in); err_umem: - if (ubuffer->umem) - ib_umem_release(ubuffer->umem); + ib_umem_release(ubuffer->umem); err_bfreg: if (bfregn != MLX5_IB_INVALID_BFREG) @@ -997,8 +993,7 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, ibucontext); mlx5_ib_db_unmap_user(context, &qp->db); - if (base->ubuffer.umem) - ib_umem_release(base->ubuffer.umem); + ib_umem_release(base->ubuffer.umem); /* * Free only the BFREGs which are handled by the kernel. @@ -1042,7 +1037,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, void *qpc; int err; - if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | + if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | @@ -3386,6 +3381,35 @@ static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, return tx_port_affinity; } +static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, + struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_qp_context context = {}; + struct mlx5_ib_port *mibport = NULL; + struct mlx5_ib_qp_base *base; + u32 set_id; + + if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) + return 0; + + if (counter) { + set_id = counter->id; + } else { + mibport = &dev->port[mqp->port - 1]; + set_id = mibport->cnts.set_id; + } + + base = &mqp->trans_qp.base; + context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff); + context.qp_counter_set_usr_page |= cpu_to_be32(set_id << 24); + return mlx5_core_qp_modify(dev->mdev, + MLX5_CMD_OP_RTS2RTS_QP, + MLX5_QP_OPTPAR_COUNTER_SET_ID, + &context, &base->mqp); +} + static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, @@ -3439,6 +3463,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; + u32 set_id = 0; int mlx5_st; int err; u16 op; @@ -3601,8 +3626,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, port_num = 0; mibport = &dev->port[port_num]; + if (ibqp->counter) + set_id = ibqp->counter->id; + else + set_id = mibport->cnts.set_id; context->qp_counter_set_usr_page |= - cpu_to_be32((u32)(mibport->cnts.set_id) << 24); + cpu_to_be32(set_id << 24); } if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) @@ -3630,7 +3659,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.operation = op; if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id; + raw_qp_param.rq_q_ctr_id = set_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } @@ -3707,6 +3736,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->db.db[MLX5_SND_DBR] = 0; } + if ((new_state == IB_QPS_RTS) && qp->counter_pending) { + err = __mlx5_ib_qp_set_counter(ibqp, ibqp->counter); + if (!err) + qp->counter_pending = 0; + } + out: kfree(context); return err; @@ -4170,15 +4205,13 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, bool umr_inline) + struct mlx5_ib_mr *mr, u8 flags) { - int size = mr->ndescs * mr->desc_size; + int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; memset(umr, 0, sizeof(*umr)); - umr->flags = MLX5_UMR_CHECK_NOT_FREE; - if (umr_inline) - umr->flags |= MLX5_UMR_INLINE; + umr->flags = flags; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); umr->mkey_mask = frwr_mkey_mask(); } @@ -4305,7 +4338,7 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, struct mlx5_ib_mr *mr, u32 key, int access) { - int ndescs = ALIGN(mr->ndescs, 8) >> 1; + int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); @@ -4356,7 +4389,7 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_mr *mr, struct mlx5_ib_pd *pd) { - int bcount = mr->desc_size * mr->ndescs; + int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); dseg->addr = cpu_to_be64(mr->desc_map); dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); @@ -4549,23 +4582,37 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, - struct mlx5_ib_qp *qp, void **seg, - int *size, void **cur_edge) +static int set_sig_data_segment(const struct ib_send_wr *send_wr, + struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { - struct ib_sig_attrs *sig_attrs = wr->sig_attrs; - struct ib_mr *sig_mr = wr->sig_mr; struct mlx5_bsf *bsf; - u32 data_len = wr->wr.sg_list->length; - u32 data_key = wr->wr.sg_list->lkey; - u64 data_va = wr->wr.sg_list->addr; + u32 data_len; + u32 data_key; + u64 data_va; + u32 prot_len = 0; + u32 prot_key = 0; + u64 prot_va = 0; + bool prot = false; int ret; int wqe_size; + struct mlx5_ib_mr *mr = to_mmr(sig_mr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; + + data_len = pi_mr->data_length; + data_key = pi_mr->ibmr.lkey; + data_va = pi_mr->data_iova; + if (pi_mr->meta_ndescs) { + prot_len = pi_mr->meta_length; + prot_key = pi_mr->ibmr.lkey; + prot_va = pi_mr->pi_iova; + prot = true; + } - if (!wr->prot || - (data_key == wr->prot->lkey && - data_va == wr->prot->addr && - data_len == wr->prot->length)) { + if (!prot || (data_key == prot_key && data_va == prot_va && + data_len == prot_len)) { /** * Source domain doesn't contain signature information * or data and protection are interleaved in memory. @@ -4599,8 +4646,6 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, struct mlx5_stride_block_ctrl_seg *sblock_ctrl; struct mlx5_stride_block_entry *data_sentry; struct mlx5_stride_block_entry *prot_sentry; - u32 prot_key = wr->prot->lkey; - u64 prot_va = wr->prot->addr; u16 block_size = sig_attrs->mem.sig.dif.pi_interval; int prot_size; @@ -4650,17 +4695,15 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - const struct ib_sig_handover_wr *wr, u32 size, - u32 length, u32 pdn) + struct ib_mr *sig_mr, int access_flags, + u32 size, u32 length, u32 pdn) { - struct ib_mr *sig_mr = wr->sig_mr; u32 sig_key = sig_mr->rkey; u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(wr->access_flags) | - MLX5_MKC_ACCESS_MODE_KLMS; + seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); @@ -4680,49 +4723,50 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = sig_mkey_mask(); } - -static int set_sig_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) +static int set_pi_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { - const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); - struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); + const struct ib_reg_wr *wr = reg_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); + struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; + struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; u32 pdn = get_pd(qp)->pdn; u32 xlt_size; int region_len, ret; - if (unlikely(wr->wr.num_sge != 1) || - unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + if (unlikely(send_wr->num_sge != 0) || + unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ - region_len = wr->wr.sg_list->length; - if (wr->prot && - (wr->prot->lkey != wr->wr.sg_list->lkey || - wr->prot->addr != wr->wr.sg_list->addr || - wr->prot->length != wr->wr.sg_list->length)) - region_len += wr->prot->length; + region_len = pi_mr->ibmr.length; /** * KLM octoword size - if protection was provided * then we use strided block format (3 octowords), * else we use single KLM (1 octoword) **/ - xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm); + if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) + xlt_size = 0x30; + else + xlt_size = sizeof(struct mlx5_klm); set_sig_umr_segment(*seg, xlt_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn); + set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, + pdn); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - ret = set_sig_data_segment(wr, qp, seg, size, cur_edge); + ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, + cur_edge); if (ret) return ret; @@ -4759,12 +4803,14 @@ static int set_psv_wr(struct ib_sig_domain *domain, static int set_reg_wr(struct mlx5_ib_qp *qp, const struct ib_reg_wr *wr, - void **seg, int *size, void **cur_edge) + void **seg, int *size, void **cur_edge, + bool check_not_free) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - size_t mr_list_size = mr->ndescs * mr->desc_size; + int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + u8 flags = 0; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), @@ -4772,7 +4818,12 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return -EINVAL; } - set_reg_umr_seg(*seg, mr, umr_inline); + if (check_not_free) + flags |= MLX5_UMR_CHECK_NOT_FREE; + if (umr_inline) + flags |= MLX5_UMR_INLINE; + + set_reg_umr_seg(*seg, mr, flags); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); @@ -4898,8 +4949,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; + struct ib_reg_wr reg_pi_wr; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr pa_pi_mr; + struct ib_sig_attrs *sig_attrs; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; void *cur_edge; @@ -4953,7 +5008,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - if (wr->opcode == IB_WR_REG_MR) { + if (wr->opcode == IB_WR_REG_MR || + wr->opcode == IB_WR_REG_MR_INTEGRITY) { fence = dev->umr_fence; next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; } else { @@ -5003,7 +5059,7 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size, - &cur_edge); + &cur_edge, true); if (err) { *bad_wr = wr; goto out; @@ -5011,26 +5067,82 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, num_sge = 0; break; - case IB_WR_REG_SIG_MR: - qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; - mr = to_mmr(sig_handover_wr(wr)->sig_mr); - + case IB_WR_REG_MR_INTEGRITY: + qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY; + + mr = to_mmr(reg_wr(wr)->mr); + pi_mr = mr->pi_mr; + + if (pi_mr) { + memset(®_pi_wr, 0, + sizeof(struct ib_reg_wr)); + + reg_pi_wr.mr = &pi_mr->ibmr; + reg_pi_wr.access = reg_wr(wr)->access; + reg_pi_wr.key = pi_mr->ibmr.rkey; + + ctrl->imm = cpu_to_be32(reg_pi_wr.key); + /* UMR for data + prot registration */ + err = set_reg_wr(qp, ®_pi_wr, &seg, + &size, &cur_edge, + false); + if (err) { + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, + cur_edge, idx, wr->wr_id, + nreq, fence, + MLX5_OPCODE_UMR); + + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, &cur_edge, + nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + } else { + memset(&pa_pi_mr, 0, + sizeof(struct mlx5_ib_mr)); + /* No UMR, use local_dma_lkey */ + pa_pi_mr.ibmr.lkey = + mr->ibmr.pd->local_dma_lkey; + + pa_pi_mr.ndescs = mr->ndescs; + pa_pi_mr.data_length = mr->data_length; + pa_pi_mr.data_iova = mr->data_iova; + if (mr->meta_ndescs) { + pa_pi_mr.meta_ndescs = + mr->meta_ndescs; + pa_pi_mr.meta_length = + mr->meta_length; + pa_pi_mr.pi_iova = mr->pi_iova; + } + + pa_pi_mr.ibmr.length = mr->ibmr.length; + mr->pi_mr = &pa_pi_mr; + } ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - err = set_sig_umr_wr(wr, qp, &seg, &size, - &cur_edge); + /* UMR for sig MR */ + err = set_pi_umr_wr(wr, qp, &seg, &size, + &cur_edge); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, fence, MLX5_OPCODE_UMR); + /* * SET_PSV WQEs are not signaled and solicited * on error */ + sig_attrs = mr->ibmr.sig_attrs; err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, nreq, false, true); @@ -5040,19 +5152,18 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, *bad_wr = wr; goto out; } - - err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem, - mr->sig->psv_memory.psv_idx, &seg, - &size); + err = set_psv_wr(&sig_attrs->mem, + mr->sig->psv_memory.psv_idx, + &seg, &size); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, + wr->wr_id, nreq, next_fence, MLX5_OPCODE_SET_PSV); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, nreq, false, true); @@ -5062,20 +5173,20 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, *bad_wr = wr; goto out; } - - err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, - mr->sig->psv_wire.psv_idx, &seg, - &size); + err = set_psv_wr(&sig_attrs->wire, + mr->sig->psv_wire.psv_idx, + &seg, &size); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, + wr->wr_id, nreq, next_fence, MLX5_OPCODE_SET_PSV); - qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + + qp->next_fence = + MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -6047,7 +6158,7 @@ err: return ERR_PTR(err); } -int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) +void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); @@ -6055,8 +6166,6 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); destroy_user_rq(dev, wq->pd, rwq, udata); kfree(rwq); - - return 0; } struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, @@ -6367,3 +6476,34 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) handle_drain_completion(cq, &rdrain, dev); } + +/** + * Bind a qp to a counter. If @counter is NULL then bind the qp to + * the default counter + */ +int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter) +{ + struct mlx5_ib_qp *mqp = to_mqp(qp); + int err = 0; + + mutex_lock(&mqp->mutex); + if (mqp->state == IB_QPS_RESET) { + qp->counter = counter; + goto out; + } + + if (mqp->state == IB_QPS_RTS) { + err = __mlx5_ib_qp_set_counter(qp, counter); + if (!err) + qp->counter = counter; + + goto out; + } + + mqp->counter_pending = 1; + qp->counter = counter; + +out: + mutex_unlock(&mqp->mutex); + return err; +} diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index aaf10dd5364d..aef1d274a14e 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -214,8 +214,6 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, dma_unmap_addr_set(&buf->direct, mapping, t); - memset(buf->direct.buf, 0, size); - while (t & ((1 << shift) - 1)) { --shift; npages *= 2; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 8ff0e90d7564..edccfd6e178f 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -482,7 +482,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); if (ret < 0) { - put_page(pages[0]); + put_user_page(pages[0]); goto out; } @@ -490,7 +490,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, mthca_uarc_virt(dev, uar, i)); if (ret) { pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); - put_page(sg_page(&db_tab->page[i].mem)); + put_user_page(sg_page(&db_tab->page[i].mem)); goto out; } @@ -556,7 +556,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, if (db_tab->page[i].uvirt) { mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1); pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); - put_page(sg_page(&db_tab->page[i].mem)); + put_user_page(sg_page(&db_tab->page[i].mem)); } } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 4f40dfedf920..23554d8bf241 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -601,10 +601,11 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +static int mthca_create_cq(struct ib_cq *ibcq, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; struct mthca_create_cq ucmd; struct mthca_cq *cq; @@ -614,20 +615,20 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, udata, struct mthca_ucontext, ibucontext); if (attr->flags) - return ERR_PTR(-EINVAL); + return -EINVAL; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) - return ERR_PTR(-EINVAL); + return -EINVAL; if (udata) { - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) - return ERR_PTR(-EFAULT); + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; err = mthca_map_user_db(to_mdev(ibdev), &context->uar, context->db_tab, ucmd.set_db_index, ucmd.set_db_page); if (err) - return ERR_PTR(err); + return err; err = mthca_map_user_db(to_mdev(ibdev), &context->uar, context->db_tab, ucmd.arm_db_index, @@ -636,11 +637,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, goto err_unmap_set; } - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - err = -ENOMEM; - goto err_unmap_arm; - } + cq = to_mcq(ibcq); if (udata) { cq->buf.mr.ibmr.lkey = ucmd.lkey; @@ -655,20 +652,17 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, cq); if (err) - goto err_free; + goto err_unmap_arm; if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) { mthca_free_cq(to_mdev(ibdev), cq); err = -EFAULT; - goto err_free; + goto err_unmap_arm; } cq->resize_buf = NULL; - return &cq->ibcq; - -err_free: - kfree(cq); + return 0; err_unmap_arm: if (udata) @@ -680,7 +674,7 @@ err_unmap_set: mthca_unmap_user_db(to_mdev(ibdev), &context->uar, context->db_tab, ucmd.set_db_index); - return ERR_PTR(err); + return err; } static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq, @@ -804,7 +798,7 @@ out: return ret; } -static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { if (udata) { struct mthca_ucontext *context = @@ -823,9 +817,6 @@ static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); - kfree(cq); - - return 0; } static inline u32 convert_access(int acc) @@ -962,8 +953,7 @@ static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata) struct mthca_mr *mmr = to_mmr(mr); mthca_free_mr(to_mdev(mr->device), mmr); - if (mmr->umem) - ib_umem_release(mmr->umem); + ib_umem_release(mmr->umem); kfree(mmr); return 0; @@ -1153,6 +1143,11 @@ static void get_dev_fw_str(struct ib_device *device, char *str) } static const struct ib_device_ops mthca_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_MTHCA, + .uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION, + .uverbs_no_driver_id_binding = 1, + .alloc_pd = mthca_alloc_pd, .alloc_ucontext = mthca_alloc_ucontext, .attach_mcast = mthca_multicast_attach, @@ -1185,6 +1180,7 @@ static const struct ib_device_ops mthca_dev_ops = { .resize_cq = mthca_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mthca_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, mthca_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext), }; @@ -1243,9 +1239,6 @@ int mthca_register_device(struct mthca_dev *dev) if (ret) return ret; - dev->ib_dev.owner = THIS_MODULE; - - dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; dev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -1303,7 +1296,6 @@ int mthca_register_device(struct mthca_dev *dev) mutex_init(&dev->cap_mask_mutex); rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group); - dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA; ret = ib_register_device(&dev->ib_dev, "mthca%d"); if (ret) return ret; diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig deleted file mode 100644 index 8245353e8106..000000000000 --- a/drivers/infiniband/hw/nes/Kconfig +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_NES - tristate "NetEffect RNIC Driver" - depends on PCI && INET - select LIBCRC32C - ---help--- - This is the RDMA Network Interface Card (RNIC) driver for - NetEffect Ethernet Cluster Server Adapters. - -config INFINIBAND_NES_DEBUG - bool "Verbose debugging output" - depends on INFINIBAND_NES - default n - ---help--- - This option enables debug messages from the NetEffect RNIC - driver. Select this if you are diagnosing a problem. diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile deleted file mode 100644 index 239689ad1f9e..000000000000 --- a/drivers/infiniband/hw/nes/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o - -iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c deleted file mode 100644 index 29b324726ea6..000000000000 --- a/drivers/infiniband/hw/nes/nes.c +++ /dev/null @@ -1,1211 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/if_vlan.h> -#include <linux/crc32.h> -#include <linux/in.h> -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/if_arp.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/byteorder.h> -#include <rdma/ib_smi.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_pack.h> -#include <rdma/iw_cm.h> - -#include "nes.h" - -#include <net/netevent.h> -#include <net/neighbour.h> -#include <linux/route.h> -#include <net/ip_fib.h> - -MODULE_AUTHOR("NetEffect"); -MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver"); -MODULE_LICENSE("Dual BSD/GPL"); - -int interrupt_mod_interval = 0; - -/* Interoperability */ -int mpa_version = 1; -module_param(mpa_version, int, 0644); -MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)"); - -/* Interoperability */ -int disable_mpa_crc = 0; -module_param(disable_mpa_crc, int, 0644); -MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC"); - -unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU; -module_param(nes_drv_opt, int, 0644); -MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); - -unsigned int nes_debug_level = 0; -module_param_named(debug_level, nes_debug_level, uint, 0644); -MODULE_PARM_DESC(debug_level, "Enable debug output level"); - -unsigned int wqm_quanta = 0x10000; -module_param(wqm_quanta, int, 0644); -MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); - -static bool limit_maxrdreqsz; -module_param(limit_maxrdreqsz, bool, 0644); -MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes"); - -LIST_HEAD(nes_adapter_list); -static LIST_HEAD(nes_dev_list); - -atomic_t qps_destroyed; - -static unsigned int ee_flsh_adapter; -static unsigned int sysfs_nonidx_addr; -static unsigned int sysfs_idx_addr; - -static const struct pci_device_id nes_pci_table[] = { - { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), }, - { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), }, - {0} -}; - -MODULE_DEVICE_TABLE(pci, nes_pci_table); - -static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); -static int nes_net_event(struct notifier_block *, unsigned long, void *); -static int nes_notifiers_registered; - - -static struct notifier_block nes_inetaddr_notifier = { - .notifier_call = nes_inetaddr_event -}; - -static struct notifier_block nes_net_notifier = { - .notifier_call = nes_net_event -}; - -/** - * nes_inetaddr_event - */ -static int nes_inetaddr_event(struct notifier_block *notifier, - unsigned long event, void *ptr) -{ - struct in_ifaddr *ifa = ptr; - struct net_device *event_netdev = ifa->ifa_dev->dev; - struct nes_device *nesdev; - struct net_device *netdev; - struct net_device *upper_dev; - struct nes_vnic *nesvnic; - unsigned int is_bonded; - - nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %pI4, netmask %pI4.\n", - &ifa->ifa_address, &ifa->ifa_mask); - list_for_each_entry(nesdev, &nes_dev_list, list) { - nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n", - nesdev, nesdev->netdev[0]->name); - netdev = nesdev->netdev[0]; - nesvnic = netdev_priv(netdev); - upper_dev = netdev_master_upper_dev_get(netdev); - is_bonded = netif_is_bond_slave(netdev) && - (upper_dev == event_netdev); - if ((netdev == event_netdev) || is_bonded) { - if (nesvnic->rdma_enabled == 0) { - nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since" - " RDMA is not enabled.\n", - netdev->name); - return NOTIFY_OK; - } - /* we have ifa->ifa_address/mask here if we need it */ - switch (event) { - case NETDEV_DOWN: - nes_debug(NES_DBG_NETDEV, "event:DOWN\n"); - nes_write_indexed(nesdev, - NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0); - - nes_manage_arp_cache(netdev, netdev->dev_addr, - ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE); - nesvnic->local_ipaddr = 0; - if (is_bonded) - continue; - else - return NOTIFY_OK; - break; - case NETDEV_UP: - nes_debug(NES_DBG_NETDEV, "event:UP\n"); - - if (nesvnic->local_ipaddr != 0) { - nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n"); - return NOTIFY_OK; - } - /* fall through */ - case NETDEV_CHANGEADDR: - /* Add the address to the IP table */ - if (upper_dev) { - struct in_device *in; - - rcu_read_lock(); - in = __in_dev_get_rcu(upper_dev); - if (in) { - struct in_ifaddr *ifa; - - ifa = rcu_dereference(in->ifa_list); - if (ifa) - nesvnic->local_ipaddr = ifa->ifa_address; - } - rcu_read_unlock(); - } else { - nesvnic->local_ipaddr = ifa->ifa_address; - } - - nes_write_indexed(nesdev, - NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), - ntohl(nesvnic->local_ipaddr)); - nes_manage_arp_cache(netdev, netdev->dev_addr, - ntohl(nesvnic->local_ipaddr), NES_ARP_ADD); - if (is_bonded) - continue; - else - return NOTIFY_OK; - break; - default: - break; - } - } - } - - return NOTIFY_DONE; -} - - -/** - * nes_net_event - */ -static int nes_net_event(struct notifier_block *notifier, - unsigned long event, void *ptr) -{ - struct neighbour *neigh = ptr; - struct nes_device *nesdev; - struct net_device *netdev; - struct nes_vnic *nesvnic; - - switch (event) { - case NETEVENT_NEIGH_UPDATE: - list_for_each_entry(nesdev, &nes_dev_list, list) { - /* nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p.\n", nesdev); */ - netdev = nesdev->netdev[0]; - nesvnic = netdev_priv(netdev); - if (netdev == neigh->dev) { - if (nesvnic->rdma_enabled == 0) { - nes_debug(NES_DBG_NETDEV, "Skipping device %s since no RDMA\n", - netdev->name); - } else { - if (neigh->nud_state & NUD_VALID) { - nes_manage_arp_cache(neigh->dev, neigh->ha, - ntohl(*(__be32 *)neigh->primary_key), NES_ARP_ADD); - } else { - nes_manage_arp_cache(neigh->dev, neigh->ha, - ntohl(*(__be32 *)neigh->primary_key), NES_ARP_DELETE); - } - } - return NOTIFY_OK; - } - } - break; - default: - nes_debug(NES_DBG_NETDEV, "NETEVENT_ %lu undefined\n", event); - break; - } - - return NOTIFY_DONE; -} - - -/** - * nes_add_ref - */ -void nes_add_ref(struct ib_qp *ibqp) -{ - struct nes_qp *nesqp; - - nesqp = to_nesqp(ibqp); - nes_debug(NES_DBG_QP, "Bumping refcount for QP%u. Pre-inc value = %u\n", - ibqp->qp_num, atomic_read(&nesqp->refcount)); - atomic_inc(&nesqp->refcount); -} - -static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) -{ - unsigned long flags; - struct nes_qp *nesqp = cqp_request->cqp_callback_pointer; - struct nes_adapter *nesadapter = nesdev->nesadapter; - - atomic_inc(&qps_destroyed); - - /* Free the control structures */ - - if (nesqp->pbl_vbase) { - pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, - nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - nesadapter->free_256pbl++; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase); - nesqp->pbl_vbase = NULL; - - } else { - pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, - nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); - } - nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id); - - nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; - kfree(nesqp->allocated_buffer); - -} - -/** - * nes_rem_ref - */ -void nes_rem_ref(struct ib_qp *ibqp) -{ - u64 u64temp; - struct nes_qp *nesqp; - struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - u32 opcode; - - nesqp = to_nesqp(ibqp); - - if (atomic_read(&nesqp->refcount) == 0) { - printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n", - __func__, ibqp->qp_num, nesqp->last_aeq); - BUG(); - } - - if (atomic_dec_and_test(&nesqp->refcount)) { - if (nesqp->pau_mode) - nes_destroy_pau_qp(nesdev, nesqp); - - /* Destroy the QP */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); - return; - } - cqp_request->waiting = 0; - cqp_request->callback = 1; - cqp_request->cqp_callback = nes_cqp_rem_ref_callback; - cqp_request->cqp_callback_pointer = nesqp; - cqp_wqe = &cqp_request->cqp_wqe; - - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - opcode = NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP; - - if (nesqp->hte_added) { - opcode |= NES_CQP_QP_DEL_HTE; - nesqp->hte_added = 0; - } - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); - u64temp = (u64)nesqp->nesqp_context_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - nes_post_cqp_request(nesdev, cqp_request); - } -} - - -/** - * nes_get_qp - */ -struct ib_qp *nes_get_qp(struct ib_device *device, int qpn) -{ - struct nes_vnic *nesvnic = to_nesvnic(device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - - if ((qpn < NES_FIRST_QPN) || (qpn >= (NES_FIRST_QPN + nesadapter->max_qp))) - return NULL; - - return &nesadapter->qp_table[qpn - NES_FIRST_QPN]->ibqp; -} - - -/** - * nes_print_macaddr - */ -static void nes_print_macaddr(struct net_device *netdev) -{ - nes_debug(NES_DBG_INIT, "%s: %pM, IRQ %u\n", - netdev->name, netdev->dev_addr, netdev->irq); -} - -/** - * nes_interrupt - handle interrupts - */ -static irqreturn_t nes_interrupt(int irq, void *dev_id) -{ - struct nes_device *nesdev = (struct nes_device *)dev_id; - int handled = 0; - u32 int_mask; - u32 int_req; - u32 int_stat; - u32 intf_int_stat; - u32 timer_stat; - - if (nesdev->msi_enabled) { - /* No need to read the interrupt pending register if msi is enabled */ - handled = 1; - } else { - if (unlikely(nesdev->nesadapter->hw_rev == NE020_REV)) { - /* Master interrupt enable provides synchronization for kicking off bottom half - when interrupt sharing is going on */ - int_mask = nes_read32(nesdev->regs + NES_INT_MASK); - if (int_mask & 0x80000000) { - /* Check interrupt status to see if this might be ours */ - int_stat = nes_read32(nesdev->regs + NES_INT_STAT); - int_req = nesdev->int_req; - if (int_stat&int_req) { - /* if interesting CEQ or AEQ is pending, claim the interrupt */ - if ((int_stat&int_req) & (~(NES_INT_TIMER|NES_INT_INTF))) { - handled = 1; - } else { - if (((int_stat & int_req) & NES_INT_TIMER) == NES_INT_TIMER) { - /* Timer might be running but might be for another function */ - timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT); - if ((timer_stat & nesdev->timer_int_req) != 0) { - handled = 1; - } - } - if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) && - (handled == 0)) { - intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); - if ((intf_int_stat & nesdev->intf_int_req) != 0) { - handled = 1; - } - } - } - if (handled) { - nes_write32(nesdev->regs+NES_INT_MASK, int_mask & (~0x80000000)); - int_mask = nes_read32(nesdev->regs+NES_INT_MASK); - /* Save off the status to save an additional read */ - nesdev->int_stat = int_stat; - nesdev->napi_isr_ran = 1; - } - } - } - } else { - handled = nes_read32(nesdev->regs+NES_INT_PENDING); - } - } - - if (handled) { - - if (nes_napi_isr(nesdev) == 0) { - tasklet_schedule(&nesdev->dpc_tasklet); - - } - return IRQ_HANDLED; - } else { - return IRQ_NONE; - } -} - - -/** - * nes_probe - Device initialization - */ -static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) -{ - struct net_device *netdev = NULL; - struct nes_device *nesdev = NULL; - int ret = 0; - void __iomem *mmio_regs = NULL; - u8 hw_rev; - - printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n", - DRV_VERSION, pci_name(pcidev)); - - ret = pci_enable_device(pcidev); - if (ret) { - printk(KERN_ERR PFX "Unable to enable PCI device. (%s)\n", pci_name(pcidev)); - goto bail0; - } - - nes_debug(NES_DBG_INIT, "BAR0 (@0x%08lX) size = 0x%lX bytes\n", - (long unsigned int)pci_resource_start(pcidev, BAR_0), - (long unsigned int)pci_resource_len(pcidev, BAR_0)); - nes_debug(NES_DBG_INIT, "BAR1 (@0x%08lX) size = 0x%lX bytes\n", - (long unsigned int)pci_resource_start(pcidev, BAR_1), - (long unsigned int)pci_resource_len(pcidev, BAR_1)); - - /* Make sure PCI base addr are MMIO */ - if (!(pci_resource_flags(pcidev, BAR_0) & IORESOURCE_MEM) || - !(pci_resource_flags(pcidev, BAR_1) & IORESOURCE_MEM)) { - printk(KERN_ERR PFX "PCI regions not an MMIO resource\n"); - ret = -ENODEV; - goto bail1; - } - - /* Reserve PCI I/O and memory resources */ - ret = pci_request_regions(pcidev, DRV_NAME); - if (ret) { - printk(KERN_ERR PFX "Unable to request regions. (%s)\n", pci_name(pcidev)); - goto bail1; - } - - if ((sizeof(dma_addr_t) > 4)) { - ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64)); - if (ret < 0) { - printk(KERN_ERR PFX "64b DMA mask configuration failed\n"); - goto bail2; - } - ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(64)); - if (ret) { - printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n"); - goto bail2; - } - } else { - ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); - if (ret < 0) { - printk(KERN_ERR PFX "32b DMA mask configuration failed\n"); - goto bail2; - } - ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32)); - if (ret) { - printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n"); - goto bail2; - } - } - - pci_set_master(pcidev); - - /* Allocate hardware structure */ - nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL); - if (!nesdev) { - ret = -ENOMEM; - goto bail2; - } - - nes_debug(NES_DBG_INIT, "Allocated nes device at %p\n", nesdev); - nesdev->pcidev = pcidev; - pci_set_drvdata(pcidev, nesdev); - - pci_read_config_byte(pcidev, 0x0008, &hw_rev); - nes_debug(NES_DBG_INIT, "hw_rev=%u\n", hw_rev); - - spin_lock_init(&nesdev->indexed_regs_lock); - - /* Remap the PCI registers in adapter BAR0 to kernel VA space */ - mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), - pci_resource_len(pcidev, BAR_0)); - if (mmio_regs == NULL) { - printk(KERN_ERR PFX "Unable to remap BAR0\n"); - ret = -EIO; - goto bail3; - } - nesdev->regs = mmio_regs; - nesdev->index_reg = 0x50 + (PCI_FUNC(pcidev->devfn)*8) + mmio_regs; - - /* Ensure interrupts are disabled */ - nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff); - - if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) { - if (!pci_enable_msi(nesdev->pcidev)) { - nesdev->msi_enabled = 1; - nes_debug(NES_DBG_INIT, "MSI is enabled for device %s\n", - pci_name(pcidev)); - } else { - nes_debug(NES_DBG_INIT, "MSI is disabled by linux for device %s\n", - pci_name(pcidev)); - } - } else { - nes_debug(NES_DBG_INIT, "MSI not requested due to driver options for device %s\n", - pci_name(pcidev)); - } - - nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0); - nesdev->doorbell_region = pci_resource_start(nesdev->pcidev, BAR_1); - - /* Init the adapter */ - nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev); - if (!nesdev->nesadapter) { - printk(KERN_ERR PFX "Unable to initialize adapter.\n"); - ret = -ENOMEM; - goto bail5; - } - nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; - nesdev->nesadapter->wqm_quanta = wqm_quanta; - - /* nesdev->base_doorbell_index = - nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ - nesdev->base_doorbell_index = 1; - nesdev->doorbell_start = nesdev->nesadapter->doorbell_start; - if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { - switch (PCI_FUNC(nesdev->pcidev->devfn) % - nesdev->nesadapter->port_count) { - case 1: - nesdev->mac_index = 2; - break; - case 2: - nesdev->mac_index = 1; - break; - case 3: - nesdev->mac_index = 3; - break; - case 0: - default: - nesdev->mac_index = 0; - } - } else { - nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % - nesdev->nesadapter->port_count; - } - - if ((limit_maxrdreqsz || - ((nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_GLADIUS) && - (hw_rev == NE020_REV1))) && - (pcie_get_readrq(pcidev) > 256)) { - if (pcie_set_readrq(pcidev, 256)) - printk(KERN_ERR PFX "Unable to set max read request" - " to 256 bytes\n"); - else - nes_debug(NES_DBG_INIT, "Max read request size set" - " to 256 bytes\n"); - } - - tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev); - - /* bring up the Control QP */ - if (nes_init_cqp(nesdev)) { - ret = -ENODEV; - goto bail6; - } - - /* Arm the CCQ */ - nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | - PCI_FUNC(nesdev->pcidev->devfn)); - nes_read32(nesdev->regs+NES_CQE_ALLOC); - - /* Enable the interrupts */ - nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) | - (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); - if (PCI_FUNC(nesdev->pcidev->devfn) < 4) { - nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24)); - } - - /* TODO: This really should be the first driver to load, not function 0 */ - if (PCI_FUNC(nesdev->pcidev->devfn) == 0) { - /* pick up PCI and critical errors if the first driver to load */ - nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR; - nesdev->int_req |= NES_INT_INTF; - } else { - nesdev->intf_int_req = 0; - } - nesdev->intf_int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, 0); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 0); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS2, 0x00001265); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS4, 0x18021804); - - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS3, 0x17801790); - - /* deal with both periodic and one_shot */ - nesdev->timer_int_req = 0x101 << PCI_FUNC(nesdev->pcidev->devfn); - nesdev->nesadapter->timer_int_req |= nesdev->timer_int_req; - nes_debug(NES_DBG_INIT, "setting int_req for function %u, nesdev = 0x%04X, adapter = 0x%04X\n", - PCI_FUNC(nesdev->pcidev->devfn), - nesdev->timer_int_req, nesdev->nesadapter->timer_int_req); - - nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); - - list_add_tail(&nesdev->list, &nes_dev_list); - - /* Request an interrupt line for the driver */ - ret = request_irq(pcidev->irq, nes_interrupt, IRQF_SHARED, DRV_NAME, nesdev); - if (ret) { - printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", - pci_name(pcidev), pcidev->irq); - goto bail65; - } - - nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - - if (nes_notifiers_registered == 0) { - register_inetaddr_notifier(&nes_inetaddr_notifier); - register_netevent_notifier(&nes_net_notifier); - } - nes_notifiers_registered++; - - INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); - - /* Initialize network devices */ - netdev = nes_netdev_init(nesdev, mmio_regs); - if (netdev == NULL) { - ret = -ENOMEM; - goto bail7; - } - - /* Register network device */ - ret = register_netdev(netdev); - if (ret) { - printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret); - nes_netdev_destroy(netdev); - goto bail7; - } - - nes_print_macaddr(netdev); - - nesdev->netdev_count++; - nesdev->nesadapter->netdev_count++; - - printk(KERN_INFO PFX "%s: NetEffect RNIC driver successfully loaded.\n", - pci_name(pcidev)); - return 0; - - bail7: - printk(KERN_ERR PFX "bail7\n"); - while (nesdev->netdev_count > 0) { - nesdev->netdev_count--; - nesdev->nesadapter->netdev_count--; - - unregister_netdev(nesdev->netdev[nesdev->netdev_count]); - nes_netdev_destroy(nesdev->netdev[nesdev->netdev_count]); - } - - nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", - nesdev->netdev_count, nesdev->nesadapter->netdev_count); - - nes_notifiers_registered--; - if (nes_notifiers_registered == 0) { - unregister_netevent_notifier(&nes_net_notifier); - unregister_inetaddr_notifier(&nes_inetaddr_notifier); - } - - list_del(&nesdev->list); - nes_destroy_cqp(nesdev); - - bail65: - printk(KERN_ERR PFX "bail65\n"); - free_irq(pcidev->irq, nesdev); - if (nesdev->msi_enabled) { - pci_disable_msi(pcidev); - } - bail6: - printk(KERN_ERR PFX "bail6\n"); - tasklet_kill(&nesdev->dpc_tasklet); - /* Deallocate the Adapter Structure */ - nes_destroy_adapter(nesdev->nesadapter); - - bail5: - printk(KERN_ERR PFX "bail5\n"); - iounmap(nesdev->regs); - - bail3: - printk(KERN_ERR PFX "bail3\n"); - kfree(nesdev); - - bail2: - pci_release_regions(pcidev); - - bail1: - pci_disable_device(pcidev); - - bail0: - return ret; -} - - -/** - * nes_remove - unload from kernel - */ -static void nes_remove(struct pci_dev *pcidev) -{ - struct nes_device *nesdev = pci_get_drvdata(pcidev); - struct net_device *netdev; - int netdev_index = 0; - unsigned long flags; - - if (nesdev->netdev_count) { - netdev = nesdev->netdev[netdev_index]; - if (netdev) { - netif_stop_queue(netdev); - unregister_netdev(netdev); - nes_netdev_destroy(netdev); - - nesdev->netdev[netdev_index] = NULL; - nesdev->netdev_count--; - nesdev->nesadapter->netdev_count--; - } - } - - nes_notifiers_registered--; - if (nes_notifiers_registered == 0) { - unregister_netevent_notifier(&nes_net_notifier); - unregister_inetaddr_notifier(&nes_inetaddr_notifier); - } - - list_del(&nesdev->list); - nes_destroy_cqp(nesdev); - - free_irq(pcidev->irq, nesdev); - tasklet_kill(&nesdev->dpc_tasklet); - - spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); - if (nesdev->link_recheck) { - spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); - cancel_delayed_work_sync(&nesdev->work); - } else { - spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); - } - - /* Deallocate the Adapter Structure */ - nes_destroy_adapter(nesdev->nesadapter); - - if (nesdev->msi_enabled) { - pci_disable_msi(pcidev); - } - - iounmap(nesdev->regs); - kfree(nesdev); - - /* nes_debug(NES_DBG_SHUTDOWN, "calling pci_release_regions.\n"); */ - pci_release_regions(pcidev); - pci_disable_device(pcidev); - pci_set_drvdata(pcidev, NULL); -} - - -static ssize_t adapter_show(struct device_driver *ddp, char *buf) -{ - unsigned int devfn = 0xffffffff; - unsigned char bus_number = 0xff; - unsigned int i = 0; - struct nes_device *nesdev; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - devfn = nesdev->pcidev->devfn; - bus_number = nesdev->pcidev->bus->number; - break; - } - i++; - } - - return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn); -} - -static ssize_t adapter_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - - ee_flsh_adapter = simple_strtoul(p, &p, 10); - return strnlen(buf, count); -} - -static ssize_t eeprom_cmd_show(struct device_driver *ddp, char *buf) -{ - u32 eeprom_cmd = 0xdead; - u32 i = 0; - struct nes_device *nesdev; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - eeprom_cmd = nes_read32(nesdev->regs + NES_EEPROM_COMMAND); - break; - } - i++; - } - return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_cmd); -} - -static ssize_t eeprom_cmd_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - u32 val; - u32 i = 0; - struct nes_device *nesdev; - - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { - val = simple_strtoul(p, &p, 16); - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - nes_write32(nesdev->regs + NES_EEPROM_COMMAND, val); - break; - } - i++; - } - } - return strnlen(buf, count); -} - -static ssize_t eeprom_data_show(struct device_driver *ddp, char *buf) -{ - u32 eeprom_data = 0xdead; - u32 i = 0; - struct nes_device *nesdev; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - eeprom_data = nes_read32(nesdev->regs + NES_EEPROM_DATA); - break; - } - i++; - } - - return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_data); -} - -static ssize_t eeprom_data_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - u32 val; - u32 i = 0; - struct nes_device *nesdev; - - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { - val = simple_strtoul(p, &p, 16); - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - nes_write32(nesdev->regs + NES_EEPROM_DATA, val); - break; - } - i++; - } - } - return strnlen(buf, count); -} - -static ssize_t flash_cmd_show(struct device_driver *ddp, char *buf) -{ - u32 flash_cmd = 0xdead; - u32 i = 0; - struct nes_device *nesdev; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - flash_cmd = nes_read32(nesdev->regs + NES_FLASH_COMMAND); - break; - } - i++; - } - - return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_cmd); -} - -static ssize_t flash_cmd_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - u32 val; - u32 i = 0; - struct nes_device *nesdev; - - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { - val = simple_strtoul(p, &p, 16); - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - nes_write32(nesdev->regs + NES_FLASH_COMMAND, val); - break; - } - i++; - } - } - return strnlen(buf, count); -} - -static ssize_t flash_data_show(struct device_driver *ddp, char *buf) -{ - u32 flash_data = 0xdead; - u32 i = 0; - struct nes_device *nesdev; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - flash_data = nes_read32(nesdev->regs + NES_FLASH_DATA); - break; - } - i++; - } - - return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_data); -} - -static ssize_t flash_data_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - u32 val; - u32 i = 0; - struct nes_device *nesdev; - - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { - val = simple_strtoul(p, &p, 16); - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - nes_write32(nesdev->regs + NES_FLASH_DATA, val); - break; - } - i++; - } - } - return strnlen(buf, count); -} - -static ssize_t nonidx_addr_show(struct device_driver *ddp, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_nonidx_addr); -} - -static ssize_t nonidx_addr_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') - sysfs_nonidx_addr = simple_strtoul(p, &p, 16); - - return strnlen(buf, count); -} - -static ssize_t nonidx_data_show(struct device_driver *ddp, char *buf) -{ - u32 nonidx_data = 0xdead; - u32 i = 0; - struct nes_device *nesdev; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - nonidx_data = nes_read32(nesdev->regs + sysfs_nonidx_addr); - break; - } - i++; - } - - return snprintf(buf, PAGE_SIZE, "0x%x\n", nonidx_data); -} - -static ssize_t nonidx_data_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - u32 val; - u32 i = 0; - struct nes_device *nesdev; - - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { - val = simple_strtoul(p, &p, 16); - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - nes_write32(nesdev->regs + sysfs_nonidx_addr, val); - break; - } - i++; - } - } - return strnlen(buf, count); -} - -static ssize_t idx_addr_show(struct device_driver *ddp, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_idx_addr); -} - -static ssize_t idx_addr_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') - sysfs_idx_addr = simple_strtoul(p, &p, 16); - - return strnlen(buf, count); -} - -static ssize_t idx_data_show(struct device_driver *ddp, char *buf) -{ - u32 idx_data = 0xdead; - u32 i = 0; - struct nes_device *nesdev; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - idx_data = nes_read_indexed(nesdev, sysfs_idx_addr); - break; - } - i++; - } - - return snprintf(buf, PAGE_SIZE, "0x%x\n", idx_data); -} - -static ssize_t idx_data_store(struct device_driver *ddp, - const char *buf, size_t count) -{ - char *p = (char *)buf; - u32 val; - u32 i = 0; - struct nes_device *nesdev; - - if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') { - val = simple_strtoul(p, &p, 16); - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - nes_write_indexed(nesdev, sysfs_idx_addr, val); - break; - } - i++; - } - } - return strnlen(buf, count); -} - -static ssize_t wqm_quanta_show(struct device_driver *ddp, char *buf) -{ - u32 wqm_quanta_value = 0xdead; - u32 i = 0; - struct nes_device *nesdev; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - wqm_quanta_value = nesdev->nesadapter->wqm_quanta; - break; - } - i++; - } - - return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta_value); -} - -static ssize_t wqm_quanta_store(struct device_driver *ddp, const char *buf, - size_t count) -{ - unsigned long wqm_quanta_value; - u32 wqm_config1; - u32 i = 0; - struct nes_device *nesdev; - - if (kstrtoul(buf, 0, &wqm_quanta_value) < 0) - return -EINVAL; - - list_for_each_entry(nesdev, &nes_dev_list, list) { - if (i == ee_flsh_adapter) { - nesdev->nesadapter->wqm_quanta = wqm_quanta_value; - wqm_config1 = nes_read_indexed(nesdev, - NES_IDX_WQM_CONFIG1); - nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, - ((wqm_quanta_value << 1) | - (wqm_config1 & 0x00000001))); - break; - } - i++; - } - return strnlen(buf, count); -} - -static DRIVER_ATTR_RW(adapter); -static DRIVER_ATTR_RW(eeprom_cmd); -static DRIVER_ATTR_RW(eeprom_data); -static DRIVER_ATTR_RW(flash_cmd); -static DRIVER_ATTR_RW(flash_data); -static DRIVER_ATTR_RW(nonidx_addr); -static DRIVER_ATTR_RW(nonidx_data); -static DRIVER_ATTR_RW(idx_addr); -static DRIVER_ATTR_RW(idx_data); -static DRIVER_ATTR_RW(wqm_quanta); - -static struct attribute *nes_attrs[] = { - &driver_attr_adapter.attr, - &driver_attr_eeprom_cmd.attr, - &driver_attr_eeprom_data.attr, - &driver_attr_flash_cmd.attr, - &driver_attr_flash_data.attr, - &driver_attr_nonidx_addr.attr, - &driver_attr_nonidx_data.attr, - &driver_attr_idx_addr.attr, - &driver_attr_idx_data.attr, - &driver_attr_wqm_quanta.attr, - NULL, -}; -ATTRIBUTE_GROUPS(nes); - -static struct pci_driver nes_pci_driver = { - .name = DRV_NAME, - .id_table = nes_pci_table, - .probe = nes_probe, - .remove = nes_remove, - .groups = nes_groups, -}; - - -/** - * nes_init_module - module initialization entry point - */ -static int __init nes_init_module(void) -{ - int retval; - - retval = nes_cm_start(); - if (retval) { - printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n"); - return retval; - } - return pci_register_driver(&nes_pci_driver); -} - - -/** - * nes_exit_module - module unload entry point - */ -static void __exit nes_exit_module(void) -{ - nes_cm_stop(); - - pci_unregister_driver(&nes_pci_driver); -} - - -module_init(nes_init_module); -module_exit(nes_exit_module); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h deleted file mode 100644 index a895fe980d10..000000000000 --- a/drivers/infiniband/hw/nes/nes.h +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __NES_H -#define __NES_H - -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/workqueue.h> -#include <linux/slab.h> -#include <asm/io.h> -#include <linux/crc32c.h> - -#include <rdma/ib_smi.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_pack.h> -#include <rdma/rdma_cm.h> -#include <rdma/iw_cm.h> -#include <rdma/rdma_netlink.h> -#include <rdma/iw_portmap.h> - -#define NES_SEND_FIRST_WRITE - -#define QUEUE_DISCONNECTS - -#define DRV_NAME "iw_nes" -#define DRV_VERSION "1.5.0.1" -#define PFX DRV_NAME ": " - -/* - * NetEffect PCI vendor id and NE010 PCI device id. - */ -#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */ -#define PCI_VENDOR_ID_NETEFFECT 0x1678 -#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 -#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110 -#endif - -#define NE020_REV 4 -#define NE020_REV1 5 - -#define BAR_0 0 -#define BAR_1 2 - -#define RX_BUF_SIZE (1536 + 8) -#define NES_REG0_SIZE (4 * 1024) -#define NES_TX_TIMEOUT (6*HZ) -#define NES_FIRST_QPN 64 -#define NES_SW_CONTEXT_ALIGN 1024 - -#define NES_MAX_MTU 9000 - -#define NES_NIC_MAX_NICS 16 -#define NES_MAX_ARP_TABLE_SIZE 4096 - -#define NES_NIC_CEQ_SIZE 8 -/* NICs will be on a separate CQ */ -#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32) - -#define NES_MAX_PORT_COUNT 4 - -#define MAX_DPC_ITERATIONS 128 - -#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 -#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 -#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 -#define NES_DRV_OPT_DISABLE_INTF 0x00000008 -#define NES_DRV_OPT_ENABLE_MSI 0x00000010 -#define NES_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020 -#define NES_DRV_OPT_SUPRESS_OPTION_BC 0x00000040 -#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080 -#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100 -#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 -#define NES_DRV_OPT_ENABLE_PAU 0x00000400 - -#define NES_AEQ_EVENT_TIMEOUT 2500 -#define NES_DISCONNECT_EVENT_TIMEOUT 2000 - -/* debug levels */ -/* must match userspace */ -#define NES_DBG_HW 0x00000001 -#define NES_DBG_INIT 0x00000002 -#define NES_DBG_ISR 0x00000004 -#define NES_DBG_PHY 0x00000008 -#define NES_DBG_NETDEV 0x00000010 -#define NES_DBG_CM 0x00000020 -#define NES_DBG_CM1 0x00000040 -#define NES_DBG_NIC_RX 0x00000080 -#define NES_DBG_NIC_TX 0x00000100 -#define NES_DBG_CQP 0x00000200 -#define NES_DBG_MMAP 0x00000400 -#define NES_DBG_MR 0x00000800 -#define NES_DBG_PD 0x00001000 -#define NES_DBG_CQ 0x00002000 -#define NES_DBG_QP 0x00004000 -#define NES_DBG_MOD_QP 0x00008000 -#define NES_DBG_AEQ 0x00010000 -#define NES_DBG_IW_RX 0x00020000 -#define NES_DBG_IW_TX 0x00040000 -#define NES_DBG_SHUTDOWN 0x00080000 -#define NES_DBG_PAU 0x00100000 -#define NES_DBG_NLMSG 0x00200000 -#define NES_DBG_RSVD1 0x10000000 -#define NES_DBG_RSVD2 0x20000000 -#define NES_DBG_RSVD3 0x40000000 -#define NES_DBG_RSVD4 0x80000000 -#define NES_DBG_ALL 0xffffffff - -#ifdef CONFIG_INFINIBAND_NES_DEBUG -#define nes_debug(level, fmt, args...) \ -do { \ - if (level & nes_debug_level) \ - printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \ -} while (0) - -#define NES_EVENT_TIMEOUT 1200000 -#else -#define nes_debug(level, fmt, args...) no_printk(fmt, ##args) - -#define NES_EVENT_TIMEOUT 100000 -#endif - -#include "nes_hw.h" -#include "nes_verbs.h" -#include "nes_context.h" -#include <rdma/nes-abi.h> -#include "nes_cm.h" -#include "nes_mgt.h" - -extern int interrupt_mod_interval; -extern int nes_if_count; -extern int mpa_version; -extern int disable_mpa_crc; -extern unsigned int nes_drv_opt; -extern unsigned int nes_debug_level; -extern unsigned int wqm_quanta; -extern struct list_head nes_adapter_list; - -extern atomic_t cm_connects; -extern atomic_t cm_accepts; -extern atomic_t cm_disconnects; -extern atomic_t cm_closes; -extern atomic_t cm_connecteds; -extern atomic_t cm_connect_reqs; -extern atomic_t cm_rejects; -extern atomic_t mod_qp_timouts; -extern atomic_t qps_created; -extern atomic_t qps_destroyed; -extern atomic_t sw_qps_destroyed; -extern u32 mh_detected; -extern u32 mh_pauses_sent; -extern u32 cm_packets_sent; -extern u32 cm_packets_bounced; -extern u32 cm_packets_created; -extern u32 cm_packets_received; -extern u32 cm_packets_dropped; -extern u32 cm_packets_retrans; -extern atomic_t cm_listens_created; -extern atomic_t cm_listens_destroyed; -extern u32 cm_backlog_drops; -extern atomic_t cm_loopbacks; -extern atomic_t cm_nodes_created; -extern atomic_t cm_nodes_destroyed; -extern atomic_t cm_accel_dropped_pkts; -extern atomic_t cm_resets_recvd; -extern atomic_t pau_qps_created; -extern atomic_t pau_qps_destroyed; - -extern u32 int_mod_timer_init; -extern u32 int_mod_cq_depth_256; -extern u32 int_mod_cq_depth_128; -extern u32 int_mod_cq_depth_32; -extern u32 int_mod_cq_depth_24; -extern u32 int_mod_cq_depth_16; -extern u32 int_mod_cq_depth_4; -extern u32 int_mod_cq_depth_1; - -struct nes_device { - struct nes_adapter *nesadapter; - void __iomem *regs; - void __iomem *index_reg; - struct pci_dev *pcidev; - struct net_device *netdev[NES_NIC_MAX_NICS]; - u64 link_status_interrupts; - struct tasklet_struct dpc_tasklet; - spinlock_t indexed_regs_lock; - unsigned long csr_start; - unsigned long doorbell_region; - unsigned long doorbell_start; - unsigned long mac_tx_errors; - unsigned long mac_pause_frames_sent; - unsigned long mac_pause_frames_received; - unsigned long mac_rx_errors; - unsigned long mac_rx_crc_errors; - unsigned long mac_rx_symbol_err_frames; - unsigned long mac_rx_jabber_frames; - unsigned long mac_rx_oversized_frames; - unsigned long mac_rx_short_frames; - unsigned long port_rx_discards; - unsigned long port_tx_discards; - unsigned int mac_index; - unsigned int nes_stack_start; - - /* Control Structures */ - void *cqp_vbase; - dma_addr_t cqp_pbase; - u32 cqp_mem_size; - u8 ceq_index; - u8 nic_ceq_index; - struct nes_hw_cqp cqp; - struct nes_hw_cq ccq; - struct list_head cqp_avail_reqs; - struct list_head cqp_pending_reqs; - struct nes_cqp_request *nes_cqp_requests; - - u32 int_req; - u32 int_stat; - u32 timer_int_req; - u32 timer_only_int_count; - u32 intf_int_req; - u32 last_mac_tx_pauses; - u32 last_used_chunks_tx; - struct list_head list; - - u16 base_doorbell_index; - u16 currcq_count; - u16 deepcq_count; - u8 iw_status; - u8 msi_enabled; - u8 netdev_count; - u8 napi_isr_ran; - u8 disable_rx_flow_control; - u8 disable_tx_flow_control; - - struct delayed_work work; - u8 link_recheck; -}; - -/* Receive skb private area - must fit in skb->cb area */ -struct nes_rskb_cb { - u64 busaddr; - u32 maplen; - u32 seqnum; - u8 *data_start; - struct nes_qp *nesqp; -}; - -static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad) -{ - u32 crc_value; - crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct nes_v4_quad)); - - /* - * With commit ef19454b ("[LIB] crc32c: Keep intermediate crc - * state in cpu order"), behavior of crc32c changes on - * big-endian platforms. Our algorithm expects the previous - * behavior; otherwise we have RDMA connection establishment - * issue on big-endian. - */ - return cpu_to_le32(crc_value); -} - -static inline void -set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) -{ - wqe_words[index] = cpu_to_le32((u32) value); - wqe_words[index + 1] = cpu_to_le32(upper_32_bits(value)); -} - -static inline void -set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value) -{ - wqe_words[index] = cpu_to_le32(value); -} - -static inline void -nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev) -{ - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_LOW_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_HIGH_IDX] = 0; -} - -static inline void -nes_fill_init_qp_wqe(struct nes_hw_qp_wqe *wqe, struct nes_qp *nesqp, u32 head) -{ - u32 value; - value = ((u32)((unsigned long) nesqp)) | head; - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX, - (u32)(upper_32_bits((unsigned long)(nesqp)))); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, value); -} - -/* Read from memory-mapped device */ -static inline u32 nes_read_indexed(struct nes_device *nesdev, u32 reg_index) -{ - unsigned long flags; - void __iomem *addr = nesdev->index_reg; - u32 value; - - spin_lock_irqsave(&nesdev->indexed_regs_lock, flags); - - writel(reg_index, addr); - value = readl((void __iomem *)addr + 4); - - spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags); - return value; -} - -static inline u32 nes_read32(const void __iomem *addr) -{ - return readl(addr); -} - -static inline u16 nes_read16(const void __iomem *addr) -{ - return readw(addr); -} - -static inline u8 nes_read8(const void __iomem *addr) -{ - return readb(addr); -} - -/* Write to memory-mapped device */ -static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u32 val) -{ - unsigned long flags; - void __iomem *addr = nesdev->index_reg; - - spin_lock_irqsave(&nesdev->indexed_regs_lock, flags); - - writel(reg_index, addr); - writel(val, (void __iomem *)addr + 4); - - spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags); -} - -static inline void nes_write32(void __iomem *addr, u32 val) -{ - writel(val, addr); -} - -static inline void nes_write16(void __iomem *addr, u16 val) -{ - writew(val, addr); -} - -static inline void nes_write8(void __iomem *addr, u8 val) -{ - writeb(val, addr); -} - -enum nes_resource { - NES_RESOURCE_MW = 1, - NES_RESOURCE_FAST_MR, - NES_RESOURCE_PHYS_MR, - NES_RESOURCE_USER_MR, - NES_RESOURCE_PD, - NES_RESOURCE_QP, - NES_RESOURCE_CQ, - NES_RESOURCE_ARP -}; - -static inline int nes_alloc_resource(struct nes_adapter *nesadapter, - unsigned long *resource_array, u32 max_resources, - u32 *req_resource_num, u32 *next, enum nes_resource resource_type) -{ - unsigned long flags; - u32 resource_num; - - spin_lock_irqsave(&nesadapter->resource_lock, flags); - - resource_num = find_next_zero_bit(resource_array, max_resources, *next); - if (resource_num >= max_resources) { - resource_num = find_first_zero_bit(resource_array, max_resources); - if (resource_num >= max_resources) { - printk(KERN_ERR PFX "%s: No available resources [type=%u].\n", __func__, resource_type); - spin_unlock_irqrestore(&nesadapter->resource_lock, flags); - return -EMFILE; - } - } - set_bit(resource_num, resource_array); - *next = resource_num+1; - if (*next == max_resources) { - *next = 0; - } - spin_unlock_irqrestore(&nesadapter->resource_lock, flags); - *req_resource_num = resource_num; - - return 0; -} - -static inline int nes_is_resource_allocated(struct nes_adapter *nesadapter, - unsigned long *resource_array, u32 resource_num) -{ - unsigned long flags; - int bit_is_set; - - spin_lock_irqsave(&nesadapter->resource_lock, flags); - - bit_is_set = test_bit(resource_num, resource_array); - nes_debug(NES_DBG_HW, "resource_num %u is%s allocated.\n", - resource_num, (bit_is_set ? "": " not")); - spin_unlock_irqrestore(&nesadapter->resource_lock, flags); - - return bit_is_set; -} - -static inline void nes_free_resource(struct nes_adapter *nesadapter, - unsigned long *resource_array, u32 resource_num) -{ - unsigned long flags; - - spin_lock_irqsave(&nesadapter->resource_lock, flags); - clear_bit(resource_num, resource_array); - spin_unlock_irqrestore(&nesadapter->resource_lock, flags); -} - -static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev) -{ - return container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic; -} - -static inline struct nes_pd *to_nespd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct nes_pd, ibpd); -} - -static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext) -{ - return container_of(ibucontext, struct nes_ucontext, ibucontext); -} - -static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct nes_mr, ibmr); -} - -static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct nes_mr, ibfmr); -} - -static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw) -{ - return container_of(ibmw, struct nes_mr, ibmw); -} - -static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr) -{ - return container_of(nesmr, struct nes_fmr, nesmr); -} - -static inline struct nes_cq *to_nescq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct nes_cq, ibcq); -} - -static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct nes_qp, ibqp); -} - - - -/* nes.c */ -void nes_add_ref(struct ib_qp *); -void nes_rem_ref(struct ib_qp *); -struct ib_qp *nes_get_qp(struct ib_device *, int); - - -/* nes_hw.c */ -struct nes_adapter *nes_init_adapter(struct nes_device *, u8); -void nes_nic_init_timer_defaults(struct nes_device *, u8); -void nes_destroy_adapter(struct nes_adapter *); -int nes_init_cqp(struct nes_device *); -int nes_init_phy(struct nes_device *); -int nes_init_nic_qp(struct nes_device *, struct net_device *); -void nes_destroy_nic_qp(struct nes_vnic *); -int nes_napi_isr(struct nes_device *); -void nes_dpc(unsigned long); -void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *); -void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *); -int nes_destroy_cqp(struct nes_device *); -int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); -void nes_recheck_link_status(struct work_struct *work); -void nes_terminate_timeout(struct timer_list *t); - -/* nes_nic.c */ -struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); -void nes_netdev_destroy(struct net_device *); -int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); - -/* nes_cm.c */ -void *nes_cm_create(struct net_device *); -int nes_cm_recv(struct sk_buff *, struct net_device *); -void nes_update_arp(unsigned char *, u32, u32, u16, u16); -void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32); -void nes_sock_release(struct nes_qp *, unsigned long *); -void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32); -int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32); -int nes_cm_disconn(struct nes_qp *); -void nes_cm_disconn_worker(void *); - -/* nes_verbs.c */ -int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32); -int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); -struct nes_ib_device *nes_init_ofa_device(struct net_device *); -void nes_port_ibevent(struct nes_vnic *nesvnic); -void nes_destroy_ofa_device(struct nes_ib_device *); -int nes_register_ofa_device(struct nes_ib_device *); - -/* nes_util.c */ -int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *); -void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16); -void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); -void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16); -void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16); -struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); -void nes_free_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request); -void nes_put_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request); -void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *); -int nes_arp_table(struct nes_device *, u32, u8 *, u32); -void nes_mh_fix(struct timer_list *t); -void nes_clc(struct timer_list *t); -void nes_dump_mem(unsigned int, void *, int); -u32 nes_crc32(u32, u32, u32, u32, u8 *, u32, u32, u32); - -#endif /* __NES_H */ diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c deleted file mode 100644 index 62bf986eba67..000000000000 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ /dev/null @@ -1,3992 +0,0 @@ -/* - * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - - -#define TCPOPT_TIMESTAMP 8 - -#include <linux/atomic.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/init.h> -#include <linux/if_arp.h> -#include <linux/if_vlan.h> -#include <linux/notifier.h> -#include <linux/net.h> -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/time.h> -#include <linux/delay.h> -#include <linux/etherdevice.h> -#include <linux/netdevice.h> -#include <linux/random.h> -#include <linux/list.h> -#include <linux/threads.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <net/arp.h> -#include <net/neighbour.h> -#include <net/route.h> -#include <net/ip_fib.h> -#include <net/secure_seq.h> -#include <net/tcp.h> -#include <linux/fcntl.h> - -#include "nes.h" - -u32 cm_packets_sent; -u32 cm_packets_bounced; -u32 cm_packets_dropped; -u32 cm_packets_retrans; -u32 cm_packets_created; -u32 cm_packets_received; -atomic_t cm_listens_created; -atomic_t cm_listens_destroyed; -u32 cm_backlog_drops; -atomic_t cm_loopbacks; -atomic_t cm_nodes_created; -atomic_t cm_nodes_destroyed; -atomic_t cm_accel_dropped_pkts; -atomic_t cm_resets_recvd; - -static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); -static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *); -static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); -static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *); -static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); -static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *); -static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *); -static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); -static int mini_cm_dealloc_core(struct nes_cm_core *); -static int mini_cm_get(struct nes_cm_core *); -static int mini_cm_set(struct nes_cm_core *, u32, u32); - -static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8); -static int add_ref_cm_node(struct nes_cm_node *); -static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); - -static int nes_cm_disconn_true(struct nes_qp *); -static int nes_cm_post_event(struct nes_cm_event *event); -static int nes_disconnect(struct nes_qp *nesqp, int abrupt); -static void nes_disconnect_worker(struct work_struct *work); - -static int send_mpa_request(struct nes_cm_node *, struct sk_buff *); -static int send_mpa_reject(struct nes_cm_node *); -static int send_syn(struct nes_cm_node *, u32, struct sk_buff *); -static int send_reset(struct nes_cm_node *, struct sk_buff *); -static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); -static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb); -static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *); - -static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); -static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); -static void cleanup_retrans_entry(struct nes_cm_node *); -static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *); -static void free_retrans_entry(struct nes_cm_node *cm_node); -static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive); - -/* CM event handler functions */ -static void cm_event_connected(struct nes_cm_event *); -static void cm_event_connect_error(struct nes_cm_event *); -static void cm_event_reset(struct nes_cm_event *); -static void cm_event_mpa_req(struct nes_cm_event *); -static void cm_event_mpa_reject(struct nes_cm_event *); -static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node); - -/* MPA build functions */ -static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8); -static void build_mpa_v2(struct nes_cm_node *, void *, u8); -static void build_mpa_v1(struct nes_cm_node *, void *, u8); -static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **); - -static void print_core(struct nes_cm_core *core); -static void record_ird_ord(struct nes_cm_node *, u16, u16); - -/* External CM API Interface */ -/* instance of function pointers for client API */ -/* set address of this instance to cm_core->cm_ops at cm_core alloc */ -static const struct nes_cm_ops nes_cm_api = { - .accelerated = mini_cm_accelerated, - .listen = mini_cm_listen, - .stop_listener = mini_cm_del_listen, - .connect = mini_cm_connect, - .close = mini_cm_close, - .accept = mini_cm_accept, - .reject = mini_cm_reject, - .recv_pkt = mini_cm_recv_pkt, - .destroy_cm_core = mini_cm_dealloc_core, - .get = mini_cm_get, - .set = mini_cm_set -}; - -static struct nes_cm_core *g_cm_core; - -atomic_t cm_connects; -atomic_t cm_accepts; -atomic_t cm_disconnects; -atomic_t cm_closes; -atomic_t cm_connecteds; -atomic_t cm_connect_reqs; -atomic_t cm_rejects; - -int nes_add_ref_cm_node(struct nes_cm_node *cm_node) -{ - return add_ref_cm_node(cm_node); -} - -int nes_rem_ref_cm_node(struct nes_cm_node *cm_node) -{ - return rem_ref_cm_node(cm_node->cm_core, cm_node); -} -/** - * create_event - */ -static struct nes_cm_event *create_event(struct nes_cm_node * cm_node, - enum nes_cm_event_type type) -{ - struct nes_cm_event *event; - - if (!cm_node->cm_id) - return NULL; - - /* allocate an empty event */ - event = kzalloc(sizeof(*event), GFP_ATOMIC); - - if (!event) - return NULL; - - event->type = type; - event->cm_node = cm_node; - event->cm_info.rem_addr = cm_node->rem_addr; - event->cm_info.loc_addr = cm_node->loc_addr; - event->cm_info.rem_port = cm_node->rem_port; - event->cm_info.loc_port = cm_node->loc_port; - event->cm_info.cm_id = cm_node->cm_id; - - nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, " - "dst_addr=%08x[%x], src_addr=%08x[%x]\n", - cm_node, event, type, event->cm_info.loc_addr, - event->cm_info.loc_port, event->cm_info.rem_addr, - event->cm_info.rem_port); - - nes_cm_post_event(event); - return event; -} - - -/** - * send_mpa_request - */ -static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - u8 start_addr = 0; - u8 *start_ptr = &start_addr; - u8 **start_buff = &start_ptr; - u16 buff_len = 0; - - if (!skb) { - nes_debug(NES_DBG_CM, "skb set to NULL\n"); - return -1; - } - - /* send an MPA Request frame */ - cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST); - form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK); - - return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); -} - - - -static int send_mpa_reject(struct nes_cm_node *cm_node) -{ - struct sk_buff *skb = NULL; - u8 start_addr = 0; - u8 *start_ptr = &start_addr; - u8 **start_buff = &start_ptr; - u16 buff_len = 0; - struct ietf_mpa_v1 *mpa_frame; - - skb = dev_alloc_skb(MAX_CM_BUFFER); - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -ENOMEM; - } - - /* send an MPA reject frame */ - cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY); - mpa_frame = (struct ietf_mpa_v1 *)*start_buff; - mpa_frame->flags |= IETF_MPA_FLAGS_REJECT; - form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN); - - cm_node->state = NES_CM_STATE_FIN_WAIT1; - return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); -} - - -/** - * recv_mpa - process a received TCP pkt, we are expecting an - * IETF MPA frame - */ -static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, - u32 len) -{ - struct ietf_mpa_v1 *mpa_frame; - struct ietf_mpa_v2 *mpa_v2_frame; - struct ietf_rtr_msg *rtr_msg; - int mpa_hdr_len; - int priv_data_len; - - *type = NES_MPA_REQUEST_ACCEPT; - - /* assume req frame is in tcp data payload */ - if (len < sizeof(struct ietf_mpa_v1)) { - nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len); - return -EINVAL; - } - - /* points to the beginning of the frame, which could be MPA V1 or V2 */ - mpa_frame = (struct ietf_mpa_v1 *)buffer; - mpa_hdr_len = sizeof(struct ietf_mpa_v1); - priv_data_len = ntohs(mpa_frame->priv_data_len); - - /* make sure mpa private data len is less than 512 bytes */ - if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) { - nes_debug(NES_DBG_CM, "The received Length of Private" - " Data field exceeds 512 octets\n"); - return -EINVAL; - } - /* - * make sure MPA receiver interoperate with the - * received MPA version and MPA key information - * - */ - if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) { - nes_debug(NES_DBG_CM, "The received mpa version" - " is not supported\n"); - return -EINVAL; - } - /* - * backwards compatibility only - */ - if (mpa_frame->rev > cm_node->mpa_frame_rev) { - nes_debug(NES_DBG_CM, "The received mpa version" - " can not be interoperated\n"); - return -EINVAL; - } else { - cm_node->mpa_frame_rev = mpa_frame->rev; - } - - if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { - if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) { - nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n"); - return -EINVAL; - } - } else { - if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) { - nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n"); - return -EINVAL; - } - } - - if (priv_data_len + mpa_hdr_len != len) { - nes_debug(NES_DBG_CM, "The received ietf buffer was not right" - " complete (%x + %x != %x)\n", - priv_data_len, mpa_hdr_len, len); - return -EINVAL; - } - /* make sure it does not exceed the max size */ - if (len > MAX_CM_BUFFER) { - nes_debug(NES_DBG_CM, "The received ietf buffer was too large" - " (%x + %x != %x)\n", - priv_data_len, mpa_hdr_len, len); - return -EINVAL; - } - - cm_node->mpa_frame_size = priv_data_len; - - switch (mpa_frame->rev) { - case IETF_MPA_V2: { - u16 ird_size; - u16 ord_size; - u16 rtr_ctrl_ird; - u16 rtr_ctrl_ord; - - mpa_v2_frame = (struct ietf_mpa_v2 *)buffer; - mpa_hdr_len += IETF_RTR_MSG_SIZE; - cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE; - rtr_msg = &mpa_v2_frame->rtr_msg; - - /* parse rtr message */ - rtr_ctrl_ird = ntohs(rtr_msg->ctrl_ird); - rtr_ctrl_ord = ntohs(rtr_msg->ctrl_ord); - ird_size = rtr_ctrl_ird & IETF_NO_IRD_ORD; - ord_size = rtr_ctrl_ord & IETF_NO_IRD_ORD; - - if (!(rtr_ctrl_ird & IETF_PEER_TO_PEER)) { - /* send reset */ - return -EINVAL; - } - if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) - cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD; - - if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) { - /* responder */ - if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { - /* we are still negotiating */ - if (ord_size > NES_MAX_IRD) { - cm_node->ird_size = NES_MAX_IRD; - } else { - cm_node->ird_size = ord_size; - if (ord_size == 0 && - (rtr_ctrl_ord & IETF_RDMA0_READ)) { - cm_node->ird_size = 1; - nes_debug(NES_DBG_CM, - "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n", - __func__, ord_size); - } - } - if (ird_size > NES_MAX_ORD) - cm_node->ord_size = NES_MAX_ORD; - else - cm_node->ord_size = ird_size; - } else { /* initiator */ - if (ord_size > NES_MAX_IRD) { - nes_debug(NES_DBG_CM, - "%s: Unable to support the requested (ord =%u)\n", - __func__, ord_size); - return -EINVAL; - } - cm_node->ird_size = ord_size; - - if (ird_size > NES_MAX_ORD) { - cm_node->ord_size = NES_MAX_ORD; - } else { - if (ird_size == 0 && - (rtr_ctrl_ord & IETF_RDMA0_READ)) { - nes_debug(NES_DBG_CM, - "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n", - __func__, ird_size); - return -EINVAL; - } else { - cm_node->ord_size = ird_size; - } - } - } - } - - if (rtr_ctrl_ord & IETF_RDMA0_READ) { - cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; - - } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) { - cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; - } else { /* Not supported RDMA0 operation */ - return -EINVAL; - } - break; - } - case IETF_MPA_V1: - default: - break; - } - - /* copy entire MPA frame to our cm_node's frame */ - memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size); - - if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT) - *type = NES_MPA_REQUEST_REJECT; - return 0; -} - - -/** - * form_cm_frame - get a free packet and build empty frame Use - * node info to build. - */ -static void form_cm_frame(struct sk_buff *skb, - struct nes_cm_node *cm_node, void *options, u32 optionsize, - void *data, u32 datasize, u8 flags) -{ - struct tcphdr *tcph; - struct iphdr *iph; - struct ethhdr *ethh; - u8 *buf; - u16 packetsize = sizeof(*iph); - - packetsize += sizeof(*tcph); - packetsize += optionsize + datasize; - - skb_trim(skb, 0); - memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph)); - - buf = skb_put(skb, packetsize + ETH_HLEN); - - ethh = (struct ethhdr *)buf; - buf += ETH_HLEN; - - iph = (struct iphdr *)buf; - buf += sizeof(*iph); - tcph = (struct tcphdr *)buf; - skb_reset_mac_header(skb); - skb_set_network_header(skb, ETH_HLEN); - skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph)); - buf += sizeof(*tcph); - - skb->ip_summed = CHECKSUM_PARTIAL; - if (!(cm_node->netdev->features & NETIF_F_IP_CSUM)) - skb->ip_summed = CHECKSUM_NONE; - skb->protocol = htons(0x800); - skb->data_len = 0; - skb->mac_len = ETH_HLEN; - - memcpy(ethh->h_dest, cm_node->rem_mac, ETH_ALEN); - memcpy(ethh->h_source, cm_node->loc_mac, ETH_ALEN); - ethh->h_proto = htons(0x0800); - - iph->version = IPVERSION; - iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ - iph->tos = 0; - iph->tot_len = htons(packetsize); - iph->id = htons(++cm_node->tcp_cntxt.loc_id); - - iph->frag_off = htons(0x4000); - iph->ttl = 0x40; - iph->protocol = 0x06; /* IPPROTO_TCP */ - - iph->saddr = htonl(cm_node->loc_addr); - iph->daddr = htonl(cm_node->rem_addr); - - tcph->source = htons(cm_node->loc_port); - tcph->dest = htons(cm_node->rem_port); - tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); - - if (flags & SET_ACK) { - cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; - tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num); - tcph->ack = 1; - } else { - tcph->ack_seq = 0; - } - - if (flags & SET_SYN) { - cm_node->tcp_cntxt.loc_seq_num++; - tcph->syn = 1; - } else { - cm_node->tcp_cntxt.loc_seq_num += datasize; - } - - if (flags & SET_FIN) { - cm_node->tcp_cntxt.loc_seq_num++; - tcph->fin = 1; - } - - if (flags & SET_RST) - tcph->rst = 1; - - tcph->doff = (u16)((sizeof(*tcph) + optionsize + 3) >> 2); - tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd); - tcph->urg_ptr = 0; - if (optionsize) - memcpy(buf, options, optionsize); - buf += optionsize; - if (datasize) - memcpy(buf, data, datasize); - - skb_shinfo(skb)->nr_frags = 0; - cm_packets_created++; -} - -/** - * print_core - dump a cm core - */ -static void print_core(struct nes_cm_core *core) -{ - nes_debug(NES_DBG_CM, "---------------------------------------------\n"); - nes_debug(NES_DBG_CM, "CM Core -- (core = %p )\n", core); - if (!core) - return; - nes_debug(NES_DBG_CM, "---------------------------------------------\n"); - - nes_debug(NES_DBG_CM, "State : %u \n", core->state); - - nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); - nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); - - nes_debug(NES_DBG_CM, "core : %p \n", core); - - nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); -} - -static void record_ird_ord(struct nes_cm_node *cm_node, - u16 conn_ird, u16 conn_ord) -{ - if (conn_ird > NES_MAX_IRD) - conn_ird = NES_MAX_IRD; - - if (conn_ord > NES_MAX_ORD) - conn_ord = NES_MAX_ORD; - - cm_node->ird_size = conn_ird; - cm_node->ord_size = conn_ord; -} - -/** - * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame - */ -static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff, - u16 *buff_len, u8 *pci_mem, u8 mpa_key) -{ - int ret = 0; - - *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0]; - - switch (cm_node->mpa_frame_rev) { - case IETF_MPA_V1: - *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg); - *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size; - build_mpa_v1(cm_node, *start_buff, mpa_key); - break; - case IETF_MPA_V2: - *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size; - build_mpa_v2(cm_node, *start_buff, mpa_key); - break; - default: - ret = -EINVAL; - } - return ret; -} - -/** - * build_mpa_v2 - build a MPA V2 frame - */ -static void build_mpa_v2(struct nes_cm_node *cm_node, - void *start_addr, u8 mpa_key) -{ - struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr; - struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; - u16 ctrl_ird; - u16 ctrl_ord; - - /* initialize the upper 5 bytes of the frame */ - build_mpa_v1(cm_node, start_addr, mpa_key); - mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */ - mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE); - - /* initialize RTR msg */ - if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { - ctrl_ird = IETF_NO_IRD_ORD; - ctrl_ord = IETF_NO_IRD_ORD; - } else { - ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD; - ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD; - } - ctrl_ird |= IETF_PEER_TO_PEER; - - switch (mpa_key) { - case MPA_KEY_REQUEST: - ctrl_ord |= IETF_RDMA0_WRITE; - ctrl_ord |= IETF_RDMA0_READ; - break; - case MPA_KEY_REPLY: - switch (cm_node->send_rdma0_op) { - case SEND_RDMA_WRITE_ZERO: - ctrl_ord |= IETF_RDMA0_WRITE; - break; - case SEND_RDMA_READ_ZERO: - ctrl_ord |= IETF_RDMA0_READ; - break; - } - } - rtr_msg->ctrl_ird = htons(ctrl_ird); - rtr_msg->ctrl_ord = htons(ctrl_ord); -} - -/** - * build_mpa_v1 - build a MPA V1 frame - */ -static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key) -{ - struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr; - - switch (mpa_key) { - case MPA_KEY_REQUEST: - memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE); - break; - case MPA_KEY_REPLY: - memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - break; - } - mpa_frame->flags = IETF_MPA_FLAGS_CRC; - mpa_frame->rev = cm_node->mpa_frame_rev; - mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size); -} - -static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr) -{ - u64 u64temp; - struct nes_qp *nesqp = *nesqp_addr; - struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0]; - - u64temp = (unsigned long)nesqp->nesuqp_addr; - u64temp |= NES_SW_CONTEXT_ALIGN >> 1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); - - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; - - switch (cm_node->send_rdma0_op) { - case SEND_RDMA_WRITE_ZERO: - nes_debug(NES_DBG_CM, "Sending first write.\n"); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - break; - - case SEND_RDMA_READ_ZERO: - default: - if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) - WARN(1, "Unsupported RDMA0 len operation=%u\n", - cm_node->send_rdma0_op); - nes_debug(NES_DBG_CM, "Sending first rdma operation.\n"); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_OP_RDMAR); - wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1; - wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1; - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1; - break; - } - - if (nesqp->sq_kmapped) { - nesqp->sq_kmapped = 0; - kunmap(nesqp->page); - } - - /*use the reserved spot on the WQ for the extra first WQE*/ - nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | - NES_QPCONTEXT_ORDIRD_ALSMM)); - nesqp->skip_lsmm = 1; - nesqp->hwqp.sq_tail = 0; -} - -/** - * schedule_nes_timer - * note - cm_node needs to be protected before calling this. Encase in: - * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node); - */ -int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, - enum nes_timer_type type, int send_retrans, - int close_when_complete) -{ - unsigned long flags; - struct nes_cm_core *cm_core = cm_node->cm_core; - struct nes_timer_entry *new_send; - int ret = 0; - - new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); - if (!new_send) - return -ENOMEM; - - /* new_send->timetosend = currenttime */ - new_send->retrycount = NES_DEFAULT_RETRYS; - new_send->retranscount = NES_DEFAULT_RETRANS; - new_send->skb = skb; - new_send->timetosend = jiffies; - new_send->type = type; - new_send->netdev = cm_node->netdev; - new_send->send_retrans = send_retrans; - new_send->close_when_complete = close_when_complete; - - if (type == NES_TIMER_TYPE_CLOSE) { - new_send->timetosend += (HZ / 10); - if (cm_node->recv_entry) { - kfree(new_send); - WARN_ON(1); - return -EINVAL; - } - cm_node->recv_entry = new_send; - } - - if (type == NES_TIMER_TYPE_SEND) { - new_send->seq_num = ntohl(tcp_hdr(skb)->seq); - refcount_inc(&new_send->skb->users); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - cm_node->send_entry = new_send; - add_ref_cm_node(cm_node); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; - - ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); - if (ret != NETDEV_TX_OK) { - nes_debug(NES_DBG_CM, "Error sending packet %p " - "(jiffies = %lu)\n", new_send, jiffies); - new_send->timetosend = jiffies; - ret = NETDEV_TX_OK; - } else { - cm_packets_sent++; - if (!send_retrans) { - cleanup_retrans_entry(cm_node); - if (close_when_complete) - rem_ref_cm_node(cm_core, cm_node); - return ret; - } - } - } - - if (!timer_pending(&cm_core->tcp_timer)) - mod_timer(&cm_core->tcp_timer, new_send->timetosend); - - return ret; -} - -static void nes_retrans_expired(struct nes_cm_node *cm_node) -{ - struct iw_cm_id *cm_id = cm_node->cm_id; - enum nes_cm_node_state state = cm_node->state; - cm_node->state = NES_CM_STATE_CLOSED; - - switch (state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_CLOSING: - rem_ref_cm_node(cm_node->cm_core, cm_node); - break; - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_FIN_WAIT1: - if (cm_node->cm_id) - cm_id->rem_ref(cm_id); - send_reset(cm_node, NULL); - break; - default: - add_ref_cm_node(cm_node); - send_reset(cm_node, NULL); - create_event(cm_node, NES_CM_EVENT_ABORTED); - } -} - -static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node) -{ - struct nes_timer_entry *recv_entry = cm_node->recv_entry; - struct iw_cm_id *cm_id = cm_node->cm_id; - struct nes_qp *nesqp; - unsigned long qplockflags; - - if (!recv_entry) - return; - nesqp = (struct nes_qp *)recv_entry->skb; - if (nesqp) { - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " - "refcount = %d: HIT A " - "NES_TIMER_TYPE_CLOSE with something " - "to do!!!\n", nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " - "refcount = %d: HIT A " - "NES_TIMER_TYPE_CLOSE with nothing " - "to do!!!\n", nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - } - } else if (rem_node) { - /* TIME_WAIT state */ - rem_ref_cm_node(cm_node->cm_core, cm_node); - } - if (cm_node->cm_id) - cm_id->rem_ref(cm_id); - kfree(recv_entry); - cm_node->recv_entry = NULL; -} - -/** - * nes_cm_timer_tick - */ -static void nes_cm_timer_tick(struct timer_list *unused) -{ - unsigned long flags; - unsigned long nexttimeout = jiffies + NES_LONG_TIME; - struct nes_cm_node *cm_node; - struct nes_timer_entry *send_entry, *recv_entry; - struct list_head *list_core_temp; - struct list_head *list_node; - struct nes_cm_core *cm_core = g_cm_core; - u32 settimer = 0; - unsigned long timetosend; - int ret = NETDEV_TX_OK; - - struct list_head timer_list; - - INIT_LIST_HEAD(&timer_list); - spin_lock_irqsave(&cm_core->ht_lock, flags); - - list_for_each_safe(list_node, list_core_temp, - &cm_core->connected_nodes) { - cm_node = container_of(list_node, struct nes_cm_node, list); - if ((cm_node->recv_entry) || (cm_node->send_entry)) { - add_ref_cm_node(cm_node); - list_add(&cm_node->timer_entry, &timer_list); - } - } - spin_unlock_irqrestore(&cm_core->ht_lock, flags); - - list_for_each_safe(list_node, list_core_temp, &timer_list) { - cm_node = container_of(list_node, struct nes_cm_node, - timer_entry); - recv_entry = cm_node->recv_entry; - - if (recv_entry) { - if (time_after(recv_entry->timetosend, jiffies)) { - if (nexttimeout > recv_entry->timetosend || - !settimer) { - nexttimeout = recv_entry->timetosend; - settimer = 1; - } - } else { - handle_recv_entry(cm_node, 1); - } - } - - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - do { - send_entry = cm_node->send_entry; - if (!send_entry) - break; - if (time_after(send_entry->timetosend, jiffies)) { - if (cm_node->state != NES_CM_STATE_TSA) { - if ((nexttimeout > - send_entry->timetosend) || - !settimer) { - nexttimeout = - send_entry->timetosend; - settimer = 1; - } - } else { - free_retrans_entry(cm_node); - } - break; - } - - if ((cm_node->state == NES_CM_STATE_TSA) || - (cm_node->state == NES_CM_STATE_CLOSED)) { - free_retrans_entry(cm_node); - break; - } - - if (!send_entry->retranscount || - !send_entry->retrycount) { - cm_packets_dropped++; - free_retrans_entry(cm_node); - - spin_unlock_irqrestore( - &cm_node->retrans_list_lock, flags); - nes_retrans_expired(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - spin_lock_irqsave(&cm_node->retrans_list_lock, - flags); - break; - } - refcount_inc(&send_entry->skb->users); - cm_packets_retrans++; - nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " - "for node %p, jiffies = %lu, time to send = " - "%lu, retranscount = %u, send_entry->seq_num = " - "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " - "0x%08X\n", send_entry, cm_node, jiffies, - send_entry->timetosend, - send_entry->retranscount, - send_entry->seq_num, - cm_node->tcp_cntxt.rem_ack_num); - - spin_unlock_irqrestore(&cm_node->retrans_list_lock, - flags); - ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - if (ret != NETDEV_TX_OK) { - nes_debug(NES_DBG_CM, "rexmit failed for " - "node=%p\n", cm_node); - cm_packets_bounced++; - send_entry->retrycount--; - nexttimeout = jiffies + NES_SHORT_TIME; - settimer = 1; - break; - } else { - cm_packets_sent++; - } - nes_debug(NES_DBG_CM, "Packet Sent: retrans count = " - "%u, retry count = %u.\n", - send_entry->retranscount, - send_entry->retrycount); - if (send_entry->send_retrans) { - send_entry->retranscount--; - timetosend = (NES_RETRY_TIMEOUT << - (NES_DEFAULT_RETRANS - send_entry->retranscount)); - - send_entry->timetosend = jiffies + - min(timetosend, NES_MAX_TIMEOUT); - if (nexttimeout > send_entry->timetosend || - !settimer) { - nexttimeout = send_entry->timetosend; - settimer = 1; - } - } else { - int close_when_complete; - close_when_complete = - send_entry->close_when_complete; - nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n", - cm_node, cm_node->state); - free_retrans_entry(cm_node); - if (close_when_complete) - rem_ref_cm_node(cm_node->cm_core, - cm_node); - } - } while (0); - - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - rem_ref_cm_node(cm_node->cm_core, cm_node); - } - - if (settimer) { - if (!timer_pending(&cm_core->tcp_timer)) - mod_timer(&cm_core->tcp_timer, nexttimeout); - } -} - - -/** - * send_syn - */ -static int send_syn(struct nes_cm_node *cm_node, u32 sendack, - struct sk_buff *skb) -{ - int ret; - int flags = SET_SYN; - char optionsbuffer[sizeof(struct option_mss) + - sizeof(struct option_windowscale) + sizeof(struct option_base) + - TCP_OPTIONS_PADDING]; - - int optionssize = 0; - /* Sending MSS option */ - union all_known_options *options; - - if (!cm_node) - return -EINVAL; - - options = (union all_known_options *)&optionsbuffer[optionssize]; - options->as_mss.optionnum = OPTION_NUMBER_MSS; - options->as_mss.length = sizeof(struct option_mss); - options->as_mss.mss = htons(cm_node->tcp_cntxt.mss); - optionssize += sizeof(struct option_mss); - - options = (union all_known_options *)&optionsbuffer[optionssize]; - options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE; - options->as_windowscale.length = sizeof(struct option_windowscale); - options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; - optionssize += sizeof(struct option_windowscale); - - if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) { - options = (union all_known_options *)&optionsbuffer[optionssize]; - options->as_base.optionnum = OPTION_NUMBER_WRITE0; - options->as_base.length = sizeof(struct option_base); - optionssize += sizeof(struct option_base); - /* we need the size to be a multiple of 4 */ - options = (union all_known_options *)&optionsbuffer[optionssize]; - options->as_end = 1; - optionssize += 1; - options = (union all_known_options *)&optionsbuffer[optionssize]; - options->as_end = 1; - optionssize += 1; - } - - options = (union all_known_options *)&optionsbuffer[optionssize]; - options->as_end = OPTION_NUMBER_END; - optionssize += 1; - - if (!skb) - skb = dev_alloc_skb(MAX_CM_BUFFER); - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; - } - - if (sendack) - flags |= SET_ACK; - - form_cm_frame(skb, cm_node, optionsbuffer, optionssize, NULL, 0, flags); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); - - return ret; -} - - -/** - * send_reset - */ -static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - int ret; - int flags = SET_RST | SET_ACK; - - if (!skb) - skb = dev_alloc_skb(MAX_CM_BUFFER); - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -ENOMEM; - } - - form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1); - - return ret; -} - - -/** - * send_ack - */ -static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - int ret; - - if (!skb) - skb = dev_alloc_skb(MAX_CM_BUFFER); - - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; - } - - form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 0); - - return ret; -} - - -/** - * send_fin - */ -static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - int ret; - - /* if we didn't get a frame get one */ - if (!skb) - skb = dev_alloc_skb(MAX_CM_BUFFER); - - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; - } - - form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK | SET_FIN); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); - - return ret; -} - - -/** - * find_node - find a cm node that matches the reference cm node - */ -static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, - u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) -{ - unsigned long flags; - struct list_head *hte; - struct nes_cm_node *cm_node; - - /* get a handle on the hte */ - hte = &cm_core->connected_nodes; - - /* walk list and find cm_node associated with this session ID */ - spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_entry(cm_node, hte, list) { - /* compare quad, return node handle if a match */ - nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n", - cm_node->loc_addr, cm_node->loc_port, - loc_addr, loc_port, - cm_node->rem_addr, cm_node->rem_port, - rem_addr, rem_port); - if ((cm_node->loc_addr == loc_addr) && - (cm_node->loc_port == loc_port) && - (cm_node->rem_addr == rem_addr) && - (cm_node->rem_port == rem_port)) { - add_ref_cm_node(cm_node); - spin_unlock_irqrestore(&cm_core->ht_lock, flags); - return cm_node; - } - } - spin_unlock_irqrestore(&cm_core->ht_lock, flags); - - /* no owner node */ - return NULL; -} - - -/** - * find_listener - find a cm node listening on this addr-port pair - */ -static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, - nes_addr_t dst_addr, u16 dst_port, - enum nes_cm_listener_state listener_state) -{ - unsigned long flags; - struct nes_cm_listener *listen_node; - nes_addr_t listen_addr; - u16 listen_port; - - /* walk list and find cm_node associated with this session ID */ - spin_lock_irqsave(&cm_core->listen_list_lock, flags); - list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { - listen_addr = listen_node->loc_addr; - listen_port = listen_node->loc_port; - - /* compare node pair, return node handle if a match */ - if (((listen_addr == dst_addr) || - listen_addr == 0x00000000) && - (listen_port == dst_port) && - (listener_state & listen_node->listener_state)) { - atomic_inc(&listen_node->ref_count); - spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - return listen_node; - } - } - spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - - /* no listener */ - return NULL; -} - -/** - * add_hte_node - add a cm node to the hash table - */ -static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) -{ - unsigned long flags; - struct list_head *hte; - - if (!cm_node || !cm_core) - return -EINVAL; - - nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", - cm_node); - - spin_lock_irqsave(&cm_core->ht_lock, flags); - - /* get a handle on the hash table element (list head for this slot) */ - hte = &cm_core->connected_nodes; - list_add_tail(&cm_node->list, hte); - atomic_inc(&cm_core->ht_node_cnt); - - spin_unlock_irqrestore(&cm_core->ht_lock, flags); - - return 0; -} - - -/** - * mini_cm_dec_refcnt_listen - */ -static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener, int free_hanging_nodes) -{ - int ret = -EINVAL; - int err = 0; - unsigned long flags; - struct list_head *list_pos = NULL; - struct list_head *list_temp = NULL; - struct nes_cm_node *cm_node = NULL; - struct list_head reset_list; - - nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " - "refcnt=%d\n", listener, free_hanging_nodes, - atomic_read(&listener->ref_count)); - /* free non-accelerated child nodes for this listener */ - INIT_LIST_HEAD(&reset_list); - if (free_hanging_nodes) { - spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_safe(list_pos, list_temp, - &g_cm_core->connected_nodes) { - cm_node = container_of(list_pos, struct nes_cm_node, - list); - if ((cm_node->listener == listener) && - (!cm_node->accelerated)) { - add_ref_cm_node(cm_node); - list_add(&cm_node->reset_entry, &reset_list); - } - } - spin_unlock_irqrestore(&cm_core->ht_lock, flags); - } - - list_for_each_safe(list_pos, list_temp, &reset_list) { - cm_node = container_of(list_pos, struct nes_cm_node, - reset_entry); - { - struct nes_cm_node *loopback = cm_node->loopbackpartner; - enum nes_cm_node_state old_state; - if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) { - rem_ref_cm_node(cm_node->cm_core, cm_node); - } else { - if (!loopback) { - cleanup_retrans_entry(cm_node); - err = send_reset(cm_node, NULL); - if (err) { - cm_node->state = - NES_CM_STATE_CLOSED; - WARN_ON(1); - } else { - old_state = cm_node->state; - cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; - if (old_state != NES_CM_STATE_MPAREQ_RCVD) - rem_ref_cm_node( - cm_node->cm_core, - cm_node); - } - } else { - struct nes_cm_event event; - - event.cm_node = loopback; - event.cm_info.rem_addr = - loopback->rem_addr; - event.cm_info.loc_addr = - loopback->loc_addr; - event.cm_info.rem_port = - loopback->rem_port; - event.cm_info.loc_port = - loopback->loc_port; - event.cm_info.cm_id = loopback->cm_id; - add_ref_cm_node(loopback); - loopback->state = NES_CM_STATE_CLOSED; - cm_event_connect_error(&event); - cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; - - rem_ref_cm_node(cm_node->cm_core, - cm_node); - - } - } - } - } - - spin_lock_irqsave(&cm_core->listen_list_lock, flags); - if (!atomic_dec_return(&listener->ref_count)) { - list_del(&listener->list); - - /* decrement our listen node count */ - atomic_dec(&cm_core->listen_node_cnt); - - spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - - if (listener->nesvnic) { - nes_manage_apbvt(listener->nesvnic, - listener->loc_port, - PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); - - nes_debug(NES_DBG_NLMSG, - "Delete APBVT loc_port = %04X\n", - listener->loc_port); - } - - nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); - - kfree(listener); - listener = NULL; - ret = 0; - atomic_inc(&cm_listens_destroyed); - } else { - spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - } - if (listener) { - if (atomic_read(&listener->pend_accepts_cnt) > 0) - nes_debug(NES_DBG_CM, "destroying listener (%p)" - " with non-zero pending accepts=%u\n", - listener, atomic_read(&listener->pend_accepts_cnt)); - } - - return ret; -} - - -/** - * mini_cm_del_listen - */ -static int mini_cm_del_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener) -{ - listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE; - listener->cm_id = NULL; /* going to be destroyed pretty soon */ - return mini_cm_dec_refcnt_listen(cm_core, listener, 1); -} - - -/** - * mini_cm_accelerated - */ -static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) -{ - cm_node->accelerated = true; - - if (cm_node->accept_pend) { - BUG_ON(!cm_node->listener); - atomic_dec(&cm_node->listener->pend_accepts_cnt); - cm_node->accept_pend = 0; - BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); - } - - if (!timer_pending(&cm_core->tcp_timer)) - mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME)); - - return 0; -} - - -/** - * nes_addr_resolve_neigh - */ -static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex) -{ - struct rtable *rt; - struct neighbour *neigh; - int rc = arpindex; - struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; - __be32 dst_ipaddr = htonl(dst_ip); - - rt = ip_route_output(&init_net, dst_ipaddr, nesvnic->local_ipaddr, 0, 0); - if (IS_ERR(rt)) { - printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", - __func__, dst_ip); - return rc; - } - - neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr); - - rcu_read_lock(); - if (neigh) { - if (neigh->nud_state & NUD_VALID) { - nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" - " is %pM, Gateway is 0x%08X \n", dst_ip, - neigh->ha, ntohl(rt->rt_gw4)); - - if (arpindex >= 0) { - if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) { - /* Mac address same as in nes_arp_table */ - goto out; - } - - nes_manage_arp_cache(nesvnic->netdev, - nesadapter->arp_table[arpindex].mac_addr, - dst_ip, NES_ARP_DELETE); - } - - nes_manage_arp_cache(nesvnic->netdev, neigh->ha, - dst_ip, NES_ARP_ADD); - rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL, - NES_ARP_RESOLVE); - } else { - neigh_event_send(neigh, NULL); - } - } -out: - rcu_read_unlock(); - - if (neigh) - neigh_release(neigh); - - ip_rt_put(rt); - return rc; -} - -/** - * make_cm_node - create a new instance of a cm node - */ -static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, - struct nes_cm_listener *listener) -{ - struct nes_cm_node *cm_node; - int oldarpindex = 0; - int arpindex = 0; - struct nes_device *nesdev; - struct nes_adapter *nesadapter; - - /* create an hte and cm_node for this instance */ - cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); - if (!cm_node) - return NULL; - - /* set our node specific transport info */ - if (listener) { - cm_node->loc_addr = listener->loc_addr; - cm_node->loc_port = listener->loc_port; - } else { - cm_node->loc_addr = cm_info->loc_addr; - cm_node->loc_port = cm_info->loc_port; - } - cm_node->rem_addr = cm_info->rem_addr; - cm_node->rem_port = cm_info->rem_port; - - cm_node->mpa_frame_rev = mpa_version; - cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; - cm_node->mpav2_ird_ord = 0; - cm_node->ird_size = 0; - cm_node->ord_size = 0; - - nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n", - &cm_node->loc_addr, cm_node->loc_port, - &cm_node->rem_addr, cm_node->rem_port); - cm_node->listener = listener; - if (listener) - cm_node->tos = listener->tos; - cm_node->netdev = nesvnic->netdev; - cm_node->cm_id = cm_info->cm_id; - memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); - - nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener, - cm_node->cm_id); - - spin_lock_init(&cm_node->retrans_list_lock); - - cm_node->loopbackpartner = NULL; - atomic_set(&cm_node->ref_count, 1); - /* associate our parent CM core */ - cm_node->cm_core = cm_core; - cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID; - cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; - cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> - NES_CM_DEFAULT_RCV_WND_SCALE; - cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr), - htonl(cm_node->rem_addr), - htons(cm_node->loc_port), - htons(cm_node->rem_port)); - cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - - sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; - cm_node->tcp_cntxt.rcv_nxt = 0; - /* get a unique session ID , add thread_id to an upcounter to handle race */ - atomic_inc(&cm_core->node_cnt); - cm_node->conn_type = cm_info->conn_type; - cm_node->apbvt_set = 0; - cm_node->accept_pend = 0; - - cm_node->nesvnic = nesvnic; - /* get some device handles, for arp lookup */ - nesdev = nesvnic->nesdev; - nesadapter = nesdev->nesadapter; - - cm_node->loopbackpartner = NULL; - - /* get the mac addr for the remote node */ - oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, - NULL, NES_ARP_RESOLVE); - arpindex = nes_addr_resolve_neigh(nesvnic, cm_node->rem_addr, - oldarpindex); - if (arpindex < 0) { - kfree(cm_node); - return NULL; - } - - /* copy the mac addr to node context */ - memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN); - nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %pM\n", - cm_node->rem_mac); - - add_hte_node(cm_core, cm_node); - atomic_inc(&cm_nodes_created); - - return cm_node; -} - - -/** - * add_ref_cm_node - destroy an instance of a cm node - */ -static int add_ref_cm_node(struct nes_cm_node *cm_node) -{ - atomic_inc(&cm_node->ref_count); - return 0; -} - - -/** - * rem_ref_cm_node - destroy an instance of a cm node - */ -static int rem_ref_cm_node(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) -{ - unsigned long flags; - struct nes_qp *nesqp; - - if (!cm_node) - return -EINVAL; - - spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags); - if (atomic_dec_return(&cm_node->ref_count)) { - spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags); - return 0; - } - list_del(&cm_node->list); - atomic_dec(&cm_core->ht_node_cnt); - spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags); - - /* if the node is destroyed before connection was accelerated */ - if (!cm_node->accelerated && cm_node->accept_pend) { - BUG_ON(!cm_node->listener); - atomic_dec(&cm_node->listener->pend_accepts_cnt); - BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); - } - WARN_ON(cm_node->send_entry); - if (cm_node->recv_entry) - handle_recv_entry(cm_node, 0); - if (cm_node->listener) { - mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); - } else { - if (cm_node->apbvt_set && cm_node->nesvnic) { - nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, - PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); - } - nes_debug(NES_DBG_NLMSG, "Delete APBVT loc_port = %04X\n", - cm_node->loc_port); - } - - atomic_dec(&cm_core->node_cnt); - atomic_inc(&cm_nodes_destroyed); - nesqp = cm_node->nesqp; - if (nesqp) { - nesqp->cm_node = NULL; - nes_rem_ref(&nesqp->ibqp); - cm_node->nesqp = NULL; - } - - kfree(cm_node); - return 0; -} - -/** - * process_options - */ -static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, - u32 optionsize, u32 syn_packet) -{ - u32 tmp; - u32 offset = 0; - union all_known_options *all_options; - char got_mss_option = 0; - - while (offset < optionsize) { - all_options = (union all_known_options *)(optionsloc + offset); - switch (all_options->as_base.optionnum) { - case OPTION_NUMBER_END: - offset = optionsize; - break; - case OPTION_NUMBER_NONE: - offset += 1; - continue; - case OPTION_NUMBER_MSS: - nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d " - "Size: %d\n", __func__, - all_options->as_mss.length, offset, optionsize); - got_mss_option = 1; - if (all_options->as_mss.length != 4) { - return 1; - } else { - tmp = ntohs(all_options->as_mss.mss); - if (tmp > 0 && tmp < - cm_node->tcp_cntxt.mss) - cm_node->tcp_cntxt.mss = tmp; - } - break; - case OPTION_NUMBER_WINDOW_SCALE: - cm_node->tcp_cntxt.snd_wscale = - all_options->as_windowscale.shiftcount; - break; - default: - nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", - all_options->as_base.optionnum); - break; - } - offset += all_options->as_base.length; - } - if ((!got_mss_option) && (syn_packet)) - cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS; - return 0; -} - -static void drop_packet(struct sk_buff *skb) -{ - atomic_inc(&cm_accel_dropped_pkts); - dev_kfree_skb_any(skb); -} - -static void handle_fin_pkt(struct nes_cm_node *cm_node) -{ - nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " - "refcnt=%d\n", cm_node, cm_node->state, - atomic_read(&cm_node->ref_count)); - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREJ_RCVD: - cm_node->tcp_cntxt.rcv_nxt++; - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_LAST_ACK; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_MPAREQ_SENT: - create_event(cm_node, NES_CM_EVENT_ABORTED); - cm_node->tcp_cntxt.rcv_nxt++; - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - add_ref_cm_node(cm_node); - send_reset(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->tcp_cntxt.rcv_nxt++; - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSING; - send_ack(cm_node, NULL); - /* Wait for ACK as this is simultaneous close.. - * After we receive ACK, do not send anything.. - * Just rm the node.. Done.. */ - break; - case NES_CM_STATE_FIN_WAIT2: - cm_node->tcp_cntxt.rcv_nxt++; - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_TIME_WAIT; - send_ack(cm_node, NULL); - schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0); - break; - case NES_CM_STATE_TIME_WAIT: - cm_node->tcp_cntxt.rcv_nxt++; - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - rem_ref_cm_node(cm_node->cm_core, cm_node); - break; - case NES_CM_STATE_TSA: - default: - nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n", - cm_node, cm_node->state); - break; - } -} - - -static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct tcphdr *tcph) -{ - - int reset = 0; /* whether to send reset in case of err.. */ - atomic_inc(&cm_resets_recvd); - nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." - " refcnt=%d\n", cm_node, cm_node->state, - atomic_read(&cm_node->ref_count)); - cleanup_retrans_entry(cm_node); - switch (cm_node->state) { - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_MPAREQ_SENT: - nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " - "listener=%p state=%d\n", __func__, __LINE__, cm_node, - cm_node->listener, cm_node->state); - switch (cm_node->mpa_frame_rev) { - case IETF_MPA_V2: - cm_node->mpa_frame_rev = IETF_MPA_V1; - /* send a syn and goto syn sent state */ - cm_node->state = NES_CM_STATE_SYN_SENT; - if (send_syn(cm_node, 0, NULL)) { - active_open_err(cm_node, skb, reset); - } - break; - case IETF_MPA_V1: - default: - active_open_err(cm_node, skb, reset); - break; - } - break; - case NES_CM_STATE_MPAREQ_RCVD: - atomic_inc(&cm_node->passive_state); - dev_kfree_skb_any(skb); - break; - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_LISTENING: - nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__); - passive_open_err(cm_node, skb, reset); - break; - case NES_CM_STATE_TSA: - active_open_err(cm_node, skb, reset); - break; - case NES_CM_STATE_CLOSED: - drop_packet(skb); - break; - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_LAST_ACK: - cm_node->cm_id->rem_ref(cm_node->cm_id); - /* fall through */ - case NES_CM_STATE_TIME_WAIT: - cm_node->state = NES_CM_STATE_CLOSED; - rem_ref_cm_node(cm_node->cm_core, cm_node); - drop_packet(skb); - break; - default: - drop_packet(skb); - break; - } -} - - -static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - int ret = 0; - int datasize = skb->len; - u8 *dataloc = skb->data; - - enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN; - u32 res_type; - - ret = parse_mpa(cm_node, dataloc, &res_type, datasize); - if (ret) { - nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); - if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) { - nes_debug(NES_DBG_CM, "%s[%u] create abort for " - "cm_node=%p listener=%p state=%d\n", __func__, - __LINE__, cm_node, cm_node->listener, - cm_node->state); - active_open_err(cm_node, skb, 1); - } else { - passive_open_err(cm_node, skb, 1); - } - return; - } - - switch (cm_node->state) { - case NES_CM_STATE_ESTABLISHED: - if (res_type == NES_MPA_REQUEST_REJECT) - /*BIG problem as we are receiving the MPA.. So should - * not be REJECT.. This is Passive Open.. We can - * only receive it Reject for Active Open...*/ - WARN_ON(1); - cm_node->state = NES_CM_STATE_MPAREQ_RCVD; - type = NES_CM_EVENT_MPA_REQ; - atomic_set(&cm_node->passive_state, - NES_PASSIVE_STATE_INDICATED); - break; - case NES_CM_STATE_MPAREQ_SENT: - cleanup_retrans_entry(cm_node); - if (res_type == NES_MPA_REQUEST_REJECT) { - type = NES_CM_EVENT_MPA_REJECT; - cm_node->state = NES_CM_STATE_MPAREJ_RCVD; - } else { - type = NES_CM_EVENT_CONNECTED; - cm_node->state = NES_CM_STATE_TSA; - } - send_ack(cm_node, NULL); - break; - default: - WARN_ON(1); - break; - } - dev_kfree_skb_any(skb); - create_event(cm_node, type); -} - -static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - switch (cm_node->state) { - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_MPAREQ_SENT: - nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " - "listener=%p state=%d\n", __func__, __LINE__, cm_node, - cm_node->listener, cm_node->state); - active_open_err(cm_node, skb, 1); - break; - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_SYN_RCVD: - passive_open_err(cm_node, skb, 1); - break; - case NES_CM_STATE_TSA: - default: - drop_packet(skb); - } -} - -static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, - struct sk_buff *skb) -{ - int err; - - err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1; - if (err) - active_open_err(cm_node, skb, 1); - - return err; -} - -static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, - struct sk_buff *skb) -{ - int err = 0; - u32 seq; - u32 ack_seq; - u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; - u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; - u32 rcv_wnd; - - seq = ntohl(tcph->seq); - ack_seq = ntohl(tcph->ack_seq); - rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; - if (ack_seq != loc_seq_num) - err = 1; - else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd))) - err = 1; - if (err) { - nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " - "listener=%p state=%d\n", __func__, __LINE__, cm_node, - cm_node->listener, cm_node->state); - indicate_pkt_err(cm_node, skb); - nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X " - "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, - rcv_wnd); - } - return err; -} - -/* - * handle_syn_pkt() is for Passive node. The syn packet is received when a node - * is created with a listener or it may comein as rexmitted packet which in - * that case will be just dropped. - */ -static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct tcphdr *tcph) -{ - int ret; - u32 inc_sequence; - int optionsize; - - optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); - skb_trim(skb, 0); - inc_sequence = ntohl(tcph->seq); - - switch (cm_node->state) { - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_MPAREQ_SENT: - /* Rcvd syn on active open connection*/ - active_open_err(cm_node, skb, 1); - break; - case NES_CM_STATE_LISTENING: - /* Passive OPEN */ - if (atomic_read(&cm_node->listener->pend_accepts_cnt) > - cm_node->listener->backlog) { - nes_debug(NES_DBG_CM, "drop syn due to backlog " - "pressure \n"); - cm_backlog_drops++; - passive_open_err(cm_node, skb, 0); - break; - } - ret = handle_tcp_options(cm_node, tcph, skb, optionsize, - 1); - if (ret) { - passive_open_err(cm_node, skb, 0); - /* drop pkt */ - break; - } - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; - BUG_ON(cm_node->send_entry); - cm_node->accept_pend = 1; - atomic_inc(&cm_node->listener->pend_accepts_cnt); - - cm_node->state = NES_CM_STATE_SYN_RCVD; - send_syn(cm_node, 1, skb); - break; - case NES_CM_STATE_CLOSED: - cleanup_retrans_entry(cm_node); - add_ref_cm_node(cm_node); - send_reset(cm_node, skb); - break; - case NES_CM_STATE_TSA: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_MPAREQ_RCVD: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - case NES_CM_STATE_UNKNOWN: - default: - drop_packet(skb); - break; - } -} - -static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct tcphdr *tcph) -{ - int ret; - u32 inc_sequence; - int optionsize; - - optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); - skb_trim(skb, 0); - inc_sequence = ntohl(tcph->seq); - switch (cm_node->state) { - case NES_CM_STATE_SYN_SENT: - cleanup_retrans_entry(cm_node); - /* active open */ - if (check_syn(cm_node, tcph, skb)) - return; - cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); - /* setup options */ - ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0); - if (ret) { - nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n", - cm_node); - break; - } - cleanup_retrans_entry(cm_node); - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; - send_mpa_request(cm_node, skb); - cm_node->state = NES_CM_STATE_MPAREQ_SENT; - break; - case NES_CM_STATE_MPAREQ_RCVD: - /* passive open, so should not be here */ - passive_open_err(cm_node, skb, 1); - break; - case NES_CM_STATE_LISTENING: - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - send_reset(cm_node, skb); - break; - case NES_CM_STATE_CLOSED: - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - cleanup_retrans_entry(cm_node); - add_ref_cm_node(cm_node); - send_reset(cm_node, skb); - break; - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_TSA: - case NES_CM_STATE_CLOSING: - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_MPAREQ_SENT: - default: - drop_packet(skb); - break; - } -} - -static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct tcphdr *tcph) -{ - int datasize = 0; - u32 inc_sequence; - int ret = 0; - int optionsize; - - optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); - - if (check_seq(cm_node, tcph, skb)) - return -EINVAL; - - skb_pull(skb, tcph->doff << 2); - inc_sequence = ntohl(tcph->seq); - datasize = skb->len; - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - /* Passive OPEN */ - cleanup_retrans_entry(cm_node); - ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1); - if (ret) - break; - cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); - cm_node->state = NES_CM_STATE_ESTABLISHED; - if (datasize) { - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; - handle_rcv_mpa(cm_node, skb); - } else { /* rcvd ACK only */ - dev_kfree_skb_any(skb); - } - break; - case NES_CM_STATE_ESTABLISHED: - /* Passive OPEN */ - cleanup_retrans_entry(cm_node); - if (datasize) { - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; - handle_rcv_mpa(cm_node, skb); - } else { - drop_packet(skb); - } - break; - case NES_CM_STATE_MPAREQ_SENT: - cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); - if (datasize) { - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; - handle_rcv_mpa(cm_node, skb); - } else { /* Could be just an ack pkt.. */ - dev_kfree_skb_any(skb); - } - break; - case NES_CM_STATE_LISTENING: - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - send_reset(cm_node, skb); - break; - case NES_CM_STATE_CLOSED: - cleanup_retrans_entry(cm_node); - add_ref_cm_node(cm_node); - send_reset(cm_node, skb); - break; - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - cm_node->cm_id->rem_ref(cm_node->cm_id); - rem_ref_cm_node(cm_node->cm_core, cm_node); - drop_packet(skb); - break; - case NES_CM_STATE_FIN_WAIT1: - cleanup_retrans_entry(cm_node); - drop_packet(skb); - cm_node->state = NES_CM_STATE_FIN_WAIT2; - break; - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_TSA: - case NES_CM_STATE_MPAREQ_RCVD: - case NES_CM_STATE_UNKNOWN: - default: - cleanup_retrans_entry(cm_node); - drop_packet(skb); - break; - } - return ret; -} - - - -static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, - struct sk_buff *skb, int optionsize, int passive) -{ - u8 *optionsloc = (u8 *)&tcph[1]; - - if (optionsize) { - if (process_options(cm_node, optionsloc, optionsize, - (u32)tcph->syn)) { - nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", - __func__, cm_node); - if (passive) - passive_open_err(cm_node, skb, 1); - else - active_open_err(cm_node, skb, 1); - return 1; - } - } - - cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << - cm_node->tcp_cntxt.snd_wscale; - - if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) - cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; - return 0; -} - -/* - * active_open_err() will send reset() if flag set.. - * It will also send ABORT event. - */ -static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, - int reset) -{ - cleanup_retrans_entry(cm_node); - if (reset) { - nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, " - "state=%d\n", cm_node, cm_node->state); - add_ref_cm_node(cm_node); - send_reset(cm_node, skb); - } else { - dev_kfree_skb_any(skb); - } - - cm_node->state = NES_CM_STATE_CLOSED; - create_event(cm_node, NES_CM_EVENT_ABORTED); -} - -/* - * passive_open_err() will either do a reset() or will free up the skb and - * remove the cm_node. - */ -static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, - int reset) -{ - cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - if (reset) { - nes_debug(NES_DBG_CM, "passive_open_err sending RST for " - "cm_node=%p state =%d\n", cm_node, cm_node->state); - send_reset(cm_node, skb); - } else { - dev_kfree_skb_any(skb); - rem_ref_cm_node(cm_node->cm_core, cm_node); - } -} - -/* - * free_retrans_entry() routines assumes that the retrans_list_lock has - * been acquired before calling. - */ -static void free_retrans_entry(struct nes_cm_node *cm_node) -{ - struct nes_timer_entry *send_entry; - - send_entry = cm_node->send_entry; - if (send_entry) { - cm_node->send_entry = NULL; - dev_kfree_skb_any(send_entry->skb); - kfree(send_entry); - rem_ref_cm_node(cm_node->cm_core, cm_node); - } -} - -static void cleanup_retrans_entry(struct nes_cm_node *cm_node) -{ - unsigned long flags; - - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - free_retrans_entry(cm_node); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); -} - -/** - * process_packet - * Returns skb if to be freed, else it will return NULL if already used.. - */ -static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct nes_cm_core *cm_core) -{ - enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; - struct tcphdr *tcph = tcp_hdr(skb); - u32 fin_set = 0; - int ret = 0; - - skb_pull(skb, ip_hdr(skb)->ihl << 2); - - nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " - "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, - tcph->ack, tcph->rst, tcph->fin); - - if (tcph->rst) { - pkt_type = NES_PKT_TYPE_RST; - } else if (tcph->syn) { - pkt_type = NES_PKT_TYPE_SYN; - if (tcph->ack) - pkt_type = NES_PKT_TYPE_SYNACK; - } else if (tcph->ack) { - pkt_type = NES_PKT_TYPE_ACK; - } - if (tcph->fin) - fin_set = 1; - - switch (pkt_type) { - case NES_PKT_TYPE_SYN: - handle_syn_pkt(cm_node, skb, tcph); - break; - case NES_PKT_TYPE_SYNACK: - handle_synack_pkt(cm_node, skb, tcph); - break; - case NES_PKT_TYPE_ACK: - ret = handle_ack_pkt(cm_node, skb, tcph); - if (fin_set && !ret) - handle_fin_pkt(cm_node); - break; - case NES_PKT_TYPE_RST: - handle_rst_pkt(cm_node, skb, tcph); - break; - default: - if ((fin_set) && (!check_seq(cm_node, tcph, skb))) - handle_fin_pkt(cm_node); - drop_packet(skb); - break; - } -} - -/** - * mini_cm_listen - create a listen node with params - */ -static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) -{ - struct nes_cm_listener *listener; - unsigned long flags; - - nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", - cm_info->loc_addr, cm_info->loc_port); - - /* cannot have multiple matching listeners */ - listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port, - NES_CM_LISTENER_EITHER_STATE); - - if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { - /* find automatically incs ref count ??? */ - atomic_dec(&listener->ref_count); - nes_debug(NES_DBG_CM, "Not creating listener since it already exists\n"); - return NULL; - } - - if (!listener) { - /* create a CM listen node (1/2 node to compare incoming traffic to) */ - listener = kzalloc(sizeof(*listener), GFP_ATOMIC); - if (!listener) - return NULL; - - listener->loc_addr = cm_info->loc_addr; - listener->loc_port = cm_info->loc_port; - listener->reused_node = 0; - - atomic_set(&listener->ref_count, 1); - } - /* pasive case */ - /* find already inc'ed the ref count */ - else { - listener->reused_node = 1; - } - - listener->cm_id = cm_info->cm_id; - atomic_set(&listener->pend_accepts_cnt, 0); - listener->cm_core = cm_core; - listener->nesvnic = nesvnic; - atomic_inc(&cm_core->node_cnt); - - listener->conn_type = cm_info->conn_type; - listener->backlog = cm_info->backlog; - listener->listener_state = NES_CM_LISTENER_ACTIVE_STATE; - - if (!listener->reused_node) { - spin_lock_irqsave(&cm_core->listen_list_lock, flags); - list_add(&listener->list, &cm_core->listen_list.list); - spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - atomic_inc(&cm_core->listen_node_cnt); - } - - nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x," - " listener = %p, backlog = %d, cm_id = %p.\n", - cm_info->loc_addr, cm_info->loc_port, - listener, listener->backlog, listener->cm_id); - - return listener; -} - - -/** - * mini_cm_connect - make a connection node with params - */ -static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, u16 private_data_len, - void *private_data, struct nes_cm_info *cm_info) -{ - int ret = 0; - struct nes_cm_node *cm_node; - struct nes_cm_listener *loopbackremotelistener; - struct nes_cm_node *loopbackremotenode; - struct nes_cm_info loopback_cm_info; - u8 *start_buff; - - /* create a CM connection node */ - cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); - if (!cm_node) - return NULL; - - /* set our node side to client (active) side */ - cm_node->tcp_cntxt.client = 1; - cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; - - if (cm_info->loc_addr == cm_info->rem_addr) { - loopbackremotelistener = find_listener(cm_core, - cm_node->loc_addr, cm_node->rem_port, - NES_CM_LISTENER_ACTIVE_STATE); - if (loopbackremotelistener == NULL) { - create_event(cm_node, NES_CM_EVENT_ABORTED); - } else { - loopback_cm_info = *cm_info; - loopback_cm_info.loc_port = cm_info->rem_port; - loopback_cm_info.rem_port = cm_info->loc_port; - loopback_cm_info.loc_port = - cm_info->rem_port; - loopback_cm_info.rem_port = - cm_info->loc_port; - loopback_cm_info.cm_id = loopbackremotelistener->cm_id; - loopbackremotenode = make_cm_node(cm_core, nesvnic, - &loopback_cm_info, loopbackremotelistener); - if (!loopbackremotenode) { - rem_ref_cm_node(cm_node->cm_core, cm_node); - return NULL; - } - atomic_inc(&cm_loopbacks); - loopbackremotenode->loopbackpartner = cm_node; - loopbackremotenode->tcp_cntxt.rcv_wscale = - NES_CM_DEFAULT_RCV_WND_SCALE; - cm_node->loopbackpartner = loopbackremotenode; - memcpy(loopbackremotenode->mpa_frame_buf, private_data, - private_data_len); - loopbackremotenode->mpa_frame_size = private_data_len; - - /* we are done handling this state. */ - /* set node to a TSA state */ - cm_node->state = NES_CM_STATE_TSA; - cm_node->tcp_cntxt.rcv_nxt = - loopbackremotenode->tcp_cntxt.loc_seq_num; - loopbackremotenode->tcp_cntxt.rcv_nxt = - cm_node->tcp_cntxt.loc_seq_num; - cm_node->tcp_cntxt.max_snd_wnd = - loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.max_snd_wnd = - cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wnd = - loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.snd_wnd = - cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wscale = - loopbackremotenode->tcp_cntxt.rcv_wscale; - loopbackremotenode->tcp_cntxt.snd_wscale = - cm_node->tcp_cntxt.rcv_wscale; - loopbackremotenode->state = NES_CM_STATE_MPAREQ_RCVD; - create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); - } - return cm_node; - } - - start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); - cm_node->mpa_frame_size = private_data_len; - - memcpy(start_buff, private_data, private_data_len); - - /* send a syn and goto syn sent state */ - cm_node->state = NES_CM_STATE_SYN_SENT; - ret = send_syn(cm_node, 0, NULL); - - if (ret) { - /* error in sending the syn free up the cm_node struct */ - nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest " - "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", - cm_node->rem_addr, cm_node->rem_port, cm_node, - cm_node->cm_id); - rem_ref_cm_node(cm_node->cm_core, cm_node); - cm_node = NULL; - } - - if (cm_node) { - nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X," - "port=0x%04x, cm_node=%p, cm_id = %p.\n", - cm_node->rem_addr, cm_node->rem_port, cm_node, - cm_node->cm_id); - } - - return cm_node; -} - - -/** - * mini_cm_accept - accept a connection - * This function is never called - */ -static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) -{ - return 0; -} - - -/** - * mini_cm_reject - reject and teardown a connection - */ -static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) -{ - int ret = 0; - int err = 0; - int passive_state; - struct nes_cm_event event; - struct iw_cm_id *cm_id = cm_node->cm_id; - struct nes_cm_node *loopback = cm_node->loopbackpartner; - - nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", - __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); - - if (cm_node->tcp_cntxt.client) - return ret; - cleanup_retrans_entry(cm_node); - - if (!loopback) { - passive_state = atomic_add_return(1, &cm_node->passive_state); - if (passive_state == NES_SEND_RESET_EVENT) { - cm_node->state = NES_CM_STATE_CLOSED; - rem_ref_cm_node(cm_core, cm_node); - } else { - if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) { - rem_ref_cm_node(cm_core, cm_node); - } else { - ret = send_mpa_reject(cm_node); - if (ret) { - cm_node->state = NES_CM_STATE_CLOSED; - err = send_reset(cm_node, NULL); - if (err) - WARN_ON(1); - } else { - cm_id->add_ref(cm_id); - } - } - } - } else { - cm_node->cm_id = NULL; - if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) { - rem_ref_cm_node(cm_core, cm_node); - rem_ref_cm_node(cm_core, loopback); - } else { - event.cm_node = loopback; - event.cm_info.rem_addr = loopback->rem_addr; - event.cm_info.loc_addr = loopback->loc_addr; - event.cm_info.rem_port = loopback->rem_port; - event.cm_info.loc_port = loopback->loc_port; - event.cm_info.cm_id = loopback->cm_id; - cm_event_mpa_reject(&event); - rem_ref_cm_node(cm_core, cm_node); - loopback->state = NES_CM_STATE_CLOSING; - - cm_id = loopback->cm_id; - rem_ref_cm_node(cm_core, loopback); - cm_id->rem_ref(cm_id); - } - } - - return ret; -} - - -/** - * mini_cm_close - */ -static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) -{ - int ret = 0; - - if (!cm_core || !cm_node) - return -EINVAL; - - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - case NES_CM_STATE_MPAREQ_RCVD: - cleanup_retrans_entry(cm_node); - send_reset(cm_node, NULL); - break; - case NES_CM_STATE_CLOSE_WAIT: - cm_node->state = NES_CM_STATE_LAST_ACK; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSING: - ret = -1; - break; - case NES_CM_STATE_LISTENING: - cleanup_retrans_entry(cm_node); - send_reset(cm_node, NULL); - break; - case NES_CM_STATE_MPAREJ_RCVD: - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_CLOSED: - case NES_CM_STATE_LISTENER_DESTROYED: - ret = rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_TSA: - if (cm_node->send_entry) - printk(KERN_ERR "ERROR Close got called from STATE_TSA " - "send_entry=%p\n", cm_node->send_entry); - ret = rem_ref_cm_node(cm_core, cm_node); - break; - } - return ret; -} - - -/** - * recv_pkt - recv an ETHERNET packet, and process it through CM - * node state machine - */ -static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct sk_buff *skb) -{ - struct nes_cm_node *cm_node = NULL; - struct nes_cm_listener *listener = NULL; - struct iphdr *iph; - struct tcphdr *tcph; - struct nes_cm_info nfo; - int skb_handled = 1; - __be32 tmp_daddr, tmp_saddr; - - if (!skb) - return 0; - if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) - return 0; - - iph = (struct iphdr *)skb->data; - tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); - - nfo.loc_addr = ntohl(iph->daddr); - nfo.loc_port = ntohs(tcph->dest); - nfo.rem_addr = ntohl(iph->saddr); - nfo.rem_port = ntohs(tcph->source); - - tmp_daddr = cpu_to_be32(iph->daddr); - tmp_saddr = cpu_to_be32(iph->saddr); - - nes_debug(NES_DBG_CM, "Received packet: dest=%pI4:0x%04X src=%pI4:0x%04X\n", - &tmp_daddr, tcph->dest, &tmp_saddr, tcph->source); - - do { - cm_node = find_node(cm_core, - nfo.rem_port, nfo.rem_addr, - nfo.loc_port, nfo.loc_addr); - - if (!cm_node) { - /* Only type of packet accepted are for */ - /* the PASSIVE open (syn only) */ - if ((!tcph->syn) || (tcph->ack)) { - skb_handled = 0; - break; - } - listener = find_listener(cm_core, nfo.loc_addr, - nfo.loc_port, - NES_CM_LISTENER_ACTIVE_STATE); - if (!listener) { - nfo.cm_id = NULL; - nfo.conn_type = 0; - nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n"); - skb_handled = 0; - break; - } - nfo.cm_id = listener->cm_id; - nfo.conn_type = listener->conn_type; - cm_node = make_cm_node(cm_core, nesvnic, &nfo, - listener); - if (!cm_node) { - nes_debug(NES_DBG_CM, "Unable to allocate " - "node\n"); - cm_packets_dropped++; - atomic_dec(&listener->ref_count); - dev_kfree_skb_any(skb); - break; - } - if (!tcph->rst && !tcph->fin) { - cm_node->state = NES_CM_STATE_LISTENING; - } else { - cm_packets_dropped++; - rem_ref_cm_node(cm_core, cm_node); - dev_kfree_skb_any(skb); - break; - } - add_ref_cm_node(cm_node); - } else if (cm_node->state == NES_CM_STATE_TSA) { - if (cm_node->nesqp->pau_mode) - nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp); - else { - rem_ref_cm_node(cm_core, cm_node); - atomic_inc(&cm_accel_dropped_pkts); - dev_kfree_skb_any(skb); - } - break; - } - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*tcph)); - skb->len = ntohs(iph->tot_len); - process_packet(cm_node, skb, cm_core); - rem_ref_cm_node(cm_core, cm_node); - } while (0); - return skb_handled; -} - - -/** - * nes_cm_alloc_core - allocate a top level instance of a cm core - */ -static struct nes_cm_core *nes_cm_alloc_core(void) -{ - struct nes_cm_core *cm_core; - - /* setup the CM core */ - /* alloc top level core control structure */ - cm_core = kzalloc(sizeof(*cm_core), GFP_KERNEL); - if (!cm_core) - return NULL; - - INIT_LIST_HEAD(&cm_core->connected_nodes); - timer_setup(&cm_core->tcp_timer, nes_cm_timer_tick, 0); - - cm_core->mtu = NES_CM_DEFAULT_MTU; - cm_core->state = NES_CM_STATE_INITED; - cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS; - - atomic_set(&cm_core->events_posted, 0); - - cm_core->api = &nes_cm_api; - - spin_lock_init(&cm_core->ht_lock); - spin_lock_init(&cm_core->listen_list_lock); - - INIT_LIST_HEAD(&cm_core->listen_list.list); - - nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core); - - nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n"); - cm_core->event_wq = alloc_ordered_workqueue("nesewq", 0); - if (!cm_core->event_wq) - goto out_free_cmcore; - cm_core->post_event = nes_cm_post_event; - nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n"); - cm_core->disconn_wq = alloc_ordered_workqueue("nesdwq", 0); - if (!cm_core->disconn_wq) - goto out_free_wq; - - print_core(cm_core); - return cm_core; - -out_free_wq: - destroy_workqueue(cm_core->event_wq); -out_free_cmcore: - kfree(cm_core); - return NULL; -} - - -/** - * mini_cm_dealloc_core - deallocate a top level instance of a cm core - */ -static int mini_cm_dealloc_core(struct nes_cm_core *cm_core) -{ - nes_debug(NES_DBG_CM, "De-Alloc CM Core (%p)\n", cm_core); - - if (!cm_core) - return -EINVAL; - - barrier(); - - if (timer_pending(&cm_core->tcp_timer)) - del_timer(&cm_core->tcp_timer); - - destroy_workqueue(cm_core->event_wq); - destroy_workqueue(cm_core->disconn_wq); - nes_debug(NES_DBG_CM, "\n"); - kfree(cm_core); - - return 0; -} - - -/** - * mini_cm_get - */ -static int mini_cm_get(struct nes_cm_core *cm_core) -{ - return cm_core->state; -} - - -/** - * mini_cm_set - */ -static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value) -{ - int ret = 0; - - switch (type) { - case NES_CM_SET_PKT_SIZE: - cm_core->mtu = value; - break; - case NES_CM_SET_FREE_PKT_Q_SIZE: - cm_core->free_tx_pkt_max = value; - break; - default: - /* unknown set option */ - ret = -EINVAL; - } - - return ret; -} - - -/** - * nes_cm_init_tsa_conn setup HW; MPA frames must be - * successfully exchanged when this is called - */ -static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_node) -{ - int ret = 0; - - if (!nesqp) - return -EINVAL; - - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 | - NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG | - NES_QPCONTEXT_MISC_DROS); - - if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale) - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE); - - nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT); - - nesqp->nesqp_context->misc2 |= cpu_to_le32( - cm_node->tos << NES_QPCONTEXT_MISC2_TOS_SHIFT); - - nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); - - nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( - (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT); - - nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( - (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) & - NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK); - - nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( - (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) & - NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK); - - nesqp->nesqp_context->keepalive = cpu_to_le32(0x80); - nesqp->nesqp_context->ts_recent = 0; - nesqp->nesqp_context->ts_age = 0; - nesqp->nesqp_context->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); - nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd); - nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); - nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd << - cm_node->tcp_cntxt.rcv_wscale); - nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); - nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); - nesqp->nesqp_context->srtt = 0; - nesqp->nesqp_context->rttvar = cpu_to_le32(0x6); - nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000); - nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss); - nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); - nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); - nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd); - - nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X," - " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n", - nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt), - le32_to_cpu(nesqp->nesqp_context->snd_nxt), - cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale), - le32_to_cpu(nesqp->nesqp_context->rcv_wnd), - le32_to_cpu(nesqp->nesqp_context->misc)); - nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd)); - nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd)); - nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd)); - - nes_debug(NES_DBG_CM, "Change cm_node state to TSA\n"); - cm_node->state = NES_CM_STATE_TSA; - - return ret; -} - - -/** - * nes_cm_disconn - */ -int nes_cm_disconn(struct nes_qp *nesqp) -{ - struct disconn_work *work; - - work = kzalloc(sizeof *work, GFP_ATOMIC); - if (!work) - return -ENOMEM; /* Timer will clean up */ - - nes_add_ref(&nesqp->ibqp); - work->nesqp = nesqp; - INIT_WORK(&work->work, nes_disconnect_worker); - queue_work(g_cm_core->disconn_wq, &work->work); - return 0; -} - - -/** - * nes_disconnect_worker - */ -static void nes_disconnect_worker(struct work_struct *work) -{ - struct disconn_work *dwork = container_of(work, struct disconn_work, work); - struct nes_qp *nesqp = dwork->nesqp; - - kfree(dwork); - nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n", - nesqp->last_aeq, nesqp->hwqp.qp_id); - nes_cm_disconn_true(nesqp); - nes_rem_ref(&nesqp->ibqp); -} - - -/** - * nes_cm_disconn_true - */ -static int nes_cm_disconn_true(struct nes_qp *nesqp) -{ - unsigned long flags; - int ret = 0; - struct iw_cm_id *cm_id; - struct iw_cm_event cm_event; - struct nes_vnic *nesvnic; - u16 last_ae; - u8 original_hw_tcp_state; - u8 original_ibqp_state; - int disconn_status = 0; - int issue_disconn = 0; - int issue_close = 0; - int issue_flush = 0; - u32 flush_q = NES_CQP_FLUSH_RQ; - struct ib_event ibevent; - - if (!nesqp) { - nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n"); - return -1; - } - - spin_lock_irqsave(&nesqp->lock, flags); - cm_id = nesqp->cm_id; - /* make sure we havent already closed this connection */ - if (!cm_id) { - nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", - nesqp->hwqp.qp_id); - spin_unlock_irqrestore(&nesqp->lock, flags); - return -1; - } - - nesvnic = to_nesvnic(nesqp->ibqp.device); - nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id); - - original_hw_tcp_state = nesqp->hw_tcp_state; - original_ibqp_state = nesqp->ibqp_state; - last_ae = nesqp->last_aeq; - - if (nesqp->term_flags) { - issue_disconn = 1; - issue_close = 1; - nesqp->cm_id = NULL; - del_timer(&nesqp->terminate_timer); - if (nesqp->flush_issued == 0) { - nesqp->flush_issued = 1; - issue_flush = 1; - } - } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || - ((original_ibqp_state == IB_QPS_RTS) && - (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { - issue_disconn = 1; - if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) - disconn_status = -ECONNRESET; - } - - if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) || - (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) || - (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) || - (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { - issue_close = 1; - nesqp->cm_id = NULL; - if (nesqp->flush_issued == 0) { - nesqp->flush_issued = 1; - issue_flush = 1; - } - } - - spin_unlock_irqrestore(&nesqp->lock, flags); - - if ((issue_flush) && (nesqp->destroyed == 0)) { - /* Flush the queue(s) */ - if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE) - flush_q |= NES_CQP_FLUSH_SQ; - flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1); - - if (nesqp->term_flags) { - ibevent.device = nesqp->ibqp.device; - ibevent.event = nesqp->terminate_eventtype; - ibevent.element.qp = &nesqp->ibqp; - if (nesqp->ibqp.event_handler) - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); - } - } - - if ((cm_id) && (cm_id->event_handler)) { - if (issue_disconn) { - atomic_inc(&cm_disconnects); - cm_event.event = IW_CM_EVENT_DISCONNECT; - cm_event.status = disconn_status; - cm_event.local_addr = cm_id->m_local_addr; - cm_event.remote_addr = cm_id->m_remote_addr; - cm_event.private_data = NULL; - cm_event.private_data_len = 0; - - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event" - " for QP%u, SQ Head = %u, SQ Tail = %u. " - "cm_id = %p, refcount = %u.\n", - nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, - nesqp->hwqp.sq_tail, cm_id, - atomic_read(&nesqp->refcount)); - - ret = cm_id->event_handler(cm_id, &cm_event); - if (ret) - nes_debug(NES_DBG_CM, "OFA CM event_handler " - "returned, ret=%d\n", ret); - } - - if (issue_close) { - atomic_inc(&cm_closes); - nes_disconnect(nesqp, 1); - - cm_id->provider_data = nesqp; - /* Send up the close complete event */ - cm_event.event = IW_CM_EVENT_CLOSE; - cm_event.status = 0; - cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->m_local_addr; - cm_event.remote_addr = cm_id->m_remote_addr; - cm_event.private_data = NULL; - cm_event.private_data_len = 0; - - ret = cm_id->event_handler(cm_id, &cm_event); - if (ret) - nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); - - cm_id->rem_ref(cm_id); - } - } - - return 0; -} - - -/** - * nes_disconnect - */ -static int nes_disconnect(struct nes_qp *nesqp, int abrupt) -{ - int ret = 0; - struct nes_vnic *nesvnic; - struct nes_device *nesdev; - struct nes_ib_device *nesibdev; - - nesvnic = to_nesvnic(nesqp->ibqp.device); - if (!nesvnic) - return -EINVAL; - - nesdev = nesvnic->nesdev; - nesibdev = nesvnic->nesibdev; - - nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", - netdev_refcnt_read(nesvnic->netdev)); - - if (nesqp->active_conn) { - - /* indicate this connection is NOT active */ - nesqp->active_conn = 0; - } else { - /* Need to free the Last Streaming Mode Message */ - if (nesqp->ietf_frame) { - if (nesqp->lsmm_mr) - nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr, - NULL); - pci_free_consistent(nesdev->pcidev, - nesqp->private_data_len + nesqp->ietf_frame_size, - nesqp->ietf_frame, nesqp->ietf_frame_pbase); - } - } - - /* close the CM node down if it is still active */ - if (nesqp->cm_node) { - nes_debug(NES_DBG_CM, "Call close API\n"); - - g_cm_core->api->close(g_cm_core, nesqp->cm_node); - } - - return ret; -} - - -/** - * nes_accept - */ -int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - u64 u64temp; - struct ib_qp *ibqp; - struct nes_qp *nesqp; - struct nes_vnic *nesvnic; - struct nes_device *nesdev; - struct nes_cm_node *cm_node; - struct nes_adapter *adapter; - struct ib_qp_attr attr; - struct iw_cm_event cm_event; - struct nes_hw_qp_wqe *wqe; - struct nes_v4_quad nes_quad; - u32 crc_value; - int ret; - int passive_state; - struct ib_mr *ibmr = NULL; - struct nes_pd *nespd; - u64 tagged_offset; - u8 mpa_frame_offset = 0; - struct ietf_mpa_v2 *mpa_v2_frame; - u8 start_addr = 0; - u8 *start_ptr = &start_addr; - u8 **start_buff = &start_ptr; - u16 buff_len = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - ibqp = nes_get_qp(cm_id->device, conn_param->qpn); - if (!ibqp) - return -EINVAL; - - /* get all our handles */ - nesqp = to_nesqp(ibqp); - nesvnic = to_nesvnic(nesqp->ibqp.device); - nesdev = nesvnic->nesdev; - adapter = nesdev->nesadapter; - - cm_node = (struct nes_cm_node *)cm_id->provider_data; - nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p," - "%s\n", cm_node, nesvnic, nesvnic->netdev, - nesvnic->netdev->name); - - if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) { - if (cm_node->loopbackpartner) - rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner); - rem_ref_cm_node(cm_node->cm_core, cm_node); - return -EINVAL; - } - - passive_state = atomic_add_return(1, &cm_node->passive_state); - if (passive_state == NES_SEND_RESET_EVENT) { - rem_ref_cm_node(cm_node->cm_core, cm_node); - return -ECONNRESET; - } - /* associate the node with the QP */ - nesqp->cm_node = (void *)cm_node; - cm_node->nesqp = nesqp; - - - nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", - nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); - atomic_inc(&cm_accepts); - - nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", - netdev_refcnt_read(nesvnic->netdev)); - - nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2); - /* allocate the ietf frame and space for private data */ - nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, - nesqp->ietf_frame_size + conn_param->private_data_len, - &nesqp->ietf_frame_pbase); - - if (!nesqp->ietf_frame) { - nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); - return -ENOMEM; - } - mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame; - - if (cm_node->mpa_frame_rev == IETF_MPA_V1) - mpa_frame_offset = 4; - - if (cm_node->mpa_frame_rev == IETF_MPA_V1 || - cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { - record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); - } - - memcpy(mpa_v2_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); - - cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY); - nesqp->private_data_len = conn_param->private_data_len; - - /* setup our first outgoing iWarp send WQE (the IETF frame response) */ - wqe = &nesqp->hwqp.sq_vbase[0]; - - if (raddr->sin_addr.s_addr != laddr->sin_addr.s_addr) { - u64temp = (unsigned long)nesqp; - nespd = nesqp->nespd; - tagged_offset = (u64)(unsigned long)*start_buff; - ibmr = nes_reg_phys_mr(&nespd->ibpd, - nesqp->ietf_frame_pbase + mpa_frame_offset, - buff_len, IB_ACCESS_LOCAL_WRITE, - &tagged_offset); - if (IS_ERR(ibmr)) { - nes_debug(NES_DBG_CM, "Unable to register memory region" - "for lSMM for cm_node = %p \n", - cm_node); - pci_free_consistent(nesdev->pcidev, - nesqp->private_data_len + nesqp->ietf_frame_size, - nesqp->ietf_frame, nesqp->ietf_frame_pbase); - return PTR_ERR(ibmr); - } - - ibmr->pd = &nespd->ibpd; - ibmr->device = nespd->ibpd.device; - nesqp->lsmm_mr = ibmr; - - u64temp |= NES_SW_CONTEXT_ALIGN >> 1; - set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | - NES_IWARP_SQ_WQE_WRPDU); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = - cpu_to_le32(buff_len); - set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, - (u64)(unsigned long)(*start_buff)); - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = - cpu_to_le32(buff_len); - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey; - if (nesqp->sq_kmapped) { - nesqp->sq_kmapped = 0; - kunmap(nesqp->page); - } - - nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU); - } else { - nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU); - } - nesqp->skip_lsmm = 1; - - /* Cache the cm_id in the qp */ - nesqp->cm_id = cm_id; - cm_node->cm_id = cm_id; - - /* nesqp->cm_node = (void *)cm_id->provider_data; */ - cm_id->provider_data = nesqp; - nesqp->active_conn = 0; - - if (cm_node->state == NES_CM_STATE_TSA) - nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n", - cm_node); - - nes_cm_init_tsa_conn(nesqp, cm_node); - - nesqp->nesqp_context->tcpPorts[0] = - cpu_to_le16(cm_node->loc_port); - nesqp->nesqp_context->tcpPorts[1] = - cpu_to_le16(cm_node->rem_port); - - nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr); - - nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << - NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); - - nesqp->nesqp_context->arp_index_vlan |= - cpu_to_le32(nes_arp_table(nesdev, - le32_to_cpu(nesqp->nesqp_context->ip0), NULL, - NES_ARP_RESOLVE) << 16); - - nesqp->nesqp_context->ts_val_delta = cpu_to_le32( - jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); - - nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); - - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); - nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)cm_node->ord_size); - - memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = - cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = htonl(cm_node->rem_addr); - nes_quad.TcpPorts[0] = htons(cm_node->rem_port); - nes_quad.TcpPorts[1] = htons(cm_node->loc_port); - - /* Produce hash key */ - crc_value = get_crc_value(&nes_quad); - nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); - nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); - - nesqp->hte_index &= adapter->hte_index_mask; - nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); - - cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = " - "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " - "private data length=%u.\n", nesqp->hwqp.qp_id, - ntohl(raddr->sin_addr.s_addr), ntohs(raddr->sin_port), - ntohl(laddr->sin_addr.s_addr), ntohs(laddr->sin_port), - le32_to_cpu(nesqp->nesqp_context->rcv_nxt), - le32_to_cpu(nesqp->nesqp_context->snd_nxt), - buff_len); - - /* notify OF layer that accept event was successful */ - cm_id->add_ref(cm_id); - nes_add_ref(&nesqp->ibqp); - - cm_event.event = IW_CM_EVENT_ESTABLISHED; - cm_event.status = 0; - cm_event.provider_data = (void *)nesqp; - cm_event.local_addr = cm_id->m_local_addr; - cm_event.remote_addr = cm_id->m_remote_addr; - cm_event.private_data = NULL; - cm_event.private_data_len = 0; - cm_event.ird = cm_node->ird_size; - cm_event.ord = cm_node->ord_size; - - ret = cm_id->event_handler(cm_id, &cm_event); - attr.qp_state = IB_QPS_RTS; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - if (cm_node->loopbackpartner) { - cm_node->loopbackpartner->mpa_frame_size = - nesqp->private_data_len; - /* copy entire MPA frame to our cm_node's frame */ - memcpy(cm_node->loopbackpartner->mpa_frame_buf, - conn_param->private_data, conn_param->private_data_len); - create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); - } - if (ret) - printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " - "ret=%d\n", __func__, __LINE__, ret); - - return 0; -} - - -/** - * nes_reject - */ -int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) -{ - struct nes_cm_node *cm_node; - struct nes_cm_node *loopback; - struct nes_cm_core *cm_core; - u8 *start_buff; - - atomic_inc(&cm_rejects); - cm_node = (struct nes_cm_node *)cm_id->provider_data; - loopback = cm_node->loopbackpartner; - cm_core = cm_node->cm_core; - cm_node->cm_id = cm_id; - - if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER) - return -EINVAL; - - if (loopback) { - memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len); - loopback->mpa_frame.priv_data_len = pdata_len; - loopback->mpa_frame_size = pdata_len; - } else { - start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); - cm_node->mpa_frame_size = pdata_len; - memcpy(start_buff, pdata, pdata_len); - } - return cm_core->api->reject(cm_core, cm_node); -} - - -/** - * nes_connect - * setup and launch cm connect node - */ -int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - struct ib_qp *ibqp; - struct nes_qp *nesqp; - struct nes_vnic *nesvnic; - struct nes_device *nesdev; - struct nes_cm_node *cm_node; - struct nes_cm_info cm_info; - int apbvt_set = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - if (cm_id->remote_addr.ss_family != AF_INET) - return -ENOSYS; - ibqp = nes_get_qp(cm_id->device, conn_param->qpn); - if (!ibqp) - return -EINVAL; - nesqp = to_nesqp(ibqp); - if (!nesqp) - return -EINVAL; - nesvnic = to_nesvnic(nesqp->ibqp.device); - if (!nesvnic) - return -EINVAL; - nesdev = nesvnic->nesdev; - if (!nesdev) - return -EINVAL; - - if (!laddr->sin_port || !raddr->sin_port) - return -EINVAL; - - nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " - "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, - ntohl(nesvnic->local_ipaddr), ntohl(raddr->sin_addr.s_addr), - ntohs(raddr->sin_port), ntohl(laddr->sin_addr.s_addr), - ntohs(laddr->sin_port)); - - atomic_inc(&cm_connects); - nesqp->active_conn = 1; - - /* cache the cm_id in the qp */ - nesqp->cm_id = cm_id; - cm_id->provider_data = nesqp; - nesqp->private_data_len = conn_param->private_data_len; - - nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); - nes_debug(NES_DBG_CM, "mpa private data len =%u\n", - conn_param->private_data_len); - - /* set up the connection params for the node */ - cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr); - cm_info.loc_port = ntohs(laddr->sin_port); - cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr); - cm_info.rem_port = ntohs(raddr->sin_port); - cm_info.cm_id = cm_id; - cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; - - if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) { - nes_manage_apbvt(nesvnic, cm_info.loc_port, - PCI_FUNC(nesdev->pcidev->devfn), - NES_MANAGE_APBVT_ADD); - apbvt_set = 1; - } - - cm_id->add_ref(cm_id); - - /* create a connect CM node connection */ - cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, - conn_param->private_data_len, (void *)conn_param->private_data, - &cm_info); - if (!cm_node) { - if (apbvt_set) - nes_manage_apbvt(nesvnic, cm_info.loc_port, - PCI_FUNC(nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); - - nes_debug(NES_DBG_NLMSG, "Delete loc_port = %04X\n", - cm_info.loc_port); - cm_id->rem_ref(cm_id); - return -ENOMEM; - } - - record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); - if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && - cm_node->ord_size == 0) - cm_node->ord_size = 1; - - cm_node->apbvt_set = apbvt_set; - cm_node->tos = cm_id->tos; - nesqp->cm_node = cm_node; - cm_node->nesqp = nesqp; - nes_add_ref(&nesqp->ibqp); - - return 0; -} - - -/** - * nes_create_listen - */ -int nes_create_listen(struct iw_cm_id *cm_id, int backlog) -{ - struct nes_vnic *nesvnic; - struct nes_cm_listener *cm_node; - struct nes_cm_info cm_info; - int err; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - - nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n", - cm_id, ntohs(laddr->sin_port)); - - if (cm_id->m_local_addr.ss_family != AF_INET) - return -ENOSYS; - nesvnic = to_nesvnic(cm_id->device); - if (!nesvnic) - return -EINVAL; - - nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", - nesvnic, nesvnic->netdev, nesvnic->netdev->name); - - nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n", - nesvnic->local_ipaddr, laddr->sin_addr.s_addr); - - /* setup listen params in our api call struct */ - cm_info.loc_addr = ntohl(nesvnic->local_ipaddr); - cm_info.loc_port = ntohs(laddr->sin_port); - cm_info.backlog = backlog; - cm_info.cm_id = cm_id; - - cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; - - cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); - if (!cm_node) { - printk(KERN_ERR "%s[%u] Error returned from listen API call\n", - __func__, __LINE__); - return -ENOMEM; - } - - cm_id->provider_data = cm_node; - cm_node->tos = cm_id->tos; - - if (!cm_node->reused_node) { - err = nes_manage_apbvt(nesvnic, cm_node->loc_port, - PCI_FUNC(nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_ADD); - if (err) { - printk(KERN_ERR "nes_manage_apbvt call returned %d.\n", - err); - g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); - return err; - } - atomic_inc(&cm_listens_created); - } - - cm_id->add_ref(cm_id); - cm_id->provider_data = (void *)cm_node; - - - return 0; -} - - -/** - * nes_destroy_listen - */ -int nes_destroy_listen(struct iw_cm_id *cm_id) -{ - if (cm_id->provider_data) - g_cm_core->api->stop_listener(g_cm_core, cm_id->provider_data); - else - nes_debug(NES_DBG_CM, "cm_id->provider_data was NULL\n"); - - cm_id->rem_ref(cm_id); - - return 0; -} - - -/** - * nes_cm_recv - */ -int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice) -{ - int rc = 0; - - cm_packets_received++; - if ((g_cm_core) && (g_cm_core->api)) - rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); - else - nes_debug(NES_DBG_CM, "Unable to process packet for CM," - " cm is not setup properly.\n"); - - return rc; -} - - -/** - * nes_cm_start - * Start and init a cm core module - */ -int nes_cm_start(void) -{ - nes_debug(NES_DBG_CM, "\n"); - /* create the primary CM core, pass this handle to subsequent core inits */ - g_cm_core = nes_cm_alloc_core(); - if (g_cm_core) - return 0; - else - return -ENOMEM; -} - - -/** - * nes_cm_stop - * stop and dealloc all cm core instances - */ -int nes_cm_stop(void) -{ - g_cm_core->api->destroy_cm_core(g_cm_core); - return 0; -} - - -/** - * cm_event_connected - * handle a connected event, setup QPs and HW - */ -static void cm_event_connected(struct nes_cm_event *event) -{ - struct nes_qp *nesqp; - struct nes_vnic *nesvnic; - struct nes_device *nesdev; - struct nes_cm_node *cm_node; - struct nes_adapter *nesadapter; - struct ib_qp_attr attr; - struct iw_cm_id *cm_id; - struct iw_cm_event cm_event; - struct nes_v4_quad nes_quad; - u32 crc_value; - int ret; - struct sockaddr_in *laddr; - struct sockaddr_in *raddr; - struct sockaddr_in *cm_event_laddr; - - /* get all our handles */ - cm_node = event->cm_node; - cm_id = cm_node->cm_id; - nes_debug(NES_DBG_CM, "cm_event_connected - %p - cm_id = %p\n", cm_node, cm_id); - nesqp = (struct nes_qp *)cm_id->provider_data; - nesvnic = to_nesvnic(nesqp->ibqp.device); - nesdev = nesvnic->nesdev; - nesadapter = nesdev->nesadapter; - laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr; - - if (nesqp->destroyed) - return; - atomic_inc(&cm_connecteds); - nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on" - " local port 0x%04X. jiffies = %lu.\n", - nesqp->hwqp.qp_id, ntohl(raddr->sin_addr.s_addr), - ntohs(raddr->sin_port), ntohs(laddr->sin_port), jiffies); - - nes_cm_init_tsa_conn(nesqp, cm_node); - - /* set the QP tsa context */ - nesqp->nesqp_context->tcpPorts[0] = - cpu_to_le16(cm_node->loc_port); - nesqp->nesqp_context->tcpPorts[1] = - cpu_to_le16(cm_node->rem_port); - nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr); - - nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << - NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); - nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, - le32_to_cpu(nesqp->nesqp_context->ip0), - NULL, NES_ARP_RESOLVE) << 16); - nesqp->nesqp_context->ts_val_delta = cpu_to_le32( - jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); - nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); - nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)1 << - NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); - nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)cm_node->ord_size); - - /* Adjust tail for not having a LSMM */ - /*nesqp->hwqp.sq_tail = 1;*/ - - build_rdma0_msg(cm_node, &nesqp); - - nes_write32(nesdev->regs + NES_WQE_ALLOC, - (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); - - memset(&nes_quad, 0, sizeof(nes_quad)); - - nes_quad.DstIpAdrIndex = - cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = htonl(cm_node->rem_addr); - nes_quad.TcpPorts[0] = htons(cm_node->rem_port); - nes_quad.TcpPorts[1] = htons(cm_node->loc_port); - - /* Produce hash key */ - crc_value = get_crc_value(&nes_quad); - nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); - nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); - - nesqp->hte_index &= nesadapter->hte_index_mask; - nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); - - nesqp->ietf_frame = &cm_node->mpa_frame; - nesqp->private_data_len = (u8)cm_node->mpa_frame_size; - cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - - /* notify OF layer we successfully created the requested connection */ - cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - cm_event.status = 0; - cm_event.provider_data = cm_id->provider_data; - cm_event_laddr->sin_family = AF_INET; - cm_event_laddr->sin_port = laddr->sin_port; - cm_event.remote_addr = cm_id->m_remote_addr; - - cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size; - cm_event.ird = cm_node->ird_size; - cm_event.ord = cm_node->ord_size; - - cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr); - ret = cm_id->event_handler(cm_id, &cm_event); - nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); - - if (ret) - printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " - "ret=%d\n", __func__, __LINE__, ret); - attr.qp_state = IB_QPS_RTS; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - - nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = " - "%lu\n", nesqp->hwqp.qp_id, jiffies); - - return; -} - - -/** - * cm_event_connect_error - */ -static void cm_event_connect_error(struct nes_cm_event *event) -{ - struct nes_qp *nesqp; - struct iw_cm_id *cm_id; - struct iw_cm_event cm_event; - /* struct nes_cm_info cm_info; */ - int ret; - - if (!event->cm_node) - return; - - cm_id = event->cm_node->cm_id; - if (!cm_id) - return; - - nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id); - nesqp = cm_id->provider_data; - - if (!nesqp) - return; - - /* notify OF layer about this connection error event */ - /* cm_id->rem_ref(cm_id); */ - nesqp->cm_id = NULL; - cm_id->provider_data = NULL; - cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - cm_event.status = -ECONNRESET; - cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->m_local_addr; - cm_event.remote_addr = cm_id->m_remote_addr; - cm_event.private_data = NULL; - cm_event.private_data_len = 0; - -#ifdef CONFIG_INFINIBAND_NES_DEBUG - { - struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *) - &cm_event.local_addr; - struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *) - &cm_event.remote_addr; - nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remote_addr=%08x\n", - cm_event_laddr->sin_addr.s_addr, cm_event_raddr->sin_addr.s_addr); - } -#endif - - ret = cm_id->event_handler(cm_id, &cm_event); - nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); - if (ret) - printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " - "ret=%d\n", __func__, __LINE__, ret); - cm_id->rem_ref(cm_id); - - rem_ref_cm_node(event->cm_node->cm_core, event->cm_node); - return; -} - - -/** - * cm_event_reset - */ -static void cm_event_reset(struct nes_cm_event *event) -{ - struct nes_qp *nesqp; - struct iw_cm_id *cm_id; - struct iw_cm_event cm_event; - /* struct nes_cm_info cm_info; */ - int ret; - - if (!event->cm_node) - return; - - if (!event->cm_node->cm_id) - return; - - cm_id = event->cm_node->cm_id; - - nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id); - nesqp = cm_id->provider_data; - if (!nesqp) - return; - - nesqp->cm_id = NULL; - /* cm_id->provider_data = NULL; */ - cm_event.event = IW_CM_EVENT_DISCONNECT; - cm_event.status = -ECONNRESET; - cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->m_local_addr; - cm_event.remote_addr = cm_id->m_remote_addr; - cm_event.private_data = NULL; - cm_event.private_data_len = 0; - - cm_id->add_ref(cm_id); - ret = cm_id->event_handler(cm_id, &cm_event); - atomic_inc(&cm_closes); - cm_event.event = IW_CM_EVENT_CLOSE; - cm_event.status = 0; - cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr = cm_id->m_local_addr; - cm_event.remote_addr = cm_id->m_remote_addr; - cm_event.private_data = NULL; - cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node); - ret = cm_id->event_handler(cm_id, &cm_event); - - nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); - - - /* notify OF layer about this connection error event */ - cm_id->rem_ref(cm_id); - - return; -} - - -/** - * cm_event_mpa_req - */ -static void cm_event_mpa_req(struct nes_cm_event *event) -{ - struct iw_cm_id *cm_id; - struct iw_cm_event cm_event; - int ret; - struct nes_cm_node *cm_node; - struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *) - &cm_event.local_addr; - struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *) - &cm_event.remote_addr; - - cm_node = event->cm_node; - if (!cm_node) - return; - cm_id = cm_node->cm_id; - - atomic_inc(&cm_connect_reqs); - nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", - cm_node, cm_id, jiffies); - - cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; - cm_event.status = 0; - cm_event.provider_data = (void *)cm_node; - - cm_event_laddr->sin_family = AF_INET; - cm_event_laddr->sin_port = htons(event->cm_info.loc_port); - cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr); - - cm_event_raddr->sin_family = AF_INET; - cm_event_raddr->sin_port = htons(event->cm_info.rem_port); - cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); - cm_event.private_data = cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8)cm_node->mpa_frame_size; - if (cm_node->mpa_frame_rev == IETF_MPA_V1) { - cm_event.ird = NES_MAX_IRD; - cm_event.ord = NES_MAX_ORD; - } else { - cm_event.ird = cm_node->ird_size; - cm_event.ord = cm_node->ord_size; - } - - ret = cm_id->event_handler(cm_id, &cm_event); - if (ret) - printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); - return; -} - - -static void cm_event_mpa_reject(struct nes_cm_event *event) -{ - struct iw_cm_id *cm_id; - struct iw_cm_event cm_event; - struct nes_cm_node *cm_node; - int ret; - struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *) - &cm_event.local_addr; - struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *) - &cm_event.remote_addr; - - cm_node = event->cm_node; - if (!cm_node) - return; - cm_id = cm_node->cm_id; - - atomic_inc(&cm_connect_reqs); - nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", - cm_node, cm_id, jiffies); - - cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - cm_event.status = -ECONNREFUSED; - cm_event.provider_data = cm_id->provider_data; - - cm_event_laddr->sin_family = AF_INET; - cm_event_laddr->sin_port = htons(event->cm_info.loc_port); - cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr); - - cm_event_raddr->sin_family = AF_INET; - cm_event_raddr->sin_port = htons(event->cm_info.rem_port); - cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); - - cm_event.private_data = cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8)cm_node->mpa_frame_size; - - nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, " - "remove_addr=%08x\n", - cm_event_laddr->sin_addr.s_addr, - cm_event_raddr->sin_addr.s_addr); - - ret = cm_id->event_handler(cm_id, &cm_event); - if (ret) - printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); - - return; -} - - -static void nes_cm_event_handler(struct work_struct *); - -/** - * nes_cm_post_event - * post an event to the cm event handler - */ -static int nes_cm_post_event(struct nes_cm_event *event) -{ - atomic_inc(&event->cm_node->cm_core->events_posted); - add_ref_cm_node(event->cm_node); - event->cm_info.cm_id->add_ref(event->cm_info.cm_id); - INIT_WORK(&event->event_work, nes_cm_event_handler); - nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n", - event->cm_node, event); - - queue_work(event->cm_node->cm_core->event_wq, &event->event_work); - - nes_debug(NES_DBG_CM, "Exit\n"); - return 0; -} - - -/** - * nes_cm_event_handler - * worker function to handle cm events - * will free instance of nes_cm_event - */ -static void nes_cm_event_handler(struct work_struct *work) -{ - struct nes_cm_event *event = container_of(work, struct nes_cm_event, - event_work); - struct nes_cm_core *cm_core; - - if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) - return; - - cm_core = event->cm_node->cm_core; - nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", - event, event->type, atomic_read(&cm_core->events_posted)); - - switch (event->type) { - case NES_CM_EVENT_MPA_REQ: - cm_event_mpa_req(event); - nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n", - event->cm_node); - break; - case NES_CM_EVENT_RESET: - nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n", - event->cm_node); - cm_event_reset(event); - break; - case NES_CM_EVENT_CONNECTED: - if ((!event->cm_node->cm_id) || - (event->cm_node->state != NES_CM_STATE_TSA)) - break; - cm_event_connected(event); - nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); - break; - case NES_CM_EVENT_MPA_REJECT: - if ((!event->cm_node->cm_id) || - (event->cm_node->state == NES_CM_STATE_TSA)) - break; - cm_event_mpa_reject(event); - nes_debug(NES_DBG_CM, "CM Event: REJECT\n"); - break; - - case NES_CM_EVENT_ABORTED: - if ((!event->cm_node->cm_id) || - (event->cm_node->state == NES_CM_STATE_TSA)) - break; - cm_event_connect_error(event); - nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); - break; - case NES_CM_EVENT_DROPPED_PKT: - nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); - break; - default: - nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); - break; - } - - atomic_dec(&cm_core->events_posted); - event->cm_info.cm_id->rem_ref(event->cm_info.cm_id); - rem_ref_cm_node(cm_core, event->cm_node); - kfree(event); - - return; -} diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h deleted file mode 100644 index b9cc02b4e8d5..000000000000 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef NES_CM_H -#define NES_CM_H - -#define QUEUE_EVENTS - -#define NES_MANAGE_APBVT_DEL 0 -#define NES_MANAGE_APBVT_ADD 1 - -#define NES_MPA_REQUEST_ACCEPT 1 -#define NES_MPA_REQUEST_REJECT 2 - -/* IETF MPA -- defines, enums, structs */ -#define IEFT_MPA_KEY_REQ "MPA ID Req Frame" -#define IEFT_MPA_KEY_REP "MPA ID Rep Frame" -#define IETF_MPA_KEY_SIZE 16 -#define IETF_MPA_VERSION 1 -#define IETF_MAX_PRIV_DATA_LEN 512 -#define IETF_MPA_FRAME_SIZE 20 -#define IETF_RTR_MSG_SIZE 4 -#define IETF_MPA_V2_FLAG 0x10 - -/* IETF RTR MSG Fields */ -#define IETF_PEER_TO_PEER 0x8000 -#define IETF_FLPDU_ZERO_LEN 0x4000 -#define IETF_RDMA0_WRITE 0x8000 -#define IETF_RDMA0_READ 0x4000 -#define IETF_NO_IRD_ORD 0x3FFF -#define NES_MAX_IRD 0x40 -#define NES_MAX_ORD 0x7F - -enum ietf_mpa_flags { - IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ - IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */ - IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */ -}; - -struct ietf_mpa_v1 { - u8 key[IETF_MPA_KEY_SIZE]; - u8 flags; - u8 rev; - __be16 priv_data_len; - u8 priv_data[0]; -}; - -#define ietf_mpa_req_resp_frame ietf_mpa_frame - -struct ietf_rtr_msg { - __be16 ctrl_ird; - __be16 ctrl_ord; -}; - -struct ietf_mpa_v2 { - u8 key[IETF_MPA_KEY_SIZE]; - u8 flags; - u8 rev; - __be16 priv_data_len; - struct ietf_rtr_msg rtr_msg; - u8 priv_data[0]; -}; - -struct nes_v4_quad { - u32 rsvd0; - __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */ - __be32 SrcIpadr; - __be16 TcpPorts[2]; /* src is low, dest is high */ -}; - -struct nes_cm_node; -enum nes_timer_type { - NES_TIMER_TYPE_SEND, - NES_TIMER_TYPE_RECV, - NES_TIMER_NODE_CLEANUP, - NES_TIMER_TYPE_CLOSE, -}; - -#define NES_PASSIVE_STATE_INDICATED 0 -#define NES_DO_NOT_SEND_RESET_EVENT 1 -#define NES_SEND_RESET_EVENT 2 - -#define MAX_NES_IFS 4 - -#define SET_ACK 1 -#define SET_SYN 2 -#define SET_FIN 4 -#define SET_RST 8 - -#define TCP_OPTIONS_PADDING 3 - -struct option_base { - u8 optionnum; - u8 length; -}; - -enum option_numbers { - OPTION_NUMBER_END, - OPTION_NUMBER_NONE, - OPTION_NUMBER_MSS, - OPTION_NUMBER_WINDOW_SCALE, - OPTION_NUMBER_SACK_PERM, - OPTION_NUMBER_SACK, - OPTION_NUMBER_WRITE0 = 0xbc -}; - -struct option_mss { - u8 optionnum; - u8 length; - __be16 mss; -}; - -struct option_windowscale { - u8 optionnum; - u8 length; - u8 shiftcount; -}; - -union all_known_options { - char as_end; - struct option_base as_base; - struct option_mss as_mss; - struct option_windowscale as_windowscale; -}; - -struct nes_timer_entry { - struct list_head list; - unsigned long timetosend; /* jiffies */ - struct sk_buff *skb; - u32 type; - u32 retrycount; - u32 retranscount; - u32 context; - u32 seq_num; - u32 send_retrans; - int close_when_complete; - struct net_device *netdev; -}; - -#define NES_DEFAULT_RETRYS 64 -#define NES_DEFAULT_RETRANS 8 -#ifdef CONFIG_INFINIBAND_NES_DEBUG -#define NES_RETRY_TIMEOUT (1000*HZ/1000) -#else -#define NES_RETRY_TIMEOUT (3000*HZ/1000) -#endif -#define NES_SHORT_TIME (10) -#define NES_LONG_TIME (2000*HZ/1000) -#define NES_MAX_TIMEOUT ((unsigned long) (12*HZ)) - -#define NES_CM_HASHTABLE_SIZE 1024 -#define NES_CM_TCP_TIMER_INTERVAL 3000 -#define NES_CM_DEFAULT_MTU 1540 -#define NES_CM_DEFAULT_FRAME_CNT 10 -#define NES_CM_THREAD_STACK_SIZE 256 -#define NES_CM_DEFAULT_RCV_WND 64240 // before we know that window scaling is allowed -#define NES_CM_DEFAULT_RCV_WND_SCALED 256960 // after we know that window scaling is allowed -#define NES_CM_DEFAULT_RCV_WND_SCALE 2 -#define NES_CM_DEFAULT_FREE_PKTS 0x000A -#define NES_CM_FREE_PKT_LO_WATERMARK 2 - -#define NES_CM_DEFAULT_MSS 536 - -#define NES_CM_DEF_SEQ 0x159bf75f -#define NES_CM_DEF_LOCAL_ID 0x3b47 - -#define NES_CM_DEF_SEQ2 0x18ed5740 -#define NES_CM_DEF_LOCAL_ID2 0xb807 -#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN) - -typedef u32 nes_addr_t; - -#define nes_cm_tsa_context nes_qp_context - -struct nes_qp; - -/* cm node transition states */ -enum nes_cm_node_state { - NES_CM_STATE_UNKNOWN, - NES_CM_STATE_INITED, - NES_CM_STATE_LISTENING, - NES_CM_STATE_SYN_RCVD, - NES_CM_STATE_SYN_SENT, - NES_CM_STATE_ONE_SIDE_ESTABLISHED, - NES_CM_STATE_ESTABLISHED, - NES_CM_STATE_ACCEPTING, - NES_CM_STATE_MPAREQ_SENT, - NES_CM_STATE_MPAREQ_RCVD, - NES_CM_STATE_MPAREJ_RCVD, - NES_CM_STATE_TSA, - NES_CM_STATE_FIN_WAIT1, - NES_CM_STATE_FIN_WAIT2, - NES_CM_STATE_CLOSE_WAIT, - NES_CM_STATE_TIME_WAIT, - NES_CM_STATE_LAST_ACK, - NES_CM_STATE_CLOSING, - NES_CM_STATE_LISTENER_DESTROYED, - NES_CM_STATE_CLOSED -}; - -enum mpa_frame_version { - IETF_MPA_V1 = 1, - IETF_MPA_V2 = 2 -}; - -enum mpa_frame_key { - MPA_KEY_REQUEST, - MPA_KEY_REPLY -}; - -enum send_rdma0 { - SEND_RDMA_READ_ZERO = 1, - SEND_RDMA_WRITE_ZERO = 2 -}; - -enum nes_tcpip_pkt_type { - NES_PKT_TYPE_UNKNOWN, - NES_PKT_TYPE_SYN, - NES_PKT_TYPE_SYNACK, - NES_PKT_TYPE_ACK, - NES_PKT_TYPE_FIN, - NES_PKT_TYPE_RST -}; - - -/* type of nes connection */ -enum nes_cm_conn_type { - NES_CM_IWARP_CONN_TYPE, -}; - -/* CM context params */ -struct nes_cm_tcp_context { - u8 client; - - u32 loc_seq_num; - u32 loc_ack_num; - u32 rem_ack_num; - u32 rcv_nxt; - - u32 loc_id; - u32 rem_id; - - u32 snd_wnd; - u32 max_snd_wnd; - - u32 rcv_wnd; - u32 mss; - u8 snd_wscale; - u8 rcv_wscale; - - struct nes_cm_tsa_context tsa_cntxt; -}; - - -enum nes_cm_listener_state { - NES_CM_LISTENER_PASSIVE_STATE = 1, - NES_CM_LISTENER_ACTIVE_STATE = 2, - NES_CM_LISTENER_EITHER_STATE = 3 -}; - -struct nes_cm_listener { - struct list_head list; - struct nes_cm_core *cm_core; - u8 loc_mac[ETH_ALEN]; - nes_addr_t loc_addr; - u16 loc_port; - struct iw_cm_id *cm_id; - enum nes_cm_conn_type conn_type; - atomic_t ref_count; - struct nes_vnic *nesvnic; - atomic_t pend_accepts_cnt; - int backlog; - enum nes_cm_listener_state listener_state; - u32 reused_node; - u8 tos; -}; - -/* per connection node and node state information */ -struct nes_cm_node { - nes_addr_t loc_addr, rem_addr; - u16 loc_port, rem_port; - - u8 loc_mac[ETH_ALEN]; - u8 rem_mac[ETH_ALEN]; - - enum nes_cm_node_state state; - struct nes_cm_tcp_context tcp_cntxt; - struct nes_cm_core *cm_core; - struct sk_buff_head resend_list; - atomic_t ref_count; - struct net_device *netdev; - - struct nes_cm_node *loopbackpartner; - - struct nes_timer_entry *send_entry; - struct nes_timer_entry *recv_entry; - spinlock_t retrans_list_lock; - enum send_rdma0 send_rdma0_op; - - union { - struct ietf_mpa_v1 mpa_frame; - struct ietf_mpa_v2 mpa_v2_frame; - u8 mpa_frame_buf[MAX_CM_BUFFER]; - }; - enum mpa_frame_version mpa_frame_rev; - u16 ird_size; - u16 ord_size; - u16 mpav2_ird_ord; - - u16 mpa_frame_size; - struct iw_cm_id *cm_id; - struct list_head list; - bool accelerated; - struct nes_cm_listener *listener; - enum nes_cm_conn_type conn_type; - struct nes_vnic *nesvnic; - int apbvt_set; - int accept_pend; - struct list_head timer_entry; - struct list_head reset_entry; - struct nes_qp *nesqp; - atomic_t passive_state; - u8 tos; -}; - -/* structure for client or CM to fill when making CM api calls. */ -/* - only need to set relevant data, based on op. */ -struct nes_cm_info { - union { - struct iw_cm_id *cm_id; - struct net_device *netdev; - }; - - u16 loc_port; - u16 rem_port; - nes_addr_t loc_addr; - nes_addr_t rem_addr; - enum nes_cm_conn_type conn_type; - int backlog; -}; - -/* CM event codes */ -enum nes_cm_event_type { - NES_CM_EVENT_UNKNOWN, - NES_CM_EVENT_ESTABLISHED, - NES_CM_EVENT_MPA_REQ, - NES_CM_EVENT_MPA_CONNECT, - NES_CM_EVENT_MPA_ACCEPT, - NES_CM_EVENT_MPA_REJECT, - NES_CM_EVENT_MPA_ESTABLISHED, - NES_CM_EVENT_CONNECTED, - NES_CM_EVENT_CLOSED, - NES_CM_EVENT_RESET, - NES_CM_EVENT_DROPPED_PKT, - NES_CM_EVENT_CLOSE_IMMED, - NES_CM_EVENT_CLOSE_HARD, - NES_CM_EVENT_CLOSE_CLEAN, - NES_CM_EVENT_ABORTED, - NES_CM_EVENT_SEND_FIRST -}; - -/* event to post to CM event handler */ -struct nes_cm_event { - enum nes_cm_event_type type; - - struct nes_cm_info cm_info; - struct work_struct event_work; - struct nes_cm_node *cm_node; -}; - -struct nes_cm_core { - enum nes_cm_node_state state; - - atomic_t listen_node_cnt; - struct nes_cm_node listen_list; - spinlock_t listen_list_lock; - - u32 mtu; - u32 free_tx_pkt_max; - u32 rx_pkt_posted; - atomic_t ht_node_cnt; - struct list_head connected_nodes; - /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */ - spinlock_t ht_lock; - - struct timer_list tcp_timer; - - const struct nes_cm_ops *api; - - int (*post_event)(struct nes_cm_event *event); - atomic_t events_posted; - struct workqueue_struct *event_wq; - struct workqueue_struct *disconn_wq; - - atomic_t node_cnt; - u64 aborted_connects; - u32 options; - - struct nes_cm_node *current_listen_node; -}; - - -#define NES_CM_SET_PKT_SIZE (1 << 1) -#define NES_CM_SET_FREE_PKT_Q_SIZE (1 << 2) - -/* CM ops/API for client interface */ -struct nes_cm_ops { - int (*accelerated)(struct nes_cm_core *, struct nes_cm_node *); - struct nes_cm_listener * (*listen)(struct nes_cm_core *, struct nes_vnic *, - struct nes_cm_info *); - int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *); - struct nes_cm_node * (*connect)(struct nes_cm_core *, - struct nes_vnic *, u16, void *, - struct nes_cm_info *); - int (*close)(struct nes_cm_core *, struct nes_cm_node *); - int (*accept)(struct nes_cm_core *, struct nes_cm_node *); - int (*reject)(struct nes_cm_core *, struct nes_cm_node *); - int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, - struct sk_buff *); - int (*destroy_cm_core)(struct nes_cm_core *); - int (*get)(struct nes_cm_core *); - int (*set)(struct nes_cm_core *, u32, u32); -}; - -int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *, - enum nes_timer_type, int, int); - -int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *); -int nes_reject(struct iw_cm_id *, const void *, u8); -int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *); -int nes_create_listen(struct iw_cm_id *, int); -int nes_destroy_listen(struct iw_cm_id *); - -int nes_cm_recv(struct sk_buff *, struct net_device *); -int nes_cm_start(void); -int nes_cm_stop(void); -int nes_add_ref_cm_node(struct nes_cm_node *cm_node); -int nes_rem_ref_cm_node(struct nes_cm_node *cm_node); - -#endif /* NES_CM_H */ diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h deleted file mode 100644 index a69eef16d72d..000000000000 --- a/drivers/infiniband/hw/nes/nes_context.h +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef NES_CONTEXT_H -#define NES_CONTEXT_H - -struct nes_qp_context { - __le32 misc; - __le32 cqs; - __le32 sq_addr_low; - __le32 sq_addr_high; - __le32 rq_addr_low; - __le32 rq_addr_high; - __le32 misc2; - __le16 tcpPorts[2]; - __le32 ip0; - __le32 ip1; - __le32 ip2; - __le32 ip3; - __le32 mss; - __le32 arp_index_vlan; - __le32 tcp_state_flow_label; - __le32 pd_index_wscale; - __le32 keepalive; - u32 ts_recent; - u32 ts_age; - __le32 snd_nxt; - __le32 snd_wnd; - __le32 rcv_nxt; - __le32 rcv_wnd; - __le32 snd_max; - __le32 snd_una; - u32 srtt; - __le32 rttvar; - __le32 ssthresh; - __le32 cwnd; - __le32 snd_wl1; - __le32 snd_wl2; - __le32 max_snd_wnd; - __le32 ts_val_delta; - u32 retransmit; - u32 probe_cnt; - u32 hte_index; - __le32 q2_addr_low; - __le32 q2_addr_high; - __le32 ird_index; - u32 Rsvd3; - __le32 ird_ord_sizes; - u32 mrkr_offset; - __le32 aeq_token_low; - __le32 aeq_token_high; -}; - -/* QP Context Misc Field */ - -#define NES_QPCONTEXT_MISC_IWARP_VER_MASK 0x00000003 -#define NES_QPCONTEXT_MISC_IWARP_VER_SHIFT 0 -#define NES_QPCONTEXT_MISC_EFB_SIZE_MASK 0x000000C0 -#define NES_QPCONTEXT_MISC_EFB_SIZE_SHIFT 6 -#define NES_QPCONTEXT_MISC_RQ_SIZE_MASK 0x00000300 -#define NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT 8 -#define NES_QPCONTEXT_MISC_SQ_SIZE_MASK 0x00000c00 -#define NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT 10 -#define NES_QPCONTEXT_MISC_PCI_FCN_MASK 0x00007000 -#define NES_QPCONTEXT_MISC_PCI_FCN_SHIFT 12 -#define NES_QPCONTEXT_MISC_DUP_ACKS_MASK 0x00070000 -#define NES_QPCONTEXT_MISC_DUP_ACKS_SHIFT 16 - -enum nes_qp_context_misc_bits { - NES_QPCONTEXT_MISC_RX_WQE_SIZE = 0x00000004, - NES_QPCONTEXT_MISC_IPV4 = 0x00000008, - NES_QPCONTEXT_MISC_DO_NOT_FRAG = 0x00000010, - NES_QPCONTEXT_MISC_INSERT_VLAN = 0x00000020, - NES_QPCONTEXT_MISC_DROS = 0x00008000, - NES_QPCONTEXT_MISC_WSCALE = 0x00080000, - NES_QPCONTEXT_MISC_KEEPALIVE = 0x00100000, - NES_QPCONTEXT_MISC_TIMESTAMP = 0x00200000, - NES_QPCONTEXT_MISC_SACK = 0x00400000, - NES_QPCONTEXT_MISC_RDMA_WRITE_EN = 0x00800000, - NES_QPCONTEXT_MISC_RDMA_READ_EN = 0x01000000, - NES_QPCONTEXT_MISC_WBIND_EN = 0x10000000, - NES_QPCONTEXT_MISC_FAST_REGISTER_EN = 0x20000000, - NES_QPCONTEXT_MISC_PRIV_EN = 0x40000000, - NES_QPCONTEXT_MISC_NO_NAGLE = 0x80000000 -}; - -enum nes_qp_acc_wq_sizes { - HCONTEXT_TSA_WQ_SIZE_4 = 0, - HCONTEXT_TSA_WQ_SIZE_32 = 1, - HCONTEXT_TSA_WQ_SIZE_128 = 2, - HCONTEXT_TSA_WQ_SIZE_512 = 3 -}; - -/* QP Context Misc2 Fields */ -#define NES_QPCONTEXT_MISC2_TTL_MASK 0x000000ff -#define NES_QPCONTEXT_MISC2_TTL_SHIFT 0 -#define NES_QPCONTEXT_MISC2_HOP_LIMIT_MASK 0x000000ff -#define NES_QPCONTEXT_MISC2_HOP_LIMIT_SHIFT 0 -#define NES_QPCONTEXT_MISC2_LIMIT_MASK 0x00000300 -#define NES_QPCONTEXT_MISC2_LIMIT_SHIFT 8 -#define NES_QPCONTEXT_MISC2_NIC_INDEX_MASK 0x0000fc00 -#define NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT 10 -#define NES_QPCONTEXT_MISC2_SRC_IP_MASK 0x001f0000 -#define NES_QPCONTEXT_MISC2_SRC_IP_SHIFT 16 -#define NES_QPCONTEXT_MISC2_TOS_MASK 0xff000000 -#define NES_QPCONTEXT_MISC2_TOS_SHIFT 24 -#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_MASK 0xff000000 -#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_SHIFT 24 - -/* QP Context Tcp State/Flow Label Fields */ -#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_MASK 0x000fffff -#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_SHIFT 0 -#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_MASK 0xf0000000 -#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT 28 - -enum nes_qp_tcp_state { - NES_QPCONTEXT_TCPSTATE_CLOSED = 1, - NES_QPCONTEXT_TCPSTATE_EST = 5, - NES_QPCONTEXT_TCPSTATE_TIME_WAIT = 11, -}; - -/* QP Context PD Index/wscale Fields */ -#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK 0x0000000f -#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT 0 -#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK 0x00000f00 -#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT 8 -#define NES_QPCONTEXT_PDWSCALE_PDINDEX_MASK 0xffff0000 -#define NES_QPCONTEXT_PDWSCALE_PDINDEX_SHIFT 16 - -/* QP Context Keepalive Fields */ -#define NES_QPCONTEXT_KEEPALIVE_DELTA_MASK 0x0000ffff -#define NES_QPCONTEXT_KEEPALIVE_DELTA_SHIFT 0 -#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_MASK 0x00ff0000 -#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_SHIFT 16 -#define NES_QPCONTEXT_KEEPALIVE_INTV_MASK 0xff000000 -#define NES_QPCONTEXT_KEEPALIVE_INTV_SHIFT 24 - -/* QP Context ORD/IRD Fields */ -#define NES_QPCONTEXT_ORDIRD_ORDSIZE_MASK 0x0000007f -#define NES_QPCONTEXT_ORDIRD_ORDSIZE_SHIFT 0 -#define NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK 0x00030000 -#define NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT 16 -#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_MASK 0x30000000 -#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT 28 - -enum nes_ord_ird_bits { - NES_QPCONTEXT_ORDIRD_WRPDU = 0x02000000, - NES_QPCONTEXT_ORDIRD_LSMM_PRESENT = 0x04000000, - NES_QPCONTEXT_ORDIRD_ALSMM = 0x08000000, - NES_QPCONTEXT_ORDIRD_AAH = 0x40000000, - NES_QPCONTEXT_ORDIRD_RNMC = 0x80000000 -}; - -enum nes_iwarp_qp_state { - NES_QPCONTEXT_IWARP_STATE_NONEXIST = 0, - NES_QPCONTEXT_IWARP_STATE_IDLE = 1, - NES_QPCONTEXT_IWARP_STATE_RTS = 2, - NES_QPCONTEXT_IWARP_STATE_CLOSING = 3, - NES_QPCONTEXT_IWARP_STATE_TERMINATE = 5, - NES_QPCONTEXT_IWARP_STATE_ERROR = 6 -}; - - -#endif /* NES_CONTEXT_H */ diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c deleted file mode 100644 index 5517e392bc01..000000000000 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ /dev/null @@ -1,3887 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/if_vlan.h> -#include <linux/slab.h> - -#include "nes.h" - -static int wide_ppm_offset; -module_param(wide_ppm_offset, int, 0644); -MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm"); - -static u32 crit_err_count; -u32 int_mod_timer_init; -u32 int_mod_cq_depth_256; -u32 int_mod_cq_depth_128; -u32 int_mod_cq_depth_32; -u32 int_mod_cq_depth_24; -u32 int_mod_cq_depth_16; -u32 int_mod_cq_depth_4; -u32 int_mod_cq_depth_1; -static const u8 nes_max_critical_error_count = 100; -#include "nes_cm.h" - -static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq); -static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count); -static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, - struct nes_adapter *nesadapter, u8 OneG_Mode); -static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq); -static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq); -static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq); -static void nes_process_iwarp_aeqe(struct nes_device *nesdev, - struct nes_hw_aeqe *aeqe); -static void process_critical_error(struct nes_device *nesdev); -static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); -static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); -static void nes_terminate_start_timer(struct nes_qp *nesqp); - -static const char *const nes_iwarp_state_str[] = { - "Non-Existent", - "Idle", - "RTS", - "Closing", - "RSVD1", - "Terminate", - "Error", - "RSVD2", -}; - -static const char *const nes_tcp_state_str[] = { - "Non-Existent", - "Closed", - "Listen", - "SYN Sent", - "SYN Rcvd", - "Established", - "Close Wait", - "FIN Wait 1", - "Closing", - "Last Ack", - "FIN Wait 2", - "Time Wait", - "RSVD1", - "RSVD2", - "RSVD3", - "RSVD4", -}; - -static inline void print_ip(struct nes_cm_node *cm_node) -{ - unsigned char *rem_addr; - if (cm_node) { - rem_addr = (unsigned char *)&cm_node->rem_addr; - printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr); - } -} - -/** - * nes_nic_init_timer_defaults - */ -void nes_nic_init_timer_defaults(struct nes_device *nesdev, u8 jumbomode) -{ - unsigned long flags; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; - - spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - - shared_timer->timer_in_use_min = NES_NIC_FAST_TIMER_LOW; - shared_timer->timer_in_use_max = NES_NIC_FAST_TIMER_HIGH; - if (jumbomode) { - shared_timer->threshold_low = DEFAULT_JUMBO_NES_QL_LOW; - shared_timer->threshold_target = DEFAULT_JUMBO_NES_QL_TARGET; - shared_timer->threshold_high = DEFAULT_JUMBO_NES_QL_HIGH; - } else { - shared_timer->threshold_low = DEFAULT_NES_QL_LOW; - shared_timer->threshold_target = DEFAULT_NES_QL_TARGET; - shared_timer->threshold_high = DEFAULT_NES_QL_HIGH; - } - - /* todo use netdev->mtu to set thresholds */ - spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); -} - - -/** - * nes_nic_init_timer - */ -static void nes_nic_init_timer(struct nes_device *nesdev) -{ - unsigned long flags; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; - - spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - - if (shared_timer->timer_in_use_old == 0) { - nesdev->deepcq_count = 0; - shared_timer->timer_direction_upward = 0; - shared_timer->timer_direction_downward = 0; - shared_timer->timer_in_use = NES_NIC_FAST_TIMER; - shared_timer->timer_in_use_old = 0; - - } - if (shared_timer->timer_in_use != shared_timer->timer_in_use_old) { - shared_timer->timer_in_use_old = shared_timer->timer_in_use; - nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, - 0x80000000 | ((u32)(shared_timer->timer_in_use*8))); - } - /* todo use netdev->mtu to set thresholds */ - spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); -} - - -/** - * nes_nic_tune_timer - */ -static void nes_nic_tune_timer(struct nes_device *nesdev) -{ - unsigned long flags; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; - u16 cq_count = nesdev->currcq_count; - - spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - - if (shared_timer->cq_count_old <= cq_count) - shared_timer->cq_direction_downward = 0; - else - shared_timer->cq_direction_downward++; - shared_timer->cq_count_old = cq_count; - if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) { - if (cq_count <= shared_timer->threshold_low && - shared_timer->threshold_low > 4) { - shared_timer->threshold_low = shared_timer->threshold_low/2; - shared_timer->cq_direction_downward=0; - nesdev->currcq_count = 0; - spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); - return; - } - } - - if (cq_count > 1) { - nesdev->deepcq_count += cq_count; - if (cq_count <= shared_timer->threshold_low) { /* increase timer gently */ - shared_timer->timer_direction_upward++; - shared_timer->timer_direction_downward = 0; - } else if (cq_count <= shared_timer->threshold_target) { /* balanced */ - shared_timer->timer_direction_upward = 0; - shared_timer->timer_direction_downward = 0; - } else if (cq_count <= shared_timer->threshold_high) { /* decrease timer gently */ - shared_timer->timer_direction_downward++; - shared_timer->timer_direction_upward = 0; - } else if (cq_count <= (shared_timer->threshold_high) * 2) { - shared_timer->timer_in_use -= 2; - shared_timer->timer_direction_upward = 0; - shared_timer->timer_direction_downward++; - } else { - shared_timer->timer_in_use -= 4; - shared_timer->timer_direction_upward = 0; - shared_timer->timer_direction_downward++; - } - - if (shared_timer->timer_direction_upward > 3 ) { /* using history */ - shared_timer->timer_in_use += 3; - shared_timer->timer_direction_upward = 0; - shared_timer->timer_direction_downward = 0; - } - if (shared_timer->timer_direction_downward > 5) { /* using history */ - shared_timer->timer_in_use -= 4 ; - shared_timer->timer_direction_downward = 0; - shared_timer->timer_direction_upward = 0; - } - } - - /* boundary checking */ - if (shared_timer->timer_in_use > shared_timer->threshold_high) - shared_timer->timer_in_use = shared_timer->threshold_high; - else if (shared_timer->timer_in_use < shared_timer->threshold_low) - shared_timer->timer_in_use = shared_timer->threshold_low; - - nesdev->currcq_count = 0; - - spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); -} - - -/** - * nes_init_adapter - initialize adapter - */ -struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { - struct nes_adapter *nesadapter = NULL; - unsigned long num_pds; - u32 u32temp; - u32 port_count; - u16 max_rq_wrs; - u16 max_sq_wrs; - u32 max_mr; - u32 max_256pbl; - u32 max_4kpbl; - u32 max_qp; - u32 max_irrq; - u32 max_cq; - u32 hte_index_mask; - u32 adapter_size; - u32 arp_table_size; - u16 vendor_id; - u16 device_id; - u8 OneG_Mode; - u8 func_index; - - /* search the list of existing adapters */ - list_for_each_entry(nesadapter, &nes_adapter_list, list) { - nes_debug(NES_DBG_INIT, "Searching Adapter list for PCI devfn = 0x%X," - " adapter PCI slot/bus = %u/%u, pci devices PCI slot/bus = %u/%u, .\n", - nesdev->pcidev->devfn, - PCI_SLOT(nesadapter->devfn), - nesadapter->bus_number, - PCI_SLOT(nesdev->pcidev->devfn), - nesdev->pcidev->bus->number ); - if ((PCI_SLOT(nesadapter->devfn) == PCI_SLOT(nesdev->pcidev->devfn)) && - (nesadapter->bus_number == nesdev->pcidev->bus->number)) { - nesadapter->ref_count++; - return nesadapter; - } - } - - /* no adapter found */ - num_pds = pci_resource_len(nesdev->pcidev, BAR_1) >> PAGE_SHIFT; - if ((hw_rev != NE020_REV) && (hw_rev != NE020_REV1)) { - nes_debug(NES_DBG_INIT, "NE020 driver detected unknown hardware revision 0x%x\n", - hw_rev); - return NULL; - } - - nes_debug(NES_DBG_INIT, "Determine Soft Reset, QP_control=0x%x, CPU0=0x%x, CPU1=0x%x, CPU2=0x%x\n", - nes_read_indexed(nesdev, NES_IDX_QP_CONTROL + PCI_FUNC(nesdev->pcidev->devfn) * 8), - nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS), - nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 4), - nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 8)); - - nes_debug(NES_DBG_INIT, "Reset and init NE020\n"); - - - if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0) - return NULL; - - max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE); - nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp); - - u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE); - if (max_qp > ((u32)1 << (u32temp & 0x001f))) { - nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to hash table size = 0x%08X\n", - max_qp, u32temp); - max_qp = (u32)1 << (u32temp & 0x001f); - } - - hte_index_mask = ((u32)1 << ((u32temp & 0x001f)+1))-1; - nes_debug(NES_DBG_INIT, "Max QP = %u, hte_index_mask = 0x%08X.\n", - max_qp, hte_index_mask); - - u32temp = nes_read_indexed(nesdev, NES_IDX_IRRQ_COUNT); - - max_irrq = 1 << (u32temp & 0x001f); - - if (max_qp > max_irrq) { - max_qp = max_irrq; - nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to Available Q1s.\n", - max_qp); - } - - /* there should be no reason to allocate more pds than qps */ - if (num_pds > max_qp) - num_pds = max_qp; - - u32temp = nes_read_indexed(nesdev, NES_IDX_MRT_SIZE); - max_mr = (u32)8192 << (u32temp & 0x7); - - u32temp = nes_read_indexed(nesdev, NES_IDX_PBL_REGION_SIZE); - max_256pbl = (u32)1 << (u32temp & 0x0000001f); - max_4kpbl = (u32)1 << ((u32temp >> 16) & 0x0000001f); - max_cq = nes_read_indexed(nesdev, NES_IDX_CQ_CTX_SIZE); - - u32temp = nes_read_indexed(nesdev, NES_IDX_ARP_CACHE_SIZE); - arp_table_size = 1 << u32temp; - - adapter_size = (sizeof(struct nes_adapter) + - (sizeof(unsigned long)-1)) & (~(sizeof(unsigned long)-1)); - adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp); - adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr); - adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq); - adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds); - adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size); - adapter_size += sizeof(struct nes_qp **) * max_qp; - - /* allocate a new adapter struct */ - nesadapter = kzalloc(adapter_size, GFP_KERNEL); - if (!nesadapter) - return NULL; - - nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", - nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); - - if (nes_read_eeprom_values(nesdev, nesadapter)) { - printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); - kfree(nesadapter); - return NULL; - } - - nesadapter->vendor_id = (((u32) nesadapter->mac_addr_high) << 8) | - (nesadapter->mac_addr_low >> 24); - - pci_bus_read_config_word(nesdev->pcidev->bus, nesdev->pcidev->devfn, - PCI_DEVICE_ID, &device_id); - nesadapter->vendor_part_id = device_id; - - if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter, - OneG_Mode)) { - kfree(nesadapter); - return NULL; - } - nes_init_csr_ne020(nesdev, hw_rev, port_count); - - memset(nesadapter->pft_mcast_map, 255, - sizeof nesadapter->pft_mcast_map); - - /* populate the new nesadapter */ - nesadapter->nesdev = nesdev; - nesadapter->devfn = nesdev->pcidev->devfn; - nesadapter->bus_number = nesdev->pcidev->bus->number; - nesadapter->ref_count = 1; - nesadapter->timer_int_req = 0xffff0000; - nesadapter->OneG_Mode = OneG_Mode; - nesadapter->doorbell_start = nesdev->doorbell_region; - - /* nesadapter->tick_delta = clk_divisor; */ - nesadapter->hw_rev = hw_rev; - nesadapter->port_count = port_count; - - nesadapter->max_qp = max_qp; - nesadapter->hte_index_mask = hte_index_mask; - nesadapter->max_irrq = max_irrq; - nesadapter->max_mr = max_mr; - nesadapter->max_256pbl = max_256pbl - 1; - nesadapter->max_4kpbl = max_4kpbl - 1; - nesadapter->max_cq = max_cq; - nesadapter->free_256pbl = max_256pbl - 1; - nesadapter->free_4kpbl = max_4kpbl - 1; - nesadapter->max_pd = num_pds; - nesadapter->arp_table_size = arp_table_size; - - nesadapter->et_pkt_rate_low = NES_TIMER_ENABLE_LIMIT; - if (nes_drv_opt & NES_DRV_OPT_DISABLE_INT_MOD) { - nesadapter->et_use_adaptive_rx_coalesce = 0; - nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; - nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; - } else { - nesadapter->et_use_adaptive_rx_coalesce = 1; - nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; - nesadapter->et_rx_coalesce_usecs_irq = 0; - printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __func__); - } - /* Setup and enable the periodic timer */ - if (nesadapter->et_rx_coalesce_usecs_irq) - nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 | - ((u32)(nesadapter->et_rx_coalesce_usecs_irq * 8))); - else - nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000); - - nesadapter->base_pd = 1; - - nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_MEM_MGT_EXTENSIONS; - - nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) - [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); - nesadapter->allocated_cqs = &nesadapter->allocated_qps[BITS_TO_LONGS(max_qp)]; - nesadapter->allocated_mrs = &nesadapter->allocated_cqs[BITS_TO_LONGS(max_cq)]; - nesadapter->allocated_pds = &nesadapter->allocated_mrs[BITS_TO_LONGS(max_mr)]; - nesadapter->allocated_arps = &nesadapter->allocated_pds[BITS_TO_LONGS(num_pds)]; - nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]); - - - /* mark the usual suspect QPs, MR and CQs as in use */ - for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) { - set_bit(u32temp, nesadapter->allocated_qps); - set_bit(u32temp, nesadapter->allocated_cqs); - } - set_bit(0, nesadapter->allocated_mrs); - - for (u32temp = 0; u32temp < 20; u32temp++) - set_bit(u32temp, nesadapter->allocated_pds); - u32temp = nes_read_indexed(nesdev, NES_IDX_QP_MAX_CFG_SIZES); - - max_rq_wrs = ((u32temp >> 8) & 3); - switch (max_rq_wrs) { - case 0: - max_rq_wrs = 4; - break; - case 1: - max_rq_wrs = 16; - break; - case 2: - max_rq_wrs = 32; - break; - case 3: - max_rq_wrs = 512; - break; - } - - max_sq_wrs = (u32temp & 3); - switch (max_sq_wrs) { - case 0: - max_sq_wrs = 4; - break; - case 1: - max_sq_wrs = 16; - break; - case 2: - max_sq_wrs = 32; - break; - case 3: - max_sq_wrs = 512; - break; - } - nesadapter->max_qp_wr = min(max_rq_wrs, max_sq_wrs); - nesadapter->max_irrq_wr = (u32temp >> 16) & 3; - - nesadapter->max_sge = 4; - nesadapter->max_cqe = 32766; - - if (nes_read_eeprom_values(nesdev, nesadapter)) { - printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); - kfree(nesadapter); - return NULL; - } - - u32temp = nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG); - nes_write_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG, - (u32temp & 0xff000000) | (nesadapter->tcp_timer_core_clk_divisor & 0x00ffffff)); - - /* setup port configuration */ - if (nesadapter->port_count == 1) { - nesadapter->log_port = 0x00000000; - if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) - nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002); - else - nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); - } else { - if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { - nesadapter->log_port = 0x000000D8; - } else { - if (nesadapter->port_count == 2) - nesadapter->log_port = 0x00000044; - else - nesadapter->log_port = 0x000000e4; - } - nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); - } - - nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, - nesadapter->log_port); - nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n", - nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT)); - - spin_lock_init(&nesadapter->resource_lock); - spin_lock_init(&nesadapter->phy_lock); - spin_lock_init(&nesadapter->pbl_lock); - spin_lock_init(&nesadapter->periodic_timer_lock); - - INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]); - INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]); - INIT_LIST_HEAD(&nesadapter->nesvnic_list[2]); - INIT_LIST_HEAD(&nesadapter->nesvnic_list[3]); - - if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) { - u32 pcs_control_status0, pcs_control_status1; - u32 reset_value; - u32 i = 0; - u32 int_cnt = 0; - u32 ext_cnt = 0; - unsigned long flags; - u32 j = 0; - - pcs_control_status0 = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0); - pcs_control_status1 = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); - - for (i = 0; i < NES_MAX_LINK_CHECK; i++) { - pcs_control_status0 = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0); - pcs_control_status1 = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); - if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) - || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) - int_cnt++; - usleep_range(1000, 2000); - } - if (int_cnt > 1) { - spin_lock_irqsave(&nesadapter->phy_lock, flags); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); - mh_detected++; - reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); - reset_value |= 0x0000003d; - nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); - - while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) - & 0x00000040) != 0x00000040) && (j++ < 5000)); - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - - pcs_control_status0 = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0); - pcs_control_status1 = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); - - for (i = 0; i < NES_MAX_LINK_CHECK; i++) { - pcs_control_status0 = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0); - pcs_control_status1 = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); - if ((0x0F000100 == (pcs_control_status0 & 0x0F000100)) - || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) { - if (++ext_cnt > int_cnt) { - spin_lock_irqsave(&nesadapter->phy_lock, flags); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, - 0x0000F088); - mh_detected++; - reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); - reset_value |= 0x0000003d; - nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); - - while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) - & 0x00000040) != 0x00000040) && (j++ < 5000)); - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - break; - } - } - usleep_range(1000, 2000); - } - } - } - - if (nesadapter->hw_rev == NE020_REV) { - timer_setup(&nesadapter->mh_timer, nes_mh_fix, 0); - nesadapter->mh_timer.expires = jiffies + (HZ/5); /* 1 second */ - add_timer(&nesadapter->mh_timer); - } else { - nes_write32(nesdev->regs+NES_INTF_INT_STAT, 0x0f000000); - } - - timer_setup(&nesadapter->lc_timer, nes_clc, 0); - nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */ - add_timer(&nesadapter->lc_timer); - - list_add_tail(&nesadapter->list, &nes_adapter_list); - - for (func_index = 0; func_index < 8; func_index++) { - pci_bus_read_config_word(nesdev->pcidev->bus, - PCI_DEVFN(PCI_SLOT(nesdev->pcidev->devfn), - func_index), 0, &vendor_id); - if (vendor_id == 0xffff) - break; - } - nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __func__, - func_index, pci_name(nesdev->pcidev)); - nesadapter->adapter_fcn_count = func_index; - - return nesadapter; -} - - -/** - * nes_reset_adapter_ne020 - */ -static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode) -{ - u32 port_count; - u32 u32temp; - u32 i; - - u32temp = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); - port_count = ((u32temp & 0x00000300) >> 8) + 1; - /* TODO: assuming that both SERDES are set the same for now */ - *OneG_Mode = (u32temp & 0x00003c00) ? 0 : 1; - nes_debug(NES_DBG_INIT, "Initial Software Reset = 0x%08X, port_count=%u\n", - u32temp, port_count); - if (*OneG_Mode) - nes_debug(NES_DBG_INIT, "Running in 1G mode.\n"); - u32temp &= 0xff00ffc0; - switch (port_count) { - case 1: - u32temp |= 0x00ee0000; - break; - case 2: - u32temp |= 0x00cc0000; - break; - case 4: - u32temp |= 0x00000000; - break; - default: - return 0; - break; - } - - /* check and do full reset if needed */ - if (nes_read_indexed(nesdev, NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))) { - nes_debug(NES_DBG_INIT, "Issuing Full Soft reset = 0x%08X\n", u32temp | 0xd); - nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd); - - i = 0; - while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) - mdelay(1); - if (i > 10000) { - nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n"); - return 0; - } - - i = 0; - while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) - mdelay(1); - if (i > 10000) { - printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n", - nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS)); - return 0; - } - } - - /* port reset */ - switch (port_count) { - case 1: - u32temp |= 0x00ee0010; - break; - case 2: - u32temp |= 0x00cc0030; - break; - case 4: - u32temp |= 0x00000030; - break; - } - - nes_debug(NES_DBG_INIT, "Issuing Port Soft reset = 0x%08X\n", u32temp | 0xd); - nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd); - - i = 0; - while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) - mdelay(1); - if (i > 10000) { - nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n"); - return 0; - } - - /* serdes 0 */ - i = 0; - while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) - & 0x0000000f)) != 0x0000000f) && i++ < 5000) - mdelay(1); - if (i > 5000) { - nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp); - return 0; - } - - /* serdes 1 */ - if (port_count > 1) { - i = 0; - while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) - & 0x0000000f)) != 0x0000000f) && i++ < 5000) - mdelay(1); - if (i > 5000) { - nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp); - return 0; - } - } - - return port_count; -} - - -/** - * nes_init_serdes - */ -static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, - struct nes_adapter *nesadapter, u8 OneG_Mode) -{ - int i; - u32 u32temp; - u32 sds; - - if (hw_rev != NE020_REV) { - /* init serdes 0 */ - switch (nesadapter->phy_type[0]) { - case NES_PHY_TYPE_CX4: - if (wide_ppm_offset) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA); - else - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - break; - case NES_PHY_TYPE_KR: - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000); - break; - case NES_PHY_TYPE_PUMA_1G: - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); - sds |= 0x00000100; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds); - break; - default: - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - break; - } - - if (!OneG_Mode) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); - - if (port_count < 2) - return 0; - - /* init serdes 1 */ - if (!(OneG_Mode && (nesadapter->phy_type[1] != NES_PHY_TYPE_PUMA_1G))) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); - - switch (nesadapter->phy_type[1]) { - case NES_PHY_TYPE_ARGUS: - case NES_PHY_TYPE_SFP_D: - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000); - break; - case NES_PHY_TYPE_CX4: - if (wide_ppm_offset) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA); - break; - case NES_PHY_TYPE_KR: - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000); - break; - case NES_PHY_TYPE_PUMA_1G: - sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); - sds |= 0x000000100; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); - } - if (!OneG_Mode) { - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); - sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); - sds &= 0xFFFFFFBF; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); - } - } else { - /* init serdes 0 */ - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); - i = 0; - while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) - & 0x0000000f)) != 0x0000000f) && i++ < 5000) - mdelay(1); - if (i > 5000) { - nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp); - return 1; - } - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000); - if (OneG_Mode) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222); - else - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222); - - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff); - if (port_count > 1) { - /* init serdes 1 */ - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x00000048); - i = 0; - while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) - & 0x0000000f)) != 0x0000000f) && (i++ < 5000)) - mdelay(1); - if (i > 5000) { - printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp); - /* return 1; */ - } - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE1, 0x9ce73000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE1, 0x0ff00000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET1, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS1, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL1, 0xf0002222); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000ff); - } - } - return 0; -} - - -/** - * nes_init_csr_ne020 - * Initialize registers for ne020 hardware - */ -static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) -{ - u32 u32temp; - - nes_debug(NES_DBG_INIT, "port_count=%d\n", port_count); - - nes_write_indexed(nesdev, 0x000001E4, 0x00000007); - /* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */ - nes_write_indexed(nesdev, 0x000001E8, 0x00020874); - nes_write_indexed(nesdev, 0x000001D8, 0x00048002); - /* nes_write_indexed(nesdev, 0x000001D8, 0x0004B002); */ - nes_write_indexed(nesdev, 0x000001FC, 0x00050005); - nes_write_indexed(nesdev, 0x00000600, 0x55555555); - nes_write_indexed(nesdev, 0x00000604, 0x55555555); - - /* TODO: move these MAC register settings to NIC bringup */ - nes_write_indexed(nesdev, 0x00002000, 0x00000001); - nes_write_indexed(nesdev, 0x00002004, 0x00000001); - nes_write_indexed(nesdev, 0x00002008, 0x0000FFFF); - nes_write_indexed(nesdev, 0x0000200C, 0x00000001); - nes_write_indexed(nesdev, 0x00002010, 0x000003c1); - nes_write_indexed(nesdev, 0x0000201C, 0x75345678); - if (port_count > 1) { - nes_write_indexed(nesdev, 0x00002200, 0x00000001); - nes_write_indexed(nesdev, 0x00002204, 0x00000001); - nes_write_indexed(nesdev, 0x00002208, 0x0000FFFF); - nes_write_indexed(nesdev, 0x0000220C, 0x00000001); - nes_write_indexed(nesdev, 0x00002210, 0x000003c1); - nes_write_indexed(nesdev, 0x0000221C, 0x75345678); - nes_write_indexed(nesdev, 0x00000908, 0x20000001); - } - if (port_count > 2) { - nes_write_indexed(nesdev, 0x00002400, 0x00000001); - nes_write_indexed(nesdev, 0x00002404, 0x00000001); - nes_write_indexed(nesdev, 0x00002408, 0x0000FFFF); - nes_write_indexed(nesdev, 0x0000240C, 0x00000001); - nes_write_indexed(nesdev, 0x00002410, 0x000003c1); - nes_write_indexed(nesdev, 0x0000241C, 0x75345678); - nes_write_indexed(nesdev, 0x00000910, 0x20000001); - - nes_write_indexed(nesdev, 0x00002600, 0x00000001); - nes_write_indexed(nesdev, 0x00002604, 0x00000001); - nes_write_indexed(nesdev, 0x00002608, 0x0000FFFF); - nes_write_indexed(nesdev, 0x0000260C, 0x00000001); - nes_write_indexed(nesdev, 0x00002610, 0x000003c1); - nes_write_indexed(nesdev, 0x0000261C, 0x75345678); - nes_write_indexed(nesdev, 0x00000918, 0x20000001); - } - - nes_write_indexed(nesdev, 0x00005000, 0x00018000); - /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */ - nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) | - 0x00000001); - nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F); - nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F); - nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F); - nes_write_indexed(nesdev, 0x00005020, 0x1F1F1F1F); - nes_write_indexed(nesdev, 0x00006090, 0xFFFFFFFF); - - /* TODO: move this to code, get from EEPROM */ - nes_write_indexed(nesdev, 0x00000900, 0x20000001); - nes_write_indexed(nesdev, 0x000060C0, 0x0000028e); - nes_write_indexed(nesdev, 0x000060C8, 0x00000020); - - nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0); - /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */ - - if (hw_rev != NE020_REV) { - u32temp = nes_read_indexed(nesdev, 0x000008e8); - u32temp |= 0x80000000; - nes_write_indexed(nesdev, 0x000008e8, u32temp); - u32temp = nes_read_indexed(nesdev, 0x000021f8); - u32temp &= 0x7fffffff; - u32temp |= 0x7fff0010; - nes_write_indexed(nesdev, 0x000021f8, u32temp); - if (port_count > 1) { - u32temp = nes_read_indexed(nesdev, 0x000023f8); - u32temp &= 0x7fffffff; - u32temp |= 0x7fff0010; - nes_write_indexed(nesdev, 0x000023f8, u32temp); - } - } -} - - -/** - * nes_destroy_adapter - destroy the adapter structure - */ -void nes_destroy_adapter(struct nes_adapter *nesadapter) -{ - struct nes_adapter *tmp_adapter; - - list_for_each_entry(tmp_adapter, &nes_adapter_list, list) { - nes_debug(NES_DBG_SHUTDOWN, "Nes Adapter list entry = 0x%p.\n", - tmp_adapter); - } - - nesadapter->ref_count--; - if (!nesadapter->ref_count) { - if (nesadapter->hw_rev == NE020_REV) { - del_timer(&nesadapter->mh_timer); - } - del_timer(&nesadapter->lc_timer); - - list_del(&nesadapter->list); - kfree(nesadapter); - } -} - - -/** - * nes_init_cqp - */ -int nes_init_cqp(struct nes_device *nesdev) -{ - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_hw_cqp_qp_context *cqp_qp_context; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_hw_ceq *ceq; - struct nes_hw_ceq *nic_ceq; - struct nes_hw_aeq *aeq; - void *vmem; - dma_addr_t pmem; - u32 count=0; - u32 cqp_head; - u64 u64temp; - u32 u32temp; - - /* allocate CQP memory */ - /* Need to add max_cq to the aeq size once cq overflow checking is added back */ - /* SQ is 512 byte aligned, others are 256 byte aligned */ - nesdev->cqp_mem_size = 512 + - (sizeof(struct nes_hw_cqp_wqe) * NES_CQP_SQ_SIZE) + - (sizeof(struct nes_hw_cqe) * NES_CCQ_SIZE) + - max(((u32)sizeof(struct nes_hw_ceqe) * NES_CCEQ_SIZE), (u32)256) + - max(((u32)sizeof(struct nes_hw_ceqe) * NES_NIC_CEQ_SIZE), (u32)256) + - (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) + - sizeof(struct nes_hw_cqp_qp_context); - - nesdev->cqp_vbase = pci_zalloc_consistent(nesdev->pcidev, - nesdev->cqp_mem_size, - &nesdev->cqp_pbase); - if (!nesdev->cqp_vbase) { - nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n"); - return -ENOMEM; - } - - /* Allocate a twice the number of CQP requests as the SQ size */ - nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) * - 2 * NES_CQP_SQ_SIZE, GFP_KERNEL); - if (!nesdev->nes_cqp_requests) { - pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, - nesdev->cqp.sq_pbase); - return -ENOMEM; - } - - nes_debug(NES_DBG_INIT, "Allocated CQP structures at %p (phys = %016lX), size = %u.\n", - nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size); - - spin_lock_init(&nesdev->cqp.lock); - init_waitqueue_head(&nesdev->cqp.waitq); - - /* Setup Various Structures */ - vmem = (void *)(((unsigned long)nesdev->cqp_vbase + (512 - 1)) & - ~(unsigned long)(512 - 1)); - pmem = (dma_addr_t)(((unsigned long long)nesdev->cqp_pbase + (512 - 1)) & - ~(unsigned long long)(512 - 1)); - - nesdev->cqp.sq_vbase = vmem; - nesdev->cqp.sq_pbase = pmem; - nesdev->cqp.sq_size = NES_CQP_SQ_SIZE; - nesdev->cqp.sq_head = 0; - nesdev->cqp.sq_tail = 0; - nesdev->cqp.qp_id = PCI_FUNC(nesdev->pcidev->devfn); - - vmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size); - pmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size); - - nesdev->ccq.cq_vbase = vmem; - nesdev->ccq.cq_pbase = pmem; - nesdev->ccq.cq_size = NES_CCQ_SIZE; - nesdev->ccq.cq_head = 0; - nesdev->ccq.ce_handler = nes_cqp_ce_handler; - nesdev->ccq.cq_number = PCI_FUNC(nesdev->pcidev->devfn); - - vmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size); - pmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size); - - nesdev->ceq_index = PCI_FUNC(nesdev->pcidev->devfn); - ceq = &nesadapter->ceq[nesdev->ceq_index]; - ceq->ceq_vbase = vmem; - ceq->ceq_pbase = pmem; - ceq->ceq_size = NES_CCEQ_SIZE; - ceq->ceq_head = 0; - - vmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256); - pmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256); - - nesdev->nic_ceq_index = PCI_FUNC(nesdev->pcidev->devfn) + 8; - nic_ceq = &nesadapter->ceq[nesdev->nic_ceq_index]; - nic_ceq->ceq_vbase = vmem; - nic_ceq->ceq_pbase = pmem; - nic_ceq->ceq_size = NES_NIC_CEQ_SIZE; - nic_ceq->ceq_head = 0; - - vmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256); - pmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256); - - aeq = &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]; - aeq->aeq_vbase = vmem; - aeq->aeq_pbase = pmem; - aeq->aeq_size = nesadapter->max_qp; - aeq->aeq_head = 0; - - /* Setup QP Context */ - vmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size); - pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size); - - cqp_qp_context = vmem; - cqp_qp_context->context_words[0] = - cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10)); - cqp_qp_context->context_words[1] = 0; - cqp_qp_context->context_words[2] = cpu_to_le32((u32)nesdev->cqp.sq_pbase); - cqp_qp_context->context_words[3] = cpu_to_le32(((u64)nesdev->cqp.sq_pbase) >> 32); - - - /* Write the address to Create CQP */ - if ((sizeof(dma_addr_t) > 4)) { - nes_write_indexed(nesdev, - NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), - ((u64)pmem) >> 32); - } else { - nes_write_indexed(nesdev, - NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), 0); - } - nes_write_indexed(nesdev, - NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8), - (u32)pmem); - - INIT_LIST_HEAD(&nesdev->cqp_avail_reqs); - INIT_LIST_HEAD(&nesdev->cqp_pending_reqs); - - for (count = 0; count < 2*NES_CQP_SQ_SIZE; count++) { - init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq); - list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs); - } - - /* Write Create CCQ WQE */ - cqp_head = nesdev->cqp.sq_head++; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - (NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | - NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16))); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, - (nesdev->ccq.cq_number | - ((u32)nesdev->ceq_index << 16))); - u64temp = (u64)nesdev->ccq.cq_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; - u64temp = (unsigned long)&nesdev->ccq; - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = - cpu_to_le32((u32)(u64temp >> 1)); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = - cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; - - /* Write Create CEQ WQE */ - cqp_head = nesdev->cqp.sq_head++; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - (NES_CQP_CREATE_CEQ + ((u32)nesdev->ceq_index << 8))); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, ceq->ceq_size); - u64temp = (u64)ceq->ceq_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); - - /* Write Create AEQ WQE */ - cqp_head = nesdev->cqp.sq_head++; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - (NES_CQP_CREATE_AEQ + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8))); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX, aeq->aeq_size); - u64temp = (u64)aeq->aeq_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); - - /* Write Create NIC CEQ WQE */ - cqp_head = nesdev->cqp.sq_head++; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - (NES_CQP_CREATE_CEQ + ((u32)nesdev->nic_ceq_index << 8))); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, nic_ceq->ceq_size); - u64temp = (u64)nic_ceq->ceq_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); - - /* Poll until CCQP done */ - count = 0; - do { - if (count++ > 1000) { - printk(KERN_ERR PFX "Error creating CQP\n"); - pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, - nesdev->cqp_vbase, nesdev->cqp_pbase); - return -1; - } - udelay(10); - } while (!(nes_read_indexed(nesdev, - NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn) * 8)) & (1 << 8))); - - nes_debug(NES_DBG_INIT, "CQP Status = 0x%08X\n", nes_read_indexed(nesdev, - NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); - - u32temp = 0x04800000; - nes_write32(nesdev->regs+NES_WQE_ALLOC, u32temp | nesdev->cqp.qp_id); - - /* wait for the CCQ, CEQ, and AEQ to get created */ - count = 0; - do { - if (count++ > 1000) { - printk(KERN_ERR PFX "Error creating CCQ, CEQ, and AEQ\n"); - pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, - nesdev->cqp_vbase, nesdev->cqp_pbase); - return -1; - } - udelay(10); - } while (((nes_read_indexed(nesdev, - NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8))); - - /* dump the QP status value */ - nes_debug(NES_DBG_INIT, "QP Status = 0x%08X\n", nes_read_indexed(nesdev, - NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); - - nesdev->cqp.sq_tail++; - - return 0; -} - - -/** - * nes_destroy_cqp - */ -int nes_destroy_cqp(struct nes_device *nesdev) -{ - struct nes_hw_cqp_wqe *cqp_wqe; - u32 count = 0; - u32 cqp_head; - unsigned long flags; - - do { - if (count++ > 1000) - break; - udelay(10); - } while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail)); - - /* Reset CCQ */ - nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_RESET | - nesdev->ccq.cq_number); - - /* Disable device interrupts */ - nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff); - - spin_lock_irqsave(&nesdev->cqp.lock, flags); - - /* Destroy the AEQ */ - cqp_head = nesdev->cqp.sq_head++; - nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ | - ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; - - /* Destroy the NIC CEQ */ - cqp_head = nesdev->cqp.sq_head++; - nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ | - ((u32)nesdev->nic_ceq_index << 8)); - - /* Destroy the CEQ */ - cqp_head = nesdev->cqp.sq_head++; - nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ | - (nesdev->ceq_index << 8)); - - /* Destroy the CCQ */ - cqp_head = nesdev->cqp.sq_head++; - nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number | - ((u32)nesdev->ceq_index << 16)); - - /* Destroy CQP */ - cqp_head = nesdev->cqp.sq_head++; - nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP | - NES_CQP_QP_TYPE_CQP); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id); - - barrier(); - /* Ring doorbell (5 WQEs) */ - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id); - - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - - /* wait for the CCQ, CEQ, and AEQ to get destroyed */ - count = 0; - do { - if (count++ > 1000) { - printk(KERN_ERR PFX "Function%d: Error destroying CCQ, CEQ, and AEQ\n", - PCI_FUNC(nesdev->pcidev->devfn)); - break; - } - udelay(10); - } while (((nes_read_indexed(nesdev, - NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15 << 8)) != 0)); - - /* dump the QP status value */ - nes_debug(NES_DBG_SHUTDOWN, "Function%d: QP Status = 0x%08X\n", - PCI_FUNC(nesdev->pcidev->devfn), - nes_read_indexed(nesdev, - NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))); - - kfree(nesdev->nes_cqp_requests); - - /* Free the control structures */ - pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, - nesdev->cqp.sq_pbase); - - return 0; -} - - -/** - * nes_init_1g_phy - */ -static int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) -{ - u32 counter = 0; - u16 phy_data; - int ret = 0; - - nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000); - - /* Reset the PHY */ - nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000); - udelay(100); - counter = 0; - do { - nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - if (counter++ > 100) { - ret = -1; - break; - } - } while (phy_data & 0x8000); - - /* Setting no phy loopback */ - phy_data &= 0xbfff; - phy_data |= 0x1140; - nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); - nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data); - nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data); - - /* Setting the interrupt mask */ - nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee); - nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); - - /* turning on flow control */ - nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00); - nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); - - /* Clear Half duplex */ - nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100)); - nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); - - nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300); - - return ret; -} - - -/** - * nes_init_2025_phy - */ -static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) -{ - u32 temp_phy_data = 0; - u32 temp_phy_data2 = 0; - u32 counter = 0; - u32 sds; - u32 mac_index = nesdev->mac_index; - int ret = 0; - unsigned int first_attempt = 1; - - /* Check firmware heartbeat */ - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - udelay(1500); - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); - temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - - if (temp_phy_data != temp_phy_data2) { - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if ((temp_phy_data & 0xff) > 0x20) - return 0; - printk(PFX "Reinitialize external PHY\n"); - } - - /* no heartbeat, configure the PHY */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); - - switch (phy_type) { - case NES_PHY_TYPE_ARGUS: - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); - - /* setup LEDs */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); - break; - - case NES_PHY_TYPE_SFP_D: - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); - - /* setup LEDs */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); - break; - - case NES_PHY_TYPE_KR: - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); - - /* setup LEDs */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004); - - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020); - break; - } - - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528); - - /* Bring PHY out of reset */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002); - - /* Check for heartbeat */ - counter = 0; - mdelay(690); - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - do { - if (counter++ > 150) { - printk(PFX "No PHY heartbeat\n"); - break; - } - mdelay(1); - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); - temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - } while (temp_phy_data2 == temp_phy_data); - - /* wait for tracking */ - counter = 0; - do { - nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if (counter++ > 300) { - if (((temp_phy_data & 0xff) == 0x0) && first_attempt) { - first_attempt = 0; - counter = 0; - /* reset AMCC PHY and try again */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0); - nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040); - continue; - } else { - ret = 1; - break; - } - } - mdelay(10); - } while ((temp_phy_data & 0xff) < 0x30); - - /* setup signal integrity */ - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032); - if (phy_type == NES_PHY_TYPE_KR) { - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C); - } else { - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002); - nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063); - } - - /* reset serdes */ - sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200); - sds |= 0x1; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds); - sds &= 0xfffffffe; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds); - - counter = 0; - while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) - && (counter++ < 5000)) - ; - - return ret; -} - - -/** - * nes_init_phy - */ -int nes_init_phy(struct nes_device *nesdev) -{ - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 mac_index = nesdev->mac_index; - u32 tx_config = 0; - unsigned long flags; - u8 phy_type = nesadapter->phy_type[mac_index]; - u8 phy_index = nesadapter->phy_index[mac_index]; - int ret = 0; - - tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); - if (phy_type == NES_PHY_TYPE_1G) { - /* setup 1G MDIO operation */ - tx_config &= 0xFFFFFFE3; - tx_config |= 0x04; - } else { - /* setup 10G MDIO operation */ - tx_config &= 0xFFFFFFE3; - tx_config |= 0x1D; - } - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); - - spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); - - switch (phy_type) { - case NES_PHY_TYPE_1G: - ret = nes_init_1g_phy(nesdev, phy_type, phy_index); - break; - case NES_PHY_TYPE_ARGUS: - case NES_PHY_TYPE_SFP_D: - case NES_PHY_TYPE_KR: - ret = nes_init_2025_phy(nesdev, phy_type, phy_index); - break; - } - - spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); - - return ret; -} - - -/** - * nes_replenish_nic_rq - */ -static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) -{ - unsigned long flags; - dma_addr_t bus_address; - struct sk_buff *skb; - struct nes_hw_nic_rq_wqe *nic_rqe; - struct nes_hw_nic *nesnic; - struct nes_device *nesdev; - struct nes_rskb_cb *cb; - u32 rx_wqes_posted = 0; - - nesnic = &nesvnic->nic; - nesdev = nesvnic->nesdev; - spin_lock_irqsave(&nesnic->rq_lock, flags); - if (nesnic->replenishing_rq !=0) { - if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) && - (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) { - atomic_set(&nesvnic->rx_skb_timer_running, 1); - spin_unlock_irqrestore(&nesnic->rq_lock, flags); - nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */ - add_timer(&nesvnic->rq_wqes_timer); - } else - spin_unlock_irqrestore(&nesnic->rq_lock, flags); - return; - } - nesnic->replenishing_rq = 1; - spin_unlock_irqrestore(&nesnic->rq_lock, flags); - do { - skb = dev_alloc_skb(nesvnic->max_frame_size); - if (skb) { - skb->dev = nesvnic->netdev; - - bus_address = pci_map_single(nesdev->pcidev, - skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); - cb = (struct nes_rskb_cb *)&skb->cb[0]; - cb->busaddr = bus_address; - cb->maplen = nesvnic->max_frame_size; - - nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head]; - nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = - cpu_to_le32(nesvnic->max_frame_size); - nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = - cpu_to_le32((u32)bus_address); - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = - cpu_to_le32((u32)((u64)bus_address >> 32)); - nesnic->rx_skb[nesnic->rq_head] = skb; - nesnic->rq_head++; - nesnic->rq_head &= nesnic->rq_size - 1; - atomic_dec(&nesvnic->rx_skbs_needed); - barrier(); - if (++rx_wqes_posted == 255) { - nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id); - rx_wqes_posted = 0; - } - } else { - spin_lock_irqsave(&nesnic->rq_lock, flags); - if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) && - (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) { - atomic_set(&nesvnic->rx_skb_timer_running, 1); - spin_unlock_irqrestore(&nesnic->rq_lock, flags); - nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */ - add_timer(&nesvnic->rq_wqes_timer); - } else - spin_unlock_irqrestore(&nesnic->rq_lock, flags); - break; - } - } while (atomic_read(&nesvnic->rx_skbs_needed)); - barrier(); - if (rx_wqes_posted) - nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id); - nesnic->replenishing_rq = 0; -} - - -/** - * nes_rq_wqes_timeout - */ -static void nes_rq_wqes_timeout(struct timer_list *t) -{ - struct nes_vnic *nesvnic = from_timer(nesvnic, t, rq_wqes_timer); - printk("%s: Timer fired.\n", __func__); - atomic_set(&nesvnic->rx_skb_timer_running, 0); - if (atomic_read(&nesvnic->rx_skbs_needed)) - nes_replenish_nic_rq(nesvnic); -} - - -/** - * nes_init_nic_qp - */ -int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) -{ - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_hw_nic_sq_wqe *nic_sqe; - struct nes_hw_nic_qp_context *nic_context; - struct sk_buff *skb; - struct nes_hw_nic_rq_wqe *nic_rqe; - struct nes_vnic *nesvnic = netdev_priv(netdev); - unsigned long flags; - void *vmem; - dma_addr_t pmem; - u64 u64temp; - int ret; - u32 cqp_head; - u32 counter; - u32 wqe_count; - struct nes_rskb_cb *cb; - u8 jumbomode=0; - - /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */ - nesvnic->nic_mem_size = 256 + - (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)) + - (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)) + - (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)) + - (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) + - sizeof(struct nes_hw_nic_qp_context); - - nesvnic->nic_vbase = pci_zalloc_consistent(nesdev->pcidev, - nesvnic->nic_mem_size, - &nesvnic->nic_pbase); - if (!nesvnic->nic_vbase) { - nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n"); - return -ENOMEM; - } - nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n", - nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size); - - vmem = (void *)(((unsigned long)nesvnic->nic_vbase + (256 - 1)) & - ~(unsigned long)(256 - 1)); - pmem = (dma_addr_t)(((unsigned long long)nesvnic->nic_pbase + (256 - 1)) & - ~(unsigned long long)(256 - 1)); - - /* Setup the first Fragment buffers */ - nesvnic->nic.first_frag_vbase = vmem; - - for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) { - nesvnic->nic.frag_paddr[counter] = pmem; - pmem += sizeof(struct nes_first_frag); - } - - /* setup the SQ */ - vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)); - - nesvnic->nic.sq_vbase = (void *)vmem; - nesvnic->nic.sq_pbase = pmem; - nesvnic->nic.sq_head = 0; - nesvnic->nic.sq_tail = 0; - nesvnic->nic.sq_size = NES_NIC_WQ_SIZE; - for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) { - nic_sqe = &nesvnic->nic.sq_vbase[counter]; - nic_sqe->wqe_words[NES_NIC_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_NIC_SQ_WQE_DISABLE_CHKSUM | - NES_NIC_SQ_WQE_COMPLETION); - nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX] = - cpu_to_le32((u32)NES_FIRST_FRAG_SIZE << 16); - nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX] = - cpu_to_le32((u32)nesvnic->nic.frag_paddr[counter]); - nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX] = - cpu_to_le32((u32)((u64)nesvnic->nic.frag_paddr[counter] >> 32)); - } - - nesvnic->get_cqp_request = nes_get_cqp_request; - nesvnic->post_cqp_request = nes_post_cqp_request; - nesvnic->mcrq_mcast_filter = NULL; - - spin_lock_init(&nesvnic->nic.rq_lock); - - /* setup the RQ */ - vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)); - pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)); - - - nesvnic->nic.rq_vbase = vmem; - nesvnic->nic.rq_pbase = pmem; - nesvnic->nic.rq_head = 0; - nesvnic->nic.rq_tail = 0; - nesvnic->nic.rq_size = NES_NIC_WQ_SIZE; - - /* setup the CQ */ - vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)); - pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)); - - if (nesdev->nesadapter->netdev_count > 2) - nesvnic->mcrq_qp_id = nesvnic->nic_index + 32; - else - nesvnic->mcrq_qp_id = nesvnic->nic.qp_id + 4; - - nesvnic->nic_cq.cq_vbase = vmem; - nesvnic->nic_cq.cq_pbase = pmem; - nesvnic->nic_cq.cq_head = 0; - nesvnic->nic_cq.cq_size = NES_NIC_WQ_SIZE * 2; - - nesvnic->nic_cq.ce_handler = nes_nic_napi_ce_handler; - - /* Send CreateCQ request to CQP */ - spin_lock_irqsave(&nesdev->cqp.lock, flags); - cqp_head = nesdev->cqp.sq_head; - - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( - NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | - ((u32)nesvnic->nic_cq.cq_size << 16)); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( - nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)); - u64temp = (u64)nesvnic->nic_cq.cq_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; - u64temp = (unsigned long)&nesvnic->nic_cq; - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1)); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = - cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; - if (++cqp_head >= nesdev->cqp.sq_size) - cqp_head = 0; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - - /* Send CreateQP request to CQP */ - nic_context = (void *)(&nesvnic->nic_cq.cq_vbase[nesvnic->nic_cq.cq_size]); - nic_context->context_words[NES_NIC_CTX_MISC_IDX] = - cpu_to_le32((u32)NES_NIC_CTX_SIZE | - ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12)); - nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n", - nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE), - nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)); - if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) { - nic_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE); - } - - u64temp = (u64)nesvnic->nic.sq_pbase; - nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); - nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); - u64temp = (u64)nesvnic->nic.rq_pbase; - nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); - nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); - - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | - NES_CQP_QP_TYPE_NIC); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic.qp_id); - u64temp = (u64)nesvnic->nic_cq.cq_pbase + - (nesvnic->nic_cq.cq_size * sizeof(struct nes_hw_nic_cqe)); - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - - if (++cqp_head >= nesdev->cqp.sq_size) - cqp_head = 0; - nesdev->cqp.sq_head = cqp_head; - - barrier(); - - /* Ring doorbell (2 WQEs) */ - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); - - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - nes_debug(NES_DBG_INIT, "Waiting for create NIC QP%u to complete.\n", - nesvnic->nic.qp_id); - - ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_INIT, "Create NIC QP%u completed, wait_event_timeout ret = %u.\n", - nesvnic->nic.qp_id, ret); - if (!ret) { - nes_debug(NES_DBG_INIT, "NIC QP%u create timeout expired\n", nesvnic->nic.qp_id); - pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase, - nesvnic->nic_pbase); - return -EIO; - } - - /* Populate the RQ */ - for (counter = 0; counter < (NES_NIC_WQ_SIZE - 1); counter++) { - skb = dev_alloc_skb(nesvnic->max_frame_size); - if (!skb) { - nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name); - - nes_destroy_nic_qp(nesvnic); - return -ENOMEM; - } - - skb->dev = netdev; - - pmem = pci_map_single(nesdev->pcidev, skb->data, - nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); - cb = (struct nes_rskb_cb *)&skb->cb[0]; - cb->busaddr = pmem; - cb->maplen = nesvnic->max_frame_size; - - nic_rqe = &nesvnic->nic.rq_vbase[counter]; - nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size); - nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); - nesvnic->nic.rx_skb[counter] = skb; - } - - wqe_count = NES_NIC_WQ_SIZE - 1; - nesvnic->nic.rq_head = wqe_count; - barrier(); - do { - counter = min(wqe_count, ((u32)255)); - wqe_count -= counter; - nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id); - } while (wqe_count); - timer_setup(&nesvnic->rq_wqes_timer, nes_rq_wqes_timeout, 0); - nes_debug(NES_DBG_INIT, "NAPI support Enabled\n"); - if (nesdev->nesadapter->et_use_adaptive_rx_coalesce) - { - nes_nic_init_timer(nesdev); - if (netdev->mtu > 1500) - jumbomode = 1; - nes_nic_init_timer_defaults(nesdev, jumbomode); - } - if ((nesdev->nesadapter->allow_unaligned_fpdus) && - (nes_init_mgt_qp(nesdev, netdev, nesvnic))) { - nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", - netdev->name); - nes_destroy_nic_qp(nesvnic); - return -ENOMEM; - } - - return 0; -} - - -/** - * nes_destroy_nic_qp - */ -void nes_destroy_nic_qp(struct nes_vnic *nesvnic) -{ - u64 u64temp; - dma_addr_t bus_address; - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_hw_nic_sq_wqe *nic_sqe; - __le16 *wqe_fragment_length; - u16 wqe_fragment_index; - u32 cqp_head; - u32 wqm_cfg0; - unsigned long flags; - struct sk_buff *rx_skb; - struct nes_rskb_cb *cb; - int ret; - - if (nesdev->nesadapter->allow_unaligned_fpdus) - nes_destroy_mgt(nesvnic); - - /* clear wqe stall before destroying NIC QP */ - wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0); - nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF); - - /* Free remaining NIC receive buffers */ - while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { - rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail]; - cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; - pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, - PCI_DMA_FROMDEVICE); - - dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]); - nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); - } - - /* Free remaining NIC transmit buffers */ - while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) { - nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail]; - wqe_fragment_index = 1; - wqe_fragment_length = (__le16 *) - &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; - /* bump past the vlan tag */ - wqe_fragment_length++; - if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { - u64temp = (u64)le32_to_cpu( - nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ - wqe_fragment_index*2]); - u64temp += ((u64)le32_to_cpu( - nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX - + wqe_fragment_index*2]))<<32; - bus_address = (dma_addr_t)u64temp; - if (test_and_clear_bit(nesvnic->nic.sq_tail, - nesvnic->nic.first_frag_overflow)) { - pci_unmap_single(nesdev->pcidev, - bus_address, - le16_to_cpu(wqe_fragment_length[ - wqe_fragment_index++]), - PCI_DMA_TODEVICE); - } - for (; wqe_fragment_index < 5; wqe_fragment_index++) { - if (wqe_fragment_length[wqe_fragment_index]) { - u64temp = le32_to_cpu( - nic_sqe->wqe_words[ - NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ - wqe_fragment_index*2]); - u64temp += ((u64)le32_to_cpu( - nic_sqe->wqe_words[ - NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+ - wqe_fragment_index*2]))<<32; - bus_address = (dma_addr_t)u64temp; - pci_unmap_page(nesdev->pcidev, - bus_address, - le16_to_cpu( - wqe_fragment_length[ - wqe_fragment_index]), - PCI_DMA_TODEVICE); - } else - break; - } - } - if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]) - dev_kfree_skb( - nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]); - - nesvnic->nic.sq_tail = (nesvnic->nic.sq_tail + 1) - & (nesvnic->nic.sq_size - 1); - } - - spin_lock_irqsave(&nesdev->cqp.lock, flags); - - /* Destroy NIC QP */ - cqp_head = nesdev->cqp.sq_head; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, - nesvnic->nic.qp_id); - - if (++cqp_head >= nesdev->cqp.sq_size) - cqp_head = 0; - - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - - /* Destroy NIC CQ */ - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - (NES_CQP_DESTROY_CQ | ((u32)nesvnic->nic_cq.cq_size << 16))); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, - (nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16))); - - if (++cqp_head >= nesdev->cqp.sq_size) - cqp_head = 0; - - nesdev->cqp.sq_head = cqp_head; - barrier(); - - /* Ring doorbell (2 WQEs) */ - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); - - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u," - " cqp.sq_tail=%u, cqp.sq_size=%u\n", - cqp_head, nesdev->cqp.sq_head, - nesdev->cqp.sq_tail, nesdev->cqp.sq_size); - - ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), - NES_EVENT_TIMEOUT); - - nes_debug(NES_DBG_SHUTDOWN, "Destroy NIC QP returned, wait_event_timeout ret = %u, cqp_head=%u," - " cqp.sq_head=%u, cqp.sq_tail=%u\n", - ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); - if (!ret) { - nes_debug(NES_DBG_SHUTDOWN, "NIC QP%u destroy timeout expired\n", - nesvnic->nic.qp_id); - } - - pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase, - nesvnic->nic_pbase); - - /* restore old wqm_cfg0 value */ - nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0); -} - -/** - * nes_napi_isr - */ -int nes_napi_isr(struct nes_device *nesdev) -{ - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 int_stat; - - if (nesdev->napi_isr_ran) { - /* interrupt status has already been read in ISR */ - int_stat = nesdev->int_stat; - } else { - int_stat = nes_read32(nesdev->regs + NES_INT_STAT); - nesdev->int_stat = int_stat; - nesdev->napi_isr_ran = 1; - } - - int_stat &= nesdev->int_req; - /* iff NIC, process here, else wait for DPC */ - if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) { - nesdev->napi_isr_ran = 0; - nes_write32(nesdev->regs + NES_INT_STAT, - (int_stat & - ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3))); - - /* Process the CEQs */ - nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]); - - if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) && - (!nesadapter->et_use_adaptive_rx_coalesce)) || - ((nesadapter->et_use_adaptive_rx_coalesce) && - (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) { - if ((nesdev->int_req & NES_INT_TIMER) == 0) { - /* Enable Periodic timer interrupts */ - nesdev->int_req |= NES_INT_TIMER; - /* ack any pending periodic timer interrupts so we don't get an immediate interrupt */ - /* TODO: need to also ack other unused periodic timer values, get from nesadapter */ - nes_write32(nesdev->regs+NES_TIMER_STAT, - nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); - nes_write32(nesdev->regs+NES_INTF_INT_MASK, - ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER)); - } - - if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) - { - nes_nic_init_timer(nesdev); - } - /* Enable interrupts, except CEQs */ - nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); - } else { - /* Enable interrupts, make sure timer is off */ - nesdev->int_req &= ~NES_INT_TIMER; - nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); - nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - } - nesdev->deepcq_count = 0; - return 1; - } else { - return 0; - } -} - -static void process_critical_error(struct nes_device *nesdev) -{ - u32 debug_error; - u32 nes_idx_debug_error_masks0 = 0; - u16 error_module = 0; - - debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); - printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", - (u16)debug_error); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, - 0x01010000 | (debug_error & 0x0000ffff)); - if (crit_err_count++ > 10) - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); - error_module = (u16) (debug_error & 0x1F00) >> 8; - if (++nesdev->nesadapter->crit_error_count[error_module-1] >= - nes_max_critical_error_count) { - printk(KERN_ERR PFX "Masking off critical error for module " - "0x%02X\n", (u16)error_module); - nes_idx_debug_error_masks0 = nes_read_indexed(nesdev, - NES_IDX_DEBUG_ERROR_MASKS0); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, - nes_idx_debug_error_masks0 | (1 << error_module)); - } -} -/** - * nes_dpc - */ -void nes_dpc(unsigned long param) -{ - struct nes_device *nesdev = (struct nes_device *)param; - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 counter; - u32 loop_counter = 0; - u32 int_status_bit; - u32 int_stat; - u32 timer_stat; - u32 temp_int_stat; - u32 intf_int_stat; - u32 processed_intf_int = 0; - u16 processed_timer_int = 0; - u16 completion_ints = 0; - u16 timer_ints = 0; - - /* nes_debug(NES_DBG_ISR, "\n"); */ - - do { - timer_stat = 0; - if (nesdev->napi_isr_ran) { - nesdev->napi_isr_ran = 0; - int_stat = nesdev->int_stat; - } else - int_stat = nes_read32(nesdev->regs+NES_INT_STAT); - if (processed_intf_int != 0) - int_stat &= nesdev->int_req & ~NES_INT_INTF; - else - int_stat &= nesdev->int_req; - if (processed_timer_int == 0) { - processed_timer_int = 1; - if (int_stat & NES_INT_TIMER) { - timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT); - if ((timer_stat & nesdev->timer_int_req) == 0) { - int_stat &= ~NES_INT_TIMER; - } - } - } else { - int_stat &= ~NES_INT_TIMER; - } - - if (int_stat) { - if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0| - NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) { - /* Ack the interrupts */ - nes_write32(nesdev->regs+NES_INT_STAT, - (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0| - NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3))); - } - - temp_int_stat = int_stat; - for (counter = 0, int_status_bit = 1; counter < 16; counter++) { - if (int_stat & int_status_bit) { - nes_process_ceq(nesdev, &nesadapter->ceq[counter]); - temp_int_stat &= ~int_status_bit; - completion_ints = 1; - } - if (!(temp_int_stat & 0x0000ffff)) - break; - int_status_bit <<= 1; - } - - /* Process the AEQ for this pci function */ - int_status_bit = 1 << (16 + PCI_FUNC(nesdev->pcidev->devfn)); - if (int_stat & int_status_bit) { - nes_process_aeq(nesdev, &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]); - } - - /* Process the MAC interrupt for this pci function */ - int_status_bit = 1 << (24 + nesdev->mac_index); - if (int_stat & int_status_bit) { - nes_process_mac_intr(nesdev, nesdev->mac_index); - } - - if (int_stat & NES_INT_TIMER) { - if (timer_stat & nesdev->timer_int_req) { - nes_write32(nesdev->regs + NES_TIMER_STAT, - (timer_stat & nesdev->timer_int_req) | - ~(nesdev->nesadapter->timer_int_req)); - timer_ints = 1; - } - } - - if (int_stat & NES_INT_INTF) { - processed_intf_int = 1; - intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); - intf_int_stat &= nesdev->intf_int_req; - if (NES_INTF_INT_CRITERR & intf_int_stat) { - process_critical_error(nesdev); - } - if (NES_INTF_INT_PCIERR & intf_int_stat) { - printk(KERN_ERR PFX "PCI Error reported by device!!!\n"); - BUG(); - } - if (NES_INTF_INT_AEQ_OFLOW & intf_int_stat) { - printk(KERN_ERR PFX "AEQ Overflow reported by device!!!\n"); - BUG(); - } - nes_write32(nesdev->regs+NES_INTF_INT_STAT, intf_int_stat); - } - - if (int_stat & NES_INT_TSW) { - } - } - /* Don't use the interface interrupt bit stay in loop */ - int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | - NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3; - } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS)); - - if (timer_ints == 1) { - if ((nesadapter->et_rx_coalesce_usecs_irq) || (nesadapter->et_use_adaptive_rx_coalesce)) { - if (completion_ints == 0) { - nesdev->timer_only_int_count++; - if (nesdev->timer_only_int_count>=nesadapter->timer_int_limit) { - nesdev->timer_only_int_count = 0; - nesdev->int_req &= ~NES_INT_TIMER; - nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); - nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req); - } else { - nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); - } - } else { - if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) - { - nes_nic_init_timer(nesdev); - } - nesdev->timer_only_int_count = 0; - nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); - } - } else { - nesdev->timer_only_int_count = 0; - nesdev->int_req &= ~NES_INT_TIMER; - nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); - nes_write32(nesdev->regs+NES_TIMER_STAT, - nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); - nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - } - } else { - if ( (completion_ints == 1) && - (((nesadapter->et_rx_coalesce_usecs_irq) && - (!nesadapter->et_use_adaptive_rx_coalesce)) || - ((nesdev->deepcq_count > nesadapter->et_pkt_rate_low) && - (nesadapter->et_use_adaptive_rx_coalesce) )) ) { - /* nes_debug(NES_DBG_ISR, "Enabling periodic timer interrupt.\n" ); */ - nesdev->timer_only_int_count = 0; - nesdev->int_req |= NES_INT_TIMER; - nes_write32(nesdev->regs+NES_TIMER_STAT, - nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req)); - nes_write32(nesdev->regs+NES_INTF_INT_MASK, - ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER)); - nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); - } else { - nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - } - } - nesdev->deepcq_count = 0; -} - - -/** - * nes_process_ceq - */ -static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq) -{ - u64 u64temp; - struct nes_hw_cq *cq; - u32 head; - u32 ceq_size; - - /* nes_debug(NES_DBG_CQ, "\n"); */ - head = ceq->ceq_head; - ceq_size = ceq->ceq_size; - - do { - if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) & - NES_CEQE_VALID) { - u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) | - ((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX]))); - u64temp <<= 1; - cq = *((struct nes_hw_cq **)&u64temp); - /* nes_debug(NES_DBG_CQ, "pCQ = %p\n", cq); */ - barrier(); - ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX] = 0; - - /* call the event handler */ - cq->ce_handler(nesdev, cq); - - if (++head >= ceq_size) - head = 0; - } else { - break; - } - - } while (1); - - ceq->ceq_head = head; -} - - -/** - * nes_process_aeq - */ -static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) -{ - /* u64 u64temp; */ - u32 head; - u32 aeq_size; - u32 aeqe_misc; - u32 aeqe_cq_id; - struct nes_hw_aeqe volatile *aeqe; - - head = aeq->aeq_head; - aeq_size = aeq->aeq_size; - - do { - aeqe = &aeq->aeq_vbase[head]; - if ((le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]) & NES_AEQE_VALID) == 0) - break; - aeqe_misc = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); - aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); - if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) { - if (aeqe_cq_id >= NES_FIRST_QPN) { - /* dealing with an accelerated QP related AE */ - /* - * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) | - * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]))); - */ - nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe); - } else { - /* TODO: dealing with a CQP related AE */ - nes_debug(NES_DBG_AEQ, "Processing CQP related AE, misc = 0x%04X\n", - (u16)(aeqe_misc >> 16)); - } - } - - aeqe->aeqe_words[NES_AEQE_MISC_IDX] = 0; - - if (++head >= aeq_size) - head = 0; - - nes_write32(nesdev->regs + NES_AEQ_ALLOC, 1 << 16); - } - while (1); - aeq->aeq_head = head; -} - -static void nes_reset_link(struct nes_device *nesdev, u32 mac_index) -{ - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 reset_value; - u32 i=0; - u32 u32temp; - - if (nesadapter->hw_rev == NE020_REV) { - return; - } - mh_detected++; - - reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); - - if ((mac_index == 0) || ((mac_index == 1) && (nesadapter->OneG_Mode))) - reset_value |= 0x0000001d; - else - reset_value |= 0x0000002d; - - if (4 <= (nesadapter->link_interrupt_count[mac_index] / ((u16)NES_MAX_LINK_INTERRUPTS))) { - if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) { - nesadapter->link_interrupt_count[0] = 0; - nesadapter->link_interrupt_count[1] = 0; - u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); - if (0x00000040 & u32temp) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); - else - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); - - reset_value |= 0x0000003d; - } - nesadapter->link_interrupt_count[mac_index] = 0; - } - - nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); - - while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) - & 0x00000040) != 0x00000040) && (i++ < 5000)); - - if (0x0000003d == (reset_value & 0x0000003d)) { - u32 pcs_control_status0, pcs_control_status1; - - for (i = 0; i < 10; i++) { - pcs_control_status0 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0); - pcs_control_status1 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); - if (((0x0F000000 == (pcs_control_status0 & 0x0F000000)) - && (pcs_control_status0 & 0x00100000)) - || ((0x0F000000 == (pcs_control_status1 & 0x0F000000)) - && (pcs_control_status1 & 0x00100000))) - continue; - else - break; - } - if (10 == i) { - u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); - if (0x00000040 & u32temp) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); - else - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); - - nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); - - while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) - & 0x00000040) != 0x00000040) && (i++ < 5000)); - } - } -} - -/** - * nes_process_mac_intr - */ -static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) -{ - unsigned long flags; - u32 pcs_control_status; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_vnic *nesvnic; - u32 mac_status; - u32 mac_index = nesdev->mac_index; - u32 u32temp; - u16 phy_data; - u16 temp_phy_data; - u32 pcs_val = 0x0f0f0000; - u32 pcs_mask = 0x0f1f0000; - u32 cdr_ctrl; - - spin_lock_irqsave(&nesadapter->phy_lock, flags); - if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) { - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - return; - } - nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT; - - /* ack the MAC interrupt */ - mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200)); - /* Clear the interrupt */ - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200), mac_status); - - nes_debug(NES_DBG_PHY, "MAC%u interrupt status = 0x%X.\n", mac_number, mac_status); - - if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) { - nesdev->link_status_interrupts++; - if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS))) - nes_reset_link(nesdev, mac_index); - - /* read the PHY interrupt status register */ - if ((nesadapter->OneG_Mode) && - (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { - do { - nes_read_1G_phy_reg(nesdev, 0x1a, - nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1a = 0x%X.\n", - nesadapter->phy_index[mac_index], phy_data); - } while (phy_data&0x8000); - - temp_phy_data = 0; - do { - nes_read_1G_phy_reg(nesdev, 0x11, - nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy%d data from register 0x11 = 0x%X.\n", - nesadapter->phy_index[mac_index], phy_data); - if (temp_phy_data == phy_data) - break; - temp_phy_data = phy_data; - } while (1); - - nes_read_1G_phy_reg(nesdev, 0x1e, - nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1e = 0x%X.\n", - nesadapter->phy_index[mac_index], phy_data); - - nes_read_1G_phy_reg(nesdev, 1, - nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "1G phy%u data from register 1 = 0x%X\n", - nesadapter->phy_index[mac_index], phy_data); - - if (temp_phy_data & 0x1000) { - nes_debug(NES_DBG_PHY, "The Link is up according to the PHY\n"); - phy_data = 4; - } else { - nes_debug(NES_DBG_PHY, "The Link is down according to the PHY\n"); - } - } - nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n", - nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0), - nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200)); - - if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) { - switch (mac_index) { - case 1: - case 3: - pcs_control_status = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); - break; - default: - pcs_control_status = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0); - break; - } - } else { - pcs_control_status = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200)); - pcs_control_status = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200)); - } - - nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n", - mac_index, pcs_control_status); - if ((nesadapter->OneG_Mode) && - (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { - u32temp = 0x01010000; - if (nesadapter->port_count > 2) { - u32temp |= 0x02020000; - } - if ((pcs_control_status & u32temp)!= u32temp) { - phy_data = 0; - nes_debug(NES_DBG_PHY, "PCS says the link is down\n"); - } - } else { - switch (nesadapter->phy_type[mac_index]) { - case NES_PHY_TYPE_ARGUS: - case NES_PHY_TYPE_SFP_D: - case NES_PHY_TYPE_KR: - /* clear the alarms */ - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005); - /* check link status */ - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); - nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); - phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - - phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; - - nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", - __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); - break; - - case NES_PHY_TYPE_PUMA_1G: - if (mac_index < 2) - pcs_val = pcs_mask = 0x01010000; - else - pcs_val = pcs_mask = 0x02020000; - /* fall through */ - default: - phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0; - break; - } - } - - if (phy_data & 0x0004) { - if (wide_ppm_offset && - (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) && - (nesadapter->hw_rev != NE020_REV)) { - cdr_ctrl = nes_read_indexed(nesdev, - NES_IDX_ETH_SERDES_CDR_CONTROL0 + - mac_index * 0x200); - nes_write_indexed(nesdev, - NES_IDX_ETH_SERDES_CDR_CONTROL0 + - mac_index * 0x200, - cdr_ctrl | 0x000F0000); - } - nesadapter->mac_link_down[mac_index] = 0; - list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { - nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n", - nesvnic->linkup); - if (nesvnic->linkup == 0) { - printk(PFX "The Link is now up for port %s, netdev %p.\n", - nesvnic->netdev->name, nesvnic->netdev); - if (netif_queue_stopped(nesvnic->netdev)) - netif_start_queue(nesvnic->netdev); - nesvnic->linkup = 1; - netif_carrier_on(nesvnic->netdev); - - spin_lock(&nesvnic->port_ibevent_lock); - if (nesvnic->of_device_registered) { - if (nesdev->iw_status == 0) { - nesdev->iw_status = 1; - nes_port_ibevent(nesvnic); - } - } - spin_unlock(&nesvnic->port_ibevent_lock); - } - } - } else { - if (wide_ppm_offset && - (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) && - (nesadapter->hw_rev != NE020_REV)) { - cdr_ctrl = nes_read_indexed(nesdev, - NES_IDX_ETH_SERDES_CDR_CONTROL0 + - mac_index * 0x200); - nes_write_indexed(nesdev, - NES_IDX_ETH_SERDES_CDR_CONTROL0 + - mac_index * 0x200, - cdr_ctrl & 0xFFF0FFFF); - } - nesadapter->mac_link_down[mac_index] = 1; - list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { - nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n", - nesvnic->linkup); - if (nesvnic->linkup == 1) { - printk(PFX "The Link is now down for port %s, netdev %p.\n", - nesvnic->netdev->name, nesvnic->netdev); - if (!(netif_queue_stopped(nesvnic->netdev))) - netif_stop_queue(nesvnic->netdev); - nesvnic->linkup = 0; - netif_carrier_off(nesvnic->netdev); - - spin_lock(&nesvnic->port_ibevent_lock); - if (nesvnic->of_device_registered) { - if (nesdev->iw_status == 1) { - nesdev->iw_status = 0; - nes_port_ibevent(nesvnic); - } - } - spin_unlock(&nesvnic->port_ibevent_lock); - } - } - } - if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) { - nesdev->link_recheck = 1; - mod_delayed_work(system_wq, &nesdev->work, - NES_LINK_RECHECK_DELAY); - } - } - - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - - nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE; -} - -void nes_recheck_link_status(struct work_struct *work) -{ - unsigned long flags; - struct nes_device *nesdev = container_of(work, struct nes_device, work.work); - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_vnic *nesvnic; - u32 mac_index = nesdev->mac_index; - u16 phy_data; - u16 temp_phy_data; - - spin_lock_irqsave(&nesadapter->phy_lock, flags); - - /* check link status */ - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); - nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); - phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - - phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; - - nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", - __func__, phy_data, - nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); - - if (phy_data & 0x0004) { - nesadapter->mac_link_down[mac_index] = 0; - list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { - if (nesvnic->linkup == 0) { - printk(PFX "The Link is now up for port %s, netdev %p.\n", - nesvnic->netdev->name, nesvnic->netdev); - if (netif_queue_stopped(nesvnic->netdev)) - netif_start_queue(nesvnic->netdev); - nesvnic->linkup = 1; - netif_carrier_on(nesvnic->netdev); - - spin_lock(&nesvnic->port_ibevent_lock); - if (nesvnic->of_device_registered) { - if (nesdev->iw_status == 0) { - nesdev->iw_status = 1; - nes_port_ibevent(nesvnic); - } - } - spin_unlock(&nesvnic->port_ibevent_lock); - } - } - - } else { - nesadapter->mac_link_down[mac_index] = 1; - list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { - if (nesvnic->linkup == 1) { - printk(PFX "The Link is now down for port %s, netdev %p.\n", - nesvnic->netdev->name, nesvnic->netdev); - if (!(netif_queue_stopped(nesvnic->netdev))) - netif_stop_queue(nesvnic->netdev); - nesvnic->linkup = 0; - netif_carrier_off(nesvnic->netdev); - - spin_lock(&nesvnic->port_ibevent_lock); - if (nesvnic->of_device_registered) { - if (nesdev->iw_status == 1) { - nesdev->iw_status = 0; - nes_port_ibevent(nesvnic); - } - } - spin_unlock(&nesvnic->port_ibevent_lock); - } - } - } - if (nesdev->link_recheck++ < NES_LINK_RECHECK_MAX) - schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); - else - nesdev->link_recheck = 0; - - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); -} - - -static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) -{ - struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); - - napi_schedule(&nesvnic->napi); -} - - -/* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before -* getting out of nic_ce_handler -*/ -#define MAX_RQES_TO_PROCESS 384 - -/** - * nes_nic_ce_handler - */ -void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) -{ - u64 u64temp; - dma_addr_t bus_address; - struct nes_hw_nic *nesnic; - struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_hw_nic_rq_wqe *nic_rqe; - struct nes_hw_nic_sq_wqe *nic_sqe; - struct sk_buff *skb; - struct sk_buff *rx_skb; - struct nes_rskb_cb *cb; - __le16 *wqe_fragment_length; - u32 head; - u32 cq_size; - u32 rx_pkt_size; - u32 cqe_count=0; - u32 cqe_errv; - u32 cqe_misc; - u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */ - u16 vlan_tag; - u16 pkt_type; - u16 rqes_processed = 0; - u8 sq_cqes = 0; - - head = cq->cq_head; - cq_size = cq->cq_size; - cq->cqes_pending = 1; - do { - if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & - NES_NIC_CQE_VALID) { - nesnic = &nesvnic->nic; - cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]); - if (cqe_misc & NES_NIC_CQE_SQ) { - sq_cqes++; - wqe_fragment_index = 1; - nic_sqe = &nesnic->sq_vbase[nesnic->sq_tail]; - skb = nesnic->tx_skb[nesnic->sq_tail]; - wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; - /* bump past the vlan tag */ - wqe_fragment_length++; - if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { - u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX + - wqe_fragment_index * 2]); - u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + - wqe_fragment_index * 2])) << 32; - bus_address = (dma_addr_t)u64temp; - if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) { - pci_unmap_single(nesdev->pcidev, - bus_address, - le16_to_cpu(wqe_fragment_length[wqe_fragment_index++]), - PCI_DMA_TODEVICE); - } - for (; wqe_fragment_index < 5; wqe_fragment_index++) { - if (wqe_fragment_length[wqe_fragment_index]) { - u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX + - wqe_fragment_index * 2]); - u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX - + wqe_fragment_index * 2])) <<32; - bus_address = (dma_addr_t)u64temp; - pci_unmap_page(nesdev->pcidev, - bus_address, - le16_to_cpu(wqe_fragment_length[wqe_fragment_index]), - PCI_DMA_TODEVICE); - } else - break; - } - } - if (skb) - dev_kfree_skb_any(skb); - nesnic->sq_tail++; - nesnic->sq_tail &= nesnic->sq_size-1; - if (sq_cqes > 128) { - barrier(); - /* restart the queue if it had been stopped */ - if (netif_queue_stopped(nesvnic->netdev)) - netif_wake_queue(nesvnic->netdev); - sq_cqes = 0; - } - } else { - rqes_processed ++; - - cq->rx_cqes_completed++; - cq->rx_pkts_indicated++; - rx_pkt_size = cqe_misc & 0x0000ffff; - nic_rqe = &nesnic->rq_vbase[nesnic->rq_tail]; - /* Get the skb */ - rx_skb = nesnic->rx_skb[nesnic->rq_tail]; - nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_tail]; - bus_address = (dma_addr_t)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); - bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; - pci_unmap_single(nesdev->pcidev, bus_address, - nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); - cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; - cb->busaddr = 0; - /* rx_skb->tail = rx_skb->data + rx_pkt_size; */ - /* rx_skb->len = rx_pkt_size; */ - rx_skb->len = 0; /* TODO: see if this is necessary */ - skb_put(rx_skb, rx_pkt_size); - rx_skb->protocol = eth_type_trans(rx_skb, nesvnic->netdev); - nesnic->rq_tail++; - nesnic->rq_tail &= nesnic->rq_size - 1; - - atomic_inc(&nesvnic->rx_skbs_needed); - if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) { - nes_write32(nesdev->regs+NES_CQE_ALLOC, - cq->cq_number | (cqe_count << 16)); - /* nesadapter->tune_timer.cq_count += cqe_count; */ - nesdev->currcq_count += cqe_count; - cqe_count = 0; - nes_replenish_nic_rq(nesvnic); - } - pkt_type = (u16)(le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])); - cqe_errv = (cqe_misc & NES_NIC_CQE_ERRV_MASK) >> NES_NIC_CQE_ERRV_SHIFT; - rx_skb->ip_summed = CHECKSUM_NONE; - - if ((NES_PKT_TYPE_TCPV4_BITS == (pkt_type & NES_PKT_TYPE_TCPV4_MASK)) || - (NES_PKT_TYPE_UDPV4_BITS == (pkt_type & NES_PKT_TYPE_UDPV4_MASK))) { - if ((cqe_errv & - (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR | - NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { - if (nesvnic->netdev->features & NETIF_F_RXCSUM) - rx_skb->ip_summed = CHECKSUM_UNNECESSARY; - } else - nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet." - " errv = 0x%X, pkt_type = 0x%X.\n", - nesvnic->netdev->name, cqe_errv, pkt_type); - - } else if ((pkt_type & NES_PKT_TYPE_IPV4_MASK) == NES_PKT_TYPE_IPV4_BITS) { - if ((cqe_errv & - (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR | - NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { - if (nesvnic->netdev->features & NETIF_F_RXCSUM) { - rx_skb->ip_summed = CHECKSUM_UNNECESSARY; - /* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n", - nesvnic->netdev->name); */ - } - } else - nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet." - " errv = 0x%X, pkt_type = 0x%X.\n", - nesvnic->netdev->name, cqe_errv, pkt_type); - } - /* nes_debug(NES_DBG_CQ, "pkt_type=%x, APBVT_MASK=%x\n", - pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */ - - if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) { - if (nes_cm_recv(rx_skb, nesvnic->netdev)) - rx_skb = NULL; - } - if (rx_skb == NULL) - goto skip_rx_indicate0; - - - if (cqe_misc & NES_NIC_CQE_TAG_VALID) { - vlan_tag = (u16)(le32_to_cpu( - cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]) - >> 16); - nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", - nesvnic->netdev->name, vlan_tag); - - __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag); - } - napi_gro_receive(&nesvnic->napi, rx_skb); - -skip_rx_indicate0: - ; - /* nesvnic->netstats.rx_packets++; */ - /* nesvnic->netstats.rx_bytes += rx_pkt_size; */ - } - - cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0; - /* Accounting... */ - cqe_count++; - if (++head >= cq_size) - head = 0; - if (cqe_count == 255) { - /* Replenish Nic CQ */ - nes_write32(nesdev->regs+NES_CQE_ALLOC, - cq->cq_number | (cqe_count << 16)); - /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */ - nesdev->currcq_count += cqe_count; - cqe_count = 0; - } - - if (cq->rx_cqes_completed >= nesvnic->budget) - break; - } else { - cq->cqes_pending = 0; - break; - } - - } while (1); - - if (sq_cqes) { - barrier(); - /* restart the queue if it had been stopped */ - if (netif_queue_stopped(nesvnic->netdev)) - netif_wake_queue(nesvnic->netdev); - } - cq->cq_head = head; - /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n", - cq->cq_number, cqe_count, cq->cq_head); */ - cq->cqe_allocs_pending = cqe_count; - if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) - { - /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */ - nesdev->currcq_count += cqe_count; - nes_nic_tune_timer(nesdev); - } - if (atomic_read(&nesvnic->rx_skbs_needed)) - nes_replenish_nic_rq(nesvnic); -} - - - -/** - * nes_cqp_ce_handler - */ -static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) -{ - u64 u64temp; - unsigned long flags; - struct nes_hw_cqp *cqp = NULL; - struct nes_cqp_request *cqp_request; - struct nes_hw_cqp_wqe *cqp_wqe; - u32 head; - u32 cq_size; - u32 cqe_count=0; - u32 error_code; - u32 opcode; - u32 ctx_index; - /* u32 counter; */ - - head = cq->cq_head; - cq_size = cq->cq_size; - - do { - /* process the CQE */ - /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head, - le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */ - - opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]); - if (opcode & NES_CQE_VALID) { - cqp = &nesdev->cqp; - - error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]); - if (error_code) { - nes_debug(NES_DBG_CQP, "Bad Completion code for opcode 0x%02X from CQP," - " Major/Minor codes = 0x%04X:%04X.\n", - le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f, - (u16)(error_code >> 16), - (u16)error_code); - } - - u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. - cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | - ((u64)(le32_to_cpu(cq->cq_vbase[head]. - cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); - - cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp; - if (cqp_request) { - if (cqp_request->waiting) { - /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */ - cqp_request->major_code = (u16)(error_code >> 16); - cqp_request->minor_code = (u16)error_code; - barrier(); - cqp_request->request_done = 1; - wake_up(&cqp_request->waitq); - nes_put_cqp_request(nesdev, cqp_request); - } else { - if (cqp_request->callback) - cqp_request->cqp_callback(nesdev, cqp_request); - nes_free_cqp_request(nesdev, cqp_request); - } - } else { - wake_up(&nesdev->cqp.waitq); - } - - cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; - nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16)); - if (++cqp->sq_tail >= cqp->sq_size) - cqp->sq_tail = 0; - - /* Accounting... */ - cqe_count++; - if (++head >= cq_size) - head = 0; - } else { - break; - } - } while (1); - cq->cq_head = head; - - spin_lock_irqsave(&nesdev->cqp.lock, flags); - while ((!list_empty(&nesdev->cqp_pending_reqs)) && - ((((nesdev->cqp.sq_tail+nesdev->cqp.sq_size)-nesdev->cqp.sq_head) & - (nesdev->cqp.sq_size - 1)) != 1)) { - cqp_request = list_entry(nesdev->cqp_pending_reqs.next, - struct nes_cqp_request, list); - list_del_init(&cqp_request->list); - head = nesdev->cqp.sq_head++; - nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; - cqp_wqe = &nesdev->cqp.sq_vbase[head]; - memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); - barrier(); - - opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]); - if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT) - ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX; - else - ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX; - cqp_wqe->wqe_words[ctx_index] = - cpu_to_le32((u32)((unsigned long)cqp_request)); - cqp_wqe->wqe_words[ctx_index + 1] = - cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request))); - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n", - cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head); - /* Ring doorbell (1 WQEs) */ - barrier(); - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); - } - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - - /* Arm the CCQ */ - nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | - cq->cq_number); - nes_read32(nesdev->regs+NES_CQE_ALLOC); -} - -static u8 *locate_mpa(u8 *pkt, u32 aeq_info) -{ - if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) { - /* skip over ethernet header */ - pkt += ETH_HLEN; - - /* Skip over IP and TCP headers */ - pkt += 4 * (pkt[0] & 0x0f); - pkt += 4 * ((pkt[12] >> 4) & 0x0f); - } - return pkt; -} - -/* Determine if incoming error pkt is rdma layer */ -static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info) -{ - u8 *pkt; - u16 *mpa; - u32 opcode = 0xffffffff; - - if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { - pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; - mpa = (u16 *)locate_mpa(pkt, aeq_info); - opcode = be16_to_cpu(mpa[1]) & 0xf; - } - - return opcode; -} - -/* Build iWARP terminate header */ -static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info) -{ - u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; - u16 ddp_seg_len; - int copy_len = 0; - u8 is_tagged = 0; - u8 flush_code = 0; - struct nes_terminate_hdr *termhdr; - - termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase; - memset(termhdr, 0, 64); - - if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { - - /* Use data from offending packet to fill in ddp & rdma hdrs */ - pkt = locate_mpa(pkt, aeq_info); - ddp_seg_len = be16_to_cpu(*(u16 *)pkt); - if (ddp_seg_len) { - copy_len = 2; - termhdr->hdrct = DDP_LEN_FLAG; - if (pkt[2] & 0x80) { - is_tagged = 1; - if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) { - copy_len += TERM_DDP_LEN_TAGGED; - termhdr->hdrct |= DDP_HDR_FLAG; - } - } else { - if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) { - copy_len += TERM_DDP_LEN_UNTAGGED; - termhdr->hdrct |= DDP_HDR_FLAG; - } - - if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) { - if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) { - copy_len += TERM_RDMA_LEN; - termhdr->hdrct |= RDMA_HDR_FLAG; - } - } - } - } - } - - switch (async_event_id) { - case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: - switch (iwarp_opcode(nesqp, aeq_info)) { - case IWARP_OPCODE_WRITE: - flush_code = IB_WC_LOC_PROT_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; - termhdr->error_code = DDP_TAGGED_INV_STAG; - break; - default: - flush_code = IB_WC_REM_ACCESS_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; - termhdr->error_code = RDMAP_INV_STAG; - } - break; - case NES_AEQE_AEID_AMP_INVALID_STAG: - flush_code = IB_WC_REM_ACCESS_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; - termhdr->error_code = RDMAP_INV_STAG; - break; - case NES_AEQE_AEID_AMP_BAD_QP: - flush_code = IB_WC_LOC_QP_OP_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; - termhdr->error_code = DDP_UNTAGGED_INV_QN; - break; - case NES_AEQE_AEID_AMP_BAD_STAG_KEY: - case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: - switch (iwarp_opcode(nesqp, aeq_info)) { - case IWARP_OPCODE_SEND_INV: - case IWARP_OPCODE_SEND_SE_INV: - flush_code = IB_WC_REM_OP_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; - termhdr->error_code = RDMAP_CANT_INV_STAG; - break; - default: - flush_code = IB_WC_REM_ACCESS_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; - termhdr->error_code = RDMAP_INV_STAG; - } - break; - case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: - if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) { - flush_code = IB_WC_LOC_PROT_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; - termhdr->error_code = DDP_TAGGED_BOUNDS; - } else { - flush_code = IB_WC_REM_ACCESS_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; - termhdr->error_code = RDMAP_INV_BOUNDS; - } - break; - case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION: - case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: - case NES_AEQE_AEID_PRIV_OPERATION_DENIED: - flush_code = IB_WC_REM_ACCESS_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; - termhdr->error_code = RDMAP_ACCESS; - break; - case NES_AEQE_AEID_AMP_TO_WRAP: - flush_code = IB_WC_REM_ACCESS_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; - termhdr->error_code = RDMAP_TO_WRAP; - break; - case NES_AEQE_AEID_AMP_BAD_PD: - switch (iwarp_opcode(nesqp, aeq_info)) { - case IWARP_OPCODE_WRITE: - flush_code = IB_WC_LOC_PROT_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; - termhdr->error_code = DDP_TAGGED_UNASSOC_STAG; - break; - case IWARP_OPCODE_SEND_INV: - case IWARP_OPCODE_SEND_SE_INV: - flush_code = IB_WC_REM_ACCESS_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; - termhdr->error_code = RDMAP_CANT_INV_STAG; - break; - default: - flush_code = IB_WC_REM_ACCESS_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; - termhdr->error_code = RDMAP_UNASSOC_STAG; - } - break; - case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH: - flush_code = IB_WC_LOC_LEN_ERR; - termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP; - termhdr->error_code = MPA_MARKER; - break; - case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: - flush_code = IB_WC_GENERAL_ERR; - termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP; - termhdr->error_code = MPA_CRC; - break; - case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE: - case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: - flush_code = IB_WC_LOC_LEN_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC; - termhdr->error_code = DDP_CATASTROPHIC_LOCAL; - break; - case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC: - case NES_AEQE_AEID_DDP_NO_L_BIT: - flush_code = IB_WC_FATAL_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC; - termhdr->error_code = DDP_CATASTROPHIC_LOCAL; - break; - case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN: - case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID: - flush_code = IB_WC_GENERAL_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; - termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE; - break; - case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: - flush_code = IB_WC_LOC_LEN_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; - termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG; - break; - case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION: - flush_code = IB_WC_GENERAL_ERR; - if (is_tagged) { - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; - termhdr->error_code = DDP_TAGGED_INV_DDP_VER; - } else { - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; - termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER; - } - break; - case NES_AEQE_AEID_DDP_UBE_INVALID_MO: - flush_code = IB_WC_GENERAL_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; - termhdr->error_code = DDP_UNTAGGED_INV_MO; - break; - case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: - flush_code = IB_WC_REM_OP_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; - termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF; - break; - case NES_AEQE_AEID_DDP_UBE_INVALID_QN: - flush_code = IB_WC_GENERAL_ERR; - termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; - termhdr->error_code = DDP_UNTAGGED_INV_QN; - break; - case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION: - flush_code = IB_WC_GENERAL_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; - termhdr->error_code = RDMAP_INV_RDMAP_VER; - break; - case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE: - flush_code = IB_WC_LOC_QP_OP_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; - termhdr->error_code = RDMAP_UNEXPECTED_OP; - break; - default: - flush_code = IB_WC_FATAL_ERR; - termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; - termhdr->error_code = RDMAP_UNSPECIFIED; - break; - } - - if (copy_len) - memcpy(termhdr + 1, pkt, copy_len); - - if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) { - if (aeq_info & NES_AEQE_SQ) - nesqp->term_sq_flush_code = flush_code; - else - nesqp->term_rq_flush_code = flush_code; - } - - return sizeof(struct nes_terminate_hdr) + copy_len; -} - -static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp, - struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype) -{ - u64 context; - unsigned long flags; - u32 aeq_info; - u16 async_event_id; - u8 tcp_state; - u8 iwarp_state; - u32 termlen = 0; - u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE | - NES_CQP_QP_TERM_DONT_SEND_FIN; - struct nes_adapter *nesadapter = nesdev->nesadapter; - - if (nesqp->term_flags & NES_TERM_SENT) - return; /* Sanity check */ - - aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); - tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; - iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; - async_event_id = (u16)aeq_info; - - context = (unsigned long)nesadapter->qp_table[le32_to_cpu( - aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; - if (!context) { - WARN_ON(!context); - return; - } - - nesqp = (struct nes_qp *)(unsigned long)context; - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - nesqp->terminate_eventtype = eventtype; - spin_unlock_irqrestore(&nesqp->lock, flags); - - if (nesadapter->send_term_ok) - termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info); - else - mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG; - - if (!nesdev->iw_status) { - nesqp->term_flags = NES_TERM_DONE; - nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_ERROR, 0, 0); - nes_cm_disconn(nesqp); - } else { - nes_terminate_start_timer(nesqp); - nesqp->term_flags |= NES_TERM_SENT; - nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0); - } -} - -static void nes_terminate_send_fin(struct nes_device *nesdev, - struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe) -{ - u32 aeq_info; - u16 async_event_id; - u8 tcp_state; - u8 iwarp_state; - unsigned long flags; - - aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); - tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; - iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; - async_event_id = (u16)aeq_info; - - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - spin_unlock_irqrestore(&nesqp->lock, flags); - - /* Send the fin only */ - nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE | - NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0); -} - -/* Cleanup after a terminate sent or received */ -static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred) -{ - u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; - unsigned long flags; - struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device); - struct nes_device *nesdev = nesvnic->nesdev; - u8 first_time = 0; - - spin_lock_irqsave(&nesqp->lock, flags); - if (nesqp->hte_added) { - nesqp->hte_added = 0; - next_iwarp_state |= NES_CQP_QP_DEL_HTE; - } - - first_time = (nesqp->term_flags & NES_TERM_DONE) == 0; - nesqp->term_flags |= NES_TERM_DONE; - spin_unlock_irqrestore(&nesqp->lock, flags); - - /* Make sure we go through this only once */ - if (first_time) { - if (timeout_occurred == 0) - del_timer(&nesqp->terminate_timer); - else - next_iwarp_state |= NES_CQP_QP_RESET; - - nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); - nes_cm_disconn(nesqp); - } -} - -static void nes_terminate_received(struct nes_device *nesdev, - struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe) -{ - u32 aeq_info; - u8 *pkt; - u32 *mpa; - u8 ddp_ctl; - u8 rdma_ctl; - u16 aeq_id = 0; - - aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); - if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { - /* Terminate is not a performance path so the silicon */ - /* did not validate the frame - do it now */ - pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; - mpa = (u32 *)locate_mpa(pkt, aeq_info); - ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff; - rdma_ctl = be32_to_cpu(mpa[0]) & 0xff; - if ((ddp_ctl & 0xc0) != 0x40) - aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC; - else if ((ddp_ctl & 0x03) != 1) - aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION; - else if (be32_to_cpu(mpa[2]) != 2) - aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN; - else if (be32_to_cpu(mpa[3]) != 1) - aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN; - else if (be32_to_cpu(mpa[4]) != 0) - aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO; - else if ((rdma_ctl & 0xc0) != 0x40) - aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION; - - if (aeq_id) { - /* Bad terminate recvd - send back a terminate */ - aeq_info = (aeq_info & 0xffff0000) | aeq_id; - aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info); - nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); - return; - } - } - - nesqp->term_flags |= NES_TERM_RCVD; - nesqp->terminate_eventtype = IB_EVENT_QP_FATAL; - nes_terminate_start_timer(nesqp); - nes_terminate_send_fin(nesdev, nesqp, aeqe); -} - -/* Timeout routine in case terminate fails to complete */ -void nes_terminate_timeout(struct timer_list *t) -{ - struct nes_qp *nesqp = from_timer(nesqp, t, terminate_timer); - - nes_terminate_done(nesqp, 1); -} - -/* Set a timer in case hw cannot complete the terminate sequence */ -static void nes_terminate_start_timer(struct nes_qp *nesqp) -{ - mod_timer(&nesqp->terminate_timer, (jiffies + HZ)); -} - -/** - * nes_process_iwarp_aeqe - */ -static void nes_process_iwarp_aeqe(struct nes_device *nesdev, - struct nes_hw_aeqe *aeqe) -{ - u64 context; - unsigned long flags; - struct nes_qp *nesqp; - struct nes_hw_cq *hw_cq; - struct nes_cq *nescq; - int resource_allocated; - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 aeq_info; - u32 next_iwarp_state = 0; - u32 aeqe_cq_id; - u16 async_event_id; - u8 tcp_state; - u8 iwarp_state; - struct ib_event ibevent; - - nes_debug(NES_DBG_AEQ, "\n"); - aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); - if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) { - context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); - context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; - } else { - context = (unsigned long)nesadapter->qp_table[le32_to_cpu( - aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; - BUG_ON(!context); - } - - /* context is nesqp unless async_event_id == CQ ERROR */ - nesqp = (struct nes_qp *)(unsigned long)context; - async_event_id = (u16)aeq_info; - tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; - iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; - nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p," - " Tcp state = %s, iWARP state = %s\n", - async_event_id, - le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, - nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]); - - aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); - if (aeq_info & NES_AEQE_QP) { - if (!nes_is_resource_allocated(nesadapter, - nesadapter->allocated_qps, - aeqe_cq_id)) - return; - } - - switch (async_event_id) { - case NES_AEQE_AEID_LLP_FIN_RECEIVED: - if (nesqp->term_flags) - return; /* Ignore it, wait for close complete */ - - if (atomic_inc_return(&nesqp->close_timer_started) == 1) { - if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) && - (nesqp->ibqp_state == IB_QPS_RTS)) { - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); - nes_cm_disconn(nesqp); - } - nesqp->cm_id->add_ref(nesqp->cm_id); - schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, - NES_TIMER_TYPE_CLOSE, 1, 0); - nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d)," - " need ae to finish up, original_last_aeq = 0x%04X." - " last_aeq = 0x%04X, scheduling timer. TCP state = %d\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), - async_event_id, nesqp->last_aeq, tcp_state); - } - break; - case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_cm_disconn(nesqp); - break; - - case NES_AEQE_AEID_RESET_SENT: - tcp_state = NES_AEQE_TCP_STATE_CLOSED; - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - nesqp->hte_added = 0; - spin_unlock_irqrestore(&nesqp->lock, flags); - next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE; - nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); - nes_cm_disconn(nesqp); - break; - - case NES_AEQE_AEID_LLP_CONNECTION_RESET: - if (atomic_read(&nesqp->close_timer_started)) - return; - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_cm_disconn(nesqp); - break; - - case NES_AEQE_AEID_TERMINATE_SENT: - nes_terminate_send_fin(nesdev, nesqp, aeqe); - break; - - case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED: - nes_terminate_received(nesdev, nesqp, aeqe); - break; - - case NES_AEQE_AEID_AMP_BAD_STAG_KEY: - case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: - case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: - case NES_AEQE_AEID_AMP_INVALID_STAG: - case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION: - case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: - case NES_AEQE_AEID_PRIV_OPERATION_DENIED: - case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: - case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: - case NES_AEQE_AEID_AMP_TO_WRAP: - printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n", - nesqp->hwqp.qp_id, async_event_id); - nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR); - break; - - case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE: - case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: - case NES_AEQE_AEID_DDP_UBE_INVALID_MO: - case NES_AEQE_AEID_DDP_UBE_INVALID_QN: - if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) { - aeq_info &= 0xffff0000; - aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE; - aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info); - } - /* fall through */ - case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE: - case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: - case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: - case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: - case NES_AEQE_AEID_AMP_BAD_QP: - case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH: - case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC: - case NES_AEQE_AEID_DDP_NO_L_BIT: - case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN: - case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID: - case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION: - case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION: - case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE: - case NES_AEQE_AEID_AMP_BAD_PD: - case NES_AEQE_AEID_AMP_FASTREG_SHARED: - case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG: - case NES_AEQE_AEID_AMP_FASTREG_MW_STAG: - case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS: - case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW: - case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH: - case NES_AEQE_AEID_AMP_INVALIDATE_SHARED: - case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS: - case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG: - case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG: - case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG: - case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG: - case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS: - case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS: - case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT: - case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED: - case NES_AEQE_AEID_BAD_CLOSE: - case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO: - case NES_AEQE_AEID_STAG_ZERO_INVALID: - case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST: - case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: - printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n", - nesqp->hwqp.qp_id, async_event_id); - print_ip(nesqp->cm_node); - if (!atomic_read(&nesqp->close_timer_started)) - nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); - break; - - case NES_AEQE_AEID_CQ_OPERATION_ERROR: - context <<= 1; - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n", - le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), (void *)(unsigned long)context); - resource_allocated = nes_is_resource_allocated(nesadapter, nesadapter->allocated_cqs, - le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])); - if (resource_allocated) { - printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n", - __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])); - hw_cq = (struct nes_hw_cq *)(unsigned long)context; - if (hw_cq) { - nescq = container_of(hw_cq, struct nes_cq, hw_cq); - if (nescq->ibcq.event_handler) { - ibevent.device = nescq->ibcq.device; - ibevent.event = IB_EVENT_CQ_ERR; - ibevent.element.cq = &nescq->ibcq; - nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context); - } - } - } - break; - - default: - nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n", - async_event_id); - break; - } - -} - -/** - * nes_iwarp_ce_handler - */ -void nes_iwarp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *hw_cq) -{ - struct nes_cq *nescq = container_of(hw_cq, struct nes_cq, hw_cq); - - /* nes_debug(NES_DBG_CQ, "Processing completion event for iWARP CQ%u.\n", - nescq->hw_cq.cq_number); */ - nes_write32(nesdev->regs+NES_CQ_ACK, nescq->hw_cq.cq_number); - - if (nescq->ibcq.comp_handler) - nescq->ibcq.comp_handler(&nescq->ibcq, nescq->ibcq.cq_context); - - return; -} - - -/** - * nes_manage_apbvt() - */ -int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, - u32 nic_index, u32 add_port) -{ - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - int ret = 0; - u16 major_code; - - /* Send manage APBVT request to CQP */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); - return -ENOMEM; - } - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - - nes_debug(NES_DBG_QP, "%s APBV for local port=%u(0x%04x), nic_index=%u\n", - (add_port == NES_MANAGE_APBVT_ADD) ? "ADD" : "DEL", - accel_local_port, accel_local_port, nic_index); - - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, (NES_CQP_MANAGE_APBVT | - ((add_port == NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0))); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, - ((nic_index << NES_CQP_APBVT_NIC_SHIFT) | accel_local_port)); - - nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n"); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - if (add_port == NES_MANAGE_APBVT_ADD) - ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n", - ret, cqp_request->major_code, cqp_request->minor_code); - major_code = cqp_request->major_code; - - nes_put_cqp_request(nesdev, cqp_request); - - if (!ret) - return -ETIME; - else if (major_code) - return -EIO; - else - return 0; -} - - -/** - * nes_manage_arp_cache - */ -void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, - u32 ip_addr, u32 action) -{ - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev; - struct nes_cqp_request *cqp_request; - int arp_index; - - nesdev = nesvnic->nesdev; - arp_index = nes_arp_table(nesdev, ip_addr, mac_addr, action); - if (arp_index == -1) { - return; - } - - /* update the ARP entry */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_NETDEV, "Failed to get a cqp_request.\n"); - return; - } - cqp_request->waiting = 0; - cqp_wqe = &cqp_request->cqp_wqe; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( - NES_CQP_MANAGE_ARP_CACHE | NES_CQP_ARP_PERM); - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_CQP_ARP_AEQ_INDEX_SHIFT); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(arp_index); - - if (action == NES_ARP_ADD) { - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID); - cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32( - (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | - (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); - cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( - (((u32)mac_addr[0]) << 8) | (u32)mac_addr[1]); - } else { - cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0; - } - - nes_debug(NES_DBG_NETDEV, "Not waiting for CQP, cqp.sq_head=%u, cqp.sq_tail=%u\n", - nesdev->cqp.sq_head, nesdev->cqp.sq_tail); - - atomic_set(&cqp_request->refcount, 1); - nes_post_cqp_request(nesdev, cqp_request); -} - - -/** - * flush_wqes - */ -void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, - u32 which_wq, u32 wait_completion) -{ - struct nes_cqp_request *cqp_request; - struct nes_hw_cqp_wqe *cqp_wqe; - u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH; - u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH; - int ret; - - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); - return; - } - if (wait_completion) { - cqp_request->waiting = 1; - atomic_set(&cqp_request->refcount, 2); - } else { - cqp_request->waiting = 0; - } - cqp_wqe = &cqp_request->cqp_wqe; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - - /* If wqe in error was identified, set code to be put into cqe */ - if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) { - which_wq |= NES_CQP_FLUSH_MAJ_MIN; - sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code; - nesqp->term_sq_flush_code = 0; - } - - if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) { - which_wq |= NES_CQP_FLUSH_MAJ_MIN; - rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code; - nesqp->term_rq_flush_code = 0; - } - - if (which_wq & NES_CQP_FLUSH_MAJ_MIN) { - cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code); - cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code); - } - - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = - cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); - - nes_post_cqp_request(nesdev, cqp_request); - - if (wait_completion) { - /* Wait for CQP */ - ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u," - " CQP Major:Minor codes = 0x%04X:0x%04X\n", - ret, cqp_request->major_code, cqp_request->minor_code); - nes_put_cqp_request(nesdev, cqp_request); - } -} diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h deleted file mode 100644 index 3c56470816a8..000000000000 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ /dev/null @@ -1,1380 +0,0 @@ -/* -* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* OpenIB.org BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef __NES_HW_H -#define __NES_HW_H - -#define NES_PHY_TYPE_CX4 1 -#define NES_PHY_TYPE_1G 2 -#define NES_PHY_TYPE_ARGUS 4 -#define NES_PHY_TYPE_PUMA_1G 5 -#define NES_PHY_TYPE_PUMA_10G 6 -#define NES_PHY_TYPE_GLADIUS 7 -#define NES_PHY_TYPE_SFP_D 8 -#define NES_PHY_TYPE_KR 9 - -#define NES_MULTICAST_PF_MAX 8 -#define NES_A0 3 - -#define NES_ENABLE_PAU 0x07000001 -#define NES_DISABLE_PAU 0x07000000 -#define NES_PAU_COUNTER 10 -#define NES_CQP_OPCODE_MASK 0x3f - -enum pci_regs { - NES_INT_STAT = 0x0000, - NES_INT_MASK = 0x0004, - NES_INT_PENDING = 0x0008, - NES_INTF_INT_STAT = 0x000C, - NES_INTF_INT_MASK = 0x0010, - NES_TIMER_STAT = 0x0014, - NES_PERIODIC_CONTROL = 0x0018, - NES_ONE_SHOT_CONTROL = 0x001C, - NES_EEPROM_COMMAND = 0x0020, - NES_EEPROM_DATA = 0x0024, - NES_FLASH_COMMAND = 0x0028, - NES_FLASH_DATA = 0x002C, - NES_SOFTWARE_RESET = 0x0030, - NES_CQ_ACK = 0x0034, - NES_WQE_ALLOC = 0x0040, - NES_CQE_ALLOC = 0x0044, - NES_AEQ_ALLOC = 0x0048 -}; - -enum indexed_regs { - NES_IDX_CREATE_CQP_LOW = 0x0000, - NES_IDX_CREATE_CQP_HIGH = 0x0004, - NES_IDX_QP_CONTROL = 0x0040, - NES_IDX_FLM_CONTROL = 0x0080, - NES_IDX_INT_CPU_STATUS = 0x00a0, - NES_IDX_GPR_TRIGGER = 0x00bc, - NES_IDX_GPIO_CONTROL = 0x00f0, - NES_IDX_GPIO_DATA = 0x00f4, - NES_IDX_GPR2 = 0x010c, - NES_IDX_TCP_CONFIG0 = 0x01e4, - NES_IDX_TCP_TIMER_CONFIG = 0x01ec, - NES_IDX_TCP_NOW = 0x01f0, - NES_IDX_QP_MAX_CFG_SIZES = 0x0200, - NES_IDX_QP_CTX_SIZE = 0x0218, - NES_IDX_TCP_TIMER_SIZE0 = 0x0238, - NES_IDX_TCP_TIMER_SIZE1 = 0x0240, - NES_IDX_ARP_CACHE_SIZE = 0x0258, - NES_IDX_CQ_CTX_SIZE = 0x0260, - NES_IDX_MRT_SIZE = 0x0278, - NES_IDX_PBL_REGION_SIZE = 0x0280, - NES_IDX_IRRQ_COUNT = 0x02b0, - NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x02f0, - NES_IDX_RX_WINDOW_BUFFER_SIZE = 0x0300, - NES_IDX_DST_IP_ADDR = 0x0400, - NES_IDX_PCIX_DIAG = 0x08e8, - NES_IDX_MPP_DEBUG = 0x0a00, - NES_IDX_PORT_RX_DISCARDS = 0x0a30, - NES_IDX_PORT_TX_DISCARDS = 0x0a34, - NES_IDX_MPP_LB_DEBUG = 0x0b00, - NES_IDX_DENALI_CTL_22 = 0x1058, - NES_IDX_MAC_TX_CONTROL = 0x2000, - NES_IDX_MAC_TX_CONFIG = 0x2004, - NES_IDX_MAC_TX_PAUSE_QUANTA = 0x2008, - NES_IDX_MAC_RX_CONTROL = 0x200c, - NES_IDX_MAC_RX_CONFIG = 0x2010, - NES_IDX_MAC_EXACT_MATCH_BOTTOM = 0x201c, - NES_IDX_MAC_MDIO_CONTROL = 0x2084, - NES_IDX_MAC_TX_OCTETS_LOW = 0x2100, - NES_IDX_MAC_TX_OCTETS_HIGH = 0x2104, - NES_IDX_MAC_TX_FRAMES_LOW = 0x2108, - NES_IDX_MAC_TX_FRAMES_HIGH = 0x210c, - NES_IDX_MAC_TX_PAUSE_FRAMES = 0x2118, - NES_IDX_MAC_TX_ERRORS = 0x2138, - NES_IDX_MAC_RX_OCTETS_LOW = 0x213c, - NES_IDX_MAC_RX_OCTETS_HIGH = 0x2140, - NES_IDX_MAC_RX_FRAMES_LOW = 0x2144, - NES_IDX_MAC_RX_FRAMES_HIGH = 0x2148, - NES_IDX_MAC_RX_BC_FRAMES_LOW = 0x214c, - NES_IDX_MAC_RX_MC_FRAMES_HIGH = 0x2150, - NES_IDX_MAC_RX_PAUSE_FRAMES = 0x2154, - NES_IDX_MAC_RX_SHORT_FRAMES = 0x2174, - NES_IDX_MAC_RX_OVERSIZED_FRAMES = 0x2178, - NES_IDX_MAC_RX_JABBER_FRAMES = 0x217c, - NES_IDX_MAC_RX_CRC_ERR_FRAMES = 0x2180, - NES_IDX_MAC_RX_LENGTH_ERR_FRAMES = 0x2184, - NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES = 0x2188, - NES_IDX_MAC_INT_STATUS = 0x21f0, - NES_IDX_MAC_INT_MASK = 0x21f4, - NES_IDX_PHY_PCS_CONTROL_STATUS0 = 0x2800, - NES_IDX_PHY_PCS_CONTROL_STATUS1 = 0x2a00, - NES_IDX_ETH_SERDES_COMMON_CONTROL0 = 0x2808, - NES_IDX_ETH_SERDES_COMMON_CONTROL1 = 0x2a08, - NES_IDX_ETH_SERDES_COMMON_STATUS0 = 0x280c, - NES_IDX_ETH_SERDES_COMMON_STATUS1 = 0x2a0c, - NES_IDX_ETH_SERDES_TX_EMP0 = 0x2810, - NES_IDX_ETH_SERDES_TX_EMP1 = 0x2a10, - NES_IDX_ETH_SERDES_TX_DRIVE0 = 0x2814, - NES_IDX_ETH_SERDES_TX_DRIVE1 = 0x2a14, - NES_IDX_ETH_SERDES_RX_MODE0 = 0x2818, - NES_IDX_ETH_SERDES_RX_MODE1 = 0x2a18, - NES_IDX_ETH_SERDES_RX_SIGDET0 = 0x281c, - NES_IDX_ETH_SERDES_RX_SIGDET1 = 0x2a1c, - NES_IDX_ETH_SERDES_BYPASS0 = 0x2820, - NES_IDX_ETH_SERDES_BYPASS1 = 0x2a20, - NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0 = 0x2824, - NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1 = 0x2a24, - NES_IDX_ETH_SERDES_RX_EQ_CONTROL0 = 0x2828, - NES_IDX_ETH_SERDES_RX_EQ_CONTROL1 = 0x2a28, - NES_IDX_ETH_SERDES_RX_EQ_STATUS0 = 0x282c, - NES_IDX_ETH_SERDES_RX_EQ_STATUS1 = 0x2a2c, - NES_IDX_ETH_SERDES_CDR_RESET0 = 0x2830, - NES_IDX_ETH_SERDES_CDR_RESET1 = 0x2a30, - NES_IDX_ETH_SERDES_CDR_CONTROL0 = 0x2834, - NES_IDX_ETH_SERDES_CDR_CONTROL1 = 0x2a34, - NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0 = 0x2838, - NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1 = 0x2a38, - NES_IDX_ENDNODE0_NSTAT_RX_DISCARD = 0x3080, - NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO = 0x3000, - NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI = 0x3004, - NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO = 0x3008, - NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI = 0x300c, - NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO = 0x7000, - NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004, - NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008, - NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c, - NES_IDX_WQM_CONFIG0 = 0x5000, - NES_IDX_WQM_CONFIG1 = 0x5004, - NES_IDX_CM_CONFIG = 0x5100, - NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000, - NES_IDX_NIC_PHYPORT_TO_USW = 0x6008, - NES_IDX_NIC_ACTIVE = 0x6010, - NES_IDX_NIC_UNICAST_ALL = 0x6018, - NES_IDX_NIC_MULTICAST_ALL = 0x6020, - NES_IDX_NIC_MULTICAST_ENABLE = 0x6028, - NES_IDX_NIC_BROADCAST_ON = 0x6030, - NES_IDX_USED_CHUNKS_TX = 0x60b0, - NES_IDX_TX_POOL_SIZE = 0x60b8, - NES_IDX_QUAD_HASH_TABLE_SIZE = 0x6148, - NES_IDX_PERFECT_FILTER_LOW = 0x6200, - NES_IDX_PERFECT_FILTER_HIGH = 0x6204, - NES_IDX_IPV4_TCP_REXMITS = 0x7080, - NES_IDX_DEBUG_ERROR_CONTROL_STATUS = 0x913c, - NES_IDX_DEBUG_ERROR_MASKS0 = 0x9140, - NES_IDX_DEBUG_ERROR_MASKS1 = 0x9144, - NES_IDX_DEBUG_ERROR_MASKS2 = 0x9148, - NES_IDX_DEBUG_ERROR_MASKS3 = 0x914c, - NES_IDX_DEBUG_ERROR_MASKS4 = 0x9150, - NES_IDX_DEBUG_ERROR_MASKS5 = 0x9154, -}; - -#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE 1 -#define NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE (1 << 17) - -enum nes_cqp_opcodes { - NES_CQP_CREATE_QP = 0x00, - NES_CQP_MODIFY_QP = 0x01, - NES_CQP_DESTROY_QP = 0x02, - NES_CQP_CREATE_CQ = 0x03, - NES_CQP_MODIFY_CQ = 0x04, - NES_CQP_DESTROY_CQ = 0x05, - NES_CQP_ALLOCATE_STAG = 0x09, - NES_CQP_REGISTER_STAG = 0x0a, - NES_CQP_QUERY_STAG = 0x0b, - NES_CQP_REGISTER_SHARED_STAG = 0x0c, - NES_CQP_DEALLOCATE_STAG = 0x0d, - NES_CQP_MANAGE_ARP_CACHE = 0x0f, - NES_CQP_DOWNLOAD_SEGMENT = 0x10, - NES_CQP_SUSPEND_QPS = 0x11, - NES_CQP_UPLOAD_CONTEXT = 0x13, - NES_CQP_CREATE_CEQ = 0x16, - NES_CQP_DESTROY_CEQ = 0x18, - NES_CQP_CREATE_AEQ = 0x19, - NES_CQP_DESTROY_AEQ = 0x1b, - NES_CQP_LMI_ACCESS = 0x20, - NES_CQP_FLUSH_WQES = 0x22, - NES_CQP_MANAGE_APBVT = 0x23, - NES_CQP_MANAGE_QUAD_HASH = 0x25 -}; - -enum nes_cqp_wqe_word_idx { - NES_CQP_WQE_OPCODE_IDX = 0, - NES_CQP_WQE_ID_IDX = 1, - NES_CQP_WQE_COMP_CTX_LOW_IDX = 2, - NES_CQP_WQE_COMP_CTX_HIGH_IDX = 3, - NES_CQP_WQE_COMP_SCRATCH_LOW_IDX = 4, - NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5, -}; - -enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */ - NES_CQP_WQE_DL_OPCODE_IDX = 0, - NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1, - NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2, - NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3 - /* For index values 4-15 use NES_NIC_SQ_WQE_ values */ -}; - -enum nes_cqp_cq_wqeword_idx { - NES_CQP_CQ_WQE_PBL_LOW_IDX = 6, - NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7, - NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX = 8, - NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX = 9, - NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX = 10, -}; - -enum nes_cqp_stag_wqeword_idx { - NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX = 1, - NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX = 6, - NES_CQP_STAG_WQE_LEN_LOW_IDX = 7, - NES_CQP_STAG_WQE_STAG_IDX = 8, - NES_CQP_STAG_WQE_VA_LOW_IDX = 10, - NES_CQP_STAG_WQE_VA_HIGH_IDX = 11, - NES_CQP_STAG_WQE_PA_LOW_IDX = 12, - NES_CQP_STAG_WQE_PA_HIGH_IDX = 13, - NES_CQP_STAG_WQE_PBL_LEN_IDX = 14 -}; - -#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26 -#define NES_CQP_OP_IWARP_STATE_SHIFT 28 -#define NES_CQP_OP_TERMLEN_SHIFT 28 - -enum nes_cqp_qp_bits { - NES_CQP_QP_ARP_VALID = (1<<8), - NES_CQP_QP_WINBUF_VALID = (1<<9), - NES_CQP_QP_CONTEXT_VALID = (1<<10), - NES_CQP_QP_ORD_VALID = (1<<11), - NES_CQP_QP_WINBUF_DATAIND_EN = (1<<12), - NES_CQP_QP_VIRT_WQS = (1<<13), - NES_CQP_QP_DEL_HTE = (1<<14), - NES_CQP_QP_CQS_VALID = (1<<15), - NES_CQP_QP_TYPE_TSA = 0, - NES_CQP_QP_TYPE_IWARP = (1<<16), - NES_CQP_QP_TYPE_CQP = (4<<16), - NES_CQP_QP_TYPE_NIC = (5<<16), - NES_CQP_QP_MSS_CHG = (1<<20), - NES_CQP_QP_STATIC_RESOURCES = (1<<21), - NES_CQP_QP_IGNORE_MW_BOUND = (1<<22), - NES_CQP_QP_VWQ_USE_LMI = (1<<23), - NES_CQP_QP_IWARP_STATE_IDLE = (1<<NES_CQP_OP_IWARP_STATE_SHIFT), - NES_CQP_QP_IWARP_STATE_RTS = (2<<NES_CQP_OP_IWARP_STATE_SHIFT), - NES_CQP_QP_IWARP_STATE_CLOSING = (3<<NES_CQP_OP_IWARP_STATE_SHIFT), - NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT), - NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT), - NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT), - NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24), - NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25), - NES_CQP_QP_RESET = (1<<31), -}; - -enum nes_cqp_qp_wqe_word_idx { - NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6, - NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7, - NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8, - NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9, - NES_CQP_QP_WQE_NEW_MSS_IDX = 15, -}; - -enum nes_nic_ctx_bits { - NES_NIC_CTX_RQ_SIZE_32 = (3<<8), - NES_NIC_CTX_RQ_SIZE_512 = (3<<8), - NES_NIC_CTX_SQ_SIZE_32 = (1<<10), - NES_NIC_CTX_SQ_SIZE_512 = (3<<10), -}; - -enum nes_nic_qp_ctx_word_idx { - NES_NIC_CTX_MISC_IDX = 0, - NES_NIC_CTX_SQ_LOW_IDX = 2, - NES_NIC_CTX_SQ_HIGH_IDX = 3, - NES_NIC_CTX_RQ_LOW_IDX = 4, - NES_NIC_CTX_RQ_HIGH_IDX = 5, -}; - -enum nes_cqp_cq_bits { - NES_CQP_CQ_CEQE_MASK = (1<<9), - NES_CQP_CQ_CEQ_VALID = (1<<10), - NES_CQP_CQ_RESIZE = (1<<11), - NES_CQP_CQ_CHK_OVERFLOW = (1<<12), - NES_CQP_CQ_4KB_CHUNK = (1<<14), - NES_CQP_CQ_VIRT = (1<<15), -}; - -enum nes_cqp_stag_bits { - NES_CQP_STAG_VA_TO = (1<<9), - NES_CQP_STAG_DEALLOC_PBLS = (1<<10), - NES_CQP_STAG_PBL_BLK_SIZE = (1<<11), - NES_CQP_STAG_MR = (1<<13), - NES_CQP_STAG_RIGHTS_LOCAL_READ = (1<<16), - NES_CQP_STAG_RIGHTS_LOCAL_WRITE = (1<<17), - NES_CQP_STAG_RIGHTS_REMOTE_READ = (1<<18), - NES_CQP_STAG_RIGHTS_REMOTE_WRITE = (1<<19), - NES_CQP_STAG_RIGHTS_WINDOW_BIND = (1<<20), - NES_CQP_STAG_REM_ACC_EN = (1<<21), - NES_CQP_STAG_LEAVE_PENDING = (1<<31), -}; - -enum nes_cqp_ceq_wqeword_idx { - NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX = 1, - NES_CQP_CEQ_WQE_PBL_LOW_IDX = 6, - NES_CQP_CEQ_WQE_PBL_HIGH_IDX = 7, -}; - -enum nes_cqp_ceq_bits { - NES_CQP_CEQ_4KB_CHUNK = (1<<14), - NES_CQP_CEQ_VIRT = (1<<15), -}; - -enum nes_cqp_aeq_wqeword_idx { - NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX = 1, - NES_CQP_AEQ_WQE_PBL_LOW_IDX = 6, - NES_CQP_AEQ_WQE_PBL_HIGH_IDX = 7, -}; - -enum nes_cqp_aeq_bits { - NES_CQP_AEQ_4KB_CHUNK = (1<<14), - NES_CQP_AEQ_VIRT = (1<<15), -}; - -enum nes_cqp_lmi_wqeword_idx { - NES_CQP_LMI_WQE_LMI_OFFSET_IDX = 1, - NES_CQP_LMI_WQE_FRAG_LOW_IDX = 8, - NES_CQP_LMI_WQE_FRAG_HIGH_IDX = 9, - NES_CQP_LMI_WQE_FRAG_LEN_IDX = 10, -}; - -enum nes_cqp_arp_wqeword_idx { - NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX = 6, - NES_CQP_ARP_WQE_MAC_HIGH_IDX = 7, - NES_CQP_ARP_WQE_REACHABILITY_MAX_IDX = 1, -}; - -enum nes_cqp_upload_wqeword_idx { - NES_CQP_UPLOAD_WQE_CTXT_LOW_IDX = 6, - NES_CQP_UPLOAD_WQE_CTXT_HIGH_IDX = 7, - NES_CQP_UPLOAD_WQE_HTE_IDX = 8, -}; - -enum nes_cqp_arp_bits { - NES_CQP_ARP_VALID = (1<<8), - NES_CQP_ARP_PERM = (1<<9), -}; - -enum nes_cqp_flush_bits { - NES_CQP_FLUSH_SQ = (1<<30), - NES_CQP_FLUSH_RQ = (1<<31), - NES_CQP_FLUSH_MAJ_MIN = (1<<28), -}; - -enum nes_cqe_opcode_bits { - NES_CQE_STAG_VALID = (1<<6), - NES_CQE_ERROR = (1<<7), - NES_CQE_SQ = (1<<8), - NES_CQE_SE = (1<<9), - NES_CQE_PSH = (1<<29), - NES_CQE_FIN = (1<<30), - NES_CQE_VALID = (1<<31), -}; - - -enum nes_cqe_word_idx { - NES_CQE_PAYLOAD_LENGTH_IDX = 0, - NES_CQE_COMP_COMP_CTX_LOW_IDX = 2, - NES_CQE_COMP_COMP_CTX_HIGH_IDX = 3, - NES_CQE_INV_STAG_IDX = 4, - NES_CQE_QP_ID_IDX = 5, - NES_CQE_ERROR_CODE_IDX = 6, - NES_CQE_OPCODE_IDX = 7, -}; - -enum nes_ceqe_word_idx { - NES_CEQE_CQ_CTX_LOW_IDX = 0, - NES_CEQE_CQ_CTX_HIGH_IDX = 1, -}; - -enum nes_ceqe_status_bit { - NES_CEQE_VALID = (1<<31), -}; - -enum nes_int_bits { - NES_INT_CEQ0 = (1<<0), - NES_INT_CEQ1 = (1<<1), - NES_INT_CEQ2 = (1<<2), - NES_INT_CEQ3 = (1<<3), - NES_INT_CEQ4 = (1<<4), - NES_INT_CEQ5 = (1<<5), - NES_INT_CEQ6 = (1<<6), - NES_INT_CEQ7 = (1<<7), - NES_INT_CEQ8 = (1<<8), - NES_INT_CEQ9 = (1<<9), - NES_INT_CEQ10 = (1<<10), - NES_INT_CEQ11 = (1<<11), - NES_INT_CEQ12 = (1<<12), - NES_INT_CEQ13 = (1<<13), - NES_INT_CEQ14 = (1<<14), - NES_INT_CEQ15 = (1<<15), - NES_INT_AEQ0 = (1<<16), - NES_INT_AEQ1 = (1<<17), - NES_INT_AEQ2 = (1<<18), - NES_INT_AEQ3 = (1<<19), - NES_INT_AEQ4 = (1<<20), - NES_INT_AEQ5 = (1<<21), - NES_INT_AEQ6 = (1<<22), - NES_INT_AEQ7 = (1<<23), - NES_INT_MAC0 = (1<<24), - NES_INT_MAC1 = (1<<25), - NES_INT_MAC2 = (1<<26), - NES_INT_MAC3 = (1<<27), - NES_INT_TSW = (1<<28), - NES_INT_TIMER = (1<<29), - NES_INT_INTF = (1<<30), -}; - -enum nes_intf_int_bits { - NES_INTF_INT_PCIERR = (1<<0), - NES_INTF_PERIODIC_TIMER = (1<<2), - NES_INTF_ONE_SHOT_TIMER = (1<<3), - NES_INTF_INT_CRITERR = (1<<14), - NES_INTF_INT_AEQ0_OFLOW = (1<<16), - NES_INTF_INT_AEQ1_OFLOW = (1<<17), - NES_INTF_INT_AEQ2_OFLOW = (1<<18), - NES_INTF_INT_AEQ3_OFLOW = (1<<19), - NES_INTF_INT_AEQ4_OFLOW = (1<<20), - NES_INTF_INT_AEQ5_OFLOW = (1<<21), - NES_INTF_INT_AEQ6_OFLOW = (1<<22), - NES_INTF_INT_AEQ7_OFLOW = (1<<23), - NES_INTF_INT_AEQ_OFLOW = (0xff<<16), -}; - -enum nes_mac_int_bits { - NES_MAC_INT_LINK_STAT_CHG = (1<<1), - NES_MAC_INT_XGMII_EXT = (1<<2), - NES_MAC_INT_TX_UNDERFLOW = (1<<6), - NES_MAC_INT_TX_ERROR = (1<<7), -}; - -enum nes_cqe_allocate_bits { - NES_CQE_ALLOC_INC_SELECT = (1<<28), - NES_CQE_ALLOC_NOTIFY_NEXT = (1<<29), - NES_CQE_ALLOC_NOTIFY_SE = (1<<30), - NES_CQE_ALLOC_RESET = (1<<31), -}; - -enum nes_nic_rq_wqe_word_idx { - NES_NIC_RQ_WQE_LENGTH_1_0_IDX = 0, - NES_NIC_RQ_WQE_LENGTH_3_2_IDX = 1, - NES_NIC_RQ_WQE_FRAG0_LOW_IDX = 2, - NES_NIC_RQ_WQE_FRAG0_HIGH_IDX = 3, - NES_NIC_RQ_WQE_FRAG1_LOW_IDX = 4, - NES_NIC_RQ_WQE_FRAG1_HIGH_IDX = 5, - NES_NIC_RQ_WQE_FRAG2_LOW_IDX = 6, - NES_NIC_RQ_WQE_FRAG2_HIGH_IDX = 7, - NES_NIC_RQ_WQE_FRAG3_LOW_IDX = 8, - NES_NIC_RQ_WQE_FRAG3_HIGH_IDX = 9, -}; - -enum nes_nic_sq_wqe_word_idx { - NES_NIC_SQ_WQE_MISC_IDX = 0, - NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX = 1, - NES_NIC_SQ_WQE_LSO_INFO_IDX = 2, - NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX = 3, - NES_NIC_SQ_WQE_LENGTH_2_1_IDX = 4, - NES_NIC_SQ_WQE_LENGTH_4_3_IDX = 5, - NES_NIC_SQ_WQE_FRAG0_LOW_IDX = 6, - NES_NIC_SQ_WQE_FRAG0_HIGH_IDX = 7, - NES_NIC_SQ_WQE_FRAG1_LOW_IDX = 8, - NES_NIC_SQ_WQE_FRAG1_HIGH_IDX = 9, - NES_NIC_SQ_WQE_FRAG2_LOW_IDX = 10, - NES_NIC_SQ_WQE_FRAG2_HIGH_IDX = 11, - NES_NIC_SQ_WQE_FRAG3_LOW_IDX = 12, - NES_NIC_SQ_WQE_FRAG3_HIGH_IDX = 13, - NES_NIC_SQ_WQE_FRAG4_LOW_IDX = 14, - NES_NIC_SQ_WQE_FRAG4_HIGH_IDX = 15, -}; - -enum nes_iwarp_sq_wqe_word_idx { - NES_IWARP_SQ_WQE_MISC_IDX = 0, - NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX = 1, - NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX = 2, - NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX = 3, - NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX = 4, - NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX = 5, - NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX = 7, - NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX = 8, - NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX = 9, - NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX = 10, - NES_IWARP_SQ_WQE_RDMA_STAG_IDX = 11, - NES_IWARP_SQ_WQE_IMM_DATA_START_IDX = 12, - NES_IWARP_SQ_WQE_FRAG0_LOW_IDX = 16, - NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX = 17, - NES_IWARP_SQ_WQE_LENGTH0_IDX = 18, - NES_IWARP_SQ_WQE_STAG0_IDX = 19, - NES_IWARP_SQ_WQE_FRAG1_LOW_IDX = 20, - NES_IWARP_SQ_WQE_FRAG1_HIGH_IDX = 21, - NES_IWARP_SQ_WQE_LENGTH1_IDX = 22, - NES_IWARP_SQ_WQE_STAG1_IDX = 23, - NES_IWARP_SQ_WQE_FRAG2_LOW_IDX = 24, - NES_IWARP_SQ_WQE_FRAG2_HIGH_IDX = 25, - NES_IWARP_SQ_WQE_LENGTH2_IDX = 26, - NES_IWARP_SQ_WQE_STAG2_IDX = 27, - NES_IWARP_SQ_WQE_FRAG3_LOW_IDX = 28, - NES_IWARP_SQ_WQE_FRAG3_HIGH_IDX = 29, - NES_IWARP_SQ_WQE_LENGTH3_IDX = 30, - NES_IWARP_SQ_WQE_STAG3_IDX = 31, -}; - -enum nes_iwarp_sq_bind_wqe_word_idx { - NES_IWARP_SQ_BIND_WQE_MR_IDX = 6, - NES_IWARP_SQ_BIND_WQE_MW_IDX = 7, - NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX = 8, - NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX = 9, - NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX = 10, - NES_IWARP_SQ_BIND_WQE_VA_FBO_HIGH_IDX = 11, -}; - -enum nes_iwarp_sq_fmr_wqe_word_idx { - NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX = 7, - NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX = 8, - NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX = 9, - NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX = 10, - NES_IWARP_SQ_FMR_WQE_VA_FBO_HIGH_IDX = 11, - NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX = 12, - NES_IWARP_SQ_FMR_WQE_PBL_ADDR_HIGH_IDX = 13, - NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14, -}; - -enum nes_iwarp_sq_fmr_opcodes { - NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6), - NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7), - NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7), - NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16), - NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17), - NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18), - NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19), - NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20), -}; - -#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF; - -enum nes_iwarp_sq_locinv_wqe_word_idx { - NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6, -}; - -enum nes_iwarp_rq_wqe_word_idx { - NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1, - NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2, - NES_IWARP_RQ_WQE_COMP_CTX_HIGH_IDX = 3, - NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX = 4, - NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX = 5, - NES_IWARP_RQ_WQE_FRAG0_LOW_IDX = 8, - NES_IWARP_RQ_WQE_FRAG0_HIGH_IDX = 9, - NES_IWARP_RQ_WQE_LENGTH0_IDX = 10, - NES_IWARP_RQ_WQE_STAG0_IDX = 11, - NES_IWARP_RQ_WQE_FRAG1_LOW_IDX = 12, - NES_IWARP_RQ_WQE_FRAG1_HIGH_IDX = 13, - NES_IWARP_RQ_WQE_LENGTH1_IDX = 14, - NES_IWARP_RQ_WQE_STAG1_IDX = 15, - NES_IWARP_RQ_WQE_FRAG2_LOW_IDX = 16, - NES_IWARP_RQ_WQE_FRAG2_HIGH_IDX = 17, - NES_IWARP_RQ_WQE_LENGTH2_IDX = 18, - NES_IWARP_RQ_WQE_STAG2_IDX = 19, - NES_IWARP_RQ_WQE_FRAG3_LOW_IDX = 20, - NES_IWARP_RQ_WQE_FRAG3_HIGH_IDX = 21, - NES_IWARP_RQ_WQE_LENGTH3_IDX = 22, - NES_IWARP_RQ_WQE_STAG3_IDX = 23, -}; - -enum nes_nic_sq_wqe_bits { - NES_NIC_SQ_WQE_PHDR_CS_READY = (1<<21), - NES_NIC_SQ_WQE_LSO_ENABLE = (1<<22), - NES_NIC_SQ_WQE_TAGVALUE_ENABLE = (1<<23), - NES_NIC_SQ_WQE_DISABLE_CHKSUM = (1<<30), - NES_NIC_SQ_WQE_COMPLETION = (1<<31), -}; - -enum nes_nic_cqe_word_idx { - NES_NIC_CQE_ACCQP_ID_IDX = 0, - NES_NIC_CQE_HASH_RCVNXT = 1, - NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2, - NES_NIC_CQE_MISC_IDX = 3, -}; - -#define NES_PKT_TYPE_APBVT_BITS 0xC112 -#define NES_PKT_TYPE_APBVT_MASK 0xff3e - -#define NES_PKT_TYPE_PVALID_BITS 0x10000000 -#define NES_PKT_TYPE_PVALID_MASK 0x30000000 - -#define NES_PKT_TYPE_TCPV4_BITS 0x0110 -#define NES_PKT_TYPE_TCPV4_MASK 0x3f30 - -#define NES_PKT_TYPE_UDPV4_BITS 0x0210 -#define NES_PKT_TYPE_UDPV4_MASK 0x3f30 - -#define NES_PKT_TYPE_IPV4_BITS 0x0010 -#define NES_PKT_TYPE_IPV4_MASK 0x3f30 - -#define NES_PKT_TYPE_OTHER_BITS 0x0000 -#define NES_PKT_TYPE_OTHER_MASK 0x0030 - -#define NES_NIC_CQE_ERRV_SHIFT 16 -enum nes_nic_ev_bits { - NES_NIC_ERRV_BITS_MODE = (1<<0), - NES_NIC_ERRV_BITS_IPV4_CSUM_ERR = (1<<1), - NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR = (1<<2), - NES_NIC_ERRV_BITS_WQE_OVERRUN = (1<<3), - NES_NIC_ERRV_BITS_IPH_ERR = (1<<4), -}; - -enum nes_nic_cqe_bits { - NES_NIC_CQE_ERRV_MASK = (0xff<<NES_NIC_CQE_ERRV_SHIFT), - NES_NIC_CQE_SQ = (1<<24), - NES_NIC_CQE_ACCQP_PORT = (1<<28), - NES_NIC_CQE_ACCQP_VALID = (1<<29), - NES_NIC_CQE_TAG_VALID = (1<<30), - NES_NIC_CQE_VALID = (1<<31), -}; - -enum nes_aeqe_word_idx { - NES_AEQE_COMP_CTXT_LOW_IDX = 0, - NES_AEQE_COMP_CTXT_HIGH_IDX = 1, - NES_AEQE_COMP_QP_CQ_ID_IDX = 2, - NES_AEQE_MISC_IDX = 3, -}; - -enum nes_aeqe_bits { - NES_AEQE_QP = (1<<16), - NES_AEQE_CQ = (1<<17), - NES_AEQE_SQ = (1<<18), - NES_AEQE_INBOUND_RDMA = (1<<19), - NES_AEQE_IWARP_STATE_MASK = (7<<20), - NES_AEQE_TCP_STATE_MASK = (0xf<<24), - NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28), - NES_AEQE_VALID = (1<<31), -}; - -#define NES_AEQE_IWARP_STATE_SHIFT 20 -#define NES_AEQE_TCP_STATE_SHIFT 24 -#define NES_AEQE_Q2_DATA_ETHERNET (1<<28) -#define NES_AEQE_Q2_DATA_MPA (1<<29) - -enum nes_aeqe_iwarp_state { - NES_AEQE_IWARP_STATE_NON_EXISTANT = 0, - NES_AEQE_IWARP_STATE_IDLE = 1, - NES_AEQE_IWARP_STATE_RTS = 2, - NES_AEQE_IWARP_STATE_CLOSING = 3, - NES_AEQE_IWARP_STATE_TERMINATE = 5, - NES_AEQE_IWARP_STATE_ERROR = 6 -}; - -enum nes_aeqe_tcp_state { - NES_AEQE_TCP_STATE_NON_EXISTANT = 0, - NES_AEQE_TCP_STATE_CLOSED = 1, - NES_AEQE_TCP_STATE_LISTEN = 2, - NES_AEQE_TCP_STATE_SYN_SENT = 3, - NES_AEQE_TCP_STATE_SYN_RCVD = 4, - NES_AEQE_TCP_STATE_ESTABLISHED = 5, - NES_AEQE_TCP_STATE_CLOSE_WAIT = 6, - NES_AEQE_TCP_STATE_FIN_WAIT_1 = 7, - NES_AEQE_TCP_STATE_CLOSING = 8, - NES_AEQE_TCP_STATE_LAST_ACK = 9, - NES_AEQE_TCP_STATE_FIN_WAIT_2 = 10, - NES_AEQE_TCP_STATE_TIME_WAIT = 11 -}; - -enum nes_aeqe_aeid { - NES_AEQE_AEID_AMP_UNALLOCATED_STAG = 0x0102, - NES_AEQE_AEID_AMP_INVALID_STAG = 0x0103, - NES_AEQE_AEID_AMP_BAD_QP = 0x0104, - NES_AEQE_AEID_AMP_BAD_PD = 0x0105, - NES_AEQE_AEID_AMP_BAD_STAG_KEY = 0x0106, - NES_AEQE_AEID_AMP_BAD_STAG_INDEX = 0x0107, - NES_AEQE_AEID_AMP_BOUNDS_VIOLATION = 0x0108, - NES_AEQE_AEID_AMP_RIGHTS_VIOLATION = 0x0109, - NES_AEQE_AEID_AMP_TO_WRAP = 0x010a, - NES_AEQE_AEID_AMP_FASTREG_SHARED = 0x010b, - NES_AEQE_AEID_AMP_FASTREG_VALID_STAG = 0x010c, - NES_AEQE_AEID_AMP_FASTREG_MW_STAG = 0x010d, - NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS = 0x010e, - NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW = 0x010f, - NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH = 0x0110, - NES_AEQE_AEID_AMP_INVALIDATE_SHARED = 0x0111, - NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS = 0x0112, - NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS = 0x0113, - NES_AEQE_AEID_AMP_MWBIND_VALID_STAG = 0x0114, - NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG = 0x0115, - NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG = 0x0116, - NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG = 0x0117, - NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS = 0x0118, - NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS = 0x0119, - NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT = 0x011a, - NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED = 0x011b, - NES_AEQE_AEID_BAD_CLOSE = 0x0201, - NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE = 0x0202, - NES_AEQE_AEID_CQ_OPERATION_ERROR = 0x0203, - NES_AEQE_AEID_PRIV_OPERATION_DENIED = 0x0204, - NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO = 0x0205, - NES_AEQE_AEID_STAG_ZERO_INVALID = 0x0206, - NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN = 0x0301, - NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID = 0x0302, - NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER = 0x0303, - NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION = 0x0304, - NES_AEQE_AEID_DDP_UBE_INVALID_MO = 0x0305, - NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE = 0x0306, - NES_AEQE_AEID_DDP_UBE_INVALID_QN = 0x0307, - NES_AEQE_AEID_DDP_NO_L_BIT = 0x0308, - NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION = 0x0311, - NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE = 0x0312, - NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST = 0x0313, - NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP = 0x0314, - NES_AEQE_AEID_INVALID_ARP_ENTRY = 0x0401, - NES_AEQE_AEID_INVALID_TCP_OPTION_RCVD = 0x0402, - NES_AEQE_AEID_STALE_ARP_ENTRY = 0x0403, - NES_AEQE_AEID_LLP_CLOSE_COMPLETE = 0x0501, - NES_AEQE_AEID_LLP_CONNECTION_RESET = 0x0502, - NES_AEQE_AEID_LLP_FIN_RECEIVED = 0x0503, - NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH = 0x0504, - NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR = 0x0505, - NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE = 0x0506, - NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL = 0x0507, - NES_AEQE_AEID_LLP_SYN_RECEIVED = 0x0508, - NES_AEQE_AEID_LLP_TERMINATE_RECEIVED = 0x0509, - NES_AEQE_AEID_LLP_TOO_MANY_RETRIES = 0x050a, - NES_AEQE_AEID_LLP_TOO_MANY_KEEPALIVE_RETRIES = 0x050b, - NES_AEQE_AEID_RESET_SENT = 0x0601, - NES_AEQE_AEID_TERMINATE_SENT = 0x0602, - NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC = 0x0700 -}; - -enum nes_iwarp_sq_opcodes { - NES_IWARP_SQ_WQE_WRPDU = (1<<15), - NES_IWARP_SQ_WQE_PSH = (1<<21), - NES_IWARP_SQ_WQE_STREAMING = (1<<23), - NES_IWARP_SQ_WQE_IMM_DATA = (1<<28), - NES_IWARP_SQ_WQE_READ_FENCE = (1<<29), - NES_IWARP_SQ_WQE_LOCAL_FENCE = (1<<30), - NES_IWARP_SQ_WQE_SIGNALED_COMPL = (1<<31), -}; - -enum nes_iwarp_sq_wqe_bits { - NES_IWARP_SQ_OP_RDMAW = 0, - NES_IWARP_SQ_OP_RDMAR = 1, - NES_IWARP_SQ_OP_SEND = 3, - NES_IWARP_SQ_OP_SENDINV = 4, - NES_IWARP_SQ_OP_SENDSE = 5, - NES_IWARP_SQ_OP_SENDSEINV = 6, - NES_IWARP_SQ_OP_BIND = 8, - NES_IWARP_SQ_OP_FAST_REG = 9, - NES_IWARP_SQ_OP_LOCINV = 10, - NES_IWARP_SQ_OP_RDMAR_LOCINV = 11, - NES_IWARP_SQ_OP_NOP = 12, -}; - -enum nes_iwarp_cqe_major_code { - NES_IWARP_CQE_MAJOR_FLUSH = 1, - NES_IWARP_CQE_MAJOR_DRV = 0x8000 -}; - -enum nes_iwarp_cqe_minor_code { - NES_IWARP_CQE_MINOR_FLUSH = 1 -}; - -#define NES_EEPROM_READ_REQUEST (1<<16) -#define NES_MAC_ADDR_VALID (1<<20) - -/* - * NES index registers init values. - */ -struct nes_init_values { - u32 index; - u32 data; - u8 wrt; -}; - -/* - * NES registers in BAR0. - */ -struct nes_pci_regs { - u32 int_status; - u32 int_mask; - u32 int_pending; - u32 intf_int_status; - u32 intf_int_mask; - u32 other_regs[59]; /* pad out to 256 bytes for now */ -}; - -#define NES_CQP_SQ_SIZE 128 -#define NES_CCQ_SIZE 128 -#define NES_NIC_WQ_SIZE 512 -#define NES_NIC_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_512) | (NES_NIC_CTX_SQ_SIZE_512)) -#define NES_NIC_BACK_STORE 0x00038000 - -struct nes_device; - -struct nes_hw_nic_qp_context { - __le32 context_words[6]; -}; - -struct nes_hw_nic_sq_wqe { - __le32 wqe_words[16]; -}; - -struct nes_hw_nic_rq_wqe { - __le32 wqe_words[16]; -}; - -struct nes_hw_nic_cqe { - __le32 cqe_words[4]; -}; - -struct nes_hw_cqp_qp_context { - __le32 context_words[4]; -}; - -struct nes_hw_cqp_wqe { - __le32 wqe_words[16]; -}; - -struct nes_hw_qp_wqe { - __le32 wqe_words[32]; -}; - -struct nes_hw_cqe { - __le32 cqe_words[8]; -}; - -struct nes_hw_ceqe { - __le32 ceqe_words[2]; -}; - -struct nes_hw_aeqe { - __le32 aeqe_words[4]; -}; - -struct nes_cqp_request { - union { - u64 cqp_callback_context; - void *cqp_callback_pointer; - }; - wait_queue_head_t waitq; - struct nes_hw_cqp_wqe cqp_wqe; - struct list_head list; - atomic_t refcount; - void (*cqp_callback)(struct nes_device *nesdev, struct nes_cqp_request *cqp_request); - u16 major_code; - u16 minor_code; - u8 waiting; - u8 request_done; - u8 dynamic; - u8 callback; -}; - -struct nes_hw_cqp { - struct nes_hw_cqp_wqe *sq_vbase; - dma_addr_t sq_pbase; - spinlock_t lock; - wait_queue_head_t waitq; - u16 qp_id; - u16 sq_head; - u16 sq_tail; - u16 sq_size; -}; - -#define NES_FIRST_FRAG_SIZE 128 -struct nes_first_frag { - u8 buffer[NES_FIRST_FRAG_SIZE]; -}; - -struct nes_hw_nic { - struct nes_first_frag *first_frag_vbase; /* virtual address of first frags */ - struct nes_hw_nic_sq_wqe *sq_vbase; /* virtual address of sq */ - struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */ - struct sk_buff *tx_skb[NES_NIC_WQ_SIZE]; - struct sk_buff *rx_skb[NES_NIC_WQ_SIZE]; - dma_addr_t frag_paddr[NES_NIC_WQ_SIZE]; - unsigned long first_frag_overflow[BITS_TO_LONGS(NES_NIC_WQ_SIZE)]; - dma_addr_t sq_pbase; /* PCI memory for host rings */ - dma_addr_t rq_pbase; /* PCI memory for host rings */ - - u16 qp_id; - u16 sq_head; - u16 sq_tail; - u16 sq_size; - u16 rq_head; - u16 rq_tail; - u16 rq_size; - u8 replenishing_rq; - u8 reserved; - - spinlock_t rq_lock; -}; - -struct nes_hw_nic_cq { - struct nes_hw_nic_cqe volatile *cq_vbase; /* PCI memory for host rings */ - void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_nic_cq *cq); - dma_addr_t cq_pbase; /* PCI memory for host rings */ - int rx_cqes_completed; - int cqe_allocs_pending; - int rx_pkts_indicated; - u16 cq_head; - u16 cq_size; - u16 cq_number; - u8 cqes_pending; -}; - -struct nes_hw_qp { - struct nes_hw_qp_wqe *sq_vbase; /* PCI memory for host rings */ - struct nes_hw_qp_wqe *rq_vbase; /* PCI memory for host rings */ - void *q2_vbase; /* PCI memory for host rings */ - dma_addr_t sq_pbase; /* PCI memory for host rings */ - dma_addr_t rq_pbase; /* PCI memory for host rings */ - dma_addr_t q2_pbase; /* PCI memory for host rings */ - u32 qp_id; - u16 sq_head; - u16 sq_tail; - u16 sq_size; - u16 rq_head; - u16 rq_tail; - u16 rq_size; - u8 rq_encoded_size; - u8 sq_encoded_size; -}; - -struct nes_hw_cq { - struct nes_hw_cqe *cq_vbase; /* PCI memory for host rings */ - void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_cq *cq); - dma_addr_t cq_pbase; /* PCI memory for host rings */ - u16 cq_head; - u16 cq_size; - u16 cq_number; -}; - -struct nes_hw_ceq { - struct nes_hw_ceqe volatile *ceq_vbase; /* PCI memory for host rings */ - dma_addr_t ceq_pbase; /* PCI memory for host rings */ - u16 ceq_head; - u16 ceq_size; -}; - -struct nes_hw_aeq { - struct nes_hw_aeqe volatile *aeq_vbase; /* PCI memory for host rings */ - dma_addr_t aeq_pbase; /* PCI memory for host rings */ - u16 aeq_head; - u16 aeq_size; -}; - -struct nic_qp_map { - u8 qpid; - u8 nic_index; - u8 logical_port; - u8 is_hnic; -}; - -#define NES_CQP_ARP_AEQ_INDEX_MASK 0x000f0000 -#define NES_CQP_ARP_AEQ_INDEX_SHIFT 16 - -#define NES_CQP_APBVT_ADD 0x00008000 -#define NES_CQP_APBVT_NIC_SHIFT 16 - -#define NES_ARP_ADD 1 -#define NES_ARP_DELETE 2 -#define NES_ARP_RESOLVE 3 - -#define NES_MAC_SW_IDLE 0 -#define NES_MAC_SW_INTERRUPT 1 -#define NES_MAC_SW_MH 2 - -struct nes_arp_entry { - u32 ip_addr; - u8 mac_addr[ETH_ALEN]; -}; - -#define NES_NIC_FAST_TIMER 96 -#define NES_NIC_FAST_TIMER_LOW 40 -#define NES_NIC_FAST_TIMER_HIGH 1000 -#define DEFAULT_NES_QL_HIGH 256 -#define DEFAULT_NES_QL_LOW 16 -#define DEFAULT_NES_QL_TARGET 64 -#define DEFAULT_JUMBO_NES_QL_LOW 12 -#define DEFAULT_JUMBO_NES_QL_TARGET 40 -#define DEFAULT_JUMBO_NES_QL_HIGH 128 -#define NES_NIC_CQ_DOWNWARD_TREND 16 -#define NES_PFT_SIZE 48 - -#define NES_MGT_WQ_COUNT 32 -#define NES_MGT_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_32) | (NES_NIC_CTX_SQ_SIZE_32)) -#define NES_MGT_QP_OFFSET 36 -#define NES_MGT_QP_COUNT 4 - -struct nes_hw_tune_timer { - /* u16 cq_count; */ - u16 threshold_low; - u16 threshold_target; - u16 threshold_high; - u16 timer_in_use; - u16 timer_in_use_old; - u16 timer_in_use_min; - u16 timer_in_use_max; - u8 timer_direction_upward; - u8 timer_direction_downward; - u16 cq_count_old; - u8 cq_direction_downward; -}; - -#define NES_TIMER_INT_LIMIT 2 -#define NES_TIMER_INT_LIMIT_DYNAMIC 10 -#define NES_TIMER_ENABLE_LIMIT 4 -#define NES_MAX_LINK_INTERRUPTS 128 -#define NES_MAX_LINK_CHECK 200 - -struct nes_adapter { - u64 fw_ver; - unsigned long *allocated_qps; - unsigned long *allocated_cqs; - unsigned long *allocated_mrs; - unsigned long *allocated_pds; - unsigned long *allocated_arps; - struct nes_qp **qp_table; - struct workqueue_struct *work_q; - - struct list_head list; - struct list_head active_listeners; - /* list of the netdev's associated with each logical port */ - struct list_head nesvnic_list[4]; - - struct timer_list mh_timer; - struct timer_list lc_timer; - struct work_struct work; - spinlock_t resource_lock; - spinlock_t phy_lock; - spinlock_t pbl_lock; - spinlock_t periodic_timer_lock; - - struct nes_arp_entry arp_table[NES_MAX_ARP_TABLE_SIZE]; - - /* Adapter CEQ and AEQs */ - struct nes_hw_ceq ceq[16]; - struct nes_hw_aeq aeq[8]; - - struct nes_hw_tune_timer tune_timer; - - unsigned long doorbell_start; - - u32 hw_rev; - u32 vendor_id; - u32 vendor_part_id; - u32 device_cap_flags; - u32 tick_delta; - u32 timer_int_req; - u32 arp_table_size; - u32 next_arp_index; - - u32 max_mr; - u32 max_256pbl; - u32 max_4kpbl; - u32 free_256pbl; - u32 free_4kpbl; - u32 max_mr_size; - u32 max_qp; - u32 next_qp; - u32 max_irrq; - u32 max_qp_wr; - u32 max_sge; - u32 max_cq; - u32 next_cq; - u32 max_cqe; - u32 max_pd; - u32 base_pd; - u32 next_pd; - u32 hte_index_mask; - - /* EEPROM information */ - u32 rx_pool_size; - u32 tx_pool_size; - u32 rx_threshold; - u32 tcp_timer_core_clk_divisor; - u32 iwarp_config; - u32 cm_config; - u32 sws_timer_config; - u32 tcp_config1; - u32 wqm_wat; - u32 core_clock; - u32 firmware_version; - u32 eeprom_version; - - u32 nic_rx_eth_route_err; - - u32 et_rx_coalesce_usecs; - u32 et_rx_max_coalesced_frames; - u32 et_rx_coalesce_usecs_irq; - u32 et_rx_max_coalesced_frames_irq; - u32 et_pkt_rate_low; - u32 et_rx_coalesce_usecs_low; - u32 et_rx_max_coalesced_frames_low; - u32 et_pkt_rate_high; - u32 et_rx_coalesce_usecs_high; - u32 et_rx_max_coalesced_frames_high; - u32 et_rate_sample_interval; - u32 timer_int_limit; - u32 wqm_quanta; - u8 allow_unaligned_fpdus; - - /* Adapter base MAC address */ - u32 mac_addr_low; - u16 mac_addr_high; - - u16 firmware_eeprom_offset; - u16 software_eeprom_offset; - - u16 max_irrq_wr; - - /* pd config for each port */ - u16 pd_config_size[4]; - u16 pd_config_base[4]; - - u16 link_interrupt_count[4]; - u8 crit_error_count[32]; - - /* the phy index for each port */ - u8 phy_index[4]; - u8 mac_sw_state[4]; - u8 mac_link_down[4]; - u8 phy_type[4]; - u8 log_port; - - /* PCI information */ - struct nes_device *nesdev; - unsigned int devfn; - unsigned char bus_number; - unsigned char OneG_Mode; - - unsigned char ref_count; - u8 netdev_count; - u8 netdev_max; /* from host nic address count in EEPROM */ - u8 port_count; - u8 virtwq; - u8 send_term_ok; - u8 et_use_adaptive_rx_coalesce; - u8 adapter_fcn_count; - u8 pft_mcast_map[NES_PFT_SIZE]; -}; - -struct nes_pbl { - u64 *pbl_vbase; - dma_addr_t pbl_pbase; - struct page *page; - unsigned long user_base; - u32 pbl_size; - struct list_head list; - /* TODO: need to add list for two level tables */ -}; - -#define NES_4K_PBL_CHUNK_SIZE 4096 - -struct nes_fast_mr_wqe_pbl { - u64 *kva; - dma_addr_t paddr; -}; - -struct nes_listener { - struct work_struct work; - struct workqueue_struct *wq; - struct nes_vnic *nesvnic; - struct iw_cm_id *cm_id; - struct list_head list; - unsigned long socket; - u8 accept_failed; -}; - -struct nes_ib_device; - -#define NES_EVENT_DELAY msecs_to_jiffies(100) - -struct nes_vnic { - struct nes_ib_device *nesibdev; - u64 sq_full; - u64 tso_requests; - u64 segmented_tso_requests; - u64 linearized_skbs; - u64 tx_sw_dropped; - u64 endnode_nstat_rx_discard; - u64 endnode_nstat_rx_octets; - u64 endnode_nstat_rx_frames; - u64 endnode_nstat_tx_octets; - u64 endnode_nstat_tx_frames; - u64 endnode_ipv4_tcp_retransmits; - /* void *mem; */ - struct nes_device *nesdev; - struct net_device *netdev; - atomic_t rx_skbs_needed; - atomic_t rx_skb_timer_running; - int budget; - u32 msg_enable; - /* u32 tx_avail; */ - __be32 local_ipaddr; - struct napi_struct napi; - spinlock_t tx_lock; /* could use netdev tx lock? */ - struct timer_list rq_wqes_timer; - u32 nic_mem_size; - void *nic_vbase; - dma_addr_t nic_pbase; - struct nes_hw_nic nic; - struct nes_hw_nic_cq nic_cq; - u32 mcrq_qp_id; - struct nes_ucontext *mcrq_ucontext; - struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev); - void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *); - int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr ); - struct net_device_stats netstats; - /* used to put the netdev on the adapters logical port list */ - struct list_head list; - u16 max_frame_size; - u8 netdev_open; - u8 linkup; - u8 logical_port; - u8 netdev_index; /* might not be needed, indexes nesdev->netdev */ - u8 perfect_filter_index; - u8 nic_index; - u8 qp_nic_index[4]; - u8 next_qp_nic_index; - u8 of_device_registered; - u8 rdma_enabled; - struct timer_list event_timer; - enum ib_event_type delayed_event; - enum ib_event_type last_dispatched_event; - spinlock_t port_ibevent_lock; - u32 mgt_mem_size; - void *mgt_vbase; - dma_addr_t mgt_pbase; - struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT]; - struct task_struct *mgt_thread; - wait_queue_head_t mgt_wait_queue; - struct sk_buff_head mgt_skb_list; - -}; - -struct nes_ib_device { - struct ib_device ibdev; - struct nes_vnic *nesvnic; - - /* Virtual RNIC Limits */ - u32 max_mr; - u32 max_qp; - u32 max_cq; - u32 max_pd; - u32 num_mr; - u32 num_qp; - u32 num_cq; - u32 num_pd; -}; - -enum nes_hdrct_flags { - DDP_LEN_FLAG = 0x80, - DDP_HDR_FLAG = 0x40, - RDMA_HDR_FLAG = 0x20 -}; - -enum nes_term_layers { - LAYER_RDMA = 0, - LAYER_DDP = 1, - LAYER_MPA = 2 -}; - -enum nes_term_error_types { - RDMAP_CATASTROPHIC = 0, - RDMAP_REMOTE_PROT = 1, - RDMAP_REMOTE_OP = 2, - DDP_CATASTROPHIC = 0, - DDP_TAGGED_BUFFER = 1, - DDP_UNTAGGED_BUFFER = 2, - DDP_LLP = 3 -}; - -enum nes_term_rdma_errors { - RDMAP_INV_STAG = 0x00, - RDMAP_INV_BOUNDS = 0x01, - RDMAP_ACCESS = 0x02, - RDMAP_UNASSOC_STAG = 0x03, - RDMAP_TO_WRAP = 0x04, - RDMAP_INV_RDMAP_VER = 0x05, - RDMAP_UNEXPECTED_OP = 0x06, - RDMAP_CATASTROPHIC_LOCAL = 0x07, - RDMAP_CATASTROPHIC_GLOBAL = 0x08, - RDMAP_CANT_INV_STAG = 0x09, - RDMAP_UNSPECIFIED = 0xff -}; - -enum nes_term_ddp_errors { - DDP_CATASTROPHIC_LOCAL = 0x00, - DDP_TAGGED_INV_STAG = 0x00, - DDP_TAGGED_BOUNDS = 0x01, - DDP_TAGGED_UNASSOC_STAG = 0x02, - DDP_TAGGED_TO_WRAP = 0x03, - DDP_TAGGED_INV_DDP_VER = 0x04, - DDP_UNTAGGED_INV_QN = 0x01, - DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02, - DDP_UNTAGGED_INV_MSN_RANGE = 0x03, - DDP_UNTAGGED_INV_MO = 0x04, - DDP_UNTAGGED_INV_TOO_LONG = 0x05, - DDP_UNTAGGED_INV_DDP_VER = 0x06 -}; - -enum nes_term_mpa_errors { - MPA_CLOSED = 0x01, - MPA_CRC = 0x02, - MPA_MARKER = 0x03, - MPA_REQ_RSP = 0x04, -}; - -struct nes_terminate_hdr { - u8 layer_etype; - u8 error_code; - u8 hdrct; - u8 rsvd; -}; - -/* Used to determine how to fill in terminate error codes */ -#define IWARP_OPCODE_WRITE 0 -#define IWARP_OPCODE_READREQ 1 -#define IWARP_OPCODE_READRSP 2 -#define IWARP_OPCODE_SEND 3 -#define IWARP_OPCODE_SEND_INV 4 -#define IWARP_OPCODE_SEND_SE 5 -#define IWARP_OPCODE_SEND_SE_INV 6 -#define IWARP_OPCODE_TERM 7 - -/* These values are used only during terminate processing */ -#define TERM_DDP_LEN_TAGGED 14 -#define TERM_DDP_LEN_UNTAGGED 18 -#define TERM_RDMA_LEN 28 -#define RDMA_OPCODE_MASK 0x0f -#define RDMA_READ_REQ_OPCODE 1 -#define BAD_FRAME_OFFSET 64 -#define CQE_MAJOR_DRV 0x8000 - -/* Used for link status recheck after interrupt processing */ -#define NES_LINK_RECHECK_DELAY msecs_to_jiffies(50) -#define NES_LINK_RECHECK_MAX 60 - -#endif /* __NES_HW_H */ diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c deleted file mode 100644 index cc4dce5c3e5f..000000000000 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ /dev/null @@ -1,1155 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/skbuff.h> -#include <linux/etherdevice.h> -#include <linux/kthread.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <net/tcp.h> -#include "nes.h" -#include "nes_mgt.h" - -atomic_t pau_qps_created; -atomic_t pau_qps_destroyed; - -static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic) -{ - unsigned long flags; - dma_addr_t bus_address; - struct sk_buff *skb; - struct nes_hw_nic_rq_wqe *nic_rqe; - struct nes_hw_mgt *nesmgt; - struct nes_device *nesdev; - struct nes_rskb_cb *cb; - u32 rx_wqes_posted = 0; - - nesmgt = &mgtvnic->mgt; - nesdev = mgtvnic->nesvnic->nesdev; - spin_lock_irqsave(&nesmgt->rq_lock, flags); - if (nesmgt->replenishing_rq != 0) { - if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) && - (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) { - atomic_set(&mgtvnic->rx_skb_timer_running, 1); - spin_unlock_irqrestore(&nesmgt->rq_lock, flags); - mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */ - add_timer(&mgtvnic->rq_wqes_timer); - } else { - spin_unlock_irqrestore(&nesmgt->rq_lock, flags); - } - return; - } - nesmgt->replenishing_rq = 1; - spin_unlock_irqrestore(&nesmgt->rq_lock, flags); - do { - skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size); - if (skb) { - skb->dev = mgtvnic->nesvnic->netdev; - - bus_address = pci_map_single(nesdev->pcidev, - skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); - cb = (struct nes_rskb_cb *)&skb->cb[0]; - cb->busaddr = bus_address; - cb->maplen = mgtvnic->nesvnic->max_frame_size; - - nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head]; - nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = - cpu_to_le32(mgtvnic->nesvnic->max_frame_size); - nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = - cpu_to_le32((u32)bus_address); - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = - cpu_to_le32((u32)((u64)bus_address >> 32)); - nesmgt->rx_skb[nesmgt->rq_head] = skb; - nesmgt->rq_head++; - nesmgt->rq_head &= nesmgt->rq_size - 1; - atomic_dec(&mgtvnic->rx_skbs_needed); - barrier(); - if (++rx_wqes_posted == 255) { - nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id); - rx_wqes_posted = 0; - } - } else { - spin_lock_irqsave(&nesmgt->rq_lock, flags); - if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) && - (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) { - atomic_set(&mgtvnic->rx_skb_timer_running, 1); - spin_unlock_irqrestore(&nesmgt->rq_lock, flags); - mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */ - add_timer(&mgtvnic->rq_wqes_timer); - } else { - spin_unlock_irqrestore(&nesmgt->rq_lock, flags); - } - break; - } - } while (atomic_read(&mgtvnic->rx_skbs_needed)); - barrier(); - if (rx_wqes_posted) - nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id); - nesmgt->replenishing_rq = 0; -} - -/** - * nes_mgt_rq_wqes_timeout - */ -static void nes_mgt_rq_wqes_timeout(struct timer_list *t) -{ - struct nes_vnic_mgt *mgtvnic = from_timer(mgtvnic, t, - rq_wqes_timer); - - atomic_set(&mgtvnic->rx_skb_timer_running, 0); - if (atomic_read(&mgtvnic->rx_skbs_needed)) - nes_replenish_mgt_rq(mgtvnic); -} - -/** - * nes_mgt_free_skb - unmap and free skb - */ -static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir) -{ - struct nes_rskb_cb *cb; - - cb = (struct nes_rskb_cb *)&skb->cb[0]; - pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir); - cb->busaddr = 0; - dev_kfree_skb_any(skb); -} - -/** - * nes_download_callback - handle download completions - */ -static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) -{ - struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer; - struct nes_qp *nesqp = fpdu_info->nesqp; - struct sk_buff *skb; - int i; - - for (i = 0; i < fpdu_info->frag_cnt; i++) { - skb = fpdu_info->frags[i].skb; - if (fpdu_info->frags[i].cmplt) { - nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE); - nes_rem_ref_cm_node(nesqp->cm_node); - } - } - - if (fpdu_info->hdr_vbase) - pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len, - fpdu_info->hdr_vbase, fpdu_info->hdr_pbase); - kfree(fpdu_info); -} - -/** - * nes_get_seq - Get the seq, ack_seq and window from the packet - */ -static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd) -{ - struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0]; - struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN); - struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); - - *ack = be32_to_cpu(tcph->ack_seq); - *wnd = be16_to_cpu(tcph->window); - *fin_rcvd = tcph->fin; - *rst_rcvd = tcph->rst; - return be32_to_cpu(tcph->seq); -} - -/** - * nes_get_next_skb - Get the next skb based on where current skb is in the queue - */ -static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp, - struct sk_buff *skb, u32 nextseq, u32 *ack, - u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd) -{ - u32 seq; - bool processacks; - struct sk_buff *old_skb; - - if (skb) { - /* Continue processing fpdu */ - skb = skb_peek_next(skb, &nesqp->pau_list); - if (!skb) - goto out; - processacks = false; - } else { - /* Starting a new one */ - if (skb_queue_empty(&nesqp->pau_list)) - goto out; - skb = skb_peek(&nesqp->pau_list); - processacks = true; - } - - while (1) { - if (skb_queue_empty(&nesqp->pau_list)) - goto out; - - seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd); - if (seq == nextseq) { - if (skb->len || processacks) - break; - } else if (after(seq, nextseq)) { - goto out; - } - - old_skb = skb; - skb = skb_peek_next(skb, &nesqp->pau_list); - skb_unlink(old_skb, &nesqp->pau_list); - nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); - nes_rem_ref_cm_node(nesqp->cm_node); - if (!skb) - goto out; - } - return skb; - -out: - return NULL; -} - -/** - * get_fpdu_info - Find the next complete fpdu and return its fragments. - */ -static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, - struct pau_fpdu_info **pau_fpdu_info) -{ - struct sk_buff *skb; - struct iphdr *iph; - struct tcphdr *tcph; - struct nes_rskb_cb *cb; - struct pau_fpdu_info *fpdu_info = NULL; - struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; - u32 fpdu_len = 0; - u32 tmp_len; - int frag_cnt = 0; - u32 tot_len; - u32 frag_tot; - u32 ack; - u32 fin_rcvd; - u32 rst_rcvd; - u16 wnd; - int i; - int rc = 0; - - *pau_fpdu_info = NULL; - - skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd); - if (!skb) - goto out; - - cb = (struct nes_rskb_cb *)&skb->cb[0]; - if (skb->len) { - fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING; - fpdu_len = (fpdu_len + 3) & 0xfffffffc; - tmp_len = fpdu_len; - - /* See if we have all of the fpdu */ - frag_tot = 0; - memset(&frags, 0, sizeof frags); - for (i = 0; i < MAX_FPDU_FRAGS; i++) { - frags[i].physaddr = cb->busaddr; - frags[i].physaddr += skb->data - cb->data_start; - frags[i].frag_len = min(tmp_len, skb->len); - frags[i].skb = skb; - frags[i].cmplt = (skb->len == frags[i].frag_len); - frag_tot += frags[i].frag_len; - frag_cnt++; - - tmp_len -= frags[i].frag_len; - if (tmp_len == 0) - break; - - skb = nes_get_next_skb(nesdev, nesqp, skb, - nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd); - if (!skb) - goto out; - if (rst_rcvd) { - /* rst received in the middle of fpdu */ - for (; i >= 0; i--) { - skb_unlink(frags[i].skb, &nesqp->pau_list); - nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE); - } - cb = (struct nes_rskb_cb *)&skb->cb[0]; - frags[0].physaddr = cb->busaddr; - frags[0].physaddr += skb->data - cb->data_start; - frags[0].frag_len = skb->len; - frags[0].skb = skb; - frags[0].cmplt = true; - frag_cnt = 1; - break; - } - - cb = (struct nes_rskb_cb *)&skb->cb[0]; - } - } else { - /* no data */ - frags[0].physaddr = cb->busaddr; - frags[0].frag_len = 0; - frags[0].skb = skb; - frags[0].cmplt = true; - frag_cnt = 1; - } - - /* Found one */ - fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); - if (!fpdu_info) { - rc = -ENOMEM; - goto out; - } - - fpdu_info->cqp_request = nes_get_cqp_request(nesdev); - if (fpdu_info->cqp_request == NULL) { - nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); - rc = -ENOMEM; - goto out; - } - - cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0]; - iph = (struct iphdr *)(cb->data_start + ETH_HLEN); - tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); - fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start; - fpdu_info->data_len = fpdu_len; - tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN; - - if (frags[0].cmplt) { - fpdu_info->hdr_pbase = cb->busaddr; - fpdu_info->hdr_vbase = NULL; - } else { - fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev, - fpdu_info->hdr_len, &fpdu_info->hdr_pbase); - if (!fpdu_info->hdr_vbase) { - nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n"); - rc = -ENOMEM; - goto out; - } - - /* Copy hdrs, adjusting len and seqnum */ - memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len); - iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN); - tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); - } - - iph->tot_len = cpu_to_be16(tot_len); - iph->saddr = cpu_to_be32(0x7f000001); - - tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt); - tcph->ack_seq = cpu_to_be32(ack); - tcph->window = cpu_to_be16(wnd); - - nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd; - - memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags)); - fpdu_info->frag_cnt = frag_cnt; - fpdu_info->nesqp = nesqp; - *pau_fpdu_info = fpdu_info; - - /* Update skb's for next pass */ - for (i = 0; i < frag_cnt; i++) { - cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0]; - skb_pull(frags[i].skb, frags[i].frag_len); - - if (frags[i].skb->len == 0) { - /* Pull skb off the list - it will be freed in the callback */ - if (!skb_queue_empty(&nesqp->pau_list)) - skb_unlink(frags[i].skb, &nesqp->pau_list); - } else { - /* Last skb still has data so update the seq */ - iph = (struct iphdr *)(cb->data_start + ETH_HLEN); - tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); - tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt); - } - } - -out: - if (rc) { - if (fpdu_info) { - if (fpdu_info->cqp_request) - nes_put_cqp_request(nesdev, fpdu_info->cqp_request); - kfree(fpdu_info); - } - } - return rc; -} - -/** - * forward_fpdu - send complete fpdus, one at a time - */ -static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) -{ - struct nes_device *nesdev = nesvnic->nesdev; - struct pau_fpdu_info *fpdu_info; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - unsigned long flags; - u64 u64tmp; - u32 u32tmp; - int rc; - - while (1) { - spin_lock_irqsave(&nesqp->pau_lock, flags); - rc = get_fpdu_info(nesdev, nesqp, &fpdu_info); - if (rc || (fpdu_info == NULL)) { - spin_unlock_irqrestore(&nesqp->pau_lock, flags); - return rc; - } - - cqp_request = fpdu_info->cqp_request; - cqp_wqe = &cqp_request->cqp_wqe; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX, - NES_CQP_DOWNLOAD_SEGMENT | - (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT)); - - u32tmp = fpdu_info->hdr_len << 16; - u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len; - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX, - u32tmp); - - u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len; - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX, - u32tmp); - - u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len; - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX, - u32tmp); - - u64tmp = (u64)fpdu_info->hdr_pbase; - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX, - lower_32_bits(u64tmp)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX, - upper_32_bits(u64tmp)); - - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, - lower_32_bits(fpdu_info->frags[0].physaddr)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX, - upper_32_bits(fpdu_info->frags[0].physaddr)); - - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX, - lower_32_bits(fpdu_info->frags[1].physaddr)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX, - upper_32_bits(fpdu_info->frags[1].physaddr)); - - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX, - lower_32_bits(fpdu_info->frags[2].physaddr)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX, - upper_32_bits(fpdu_info->frags[2].physaddr)); - - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX, - lower_32_bits(fpdu_info->frags[3].physaddr)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX, - upper_32_bits(fpdu_info->frags[3].physaddr)); - - cqp_request->cqp_callback_pointer = fpdu_info; - cqp_request->callback = 1; - cqp_request->cqp_callback = nes_download_callback; - - atomic_set(&cqp_request->refcount, 1); - nes_post_cqp_request(nesdev, cqp_request); - spin_unlock_irqrestore(&nesqp->pau_lock, flags); - } - - return 0; -} - -static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) -{ - int again = 1; - unsigned long flags; - - do { - /* Ignore rc - if it failed, tcp retries will cause it to try again */ - forward_fpdus(nesvnic, nesqp); - - spin_lock_irqsave(&nesqp->pau_lock, flags); - if (nesqp->pau_pending) { - nesqp->pau_pending = 0; - } else { - nesqp->pau_busy = 0; - again = 0; - } - - spin_unlock_irqrestore(&nesqp->pau_lock, flags); - } while (again); -} - -/** - * queue_fpdus - Handle fpdu's that hw passed up to sw - */ -static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp) -{ - struct sk_buff *tmpskb; - struct nes_rskb_cb *cb; - struct iphdr *iph; - struct tcphdr *tcph; - unsigned char *tcph_end; - u32 rcv_nxt; - u32 rcv_wnd; - u32 seqnum; - u32 len; - bool process_it = false; - unsigned long flags; - - /* Move data ptr to after tcp header */ - iph = (struct iphdr *)skb->data; - tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); - seqnum = be32_to_cpu(tcph->seq); - tcph_end = (((char *)tcph) + (4 * tcph->doff)); - - len = be16_to_cpu(iph->tot_len); - if (skb->len > len) - skb_trim(skb, len); - skb_pull(skb, tcph_end - skb->data); - - /* Initialize tracking values */ - cb = (struct nes_rskb_cb *)&skb->cb[0]; - cb->seqnum = seqnum; - - /* Make sure data is in the receive window */ - rcv_nxt = nesqp->pau_rcv_nxt; - rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd); - if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) { - nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE); - nes_rem_ref_cm_node(nesqp->cm_node); - return; - } - - spin_lock_irqsave(&nesqp->pau_lock, flags); - - if (nesqp->pau_busy) - nesqp->pau_pending = 1; - else - nesqp->pau_busy = 1; - - /* Queue skb by sequence number */ - if (skb_queue_len(&nesqp->pau_list) == 0) { - __skb_queue_head(&nesqp->pau_list, skb); - } else { - skb_queue_walk(&nesqp->pau_list, tmpskb) { - cb = (struct nes_rskb_cb *)&tmpskb->cb[0]; - if (before(seqnum, cb->seqnum)) - break; - } - __skb_insert(skb, tmpskb->prev, tmpskb, &nesqp->pau_list); - } - if (nesqp->pau_state == PAU_READY) - process_it = true; - spin_unlock_irqrestore(&nesqp->pau_lock, flags); - - if (process_it) - process_fpdus(nesvnic, nesqp); - - return; -} - -/** - * mgt_thread - Handle mgt skbs in a safe context - */ -static int mgt_thread(void *context) -{ - struct nes_vnic *nesvnic = context; - struct sk_buff *skb; - struct nes_rskb_cb *cb; - - while (!kthread_should_stop()) { - wait_event_interruptible(nesvnic->mgt_wait_queue, - skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop()); - while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) { - skb = skb_dequeue(&nesvnic->mgt_skb_list); - cb = (struct nes_rskb_cb *)&skb->cb[0]; - cb->data_start = skb->data - ETH_HLEN; - cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start, - nesvnic->max_frame_size, PCI_DMA_TODEVICE); - queue_fpdus(skb, nesvnic, cb->nesqp); - } - } - - /* Closing down so delete any entries on the queue */ - while (skb_queue_len(&nesvnic->mgt_skb_list)) { - skb = skb_dequeue(&nesvnic->mgt_skb_list); - cb = (struct nes_rskb_cb *)&skb->cb[0]; - nes_rem_ref_cm_node(cb->nesqp->cm_node); - dev_kfree_skb_any(skb); - } - return 0; -} - -/** - * nes_queue_skbs - Queue skb so it can be handled in a thread context - */ -void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp) -{ - struct nes_rskb_cb *cb; - - cb = (struct nes_rskb_cb *)&skb->cb[0]; - cb->nesqp = nesqp; - skb_queue_tail(&nesvnic->mgt_skb_list, skb); - wake_up_interruptible(&nesvnic->mgt_wait_queue); -} - -void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp) -{ - struct sk_buff *skb; - unsigned long flags; - atomic_inc(&pau_qps_destroyed); - - /* Free packets that have not yet been forwarded */ - /* Lock is acquired by skb_dequeue when removing the skb */ - spin_lock_irqsave(&nesqp->pau_lock, flags); - while (skb_queue_len(&nesqp->pau_list)) { - skb = skb_dequeue(&nesqp->pau_list); - nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE); - nes_rem_ref_cm_node(nesqp->cm_node); - } - spin_unlock_irqrestore(&nesqp->pau_lock, flags); -} - -static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) -{ - struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer; - struct nes_cqp_request *new_request; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_adapter *nesadapter; - struct nes_qp *nesqp; - struct nes_v4_quad nes_quad; - u32 crc_value; - u64 u64temp; - - nesadapter = nesdev->nesadapter; - nesqp = qh_chg->nesqp; - - /* Should we handle the bad completion */ - if (cqp_request->major_code) - WARN(1, PFX "Invalid cqp_request major_code=0x%x\n", - cqp_request->major_code); - - switch (nesqp->pau_state) { - case PAU_DEL_QH: - /* Old hash code deleted, now set the new one */ - nesqp->pau_state = PAU_ADD_LB_QH; - new_request = nes_get_cqp_request(nesdev); - if (new_request == NULL) { - nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n"); - WARN_ON(1); - return; - } - - memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = - cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cpu_to_be32(0x7f000001); - nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]); - nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]); - - /* Produce hash key */ - crc_value = get_crc_value(&nes_quad); - nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); - nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); - - nesqp->hte_index &= nesadapter->hte_index_mask; - nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); - nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001); - nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt); - - cqp_wqe = &new_request->cqp_wqe; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, - NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | - NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); - u64temp = (u64)nesqp->nesqp_context_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - - nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n"); - - new_request->cqp_callback_pointer = qh_chg; - new_request->callback = 1; - new_request->cqp_callback = nes_chg_qh_handler; - atomic_set(&new_request->refcount, 1); - nes_post_cqp_request(nesdev, new_request); - break; - - case PAU_ADD_LB_QH: - /* Start processing the queued fpdu's */ - nesqp->pau_state = PAU_READY; - process_fpdus(qh_chg->nesvnic, qh_chg->nesqp); - kfree(qh_chg); - break; - } -} - -/** - * nes_change_quad_hash - */ -static int nes_change_quad_hash(struct nes_device *nesdev, - struct nes_vnic *nesvnic, struct nes_qp *nesqp) -{ - struct nes_cqp_request *cqp_request = NULL; - struct pau_qh_chg *qh_chg = NULL; - u64 u64temp; - struct nes_hw_cqp_wqe *cqp_wqe; - int ret = 0; - - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); - ret = -ENOMEM; - goto chg_qh_err; - } - - qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC); - if (!qh_chg) { - ret = -ENOMEM; - goto chg_qh_err; - } - qh_chg->nesdev = nesdev; - qh_chg->nesvnic = nesvnic; - qh_chg->nesqp = nesqp; - nesqp->pau_state = PAU_DEL_QH; - - cqp_wqe = &cqp_request->cqp_wqe; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, - NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE | - NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); - u64temp = (u64)nesqp->nesqp_context_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - - nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n"); - - cqp_request->cqp_callback_pointer = qh_chg; - cqp_request->callback = 1; - cqp_request->cqp_callback = nes_chg_qh_handler; - atomic_set(&cqp_request->refcount, 1); - nes_post_cqp_request(nesdev, cqp_request); - - return ret; - -chg_qh_err: - kfree(qh_chg); - if (cqp_request) - nes_put_cqp_request(nesdev, cqp_request); - return ret; -} - -/** - * nes_mgt_ce_handler - * This management code deals with any packed and unaligned (pau) fpdu's - * that the hardware cannot handle. - */ -static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) -{ - struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq); - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 head; - u32 cq_size; - u32 cqe_count = 0; - u32 cqe_misc; - u32 qp_id = 0; - u32 skbs_needed; - unsigned long context; - struct nes_qp *nesqp; - struct sk_buff *rx_skb; - struct nes_rskb_cb *cb; - - head = cq->cq_head; - cq_size = cq->cq_size; - - while (1) { - cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]); - if (!(cqe_misc & NES_NIC_CQE_VALID)) - break; - - nesqp = NULL; - if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) { - qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]); - qp_id &= 0x001fffff; - if (qp_id < nesadapter->max_qp) { - context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN]; - nesqp = (struct nes_qp *)context; - } - } - - if (nesqp) { - if (nesqp->pau_mode == false) { - nesqp->pau_mode = true; /* First time for this qp */ - nesqp->pau_rcv_nxt = le32_to_cpu( - cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]); - skb_queue_head_init(&nesqp->pau_list); - spin_lock_init(&nesqp->pau_lock); - atomic_inc(&pau_qps_created); - nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp); - } - - rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail]; - rx_skb->len = 0; - skb_put(rx_skb, cqe_misc & 0x0000ffff); - rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev); - cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; - pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE); - cb->busaddr = 0; - mgtvnic->mgt.rq_tail++; - mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1; - - nes_add_ref_cm_node(nesqp->cm_node); - nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp); - } else { - printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id); - } - - cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0; - cqe_count++; - if (++head >= cq_size) - head = 0; - - if (cqe_count == 255) { - /* Replenish mgt CQ */ - nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); - nesdev->currcq_count += cqe_count; - cqe_count = 0; - } - - skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed); - if (skbs_needed > (mgtvnic->mgt.rq_size >> 1)) - nes_replenish_mgt_rq(mgtvnic); - } - - cq->cq_head = head; - nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | - cq->cq_number | (cqe_count << 16)); - nes_read32(nesdev->regs + NES_CQE_ALLOC); - nesdev->currcq_count += cqe_count; -} - -/** - * nes_init_mgt_qp - */ -int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic) -{ - struct nes_vnic_mgt *mgtvnic; - u32 counter; - void *vmem; - dma_addr_t pmem; - struct nes_hw_cqp_wqe *cqp_wqe; - u32 cqp_head; - unsigned long flags; - struct nes_hw_nic_qp_context *mgt_context; - u64 u64temp; - struct nes_hw_nic_rq_wqe *mgt_rqe; - struct sk_buff *skb; - u32 wqe_count; - struct nes_rskb_cb *cb; - u32 mgt_mem_size; - void *mgt_vbase; - dma_addr_t mgt_pbase; - int i; - int ret; - - /* Allocate space the all mgt QPs once */ - mgtvnic = kcalloc(NES_MGT_QP_COUNT, sizeof(struct nes_vnic_mgt), - GFP_KERNEL); - if (!mgtvnic) - return -ENOMEM; - - /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */ - /* We are not sending from this NIC so sq is not allocated */ - mgt_mem_size = 256 + - (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) + - (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) + - sizeof(struct nes_hw_nic_qp_context); - mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); - mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase); - if (!mgt_vbase) { - kfree(mgtvnic); - nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n"); - return -ENOMEM; - } - - nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size; - nesvnic->mgt_vbase = mgt_vbase; - nesvnic->mgt_pbase = mgt_pbase; - - skb_queue_head_init(&nesvnic->mgt_skb_list); - init_waitqueue_head(&nesvnic->mgt_wait_queue); - nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread"); - - for (i = 0; i < NES_MGT_QP_COUNT; i++) { - mgtvnic->nesvnic = nesvnic; - mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i; - memset(mgt_vbase, 0, mgt_mem_size); - nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n", - mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size); - - vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) & - ~(unsigned long)(256 - 1)); - pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) & - ~(unsigned long long)(256 - 1)); - - spin_lock_init(&mgtvnic->mgt.rq_lock); - - /* setup the RQ */ - mgtvnic->mgt.rq_vbase = vmem; - mgtvnic->mgt.rq_pbase = pmem; - mgtvnic->mgt.rq_head = 0; - mgtvnic->mgt.rq_tail = 0; - mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT; - - /* setup the CQ */ - vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)); - pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)); - - mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id; - mgtvnic->mgt_cq.cq_vbase = vmem; - mgtvnic->mgt_cq.cq_pbase = pmem; - mgtvnic->mgt_cq.cq_head = 0; - mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT; - - mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler; - - /* Send CreateCQ request to CQP */ - spin_lock_irqsave(&nesdev->cqp.lock, flags); - cqp_head = nesdev->cqp.sq_head; - - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( - NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | - ((u32)mgtvnic->mgt_cq.cq_size << 16)); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( - mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)); - u64temp = (u64)mgtvnic->mgt_cq.cq_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; - u64temp = (unsigned long)&mgtvnic->mgt_cq; - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1)); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = - cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; - - if (++cqp_head >= nesdev->cqp.sq_size) - cqp_head = 0; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - - /* Send CreateQP request to CQP */ - mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]); - mgt_context->context_words[NES_NIC_CTX_MISC_IDX] = - cpu_to_le32((u32)NES_MGT_CTX_SIZE | - ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12)); - nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n", - nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE), - nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)); - if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) - mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE); - - u64temp = (u64)mgtvnic->mgt.rq_pbase; - mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); - mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); - u64temp = (u64)mgtvnic->mgt.rq_pbase; - mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); - mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); - - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | - NES_CQP_QP_TYPE_NIC); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id); - u64temp = (u64)mgtvnic->mgt_cq.cq_pbase + - (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe)); - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - - if (++cqp_head >= nesdev->cqp.sq_size) - cqp_head = 0; - nesdev->cqp.sq_head = cqp_head; - - barrier(); - - /* Ring doorbell (2 WQEs) */ - nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); - - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n", - mgtvnic->mgt.qp_id); - - ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n", - mgtvnic->mgt.qp_id, ret); - if (!ret) { - nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id); - if (i == 0) { - pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase, - nesvnic->mgt_pbase); - kfree(mgtvnic); - } else { - nes_destroy_mgt(nesvnic); - } - return -EIO; - } - - /* Populate the RQ */ - for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) { - skb = dev_alloc_skb(nesvnic->max_frame_size); - if (!skb) { - nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name); - return -ENOMEM; - } - - skb->dev = netdev; - - pmem = pci_map_single(nesdev->pcidev, skb->data, - nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); - cb = (struct nes_rskb_cb *)&skb->cb[0]; - cb->busaddr = pmem; - cb->maplen = nesvnic->max_frame_size; - - mgt_rqe = &mgtvnic->mgt.rq_vbase[counter]; - mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size); - mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; - mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); - mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); - mgtvnic->mgt.rx_skb[counter] = skb; - } - - timer_setup(&mgtvnic->rq_wqes_timer, nes_mgt_rq_wqes_timeout, - 0); - - wqe_count = NES_MGT_WQ_COUNT - 1; - mgtvnic->mgt.rq_head = wqe_count; - barrier(); - do { - counter = min(wqe_count, ((u32)255)); - wqe_count -= counter; - nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id); - } while (wqe_count); - - nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | - mgtvnic->mgt_cq.cq_number); - nes_read32(nesdev->regs + NES_CQE_ALLOC); - - mgt_vbase += mgt_mem_size; - mgt_pbase += mgt_mem_size; - nesvnic->mgtvnic[i] = mgtvnic++; - } - return 0; -} - - -void nes_destroy_mgt(struct nes_vnic *nesvnic) -{ - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_vnic_mgt *mgtvnic; - struct nes_vnic_mgt *first_mgtvnic; - unsigned long flags; - struct nes_hw_cqp_wqe *cqp_wqe; - u32 cqp_head; - struct sk_buff *rx_skb; - int i; - int ret; - - kthread_stop(nesvnic->mgt_thread); - - /* Free remaining NIC receive buffers */ - first_mgtvnic = nesvnic->mgtvnic[0]; - for (i = 0; i < NES_MGT_QP_COUNT; i++) { - mgtvnic = nesvnic->mgtvnic[i]; - if (mgtvnic == NULL) - continue; - - while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) { - rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail]; - nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE); - mgtvnic->mgt.rq_tail++; - mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1); - } - - spin_lock_irqsave(&nesdev->cqp.lock, flags); - - /* Destroy NIC QP */ - cqp_head = nesdev->cqp.sq_head; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, - mgtvnic->mgt.qp_id); - - if (++cqp_head >= nesdev->cqp.sq_size) - cqp_head = 0; - - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - - /* Destroy NIC CQ */ - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16))); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, - (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16))); - - if (++cqp_head >= nesdev->cqp.sq_size) - cqp_head = 0; - - nesdev->cqp.sq_head = cqp_head; - barrier(); - - /* Ring doorbell (2 WQEs) */ - nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); - - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u," - " cqp.sq_tail=%u, cqp.sq_size=%u\n", - cqp_head, nesdev->cqp.sq_head, - nesdev->cqp.sq_tail, nesdev->cqp.sq_size); - - ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), - NES_EVENT_TIMEOUT); - - nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u," - " cqp.sq_head=%u, cqp.sq_tail=%u\n", - ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); - if (!ret) - nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n", - mgtvnic->mgt.qp_id); - - nesvnic->mgtvnic[i] = NULL; - } - - if (nesvnic->mgt_vbase) { - pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase, - nesvnic->mgt_pbase); - nesvnic->mgt_vbase = NULL; - nesvnic->mgt_pbase = 0; - } - - kfree(first_mgtvnic); -} diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h deleted file mode 100644 index 4f7f701c4a81..000000000000 --- a/drivers/infiniband/hw/nes/nes_mgt.h +++ /dev/null @@ -1,97 +0,0 @@ -/* -* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* OpenIB.org BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef __NES_MGT_H -#define __NES_MGT_H - -#define MPA_FRAMING 6 /* length is 2 bytes, crc is 4 bytes */ - -int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic); -void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp); -void nes_destroy_mgt(struct nes_vnic *nesvnic); -void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp); - -struct nes_hw_mgt { - struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */ - dma_addr_t rq_pbase; /* PCI memory for host rings */ - struct sk_buff *rx_skb[NES_NIC_WQ_SIZE]; - u16 qp_id; - u16 sq_head; - u16 rq_head; - u16 rq_tail; - u16 rq_size; - u8 replenishing_rq; - u8 reserved; - spinlock_t rq_lock; -}; - -struct nes_vnic_mgt { - struct nes_vnic *nesvnic; - struct nes_hw_mgt mgt; - struct nes_hw_nic_cq mgt_cq; - atomic_t rx_skbs_needed; - struct timer_list rq_wqes_timer; - atomic_t rx_skb_timer_running; -}; - -#define MAX_FPDU_FRAGS 4 -struct pau_fpdu_frag { - struct sk_buff *skb; - u64 physaddr; - u32 frag_len; - bool cmplt; -}; - -struct pau_fpdu_info { - struct nes_qp *nesqp; - struct nes_cqp_request *cqp_request; - void *hdr_vbase; - dma_addr_t hdr_pbase; - int hdr_len; - u16 data_len; - u16 frag_cnt; - struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; -}; - -enum pau_qh_state { - PAU_DEL_QH, - PAU_ADD_LB_QH, - PAU_READY -}; - -struct pau_qh_chg { - struct nes_device *nesdev; - struct nes_vnic *nesvnic; - struct nes_qp *nesqp; -}; - -#endif /* __NES_MGT_H */ diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c deleted file mode 100644 index 16f33454c198..000000000000 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ /dev/null @@ -1,1870 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/if_arp.h> -#include <linux/if_vlan.h> -#include <linux/ethtool.h> -#include <linux/slab.h> -#include <net/tcp.h> - -#include <net/inet_common.h> -#include <linux/inet.h> - -#include "nes.h" - -static struct nic_qp_map nic_qp_mapping_0[] = { - {16,0,0,1},{24,4,0,0},{28,8,0,0},{32,12,0,0}, - {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0}, - {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0}, - {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} -}; - -static struct nic_qp_map nic_qp_mapping_1[] = { - {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0}, - {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} -}; - -static struct nic_qp_map nic_qp_mapping_2[] = { - {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0} -}; - -static struct nic_qp_map nic_qp_mapping_3[] = { - {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0} -}; - -static struct nic_qp_map nic_qp_mapping_4[] = { - {28,8,0,0},{32,12,0,0} -}; - -static struct nic_qp_map nic_qp_mapping_5[] = { - {29,9,1,0},{33,13,1,0} -}; - -static struct nic_qp_map nic_qp_mapping_6[] = { - {30,10,2,0},{34,14,2,0} -}; - -static struct nic_qp_map nic_qp_mapping_7[] = { - {31,11,3,0},{35,15,3,0} -}; - -static struct nic_qp_map *nic_qp_mapping_per_function[] = { - nic_qp_mapping_0, nic_qp_mapping_1, nic_qp_mapping_2, nic_qp_mapping_3, - nic_qp_mapping_4, nic_qp_mapping_5, nic_qp_mapping_6, nic_qp_mapping_7 -}; - -static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK - | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; -static int debug = -1; -static int nics_per_function = 1; - -/** - * nes_netdev_poll - */ -static int nes_netdev_poll(struct napi_struct *napi, int budget) -{ - struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq; - - nesvnic->budget = budget; - nescq->cqes_pending = 0; - nescq->rx_cqes_completed = 0; - nescq->cqe_allocs_pending = 0; - nescq->rx_pkts_indicated = 0; - - nes_nic_ce_handler(nesdev, nescq); - - if (nescq->cqes_pending == 0) { - napi_complete(napi); - /* clear out completed cqes and arm */ - nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | - nescq->cq_number | (nescq->cqe_allocs_pending << 16)); - nes_read32(nesdev->regs+NES_CQE_ALLOC); - } else { - /* clear out completed cqes but don't arm */ - nes_write32(nesdev->regs+NES_CQE_ALLOC, - nescq->cq_number | (nescq->cqe_allocs_pending << 16)); - nes_debug(NES_DBG_NETDEV, "%s: exiting with work pending\n", - nesvnic->netdev->name); - } - return nescq->rx_pkts_indicated; -} - - -/** - * nes_netdev_open - Activate the network interface; ifconfig - * ethx up. - */ -static int nes_netdev_open(struct net_device *netdev) -{ - u32 macaddr_low; - u16 macaddr_high; - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - int ret; - int i; - struct nes_vnic *first_nesvnic = NULL; - u32 nic_active_bit; - u32 nic_active; - struct list_head *list_pos, *list_temp; - unsigned long flags; - - if (nesvnic->netdev_open == 1) - return 0; - - if (netif_msg_ifup(nesvnic)) - printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name); - - ret = nes_init_nic_qp(nesdev, netdev); - if (ret) { - return ret; - } - - netif_carrier_off(netdev); - netif_stop_queue(netdev); - - if ((!nesvnic->of_device_registered) && (nesvnic->rdma_enabled)) { - nesvnic->nesibdev = nes_init_ofa_device(netdev); - if (nesvnic->nesibdev == NULL) { - printk(KERN_ERR PFX "%s: nesvnic->nesibdev alloc failed", netdev->name); - } else { - nesvnic->nesibdev->nesvnic = nesvnic; - ret = nes_register_ofa_device(nesvnic->nesibdev); - if (ret) { - printk(KERN_ERR PFX "%s: Unable to register RDMA device, ret = %d\n", - netdev->name, ret); - } - } - } - /* Set packet filters */ - nic_active_bit = 1 << nesvnic->nic_index; - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE); - nic_active |= nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE); - nic_active |= nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON); - nic_active |= nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); - - macaddr_high = ((u16)netdev->dev_addr[0]) << 8; - macaddr_high += (u16)netdev->dev_addr[1]; - - macaddr_low = ((u32)netdev->dev_addr[2]) << 24; - macaddr_low += ((u32)netdev->dev_addr[3]) << 16; - macaddr_low += ((u32)netdev->dev_addr[4]) << 8; - macaddr_low += (u32)netdev->dev_addr[5]; - - /* Program the various MAC regs */ - for (i = 0; i < NES_MAX_PORT_COUNT; i++) { - if (nesvnic->qp_nic_index[i] == 0xf) { - break; - } - nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW" - " (Addr:%08X) = %08X, HIGH = %08X.\n", - i, nesvnic->qp_nic_index[i], - NES_IDX_PERFECT_FILTER_LOW+ - (nesvnic->qp_nic_index[i] * 8), - macaddr_low, - (u32)macaddr_high | NES_MAC_ADDR_VALID | - ((((u32)nesvnic->nic_index) << 16))); - nes_write_indexed(nesdev, - NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8), - macaddr_low); - nes_write_indexed(nesdev, - NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8), - (u32)macaddr_high | NES_MAC_ADDR_VALID | - ((((u32)nesvnic->nic_index) << 16))); - } - - - nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | - nesvnic->nic_cq.cq_number); - nes_read32(nesdev->regs+NES_CQE_ALLOC); - list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) { - first_nesvnic = container_of(list_pos, struct nes_vnic, list); - if (first_nesvnic->netdev_open == 1) - break; - } - if (first_nesvnic->netdev_open == 0) { - nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n"); - nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), - ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | - NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); - first_nesvnic = nesvnic; - } - - if (first_nesvnic->linkup) { - /* Enable network packets */ - nesvnic->linkup = 1; - netif_start_queue(netdev); - netif_carrier_on(netdev); - } - - spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); - if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) { - nesdev->link_recheck = 1; - mod_delayed_work(system_wq, &nesdev->work, - NES_LINK_RECHECK_DELAY); - } - spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); - - spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags); - if (nesvnic->of_device_registered) { - nesdev->nesadapter->send_term_ok = 1; - if (nesvnic->linkup == 1) { - if (nesdev->iw_status == 0) { - nesdev->iw_status = 1; - nes_port_ibevent(nesvnic); - } - } else { - nesdev->iw_status = 0; - } - } - spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags); - - napi_enable(&nesvnic->napi); - nesvnic->netdev_open = 1; - - return 0; -} - - -/** - * nes_netdev_stop - */ -static int nes_netdev_stop(struct net_device *netdev) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - u32 nic_active_mask; - u32 nic_active; - struct nes_vnic *first_nesvnic = NULL; - struct list_head *list_pos, *list_temp; - unsigned long flags; - - nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n", - nesvnic, nesdev, netdev, netdev->name); - if (nesvnic->netdev_open == 0) - return 0; - - if (netif_msg_ifdown(nesvnic)) - printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name); - netif_carrier_off(netdev); - - /* Disable network packets */ - napi_disable(&nesvnic->napi); - netif_stop_queue(netdev); - list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) { - first_nesvnic = container_of(list_pos, struct nes_vnic, list); - if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic)) - break; - } - - if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) && - (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != - PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+ - (0x200*nesdev->mac_index), 0xffffffff); - nes_write_indexed(first_nesvnic->nesdev, - NES_IDX_MAC_INT_MASK+ - (0x200*first_nesvnic->nesdev->mac_index), - ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | - NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); - } else { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); - } - - nic_active_mask = ~((u32)(1 << nesvnic->nic_index)); - nes_write_indexed(nesdev, NES_IDX_PERFECT_FILTER_HIGH+ - (nesvnic->perfect_filter_index*8), 0); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE); - nic_active &= nic_active_mask; - nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); - nic_active &= nic_active_mask; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE); - nic_active &= nic_active_mask; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); - nic_active &= nic_active_mask; - nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON); - nic_active &= nic_active_mask; - nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); - - spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags); - if (nesvnic->of_device_registered) { - nesdev->nesadapter->send_term_ok = 0; - nesdev->iw_status = 0; - if (nesvnic->linkup == 1) - nes_port_ibevent(nesvnic); - } - del_timer_sync(&nesvnic->event_timer); - nesvnic->event_timer.function = NULL; - spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags); - - nes_destroy_nic_qp(nesvnic); - - nesvnic->netdev_open = 0; - - return 0; -} - - -/** - * nes_nic_send - */ -static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_hw_nic *nesnic = &nesvnic->nic; - struct nes_hw_nic_sq_wqe *nic_sqe; - struct tcphdr *tcph; - __le16 *wqe_fragment_length; - u32 wqe_misc; - u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */ - u16 skb_fragment_index; - dma_addr_t bus_address; - - nic_sqe = &nesnic->sq_vbase[nesnic->sq_head]; - wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; - - /* setup the VLAN tag if present */ - if (skb_vlan_tag_present(skb)) { - nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n", - netdev->name, skb_vlan_tag_get(skb)); - wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE; - wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb); - } else - wqe_misc = 0; - - /* bump past the vlan tag */ - wqe_fragment_length++; - /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */ - wqe_misc |= NES_NIC_SQ_WQE_COMPLETION; - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (skb_is_gso(skb)) { - tcph = tcp_hdr(skb); - /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... is_gso = %u seg size = %u\n", - netdev->name, skb_is_gso(skb), skb_shinfo(skb)->gso_size); */ - wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | (u16)skb_shinfo(skb)->gso_size; - set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, - ((u32)tcph->doff) | - (((u32)(((unsigned char *)tcph) - skb->data)) << 4)); - } - } else { /* CHECKSUM_HW */ - wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM; - } - - set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, - skb->len); - memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, - skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), skb_headlen(skb))); - wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE), - skb_headlen(skb))); - wqe_fragment_length[1] = 0; - if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) { - if ((skb_shinfo(skb)->nr_frags + 1) > 4) { - nes_debug(NES_DBG_NIC_TX, "%s: Packet with %u fragments not sent, skb_headlen=%u\n", - netdev->name, skb_shinfo(skb)->nr_frags + 2, skb_headlen(skb)); - kfree_skb(skb); - nesvnic->tx_sw_dropped++; - return false; - } - set_bit(nesnic->sq_head, nesnic->first_frag_overflow); - bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE, - skb_headlen(skb) - NES_FIRST_FRAG_SIZE, PCI_DMA_TODEVICE); - wqe_fragment_length[wqe_fragment_index++] = - cpu_to_le16(skb_headlen(skb) - NES_FIRST_FRAG_SIZE); - wqe_fragment_length[wqe_fragment_index] = 0; - set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, - ((u64)(bus_address))); - nesnic->tx_skb[nesnic->sq_head] = skb; - } - - if (skb_headlen(skb) == skb->len) { - if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) { - nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0; - nesnic->tx_skb[nesnic->sq_head] = skb; - } - } else { - /* Deal with Fragments */ - nesnic->tx_skb[nesnic->sq_head] = skb; - for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags; - skb_fragment_index++) { - skb_frag_t *frag = - &skb_shinfo(skb)->frags[skb_fragment_index]; - bus_address = skb_frag_dma_map(&nesdev->pcidev->dev, - frag, 0, skb_frag_size(frag), - DMA_TO_DEVICE); - wqe_fragment_length[wqe_fragment_index] = - cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[skb_fragment_index])); - set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), - bus_address); - wqe_fragment_index++; - if (wqe_fragment_index < 5) - wqe_fragment_length[wqe_fragment_index] = 0; - } - } - - set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, wqe_misc); - nesnic->sq_head++; - nesnic->sq_head &= nesnic->sq_size - 1; - return true; -} - - -/** - * nes_netdev_start_xmit - */ -static netdev_tx_t nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_hw_nic *nesnic = &nesvnic->nic; - struct nes_hw_nic_sq_wqe *nic_sqe; - struct tcphdr *tcph; - /* struct udphdr *udph; */ -#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS - /* 64K segment plus overflow on each side */ - dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS]; - dma_addr_t bus_address; - u32 tso_frag_index; - u32 tso_frag_count; - u32 tso_wqe_length; - u32 curr_tcp_seq; - u32 wqe_count=1; - struct iphdr *iph; - __le16 *wqe_fragment_length; - u32 nr_frags; - u32 original_first_length; - /* u64 *wqe_fragment_address; */ - /* first fragment (0) is used by copy buffer */ - u16 wqe_fragment_index=1; - u16 hoffset; - u16 nhoffset; - u16 wqes_needed; - u16 wqes_available; - u32 wqe_misc; - - /* - * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," - * " (%u frags), tso_size=%u\n", - * netdev->name, skb->len, skb_headlen(skb), - * skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); - */ - - if (netif_queue_stopped(netdev)) - return NETDEV_TX_BUSY; - - /* Check if SQ is full */ - if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) { - if (!netif_queue_stopped(netdev)) { - netif_stop_queue(netdev); - barrier(); - if ((((((volatile u16)nesnic->sq_tail)+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) != 1) { - netif_start_queue(netdev); - goto sq_no_longer_full; - } - } - nesvnic->sq_full++; - return NETDEV_TX_BUSY; - } - -sq_no_longer_full: - nr_frags = skb_shinfo(skb)->nr_frags; - if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) { - nr_frags++; - } - /* Check if too many fragments */ - if (unlikely((nr_frags > 4))) { - if (skb_is_gso(skb)) { - nesvnic->segmented_tso_requests++; - nesvnic->tso_requests++; - /* Basically 4 fragments available per WQE with extended fragments */ - wqes_needed = nr_frags >> 2; - wqes_needed += (nr_frags&3)?1:0; - wqes_available = (((nesnic->sq_tail+nesnic->sq_size)-nesnic->sq_head) - 1) & - (nesnic->sq_size - 1); - - if (unlikely(wqes_needed > wqes_available)) { - if (!netif_queue_stopped(netdev)) { - netif_stop_queue(netdev); - barrier(); - wqes_available = (((((volatile u16)nesnic->sq_tail)+nesnic->sq_size)-nesnic->sq_head) - 1) & - (nesnic->sq_size - 1); - if (wqes_needed <= wqes_available) { - netif_start_queue(netdev); - goto tso_sq_no_longer_full; - } - } - nesvnic->sq_full++; - nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n", - netdev->name); - return NETDEV_TX_BUSY; - } -tso_sq_no_longer_full: - /* Map all the buffers */ - for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags; - tso_frag_count++) { - skb_frag_t *frag = - &skb_shinfo(skb)->frags[tso_frag_count]; - tso_bus_address[tso_frag_count] = - skb_frag_dma_map(&nesdev->pcidev->dev, - frag, 0, skb_frag_size(frag), - DMA_TO_DEVICE); - } - - tso_frag_index = 0; - curr_tcp_seq = ntohl(tcp_hdr(skb)->seq); - hoffset = skb_transport_header(skb) - skb->data; - nhoffset = skb_network_header(skb) - skb->data; - original_first_length = hoffset + ((((struct tcphdr *)skb_transport_header(skb))->doff)<<2); - - for (wqe_count=0; wqe_count<((u32)wqes_needed); wqe_count++) { - tso_wqe_length = 0; - nic_sqe = &nesnic->sq_vbase[nesnic->sq_head]; - wqe_fragment_length = - (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; - /* setup the VLAN tag if present */ - if (skb_vlan_tag_present(skb)) { - nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n", - netdev->name, - skb_vlan_tag_get(skb)); - wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE; - wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb); - } else - wqe_misc = 0; - - /* bump past the vlan tag */ - wqe_fragment_length++; - - /* Assumes header totally fits in allocated buffer and is in first fragment */ - if (original_first_length > NES_FIRST_FRAG_SIZE) { - nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n", - original_first_length, NES_FIRST_FRAG_SIZE); - nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," - " (%u frags), is_gso = %u tso_size=%u\n", - netdev->name, - skb->len, skb_headlen(skb), - skb_shinfo(skb)->nr_frags, skb_is_gso(skb), skb_shinfo(skb)->gso_size); - } - memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, - skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), - original_first_length)); - iph = (struct iphdr *) - (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[nhoffset]); - tcph = (struct tcphdr *) - (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[hoffset]); - if ((wqe_count+1)!=(u32)wqes_needed) { - tcph->fin = 0; - tcph->psh = 0; - tcph->rst = 0; - tcph->urg = 0; - } - if (wqe_count) { - tcph->syn = 0; - } - tcph->seq = htonl(curr_tcp_seq); - wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE), - original_first_length)); - - wqe_fragment_index = 1; - if ((wqe_count==0) && (skb_headlen(skb) > original_first_length)) { - set_bit(nesnic->sq_head, nesnic->first_frag_overflow); - bus_address = pci_map_single(nesdev->pcidev, skb->data + original_first_length, - skb_headlen(skb) - original_first_length, PCI_DMA_TODEVICE); - wqe_fragment_length[wqe_fragment_index++] = - cpu_to_le16(skb_headlen(skb) - original_first_length); - wqe_fragment_length[wqe_fragment_index] = 0; - set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, - bus_address); - tso_wqe_length += skb_headlen(skb) - - original_first_length; - } - while (wqe_fragment_index < 5) { - wqe_fragment_length[wqe_fragment_index] = - cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index])); - set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), - (u64)tso_bus_address[tso_frag_index]); - wqe_fragment_index++; - tso_wqe_length += skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index++]); - if (wqe_fragment_index < 5) - wqe_fragment_length[wqe_fragment_index] = 0; - if (tso_frag_index == tso_frag_count) - break; - } - if ((wqe_count+1) == (u32)wqes_needed) { - nesnic->tx_skb[nesnic->sq_head] = skb; - } else { - nesnic->tx_skb[nesnic->sq_head] = NULL; - } - wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_shinfo(skb)->gso_size; - if ((tso_wqe_length + original_first_length) > skb_shinfo(skb)->gso_size) { - wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE; - } else { - iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset); - } - - set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, - wqe_misc); - set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, - ((u32)tcph->doff) | (((u32)hoffset) << 4)); - - set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, - tso_wqe_length + original_first_length); - curr_tcp_seq += tso_wqe_length; - nesnic->sq_head++; - nesnic->sq_head &= nesnic->sq_size-1; - } - } else { - hoffset = skb_transport_header(skb) - skb->data; - nhoffset = skb_network_header(skb) - skb->data; - if (skb_linearize(skb)) { - nesvnic->tx_sw_dropped++; - kfree_skb(skb); - return NETDEV_TX_OK; - } - nesvnic->linearized_skbs++; - skb_set_transport_header(skb, hoffset); - skb_set_network_header(skb, nhoffset); - if (!nes_nic_send(skb, netdev)) - return NETDEV_TX_OK; - } - } else { - if (!nes_nic_send(skb, netdev)) - return NETDEV_TX_OK; - } - - barrier(); - - if (wqe_count) - nes_write32(nesdev->regs+NES_WQE_ALLOC, - (wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id); - - netif_trans_update(netdev); - - return NETDEV_TX_OK; -} - - -/** - * nes_netdev_get_stats - */ -static struct net_device_stats *nes_netdev_get_stats(struct net_device *netdev) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - u64 u64temp; - u32 u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + (nesvnic->nic_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->endnode_nstat_rx_discard += u32temp; - - u64temp = (u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + (nesvnic->nic_index*0x200)); - u64temp += ((u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32; - - nesvnic->endnode_nstat_rx_octets += u64temp; - nesvnic->netstats.rx_bytes += u64temp; - - u64temp = (u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + (nesvnic->nic_index*0x200)); - u64temp += ((u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32; - - nesvnic->endnode_nstat_rx_frames += u64temp; - nesvnic->netstats.rx_packets += u64temp; - - u64temp = (u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + (nesvnic->nic_index*0x200)); - u64temp += ((u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32; - - nesvnic->endnode_nstat_tx_octets += u64temp; - nesvnic->netstats.tx_bytes += u64temp; - - u64temp = (u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + (nesvnic->nic_index*0x200)); - u64temp += ((u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32; - - nesvnic->endnode_nstat_tx_frames += u64temp; - nesvnic->netstats.tx_packets += u64temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_short_frames += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_oversized_frames += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_jabber_frames += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_length_errors += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_crc_errors += u32temp; - nesvnic->netstats.rx_crc_errors += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->nesdev->mac_tx_errors += u32temp; - nesvnic->netstats.tx_errors += u32temp; - - return &nesvnic->netstats; -} - - -/** - * nes_netdev_tx_timeout - */ -static void nes_netdev_tx_timeout(struct net_device *netdev) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - - if (netif_msg_timer(nesvnic)) - nes_debug(NES_DBG_NIC_TX, "%s: tx timeout\n", netdev->name); -} - - -/** - * nes_netdev_set_mac_address - */ -static int nes_netdev_set_mac_address(struct net_device *netdev, void *p) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct sockaddr *mac_addr = p; - int i; - u32 macaddr_low; - u16 macaddr_high; - - if (!is_valid_ether_addr(mac_addr->sa_data)) - return -EADDRNOTAVAIL; - - memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len); - printk(PFX "%s: Address length = %d, Address = %pM\n", - __func__, netdev->addr_len, mac_addr->sa_data); - macaddr_high = ((u16)netdev->dev_addr[0]) << 8; - macaddr_high += (u16)netdev->dev_addr[1]; - macaddr_low = ((u32)netdev->dev_addr[2]) << 24; - macaddr_low += ((u32)netdev->dev_addr[3]) << 16; - macaddr_low += ((u32)netdev->dev_addr[4]) << 8; - macaddr_low += (u32)netdev->dev_addr[5]; - - for (i = 0; i < NES_MAX_PORT_COUNT; i++) { - if (nesvnic->qp_nic_index[i] == 0xf) { - break; - } - nes_write_indexed(nesdev, - NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8), - macaddr_low); - nes_write_indexed(nesdev, - NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8), - (u32)macaddr_high | NES_MAC_ADDR_VALID | - ((((u32)nesvnic->nic_index) << 16))); - } - return 0; -} - - -static void set_allmulti(struct nes_device *nesdev, u32 nic_active_bit) -{ - u32 nic_active; - - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); - nic_active |= nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); - nic_active &= ~nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); -} - -#define get_addr(addrs, index) ((addrs) + (index) * ETH_ALEN) - -/** - * nes_netdev_set_multicast_list - */ -static void nes_netdev_set_multicast_list(struct net_device *netdev) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; - u32 nic_active_bit; - u32 nic_active; - u32 perfect_filter_register_address; - u32 macaddr_low; - u16 macaddr_high; - u8 mc_all_on = 0; - u8 mc_index; - int mc_nic_index = -1; - u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count * - nics_per_function, 4); - u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated; - unsigned long flags; - int mc_count = netdev_mc_count(netdev); - - spin_lock_irqsave(&nesadapter->resource_lock, flags); - nic_active_bit = 1 << nesvnic->nic_index; - - if (netdev->flags & IFF_PROMISC) { - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); - nic_active |= nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); - nic_active |= nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); - mc_all_on = 1; - } else if ((netdev->flags & IFF_ALLMULTI) || - (nesvnic->nic_index > 3)) { - set_allmulti(nesdev, nic_active_bit); - mc_all_on = 1; - } else { - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); - nic_active &= ~nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); - nic_active &= ~nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); - } - - nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscuous = %d, All Multicast = %d.\n", - mc_count, !!(netdev->flags & IFF_PROMISC), - !!(netdev->flags & IFF_ALLMULTI)); - if (!mc_all_on) { - char *addrs; - int i; - struct netdev_hw_addr *ha; - - addrs = kmalloc_array(mc_count, ETH_ALEN, GFP_ATOMIC); - if (!addrs) { - set_allmulti(nesdev, nic_active_bit); - goto unlock; - } - i = 0; - netdev_for_each_mc_addr(ha, netdev) - memcpy(get_addr(addrs, i++), ha->addr, ETH_ALEN); - - perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + - pft_entries_preallocated * 0x8; - for (i = 0, mc_index = 0; mc_index < max_pft_entries_avaiable; - mc_index++) { - while (i < mc_count && nesvnic->mcrq_mcast_filter && - ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, - get_addr(addrs, i++))) == 0)); - if (mc_nic_index < 0) - mc_nic_index = nesvnic->nic_index; - while (nesadapter->pft_mcast_map[mc_index] < 16 && - nesadapter->pft_mcast_map[mc_index] != - nesvnic->nic_index && - mc_index < max_pft_entries_avaiable) { - nes_debug(NES_DBG_NIC_RX, - "mc_index=%d skipping nic_index=%d, used for=%d\n", - mc_index, nesvnic->nic_index, - nesadapter->pft_mcast_map[mc_index]); - mc_index++; - } - if (mc_index >= max_pft_entries_avaiable) - break; - if (i < mc_count) { - char *addr = get_addr(addrs, i++); - - nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %pM to register 0x%04X nic_idx=%d\n", - addr, - perfect_filter_register_address+(mc_index * 8), - mc_nic_index); - macaddr_high = ((u8) addr[0]) << 8; - macaddr_high += (u8) addr[1]; - macaddr_low = ((u8) addr[2]) << 24; - macaddr_low += ((u8) addr[3]) << 16; - macaddr_low += ((u8) addr[4]) << 8; - macaddr_low += (u8) addr[5]; - - nes_write_indexed(nesdev, - perfect_filter_register_address+(mc_index * 8), - macaddr_low); - nes_write_indexed(nesdev, - perfect_filter_register_address+4+(mc_index * 8), - (u32)macaddr_high | NES_MAC_ADDR_VALID | - ((((u32)(1<<mc_nic_index)) << 16))); - nesadapter->pft_mcast_map[mc_index] = - nesvnic->nic_index; - } else { - nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n", - perfect_filter_register_address+(mc_index * 8)); - nes_write_indexed(nesdev, - perfect_filter_register_address+4+(mc_index * 8), - 0); - nesadapter->pft_mcast_map[mc_index] = 255; - } - } - kfree(addrs); - /* PFT is not large enough */ - if (i < mc_count) - set_allmulti(nesdev, nic_active_bit); - } - -unlock: - spin_unlock_irqrestore(&nesadapter->resource_lock, flags); -} - - -/** - * nes_netdev_change_mtu - */ -static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - u8 jumbomode = 0; - u32 nic_active; - u32 nic_active_bit; - u32 uc_all_active; - u32 mc_all_active; - - netdev->mtu = new_mtu; - nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN; - - if (netdev->mtu > ETH_DATA_LEN) { - jumbomode=1; - } - nes_nic_init_timer_defaults(nesdev, jumbomode); - - if (netif_running(netdev)) { - nic_active_bit = 1 << nesvnic->nic_index; - mc_all_active = nes_read_indexed(nesdev, - NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit; - uc_all_active = nes_read_indexed(nesdev, - NES_IDX_NIC_UNICAST_ALL) & nic_active_bit; - - nes_netdev_stop(netdev); - nes_netdev_open(netdev); - - nic_active = nes_read_indexed(nesdev, - NES_IDX_NIC_MULTICAST_ALL); - nic_active |= mc_all_active; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, - nic_active); - - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); - nic_active |= uc_all_active; - nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); - } - - return 0; -} - - -static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { - "Link Change Interrupts", - "Linearized SKBs", - "T/GSO Requests", - "Pause Frames Sent", - "Pause Frames Received", - "Internal Routing Errors", - "SQ SW Dropped SKBs", - "SQ Full", - "Segmented TSO Requests", - "Rx Symbol Errors", - "Rx Jabber Errors", - "Rx Oversized Frames", - "Rx Short Frames", - "Rx Length Errors", - "Rx CRC Errors", - "Rx Port Discard", - "Endnode Rx Discards", - "Endnode Rx Octets", - "Endnode Rx Frames", - "Endnode Tx Octets", - "Endnode Tx Frames", - "Tx Errors", - "mh detected", - "mh pauses", - "Retransmission Count", - "CM Connects", - "CM Accepts", - "Disconnects", - "Connected Events", - "Connect Requests", - "CM Rejects", - "ModifyQP Timeouts", - "CreateQPs", - "SW DestroyQPs", - "DestroyQPs", - "CM Closes", - "CM Packets Sent", - "CM Packets Bounced", - "CM Packets Created", - "CM Packets Rcvd", - "CM Packets Dropped", - "CM Packets Retrans", - "CM Listens Created", - "CM Listens Destroyed", - "CM Backlog Drops", - "CM Loopbacks", - "CM Nodes Created", - "CM Nodes Destroyed", - "CM Accel Drops", - "CM Resets Received", - "Free 4Kpbls", - "Free 256pbls", - "Timer Inits", - "PAU CreateQPs", - "PAU DestroyQPs", -}; -#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset) - - -/** - * nes_netdev_get_sset_count - */ -static int nes_netdev_get_sset_count(struct net_device *netdev, int stringset) -{ - if (stringset == ETH_SS_STATS) - return NES_ETHTOOL_STAT_COUNT; - else - return -EINVAL; -} - - -/** - * nes_netdev_get_strings - */ -static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset, - u8 *ethtool_strings) -{ - if (stringset == ETH_SS_STATS) - memcpy(ethtool_strings, - &nes_ethtool_stringset, - sizeof(nes_ethtool_stringset)); -} - - -/** - * nes_netdev_get_ethtool_stats - */ - -static void nes_netdev_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values) -{ - u64 u64temp; - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 nic_count; - u32 u32temp; - u32 index = 0; - - target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT; - target_stat_values[index] = nesvnic->nesdev->link_status_interrupts; - target_stat_values[++index] = nesvnic->linearized_skbs; - target_stat_values[++index] = nesvnic->tso_requests; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->nesdev->mac_pause_frames_sent += u32temp; - target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_sent; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->nesdev->mac_pause_frames_received += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_PORT_RX_DISCARDS + (nesvnic->nesdev->mac_index*0x40)); - nesvnic->nesdev->port_rx_discards += u32temp; - nesvnic->netstats.rx_dropped += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_PORT_TX_DISCARDS + (nesvnic->nesdev->mac_index*0x40)); - nesvnic->nesdev->port_tx_discards += u32temp; - nesvnic->netstats.tx_dropped += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_short_frames += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_oversized_frames += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_jabber_frames += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->netstats.rx_length_errors += u32temp; - nesvnic->nesdev->mac_rx_errors += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->nesdev->mac_rx_errors += u32temp; - nesvnic->nesdev->mac_rx_crc_errors += u32temp; - nesvnic->netstats.rx_crc_errors += u32temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200)); - nesvnic->nesdev->mac_tx_errors += u32temp; - nesvnic->netstats.tx_errors += u32temp; - - for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) { - if (nesvnic->qp_nic_index[nic_count] == 0xf) - break; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + - (nesvnic->qp_nic_index[nic_count]*0x200)); - nesvnic->netstats.rx_dropped += u32temp; - nesvnic->endnode_nstat_rx_discard += u32temp; - - u64temp = (u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + - (nesvnic->qp_nic_index[nic_count]*0x200)); - u64temp += ((u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + - (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; - - nesvnic->endnode_nstat_rx_octets += u64temp; - nesvnic->netstats.rx_bytes += u64temp; - - u64temp = (u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + - (nesvnic->qp_nic_index[nic_count]*0x200)); - u64temp += ((u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + - (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; - - nesvnic->endnode_nstat_rx_frames += u64temp; - nesvnic->netstats.rx_packets += u64temp; - - u64temp = (u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + - (nesvnic->qp_nic_index[nic_count]*0x200)); - u64temp += ((u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + - (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; - - nesvnic->endnode_nstat_tx_octets += u64temp; - nesvnic->netstats.tx_bytes += u64temp; - - u64temp = (u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + - (nesvnic->qp_nic_index[nic_count]*0x200)); - u64temp += ((u64)nes_read_indexed(nesdev, - NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + - (nesvnic->qp_nic_index[nic_count]*0x200))) << 32; - - nesvnic->endnode_nstat_tx_frames += u64temp; - nesvnic->netstats.tx_packets += u64temp; - - u32temp = nes_read_indexed(nesdev, - NES_IDX_IPV4_TCP_REXMITS + (nesvnic->qp_nic_index[nic_count]*0x200)); - nesvnic->endnode_ipv4_tcp_retransmits += u32temp; - } - - target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_received; - target_stat_values[++index] = nesdev->nesadapter->nic_rx_eth_route_err; - target_stat_values[++index] = nesvnic->tx_sw_dropped; - target_stat_values[++index] = nesvnic->sq_full; - target_stat_values[++index] = nesvnic->segmented_tso_requests; - target_stat_values[++index] = nesvnic->nesdev->mac_rx_symbol_err_frames; - target_stat_values[++index] = nesvnic->nesdev->mac_rx_jabber_frames; - target_stat_values[++index] = nesvnic->nesdev->mac_rx_oversized_frames; - target_stat_values[++index] = nesvnic->nesdev->mac_rx_short_frames; - target_stat_values[++index] = nesvnic->netstats.rx_length_errors; - target_stat_values[++index] = nesvnic->nesdev->mac_rx_crc_errors; - target_stat_values[++index] = nesvnic->nesdev->port_rx_discards; - target_stat_values[++index] = nesvnic->endnode_nstat_rx_discard; - target_stat_values[++index] = nesvnic->endnode_nstat_rx_octets; - target_stat_values[++index] = nesvnic->endnode_nstat_rx_frames; - target_stat_values[++index] = nesvnic->endnode_nstat_tx_octets; - target_stat_values[++index] = nesvnic->endnode_nstat_tx_frames; - target_stat_values[++index] = nesvnic->nesdev->mac_tx_errors; - target_stat_values[++index] = mh_detected; - target_stat_values[++index] = mh_pauses_sent; - target_stat_values[++index] = nesvnic->endnode_ipv4_tcp_retransmits; - target_stat_values[++index] = atomic_read(&cm_connects); - target_stat_values[++index] = atomic_read(&cm_accepts); - target_stat_values[++index] = atomic_read(&cm_disconnects); - target_stat_values[++index] = atomic_read(&cm_connecteds); - target_stat_values[++index] = atomic_read(&cm_connect_reqs); - target_stat_values[++index] = atomic_read(&cm_rejects); - target_stat_values[++index] = atomic_read(&mod_qp_timouts); - target_stat_values[++index] = atomic_read(&qps_created); - target_stat_values[++index] = atomic_read(&sw_qps_destroyed); - target_stat_values[++index] = atomic_read(&qps_destroyed); - target_stat_values[++index] = atomic_read(&cm_closes); - target_stat_values[++index] = cm_packets_sent; - target_stat_values[++index] = cm_packets_bounced; - target_stat_values[++index] = cm_packets_created; - target_stat_values[++index] = cm_packets_received; - target_stat_values[++index] = cm_packets_dropped; - target_stat_values[++index] = cm_packets_retrans; - target_stat_values[++index] = atomic_read(&cm_listens_created); - target_stat_values[++index] = atomic_read(&cm_listens_destroyed); - target_stat_values[++index] = cm_backlog_drops; - target_stat_values[++index] = atomic_read(&cm_loopbacks); - target_stat_values[++index] = atomic_read(&cm_nodes_created); - target_stat_values[++index] = atomic_read(&cm_nodes_destroyed); - target_stat_values[++index] = atomic_read(&cm_accel_dropped_pkts); - target_stat_values[++index] = atomic_read(&cm_resets_recvd); - target_stat_values[++index] = nesadapter->free_4kpbl; - target_stat_values[++index] = nesadapter->free_256pbl; - target_stat_values[++index] = int_mod_timer_init; - target_stat_values[++index] = atomic_read(&pau_qps_created); - target_stat_values[++index] = atomic_read(&pau_qps_destroyed); -} - -/** - * nes_netdev_get_drvinfo - */ -static void nes_netdev_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; - - strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); - strlcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev), - sizeof(drvinfo->bus_info)); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%u.%u", nesadapter->firmware_version >> 16, - nesadapter->firmware_version & 0x000000ff); - strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); -} - - -/** - * nes_netdev_set_coalesce - */ -static int nes_netdev_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *et_coalesce) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; - unsigned long flags; - - spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - if (et_coalesce->rx_max_coalesced_frames_low) { - shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low; - } - if (et_coalesce->rx_max_coalesced_frames_irq) { - shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq; - } - if (et_coalesce->rx_max_coalesced_frames_high) { - shared_timer->threshold_high = et_coalesce->rx_max_coalesced_frames_high; - } - if (et_coalesce->rx_coalesce_usecs_low) { - shared_timer->timer_in_use_min = et_coalesce->rx_coalesce_usecs_low; - } - if (et_coalesce->rx_coalesce_usecs_high) { - shared_timer->timer_in_use_max = et_coalesce->rx_coalesce_usecs_high; - } - spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); - - /* using this to drive total interrupt moderation */ - nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq; - if (et_coalesce->use_adaptive_rx_coalesce) { - nesadapter->et_use_adaptive_rx_coalesce = 1; - nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; - nesadapter->et_rx_coalesce_usecs_irq = 0; - if (et_coalesce->pkt_rate_low) { - nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low; - } - } else { - nesadapter->et_use_adaptive_rx_coalesce = 0; - nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; - if (nesadapter->et_rx_coalesce_usecs_irq) { - nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, - 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8))); - } - } - return 0; -} - - -/** - * nes_netdev_get_coalesce - */ -static int nes_netdev_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *et_coalesce) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct ethtool_coalesce temp_et_coalesce; - struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; - unsigned long flags; - - memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce)); - temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq; - temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce; - temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval; - temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low; - spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low; - temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target; - temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high; - temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; - temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max; - if (nesadapter->et_use_adaptive_rx_coalesce) { - temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use; - } - spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); - memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); - return 0; -} - - -/** - * nes_netdev_get_pauseparam - */ -static void nes_netdev_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *et_pauseparam) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - - et_pauseparam->autoneg = 0; - et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control == 0) ? 1:0; - et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control == 0) ? 1:0; -} - - -/** - * nes_netdev_set_pauseparam - */ -static int nes_netdev_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *et_pauseparam) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - u32 u32temp; - - if (et_pauseparam->autoneg) { - /* TODO: should return unsupported */ - return 0; - } - if ((et_pauseparam->tx_pause == 1) && (nesdev->disable_tx_flow_control == 1)) { - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); - u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; - nes_write_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); - nesdev->disable_tx_flow_control = 0; - } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) { - u32temp = nes_read_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); - u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; - nes_write_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); - nesdev->disable_tx_flow_control = 1; - } - if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) { - u32temp = nes_read_indexed(nesdev, - NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40)); - u32temp &= ~NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE; - nes_write_indexed(nesdev, - NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp); - nesdev->disable_rx_flow_control = 0; - } else if ((et_pauseparam->rx_pause == 0) && (nesdev->disable_rx_flow_control == 0)) { - u32temp = nes_read_indexed(nesdev, - NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40)); - u32temp |= NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE; - nes_write_indexed(nesdev, - NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp); - nesdev->disable_rx_flow_control = 1; - } - - return 0; -} - - -/** - * nes_netdev_get_settings - */ -static int nes_netdev_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 mac_index = nesdev->mac_index; - u8 phy_type = nesadapter->phy_type[mac_index]; - u8 phy_index = nesadapter->phy_index[mac_index]; - u16 phy_data; - u32 supported, advertising; - - cmd->base.duplex = DUPLEX_FULL; - cmd->base.port = PORT_MII; - - if (nesadapter->OneG_Mode) { - cmd->base.speed = SPEED_1000; - if (phy_type == NES_PHY_TYPE_PUMA_1G) { - supported = SUPPORTED_1000baseT_Full; - advertising = ADVERTISED_1000baseT_Full; - cmd->base.autoneg = AUTONEG_DISABLE; - cmd->base.phy_address = mac_index; - } else { - unsigned long flags; - - supported = SUPPORTED_1000baseT_Full - | SUPPORTED_Autoneg; - advertising = ADVERTISED_1000baseT_Full - | ADVERTISED_Autoneg; - spin_lock_irqsave(&nesadapter->phy_lock, flags); - nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - if (phy_data & 0x1000) - cmd->base.autoneg = AUTONEG_ENABLE; - else - cmd->base.autoneg = AUTONEG_DISABLE; - cmd->base.phy_address = phy_index; - } - ethtool_convert_legacy_u32_to_link_mode( - cmd->link_modes.supported, supported); - ethtool_convert_legacy_u32_to_link_mode( - cmd->link_modes.advertising, advertising); - return 0; - } - if ((phy_type == NES_PHY_TYPE_ARGUS) || - (phy_type == NES_PHY_TYPE_SFP_D) || - (phy_type == NES_PHY_TYPE_KR)) { - cmd->base.port = PORT_FIBRE; - supported = SUPPORTED_FIBRE; - advertising = ADVERTISED_FIBRE; - cmd->base.phy_address = phy_index; - } else { - supported = SUPPORTED_10000baseT_Full; - advertising = ADVERTISED_10000baseT_Full; - cmd->base.phy_address = mac_index; - } - cmd->base.speed = SPEED_10000; - cmd->base.autoneg = AUTONEG_DISABLE; - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); - - return 0; -} - - -/** - * nes_netdev_set_settings - */ -static int -nes_netdev_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - - if ((nesadapter->OneG_Mode) && - (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) { - unsigned long flags; - u16 phy_data; - u8 phy_index = nesadapter->phy_index[nesdev->mac_index]; - - spin_lock_irqsave(&nesadapter->phy_lock, flags); - nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); - if (cmd->base.autoneg) { - /* Turn on Full duplex, Autoneg, and restart autonegotiation */ - phy_data |= 0x1300; - } else { - /* Turn off autoneg */ - phy_data &= ~0x1000; - } - nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - } - - return 0; -} - - -static const struct ethtool_ops nes_ethtool_ops = { - .get_link = ethtool_op_get_link, - .get_strings = nes_netdev_get_strings, - .get_sset_count = nes_netdev_get_sset_count, - .get_ethtool_stats = nes_netdev_get_ethtool_stats, - .get_drvinfo = nes_netdev_get_drvinfo, - .get_coalesce = nes_netdev_get_coalesce, - .set_coalesce = nes_netdev_set_coalesce, - .get_pauseparam = nes_netdev_get_pauseparam, - .set_pauseparam = nes_netdev_set_pauseparam, - .get_link_ksettings = nes_netdev_get_link_ksettings, - .set_link_ksettings = nes_netdev_set_link_ksettings, -}; - -static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features) -{ - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 u32temp; - unsigned long flags; - - spin_lock_irqsave(&nesadapter->phy_lock, flags); - - nes_debug(NES_DBG_NETDEV, "%s: %s\n", __func__, netdev->name); - - /* Enable/Disable VLAN Stripping */ - u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); - if (features & NETIF_F_HW_VLAN_CTAG_RX) - u32temp &= 0xfdffffff; - else - u32temp |= 0x02000000; - - nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp); - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); -} - -static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_features_t features) -{ - /* - * Since there is no support for separate rx/tx vlan accel - * enable/disable make sure tx flag is always in same state as rx. - */ - if (features & NETIF_F_HW_VLAN_CTAG_RX) - features |= NETIF_F_HW_VLAN_CTAG_TX; - else - features &= ~NETIF_F_HW_VLAN_CTAG_TX; - - return features; -} - -static int nes_set_features(struct net_device *netdev, netdev_features_t features) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - u32 changed = netdev->features ^ features; - - if (changed & NETIF_F_HW_VLAN_CTAG_RX) - nes_vlan_mode(netdev, nesdev, features); - - return 0; -} - -static const struct net_device_ops nes_netdev_ops = { - .ndo_open = nes_netdev_open, - .ndo_stop = nes_netdev_stop, - .ndo_start_xmit = nes_netdev_start_xmit, - .ndo_get_stats = nes_netdev_get_stats, - .ndo_tx_timeout = nes_netdev_tx_timeout, - .ndo_set_mac_address = nes_netdev_set_mac_address, - .ndo_set_rx_mode = nes_netdev_set_multicast_list, - .ndo_change_mtu = nes_netdev_change_mtu, - .ndo_validate_addr = eth_validate_addr, - .ndo_fix_features = nes_fix_features, - .ndo_set_features = nes_set_features, -}; - -/** - * nes_netdev_init - initialize network device - */ -struct net_device *nes_netdev_init(struct nes_device *nesdev, - void __iomem *mmio_addr) -{ - u64 u64temp; - struct nes_vnic *nesvnic; - struct net_device *netdev; - struct nic_qp_map *curr_qp_map; - u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index]; - - netdev = alloc_etherdev(sizeof(struct nes_vnic)); - if (!netdev) { - printk(KERN_ERR PFX "nesvnic etherdev alloc failed"); - return NULL; - } - nesvnic = netdev_priv(netdev); - - nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name); - - SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev); - - netdev->watchdog_timeo = NES_TX_TIMEOUT; - netdev->irq = nesdev->pcidev->irq; - netdev->max_mtu = NES_MAX_MTU; - netdev->hard_header_len = ETH_HLEN; - netdev->addr_len = ETH_ALEN; - netdev->type = ARPHRD_ETHER; - netdev->netdev_ops = &nes_netdev_ops; - netdev->ethtool_ops = &nes_ethtool_ops; - netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128); - nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n"); - - /* Fill in the port structure */ - nesvnic->netdev = netdev; - nesvnic->nesdev = nesdev; - nesvnic->msg_enable = netif_msg_init(debug, default_msg); - nesvnic->netdev_index = nesdev->netdev_count; - nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count; - nesvnic->max_frame_size = netdev->mtu + netdev->hard_header_len + VLAN_HLEN; - - curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)]; - nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid; - nesvnic->nic_index = curr_qp_map[nesdev->netdev_count].nic_index; - nesvnic->logical_port = curr_qp_map[nesdev->netdev_count].logical_port; - - /* Setup the burned in MAC address */ - u64temp = (u64)nesdev->nesadapter->mac_addr_low; - u64temp += ((u64)nesdev->nesadapter->mac_addr_high) << 32; - u64temp += nesvnic->nic_index; - netdev->dev_addr[0] = (u8)(u64temp>>40); - netdev->dev_addr[1] = (u8)(u64temp>>32); - netdev->dev_addr[2] = (u8)(u64temp>>24); - netdev->dev_addr[3] = (u8)(u64temp>>16); - netdev->dev_addr[4] = (u8)(u64temp>>8); - netdev->dev_addr[5] = (u8)u64temp; - - netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX; - if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) - netdev->hw_features |= NETIF_F_TSO; - - netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX; - - nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," - " nic_index = %d, logical_port = %d, mac_index = %d.\n", - nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id, - nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index); - - if (nesvnic->nesdev->nesadapter->port_count == 1 && - nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) { - - nesvnic->qp_nic_index[0] = nesvnic->nic_index; - nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1; - if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) { - nesvnic->qp_nic_index[2] = 0xf; - nesvnic->qp_nic_index[3] = 0xf; - } else { - nesvnic->qp_nic_index[2] = nesvnic->nic_index + 2; - nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3; - } - } else { - if (nesvnic->nesdev->nesadapter->port_count == 2 || - (nesvnic->nesdev->nesadapter->port_count == 1 && - nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) { - nesvnic->qp_nic_index[0] = nesvnic->nic_index; - nesvnic->qp_nic_index[1] = nesvnic->nic_index - + 2; - nesvnic->qp_nic_index[2] = 0xf; - nesvnic->qp_nic_index[3] = 0xf; - } else { - nesvnic->qp_nic_index[0] = nesvnic->nic_index; - nesvnic->qp_nic_index[1] = 0xf; - nesvnic->qp_nic_index[2] = 0xf; - nesvnic->qp_nic_index[3] = 0xf; - } - } - nesvnic->next_qp_nic_index = 0; - - if (nesdev->netdev_count == 0) { - nesvnic->rdma_enabled = 1; - } else { - nesvnic->rdma_enabled = 0; - } - nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id; - timer_setup(&nesvnic->event_timer, NULL, 0); - spin_lock_init(&nesvnic->tx_lock); - spin_lock_init(&nesvnic->port_ibevent_lock); - nesdev->netdev[nesdev->netdev_count] = netdev; - - nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n", - nesvnic, nesdev->mac_index); - list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]); - - if ((nesdev->netdev_count == 0) && - ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) || - ((phy_type == NES_PHY_TYPE_PUMA_1G) && - (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || - ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { - u32 u32temp; - u32 link_mask = 0; - u32 link_val = 0; - u16 temp_phy_data; - u16 phy_data = 0; - unsigned long flags; - - u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200 * (nesdev->mac_index & 1))); - if (phy_type != NES_PHY_TYPE_PUMA_1G) { - u32temp |= 0x00200000; - nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200 * (nesdev->mac_index & 1)), u32temp); - } - - /* Check and set linkup here. This is for back to back */ - /* configuration where second port won't get link interrupt */ - switch (phy_type) { - case NES_PHY_TYPE_PUMA_1G: - if (nesdev->mac_index < 2) { - link_mask = 0x01010000; - link_val = 0x01010000; - } else { - link_mask = 0x02020000; - link_val = 0x02020000; - } - break; - case NES_PHY_TYPE_SFP_D: - spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); - nes_read_10G_phy_reg(nesdev, - nesdev->nesadapter->phy_index[nesdev->mac_index], - 1, 0x9003); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - nes_read_10G_phy_reg(nesdev, - nesdev->nesadapter->phy_index[nesdev->mac_index], - 3, 0x0021); - nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - nes_read_10G_phy_reg(nesdev, - nesdev->nesadapter->phy_index[nesdev->mac_index], - 3, 0x0021); - phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); - phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; - break; - default: - link_mask = 0x0f1f0000; - link_val = 0x0f0f0000; - break; - } - - u32temp = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200 * (nesdev->mac_index & 1))); - - if (phy_type == NES_PHY_TYPE_SFP_D) { - if (phy_data & 0x0004) - nesvnic->linkup = 1; - } else { - if ((u32temp & link_mask) == link_val) - nesvnic->linkup = 1; - } - - /* clear the MAC interrupt status, assumes direct logical to physical mapping */ - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); - nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); - - nes_init_phy(nesdev); - } - - nes_vlan_mode(netdev, nesdev, netdev->features); - - return netdev; -} - - -/** - * nes_netdev_destroy - destroy network device structure - */ -void nes_netdev_destroy(struct net_device *netdev) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - - /* make sure 'stop' method is called by Linux stack */ - /* nes_netdev_stop(netdev); */ - - list_del(&nesvnic->list); - - if (nesvnic->of_device_registered) { - nes_destroy_ofa_device(nesvnic->nesibdev); - } - - free_netdev(netdev); -} - - -/** - * nes_nic_cm_xmit -- CM calls this to send out pkts - */ -int nes_nic_cm_xmit(struct sk_buff *skb, struct net_device *netdev) -{ - int ret; - - skb->dev = netdev; - ret = dev_queue_xmit(skb); - if (ret) { - nes_debug(NES_DBG_CM, "Bad return code from dev_queue_xmit %d\n", ret); - } - - return ret; -} diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c deleted file mode 100644 index 21b4a8373acf..000000000000 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ /dev/null @@ -1,916 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/ethtool.h> -#include <linux/mii.h> -#include <linux/if_vlan.h> -#include <linux/slab.h> -#include <linux/crc32.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/init.h> -#include <linux/kernel.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/byteorder.h> - -#include "nes.h" - -static u16 nes_read16_eeprom(void __iomem *addr, u16 offset); - -u32 mh_detected; -u32 mh_pauses_sent; - -static u32 nes_set_pau(struct nes_device *nesdev) -{ - u32 ret = 0; - u32 counter; - - nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU); - nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1); - - for (counter = 0; counter < NES_PAU_COUNTER; counter++) { - udelay(30); - if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) { - printk(KERN_INFO PFX "PAU is supported.\n"); - break; - } - nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1); - } - if (counter == NES_PAU_COUNTER) { - printk(KERN_INFO PFX "PAU is not supported.\n"); - return -EPERM; - } - return ret; -} - -/** - * nes_read_eeprom_values - - */ -int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesadapter) -{ - u32 mac_addr_low; - u16 mac_addr_high; - u16 eeprom_data; - u16 eeprom_offset; - u16 next_section_address; - u16 sw_section_ver; - u8 major_ver = 0; - u8 minor_ver = 0; - - /* TODO: deal with EEPROM endian issues */ - if (nesadapter->firmware_eeprom_offset == 0) { - /* Read the EEPROM Parameters */ - eeprom_data = nes_read16_eeprom(nesdev->regs, 0); - nes_debug(NES_DBG_HW, "EEPROM Offset 0 = 0x%04X\n", eeprom_data); - eeprom_offset = 2 + (((eeprom_data & 0x007f) << 3) << - ((eeprom_data & 0x0080) >> 7)); - nes_debug(NES_DBG_HW, "Firmware Offset = 0x%04X\n", eeprom_offset); - nesadapter->firmware_eeprom_offset = eeprom_offset; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4); - if (eeprom_data != 0x5746) { - nes_debug(NES_DBG_HW, "Not a valid Firmware Image = 0x%04X\n", eeprom_data); - return -1; - } - - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); - nes_debug(NES_DBG_HW, "EEPROM Offset %u = 0x%04X\n", - eeprom_offset + 2, eeprom_data); - eeprom_offset += ((eeprom_data & 0x00ff) << 3) << ((eeprom_data & 0x0100) >> 8); - nes_debug(NES_DBG_HW, "Software Offset = 0x%04X\n", eeprom_offset); - nesadapter->software_eeprom_offset = eeprom_offset; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4); - if (eeprom_data != 0x5753) { - printk("Not a valid Software Image = 0x%04X\n", eeprom_data); - return -1; - } - sw_section_ver = nes_read16_eeprom(nesdev->regs, nesadapter->software_eeprom_offset + 6); - nes_debug(NES_DBG_HW, "Software section version number = 0x%04X\n", - sw_section_ver); - - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); - nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", - eeprom_offset + 2, eeprom_data); - next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) << - ((eeprom_data & 0x0100) >> 8)); - eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); - if (eeprom_data != 0x414d) { - nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n", - eeprom_data); - goto no_fw_rev; - } - eeprom_offset = next_section_address; - - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); - nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", - eeprom_offset + 2, eeprom_data); - next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) << - ((eeprom_data & 0x0100) >> 8)); - eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); - if (eeprom_data != 0x4f52) { - nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x4f52 but was 0x%04X\n", - eeprom_data); - goto no_fw_rev; - } - eeprom_offset = next_section_address; - - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); - nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", - eeprom_offset + 2, eeprom_data); - next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); - eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); - if (eeprom_data != 0x5746) { - nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5746 but was 0x%04X\n", - eeprom_data); - goto no_fw_rev; - } - eeprom_offset = next_section_address; - - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); - nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", - eeprom_offset + 2, eeprom_data); - next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); - eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); - if (eeprom_data != 0x5753) { - nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5753 but was 0x%04X\n", - eeprom_data); - goto no_fw_rev; - } - eeprom_offset = next_section_address; - - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); - nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", - eeprom_offset + 2, eeprom_data); - next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); - eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); - if (eeprom_data != 0x414d) { - nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n", - eeprom_data); - goto no_fw_rev; - } - eeprom_offset = next_section_address; - - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); - nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", - eeprom_offset + 2, eeprom_data); - next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3); - eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4); - if (eeprom_data != 0x464e) { - nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x464e but was 0x%04X\n", - eeprom_data); - goto no_fw_rev; - } - eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 8); - printk(PFX "Firmware version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data); - major_ver = (u8)(eeprom_data >> 8); - minor_ver = (u8)(eeprom_data); - - if (nes_drv_opt & NES_DRV_OPT_DISABLE_VIRT_WQ) { - nes_debug(NES_DBG_HW, "Virtual WQs have been disabled\n"); - } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) { - nesadapter->virtwq = 1; - } - if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3)) - nesadapter->send_term_ok = 1; - - if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) { - if (!nes_set_pau(nesdev)) - nesadapter->allow_unaligned_fpdus = 1; - } - - nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + - (u32)((u8)eeprom_data); - - eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 10); - printk(PFX "EEPROM version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data); - nesadapter->eeprom_version = (((u32)(u8)(eeprom_data>>8)) << 16) + - (u32)((u8)eeprom_data); - -no_fw_rev: - /* eeprom is valid */ - eeprom_offset = nesadapter->software_eeprom_offset; - eeprom_offset += 8; - nesadapter->netdev_max = (u8)nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - mac_addr_high = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - mac_addr_low = (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - mac_addr_low <<= 16; - mac_addr_low += (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "Base MAC Address = 0x%04X%08X\n", - mac_addr_high, mac_addr_low); - nes_debug(NES_DBG_HW, "MAC Address count = %u\n", nesadapter->netdev_max); - - nesadapter->mac_addr_low = mac_addr_low; - nesadapter->mac_addr_high = mac_addr_high; - - /* Read the Phy Type array */ - eeprom_offset += 10; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->phy_type[0] = (u8)(eeprom_data >> 8); - nesadapter->phy_type[1] = (u8)eeprom_data; - - /* Read the port array */ - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->phy_type[2] = (u8)(eeprom_data >> 8); - nesadapter->phy_type[3] = (u8)eeprom_data; - /* port_count is set by soft reset reg */ - nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u," - " port 2 -> %u, port 3 -> %u\n", - nesadapter->port_count, - nesadapter->phy_type[0], nesadapter->phy_type[1], - nesadapter->phy_type[2], nesadapter->phy_type[3]); - - /* Read PD config array */ - eeprom_offset += 10; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->pd_config_size[0] = eeprom_data; - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->pd_config_base[0] = eeprom_data; - nes_debug(NES_DBG_HW, "PD0 config, size=0x%04x, base=0x%04x\n", - nesadapter->pd_config_size[0], nesadapter->pd_config_base[0]); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->pd_config_size[1] = eeprom_data; - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->pd_config_base[1] = eeprom_data; - nes_debug(NES_DBG_HW, "PD1 config, size=0x%04x, base=0x%04x\n", - nesadapter->pd_config_size[1], nesadapter->pd_config_base[1]); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->pd_config_size[2] = eeprom_data; - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->pd_config_base[2] = eeprom_data; - nes_debug(NES_DBG_HW, "PD2 config, size=0x%04x, base=0x%04x\n", - nesadapter->pd_config_size[2], nesadapter->pd_config_base[2]); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->pd_config_size[3] = eeprom_data; - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->pd_config_base[3] = eeprom_data; - nes_debug(NES_DBG_HW, "PD3 config, size=0x%04x, base=0x%04x\n", - nesadapter->pd_config_size[3], nesadapter->pd_config_base[3]); - - /* Read Rx Pool Size */ - eeprom_offset += 22; /* 46 */ - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->rx_pool_size = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "rx_pool_size = 0x%08X\n", nesadapter->rx_pool_size); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->tx_pool_size = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "tx_pool_size = 0x%08X\n", nesadapter->tx_pool_size); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->rx_threshold = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "rx_threshold = 0x%08X\n", nesadapter->rx_threshold); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->tcp_timer_core_clk_divisor = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "tcp_timer_core_clk_divisor = 0x%08X\n", - nesadapter->tcp_timer_core_clk_divisor); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->iwarp_config = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "iwarp_config = 0x%08X\n", nesadapter->iwarp_config); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->cm_config = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "cm_config = 0x%08X\n", nesadapter->cm_config); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->sws_timer_config = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "sws_timer_config = 0x%08X\n", nesadapter->sws_timer_config); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->tcp_config1 = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "tcp_config1 = 0x%08X\n", nesadapter->tcp_config1); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->wqm_wat = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "wqm_wat = 0x%08X\n", nesadapter->wqm_wat); - - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - eeprom_offset += 2; - nesadapter->core_clock = (((u32)eeprom_data) << 16) + - nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "core_clock = 0x%08X\n", nesadapter->core_clock); - - if ((sw_section_ver) && (nesadapter->hw_rev != NE020_REV)) { - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->phy_index[0] = (eeprom_data & 0xff00)>>8; - nesadapter->phy_index[1] = eeprom_data & 0x00ff; - eeprom_offset += 2; - eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nesadapter->phy_index[2] = (eeprom_data & 0xff00)>>8; - nesadapter->phy_index[3] = eeprom_data & 0x00ff; - } else { - nesadapter->phy_index[0] = 4; - nesadapter->phy_index[1] = 5; - nesadapter->phy_index[2] = 6; - nesadapter->phy_index[3] = 7; - } - nes_debug(NES_DBG_HW, "Phy address map = 0 > %u, 1 > %u, 2 > %u, 3 > %u\n", - nesadapter->phy_index[0],nesadapter->phy_index[1], - nesadapter->phy_index[2],nesadapter->phy_index[3]); - } - - return 0; -} - - -/** - * nes_read16_eeprom - */ -static u16 nes_read16_eeprom(void __iomem *addr, u16 offset) -{ - writel(NES_EEPROM_READ_REQUEST + (offset >> 1), - (void __iomem *)addr + NES_EEPROM_COMMAND); - - do { - } while (readl((void __iomem *)addr + NES_EEPROM_COMMAND) & - NES_EEPROM_READ_REQUEST); - - return readw((void __iomem *)addr + NES_EEPROM_DATA); -} - - -/** - * nes_write_1G_phy_reg - */ -void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data) -{ - u32 u32temp; - u32 counter; - - nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23)); - for (counter = 0; counter < 100 ; counter++) { - udelay(30); - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); - if (u32temp & 1) { - /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */ - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); - break; - } - } - if (!(u32temp & 1)) - nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", - u32temp); -} - - -/** - * nes_read_1G_phy_reg - * This routine only issues the read, the data must be read - * separately. - */ -void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data) -{ - u32 u32temp; - u32 counter; - - /* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n", - phy_addr, nesdev->mac_index); */ - - nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23)); - for (counter = 0; counter < 100 ; counter++) { - udelay(30); - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); - if (u32temp & 1) { - /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */ - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); - break; - } - } - if (!(u32temp & 1)) { - nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", - u32temp); - *data = 0xffff; - } else { - *data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - } -} - - -/** - * nes_write_10G_phy_reg - */ -void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg, - u16 data) -{ - u32 port_addr; - u32 u32temp; - u32 counter; - - port_addr = phy_addr; - - /* set address */ - nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); - for (counter = 0; counter < 100 ; counter++) { - udelay(30); - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); - if (u32temp & 1) { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); - break; - } - } - if (!(u32temp & 1)) - nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", - u32temp); - - /* set data */ - nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x10020000 | (u32)data | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); - for (counter = 0; counter < 100 ; counter++) { - udelay(30); - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); - if (u32temp & 1) { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); - break; - } - } - if (!(u32temp & 1)) - nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", - u32temp); -} - - -/** - * nes_read_10G_phy_reg - * This routine only issues the read, the data must be read - * separately. - */ -void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg) -{ - u32 port_addr; - u32 u32temp; - u32 counter; - - port_addr = phy_addr; - - /* set address */ - nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); - for (counter = 0; counter < 100 ; counter++) { - udelay(30); - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); - if (u32temp & 1) { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); - break; - } - } - if (!(u32temp & 1)) - nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", - u32temp); - - /* issue read */ - nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x30020000 | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); - for (counter = 0; counter < 100 ; counter++) { - udelay(30); - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); - if (u32temp & 1) { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1); - break; - } - } - if (!(u32temp & 1)) - nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", - u32temp); -} - - -/** - * nes_get_cqp_request - */ -struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) -{ - unsigned long flags; - struct nes_cqp_request *cqp_request = NULL; - - if (!list_empty(&nesdev->cqp_avail_reqs)) { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - if (!list_empty(&nesdev->cqp_avail_reqs)) { - cqp_request = list_entry(nesdev->cqp_avail_reqs.next, - struct nes_cqp_request, list); - list_del_init(&cqp_request->list); - } - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - if (cqp_request == NULL) { - cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC); - if (cqp_request) { - cqp_request->dynamic = 1; - INIT_LIST_HEAD(&cqp_request->list); - } - } - - if (cqp_request) { - init_waitqueue_head(&cqp_request->waitq); - cqp_request->waiting = 0; - cqp_request->request_done = 0; - cqp_request->callback = 0; - init_waitqueue_head(&cqp_request->waitq); - nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n", - cqp_request); - } else - printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n", - __func__); - - return cqp_request; -} - -void nes_free_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request) -{ - unsigned long flags; - - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", - cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); - - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } -} - -void nes_put_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request) -{ - if (atomic_dec_and_test(&cqp_request->refcount)) - nes_free_cqp_request(nesdev, cqp_request); -} - - -/** - * nes_post_cqp_request - */ -void nes_post_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request) -{ - struct nes_hw_cqp_wqe *cqp_wqe; - unsigned long flags; - u32 cqp_head; - u64 u64temp; - u32 opcode; - int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX; - - spin_lock_irqsave(&nesdev->cqp.lock, flags); - - if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) & - (nesdev->cqp.sq_size - 1)) != 1) - && (list_empty(&nesdev->cqp_pending_reqs))) { - cqp_head = nesdev->cqp.sq_head++; - nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); - opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]); - if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT) - ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX; - barrier(); - u64temp = (unsigned long)cqp_request; - set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp); - nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ," - " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," - " waiting = %d, refcount = %d.\n", - opcode & NES_CQP_OPCODE_MASK, - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, - nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, - cqp_request->waiting, atomic_read(&cqp_request->refcount)); - - barrier(); - - /* Ring doorbell (1 WQEs) */ - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); - - barrier(); - } else { - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X" - " put on the pending queue.\n", - cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX])); - list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs); - } - - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - - return; -} - -/** - * nes_arp_table - */ -int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 action) -{ - struct nes_adapter *nesadapter = nesdev->nesadapter; - int arp_index; - int err = 0; - __be32 tmp_addr; - - for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) { - if (nesadapter->arp_table[arp_index].ip_addr == ip_addr) - break; - } - - if (action == NES_ARP_ADD) { - if (arp_index != nesadapter->arp_table_size) { - return -1; - } - - arp_index = 0; - err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps, - nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index, NES_RESOURCE_ARP); - if (err) { - nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err); - return err; - } - nes_debug(NES_DBG_NETDEV, "ADD, arp_index=%d\n", arp_index); - - nesadapter->arp_table[arp_index].ip_addr = ip_addr; - memcpy(nesadapter->arp_table[arp_index].mac_addr, mac_addr, ETH_ALEN); - return arp_index; - } - - /* DELETE or RESOLVE */ - if (arp_index == nesadapter->arp_table_size) { - tmp_addr = cpu_to_be32(ip_addr); - nes_debug(NES_DBG_NETDEV, "MAC for %pI4 not in ARP table - cannot %s\n", - &tmp_addr, action == NES_ARP_RESOLVE ? "resolve" : "delete"); - return -1; - } - - if (action == NES_ARP_RESOLVE) { - nes_debug(NES_DBG_NETDEV, "RESOLVE, arp_index=%d\n", arp_index); - return arp_index; - } - - if (action == NES_ARP_DELETE) { - nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index); - nesadapter->arp_table[arp_index].ip_addr = 0; - eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr); - nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index); - return arp_index; - } - - return -1; -} - - -/** - * nes_mh_fix - */ -void nes_mh_fix(struct timer_list *t) -{ - struct nes_adapter *nesadapter = from_timer(nesadapter, t, mh_timer); - struct nes_device *nesdev = nesadapter->nesdev; - unsigned long flags; - struct nes_vnic *nesvnic; - u32 used_chunks_tx; - u32 temp_used_chunks_tx; - u32 temp_last_used_chunks_tx; - u32 used_chunks_mask; - u32 mac_tx_frames_low; - u32 mac_tx_frames_high; - u32 mac_tx_pauses; - u32 reset_value; - u32 tx_control; - u32 tx_config; - u32 tx_pause_quanta; - u32 rx_control; - u32 rx_config; - u32 mac_exact_match; - u32 mpp_debug; - u32 i=0; - u32 chunks_tx_progress = 0; - - spin_lock_irqsave(&nesadapter->phy_lock, flags); - if ((nesadapter->mac_sw_state[0] != NES_MAC_SW_IDLE) || (nesadapter->mac_link_down[0])) { - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - goto no_mh_work; - } - nesadapter->mac_sw_state[0] = NES_MAC_SW_MH; - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - do { - mac_tx_frames_low = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_LOW); - mac_tx_frames_high = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_HIGH); - mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); - used_chunks_tx = nes_read_indexed(nesdev, NES_IDX_USED_CHUNKS_TX); - nesdev->mac_pause_frames_sent += mac_tx_pauses; - used_chunks_mask = 0; - temp_used_chunks_tx = used_chunks_tx; - temp_last_used_chunks_tx = nesdev->last_used_chunks_tx; - - if (nesdev->netdev[0]) { - nesvnic = netdev_priv(nesdev->netdev[0]); - } else { - break; - } - - for (i=0; i<4; i++) { - used_chunks_mask <<= 8; - if (nesvnic->qp_nic_index[i] != 0xff) { - used_chunks_mask |= 0xff; - if ((temp_used_chunks_tx&0xff)<(temp_last_used_chunks_tx&0xff)) { - chunks_tx_progress = 1; - } - } - temp_used_chunks_tx >>= 8; - temp_last_used_chunks_tx >>= 8; - } - if ((mac_tx_frames_low) || (mac_tx_frames_high) || - (!(used_chunks_tx&used_chunks_mask)) || - (!(nesdev->last_used_chunks_tx&used_chunks_mask)) || - (chunks_tx_progress) ) { - nesdev->last_used_chunks_tx = used_chunks_tx; - break; - } - nesdev->last_used_chunks_tx = used_chunks_tx; - barrier(); - - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000005); - mh_pauses_sent++; - mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); - if (mac_tx_pauses) { - nesdev->mac_pause_frames_sent += mac_tx_pauses; - break; - } - - tx_control = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONTROL); - tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); - tx_pause_quanta = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA); - rx_control = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONTROL); - rx_config = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONFIG); - mac_exact_match = nes_read_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM); - mpp_debug = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG); - - /* one last ditch effort to avoid a false positive */ - mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES); - if (mac_tx_pauses) { - nesdev->last_mac_tx_pauses = nesdev->mac_pause_frames_sent; - nes_debug(NES_DBG_HW, "failsafe caught slow outbound pause\n"); - break; - } - mh_detected++; - - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, 0x00000000); - reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); - - nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value | 0x0000001d); - - while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) - & 0x00000040) != 0x00000040) && (i++ < 5000)) { - /* mdelay(1); */ - } - - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); - nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0); - - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000); - if (nesadapter->OneG_Mode) { - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222); - } else { - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222); - } - nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff); - - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, tx_control); - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); - nes_write_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA, tx_pause_quanta); - nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONTROL, rx_control); - nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONFIG, rx_config); - nes_write_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM, mac_exact_match); - nes_write_indexed(nesdev, NES_IDX_MPP_DEBUG, mpp_debug); - - } while (0); - - nesadapter->mac_sw_state[0] = NES_MAC_SW_IDLE; -no_mh_work: - nesdev->nesadapter->mh_timer.expires = jiffies + (HZ/5); - add_timer(&nesdev->nesadapter->mh_timer); -} - -/** - * nes_clc - */ -void nes_clc(struct timer_list *t) -{ - struct nes_adapter *nesadapter = from_timer(nesadapter, t, lc_timer); - unsigned long flags; - - spin_lock_irqsave(&nesadapter->phy_lock, flags); - nesadapter->link_interrupt_count[0] = 0; - nesadapter->link_interrupt_count[1] = 0; - nesadapter->link_interrupt_count[2] = 0; - nesadapter->link_interrupt_count[3] = 0; - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - - nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */ - add_timer(&nesadapter->lc_timer); -} - - -/** - * nes_dump_mem - */ -void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length) -{ - if (!(nes_debug_level & dump_debug_level)) { - return; - } - - if (length > 0x100) { - nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100); - length = 0x100; - } - nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", addr, length, length); - - print_hex_dump(KERN_ERR, PFX, DUMP_PREFIX_NONE, 16, 1, addr, length, true); -} diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c deleted file mode 100644 index 49024326a518..000000000000 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ /dev/null @@ -1,3759 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/random.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <asm/byteorder.h> - -#include <rdma/ib_verbs.h> -#include <rdma/iw_cm.h> -#include <rdma/ib_user_verbs.h> -#include <rdma/uverbs_ioctl.h> - -#include "nes.h" - -#include <rdma/ib_umem.h> - -atomic_t mod_qp_timouts; -atomic_t qps_created; -atomic_t sw_qps_destroyed; - -static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); -static int nes_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); - -/** - * nes_alloc_mw - */ -static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type, - struct ib_udata *udata) -{ - struct nes_pd *nespd = to_nespd(ibpd); - struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_cqp_request *cqp_request; - struct nes_mr *nesmr; - struct ib_mw *ibmw; - struct nes_hw_cqp_wqe *cqp_wqe; - int ret; - u32 stag; - u32 stag_index = 0; - u32 next_stag_index = 0; - u32 driver_key = 0; - u8 stag_key = 0; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - get_random_bytes(&next_stag_index, sizeof(next_stag_index)); - stag_key = (u8)next_stag_index; - - driver_key = 0; - - next_stag_index >>= 8; - next_stag_index %= nesadapter->max_mr; - - ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, - nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_MW); - if (ret) { - return ERR_PTR(ret); - } - - nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); - if (!nesmr) { - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - return ERR_PTR(-ENOMEM); - } - - stag = stag_index << 8; - stag |= driver_key; - stag += (u32)stag_key; - - nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n", - stag, stag_index); - - /* Register the region with the adapter */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - kfree(nesmr); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - return ERR_PTR(-ENOMEM); - } - - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = - cpu_to_le32( NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_RIGHTS_REMOTE_READ | - NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_VA_TO | - NES_CQP_STAG_REM_ACC_EN); - - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," - " CQP Major:Minor codes = 0x%04X:0x%04X.\n", - stag, ret, cqp_request->major_code, cqp_request->minor_code); - if ((!ret) || (cqp_request->major_code)) { - nes_put_cqp_request(nesdev, cqp_request); - kfree(nesmr); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - if (!ret) { - return ERR_PTR(-ETIME); - } else { - return ERR_PTR(-ENOMEM); - } - } - nes_put_cqp_request(nesdev, cqp_request); - - nesmr->ibmw.rkey = stag; - nesmr->mode = IWNES_MEMREG_TYPE_MW; - ibmw = &nesmr->ibmw; - nesmr->pbl_4k = 0; - nesmr->pbls_used = 0; - - return ibmw; -} - - -/** - * nes_dealloc_mw - */ -static int nes_dealloc_mw(struct ib_mw *ibmw) -{ - struct nes_mr *nesmr = to_nesmw(ibmw); - struct nes_vnic *nesvnic = to_nesvnic(ibmw->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - int err = 0; - int ret; - - /* Deallocate the window with the adapter */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); - return -ENOMEM; - } - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n", - ibmw->rkey); - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u," - " CQP Major:Minor codes = 0x%04X:0x%04X.\n", - ret, cqp_request->major_code, cqp_request->minor_code); - if (!ret) - err = -ETIME; - else if (cqp_request->major_code) - err = -EIO; - - nes_put_cqp_request(nesdev, cqp_request); - - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - (ibmw->rkey & 0x0fffff00) >> 8); - kfree(nesmr); - - return err; -} - - -/* - * nes_alloc_fast_mr - */ -static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, - u32 stag, u32 page_count) -{ - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - unsigned long flags; - int ret; - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 opcode = 0; - u16 major_code; - u64 region_length = page_count * PAGE_SIZE; - - - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); - return -ENOMEM; - } - nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, " - "region_length = %llu\n", - page_count, region_length); - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - if (nesadapter->free_4kpbl > 0) { - nesadapter->free_4kpbl--; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } else { - /* No 4kpbl's available: */ - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - nes_debug(NES_DBG_MR, "Out of Pbls\n"); - nes_free_cqp_request(nesdev, cqp_request); - return -ENOMEM; - } - - opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR | - NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO | - NES_CQP_STAG_REM_ACC_EN; - /* - * The current OFED API does not support the zero based TO option. - * If added then need to changed the NES_CQP_STAG_VA* option. Also, - * the API does not support that ability to have the MR set for local - * access only when created and not allow the SQ op to override. Given - * this the remote enable must be set here. - */ - - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1); - - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] = - cpu_to_le32((u32)(region_length >> 8) & 0xff000000); - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |= - cpu_to_le32(nespd->pd_id & 0x00007fff); - - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0); - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8)); - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); - barrier(); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - ret = wait_event_timeout(cqp_request->waitq, - (0 != cqp_request->request_done), - NES_EVENT_TIMEOUT); - - nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, " - "wait_event_timeout ret = %u, CQP Major:Minor codes = " - "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, - cqp_request->minor_code); - major_code = cqp_request->major_code; - nes_put_cqp_request(nesdev, cqp_request); - - if (!ret || major_code) { - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - nesadapter->free_4kpbl++; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } - - if (!ret) - return -ETIME; - else if (major_code) - return -EIO; - return 0; -} - -/* - * nes_alloc_mr - */ -static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) -{ - struct nes_pd *nespd = to_nespd(ibpd); - struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - - u32 next_stag_index; - u8 stag_key = 0; - u32 driver_key = 0; - int err = 0; - u32 stag_index = 0; - struct nes_mr *nesmr; - u32 stag; - int ret; - struct ib_mr *ibmr; - - if (mr_type != IB_MR_TYPE_MEM_REG) - return ERR_PTR(-EINVAL); - - if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) - return ERR_PTR(-E2BIG); - -/* - * Note: Set to always use a fixed length single page entry PBL. This is to allow - * for the fast_reg_mr operation to always know the size of the PBL. - */ - if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) - return ERR_PTR(-E2BIG); - - get_random_bytes(&next_stag_index, sizeof(next_stag_index)); - stag_key = (u8)next_stag_index; - next_stag_index >>= 8; - next_stag_index %= nesadapter->max_mr; - - err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, - nesadapter->max_mr, &stag_index, - &next_stag_index, NES_RESOURCE_FAST_MR); - if (err) - return ERR_PTR(err); - - nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); - if (!nesmr) { - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - return ERR_PTR(-ENOMEM); - } - - stag = stag_index << 8; - stag |= driver_key; - stag += (u32)stag_key; - - nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n", - stag, stag_index); - - ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_num_sg); - - if (ret == 0) { - nesmr->ibmr.rkey = stag; - nesmr->ibmr.lkey = stag; - nesmr->mode = IWNES_MEMREG_TYPE_FMEM; - ibmr = &nesmr->ibmr; - } else { - kfree(nesmr); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - return ERR_PTR(-ENOMEM); - } - - nesmr->pages = pci_alloc_consistent(nesdev->pcidev, - max_num_sg * sizeof(u64), - &nesmr->paddr); - if (!nesmr->paddr) - goto err; - - nesmr->max_pages = max_num_sg; - - return ibmr; - -err: - nes_dereg_mr(ibmr, udata); - - return ERR_PTR(-ENOMEM); -} - -static int nes_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct nes_mr *nesmr = to_nesmr(ibmr); - - if (unlikely(nesmr->npages == nesmr->max_pages)) - return -ENOMEM; - - nesmr->pages[nesmr->npages++] = cpu_to_le64(addr); - - return 0; -} - -static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int *sg_offset) -{ - struct nes_mr *nesmr = to_nesmr(ibmr); - - nesmr->npages = 0; - - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, nes_set_page); -} - -/** - * nes_query_device - */ -static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_ib_device *nesibdev = nesvnic->nesibdev; - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - memset(props, 0, sizeof(*props)); - memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6); - - props->fw_ver = nesdev->nesadapter->firmware_version; - props->device_cap_flags = nesdev->nesadapter->device_cap_flags; - props->vendor_id = nesdev->nesadapter->vendor_id; - props->vendor_part_id = nesdev->nesadapter->vendor_part_id; - props->hw_ver = nesdev->nesadapter->hw_rev; - props->max_mr_size = 0x80000000; - props->max_qp = nesibdev->max_qp; - props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; - props->max_send_sge = nesdev->nesadapter->max_sge; - props->max_recv_sge = nesdev->nesadapter->max_sge; - props->max_cq = nesibdev->max_cq; - props->max_cqe = nesdev->nesadapter->max_cqe; - props->max_mr = nesibdev->max_mr; - props->max_mw = nesibdev->max_mr; - props->max_pd = nesibdev->max_pd; - props->max_sge_rd = 1; - switch (nesdev->nesadapter->max_irrq_wr) { - case 0: - props->max_qp_rd_atom = 2; - break; - case 1: - props->max_qp_rd_atom = 8; - break; - case 2: - props->max_qp_rd_atom = 32; - break; - case 3: - props->max_qp_rd_atom = 64; - break; - default: - props->max_qp_rd_atom = 0; - } - props->max_qp_init_rd_atom = props->max_qp_rd_atom; - props->atomic_cap = IB_ATOMIC_NONE; - props->max_map_per_fmr = 1; - - return 0; -} - - -/** - * nes_query_port - */ -static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) -{ - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - struct net_device *netdev = nesvnic->netdev; - - /* props being zeroed by the caller, avoid zeroing it here */ - - props->max_mtu = IB_MTU_4096; - props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); - - props->lid = 1; - if (netif_queue_stopped(netdev)) - props->state = IB_PORT_DOWN; - else if (nesvnic->linkup) - props->state = IB_PORT_ACTIVE; - else - props->state = IB_PORT_DOWN; - props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | - IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; - props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; - props->active_width = IB_WIDTH_4X; - props->active_speed = IB_SPEED_SDR; - props->max_msg_sz = 0x80000000; - - return 0; -} - -/** - * nes_query_pkey - */ -static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) -{ - *pkey = 0; - return 0; -} - - -/** - * nes_query_gid - */ -static int nes_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - - memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - memcpy(&(gid->raw[0]), nesvnic->netdev->dev_addr, 6); - - return 0; -} - - -/** - * nes_alloc_ucontext - Allocate the user context data structure. This keeps track - * of all objects associated with a particular user-mode client. - */ -static int nes_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) -{ - struct ib_device *ibdev = uctx->device; - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_alloc_ucontext_req req; - struct nes_alloc_ucontext_resp uresp = {}; - struct nes_ucontext *nes_ucontext = to_nesucontext(uctx); - struct nes_ib_device *nesibdev = nesvnic->nesibdev; - - - if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) { - printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n"); - return -EINVAL; - } - - if (req.userspace_ver != NES_ABI_USERSPACE_VER) { - printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n", - req.userspace_ver, NES_ABI_USERSPACE_VER); - return -EINVAL; - } - - - uresp.max_qps = nesibdev->max_qp; - uresp.max_pds = nesibdev->max_pd; - uresp.wq_size = nesdev->nesadapter->max_qp_wr * 2; - uresp.virtwq = nesadapter->virtwq; - uresp.kernel_ver = NES_ABI_KERNEL_VER; - - nes_ucontext->nesdev = nesdev; - nes_ucontext->mmap_wq_offset = uresp.max_pds; - nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset + - ((sizeof(struct nes_hw_qp_wqe) * uresp.max_qps * 2) + PAGE_SIZE-1) / - PAGE_SIZE; - - - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) - return -EFAULT; - - INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list); - INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list); - return 0; -} - -/** - * nes_dealloc_ucontext - */ -static void nes_dealloc_ucontext(struct ib_ucontext *context) -{ - return; -} - -/** - * nes_mmap - */ -static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - unsigned long index; - struct nes_vnic *nesvnic = to_nesvnic(context->device); - struct nes_device *nesdev = nesvnic->nesdev; - /* struct nes_adapter *nesadapter = nesdev->nesadapter; */ - struct nes_ucontext *nes_ucontext; - struct nes_qp *nesqp; - - nes_ucontext = to_nesucontext(context); - - - if (vma->vm_pgoff >= nes_ucontext->mmap_wq_offset) { - index = (vma->vm_pgoff - nes_ucontext->mmap_wq_offset) * PAGE_SIZE; - index /= ((sizeof(struct nes_hw_qp_wqe) * nesdev->nesadapter->max_qp_wr * 2) + - PAGE_SIZE-1) & (~(PAGE_SIZE-1)); - if (!test_bit(index, nes_ucontext->allocated_wqs)) { - nes_debug(NES_DBG_MMAP, "wq %lu not allocated\n", index); - return -EFAULT; - } - nesqp = nes_ucontext->mmap_nesqp[index]; - if (nesqp == NULL) { - nes_debug(NES_DBG_MMAP, "wq %lu has a NULL QP base.\n", index); - return -EFAULT; - } - if (remap_pfn_range(vma, vma->vm_start, - virt_to_phys(nesqp->hwqp.sq_vbase) >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) { - nes_debug(NES_DBG_MMAP, "remap_pfn_range failed.\n"); - return -EAGAIN; - } - vma->vm_private_data = nesqp; - return 0; - } else { - index = vma->vm_pgoff; - if (!test_bit(index, nes_ucontext->allocated_doorbells)) - return -EFAULT; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (io_remap_pfn_range(vma, vma->vm_start, - (nesdev->doorbell_start + - ((nes_ucontext->mmap_db_index[index] - nesdev->base_doorbell_index) * 4096)) - >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - vma->vm_private_data = nes_ucontext; - return 0; - } - - return -ENOSYS; -} - - -/** - * nes_alloc_pd - */ -static int nes_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct ib_device *ibdev = pd->device; - struct nes_pd *nespd = to_nespd(pd); - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_alloc_pd_resp uresp; - u32 pd_num = 0; - int err; - struct nes_ucontext *nesucontext = rdma_udata_to_drv_context( - udata, struct nes_ucontext, ibucontext); - - nes_debug( - NES_DBG_PD, - "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", - nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, - &nesucontext->ibucontext, netdev_refcnt_read(nesvnic->netdev)); - - err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, - nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD); - if (err) - return err; - - nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", - nespd, dev_name(&nesvnic->nesibdev->ibdev.dev)); - - nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd; - - if (udata) { - nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells, - NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db); - nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n", - nespd->mmap_db_index, nespd->pd_id); - if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) { - nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n"); - nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); - return -ENOMEM; - } - - uresp.pd_id = nespd->pd_id; - uresp.mmap_db_index = nespd->mmap_db_index; - if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) { - nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); - return -EFAULT; - } - - set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); - nesucontext->mmap_db_index[nespd->mmap_db_index] = nespd->pd_id; - nesucontext->first_free_db = nespd->mmap_db_index + 1; - } - - nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd); - return 0; -} - - -/** - * nes_dealloc_pd - */ -static void nes_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) -{ - struct nes_ucontext *nesucontext; - struct nes_pd *nespd = to_nespd(ibpd); - struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - - if (udata) { - nesucontext = - rdma_udata_to_drv_context( - udata, - struct nes_ucontext, - ibucontext); - nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n", - nespd->mmap_db_index); - clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); - nesucontext->mmap_db_index[nespd->mmap_db_index] = 0; - if (nesucontext->first_free_db > nespd->mmap_db_index) { - nesucontext->first_free_db = nespd->mmap_db_index; - } - } - - nes_debug(NES_DBG_PD, "Deallocating PD%u structure located @%p.\n", - nespd->pd_id, nespd); - nes_free_resource(nesadapter, nesadapter->allocated_pds, - (nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12)); -} - - -/** - * nes_get_encoded_size - */ -static inline u8 nes_get_encoded_size(int *size) -{ - u8 encoded_size = 0; - if (*size <= 32) { - *size = 32; - encoded_size = 1; - } else if (*size <= 128) { - *size = 128; - encoded_size = 2; - } else if (*size <= 512) { - *size = 512; - encoded_size = 3; - } - return (encoded_size); -} - - - -/** - * nes_setup_virt_qp - */ -static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl, - struct nes_vnic *nesvnic, int sq_size, int rq_size) -{ - unsigned long flags; - void *mem; - __le64 *pbl = NULL; - __le64 *tpbl; - __le64 *pblbuffer; - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 pbl_entries; - u8 rq_pbl_entries; - u8 sq_pbl_entries; - - pbl_entries = nespbl->pbl_size >> 3; - nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%lx\n", - nespbl->pbl_size, pbl_entries, - (void *)nespbl->pbl_vbase, - (unsigned long) nespbl->pbl_pbase); - pbl = (__le64 *) nespbl->pbl_vbase; /* points to first pbl entry */ - /* now lets set the sq_vbase as well as rq_vbase addrs we will assign */ - /* the first pbl to be fro the rq_vbase... */ - rq_pbl_entries = (rq_size * sizeof(struct nes_hw_qp_wqe)) >> 12; - sq_pbl_entries = (sq_size * sizeof(struct nes_hw_qp_wqe)) >> 12; - nesqp->hwqp.sq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32); - if (!nespbl->page) { - nes_debug(NES_DBG_QP, "QP nespbl->page is NULL \n"); - kfree(nespbl); - return -ENOMEM; - } - - nesqp->hwqp.sq_vbase = kmap(nespbl->page); - nesqp->page = nespbl->page; - if (!nesqp->hwqp.sq_vbase) { - nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n"); - kfree(nespbl); - return -ENOMEM; - } - - /* Now to get to sq.. we need to calculate how many */ - /* PBL entries were used by the rq.. */ - pbl += sq_pbl_entries; - nesqp->hwqp.rq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32); - /* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */ - /*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */ - - nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%lx rq_vbase=%p rq_pbase=%lx\n", - nesqp->hwqp.sq_vbase, (unsigned long) nesqp->hwqp.sq_pbase, - nesqp->hwqp.rq_vbase, (unsigned long) nesqp->hwqp.rq_pbase); - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - if (!nesadapter->free_256pbl) { - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, - nespbl->pbl_pbase); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kunmap(nesqp->page); - kfree(nespbl); - return -ENOMEM; - } - nesadapter->free_256pbl--; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - - nesqp->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 256, &nesqp->pbl_pbase); - pblbuffer = nesqp->pbl_vbase; - if (!nesqp->pbl_vbase) { - /* memory allocated during nes_reg_user_mr() */ - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, - nespbl->pbl_pbase); - kfree(nespbl); - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - nesadapter->free_256pbl++; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kunmap(nesqp->page); - return -ENOMEM; - } - memset(nesqp->pbl_vbase, 0, 256); - /* fill in the page address in the pbl buffer.. */ - tpbl = pblbuffer + 16; - pbl = (__le64 *)nespbl->pbl_vbase; - while (sq_pbl_entries--) - *tpbl++ = *pbl++; - tpbl = pblbuffer; - while (rq_pbl_entries--) - *tpbl++ = *pbl++; - - /* done with memory allocated during nes_reg_user_mr() */ - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, - nespbl->pbl_pbase); - kfree(nespbl); - - nesqp->qp_mem_size = - max((u32)sizeof(struct nes_qp_context), ((u32)256)) + 256; /* this is Q2 */ - /* Round up to a multiple of a page */ - nesqp->qp_mem_size += PAGE_SIZE - 1; - nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); - - mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, - &nesqp->hwqp.q2_pbase); - - if (!mem) { - pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase); - nesqp->pbl_vbase = NULL; - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - nesadapter->free_256pbl++; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kunmap(nesqp->page); - return -ENOMEM; - } - nesqp->sq_kmapped = 1; - nesqp->hwqp.q2_vbase = mem; - mem += 256; - memset(nesqp->hwqp.q2_vbase, 0, 256); - nesqp->nesqp_context = mem; - memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); - nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; - - return 0; -} - - -/** - * nes_setup_mmap_qp - */ -static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic, - int sq_size, int rq_size) -{ - void *mem; - struct nes_device *nesdev = nesvnic->nesdev; - - nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) + - (sizeof(struct nes_hw_qp_wqe) * rq_size) + - max((u32)sizeof(struct nes_qp_context), ((u32)256)) + - 256; /* this is Q2 */ - /* Round up to a multiple of a page */ - nesqp->qp_mem_size += PAGE_SIZE - 1; - nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); - - mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, - &nesqp->hwqp.sq_pbase); - if (!mem) - return -ENOMEM; - nes_debug(NES_DBG_QP, "PCI consistent memory for " - "host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n", - mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size); - - memset(mem, 0, nesqp->qp_mem_size); - - nesqp->hwqp.sq_vbase = mem; - mem += sizeof(struct nes_hw_qp_wqe) * sq_size; - - nesqp->hwqp.rq_vbase = mem; - nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase + - sizeof(struct nes_hw_qp_wqe) * sq_size; - mem += sizeof(struct nes_hw_qp_wqe) * rq_size; - - nesqp->hwqp.q2_vbase = mem; - nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase + - sizeof(struct nes_hw_qp_wqe) * rq_size; - mem += 256; - memset(nesqp->hwqp.q2_vbase, 0, 256); - - nesqp->nesqp_context = mem; - nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; - memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); - return 0; -} - - -/** - * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory. - */ -static void nes_free_qp_mem(struct nes_device *nesdev, - struct nes_qp *nesqp, int virt_wqs) -{ - unsigned long flags; - struct nes_adapter *nesadapter = nesdev->nesadapter; - if (!virt_wqs) { - pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, - nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); - }else { - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - nesadapter->free_256pbl++; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); - pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase ); - nesqp->pbl_vbase = NULL; - if (nesqp->sq_kmapped) { - nesqp->sq_kmapped = 0; - kunmap(nesqp->page); - } - } -} - - -/** - * nes_create_qp - */ -static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, struct ib_udata *udata) -{ - u64 u64temp= 0; - u64 u64nesqp = 0; - struct nes_pd *nespd = to_nespd(ibpd); - struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_qp *nesqp; - struct nes_cq *nescq; - struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context( - udata, struct nes_ucontext, ibucontext); - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - struct nes_create_qp_req req; - struct nes_create_qp_resp uresp; - struct nes_pbl *nespbl = NULL; - u32 qp_num = 0; - u32 opcode = 0; - /* u32 counter = 0; */ - void *mem; - unsigned long flags; - int ret; - int err; - int virt_wqs = 0; - int sq_size; - int rq_size; - u8 sq_encoded_size; - u8 rq_encoded_size; - /* int counter; */ - - if (init_attr->create_flags) - return ERR_PTR(-EINVAL); - - atomic_inc(&qps_created); - switch (init_attr->qp_type) { - case IB_QPT_RC: - if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) { - init_attr->cap.max_inline_data = 0; - } else { - init_attr->cap.max_inline_data = 64; - } - sq_size = init_attr->cap.max_send_wr; - rq_size = init_attr->cap.max_recv_wr; - - /* check if the encoded sizes are OK or not... */ - sq_encoded_size = nes_get_encoded_size(&sq_size); - rq_encoded_size = nes_get_encoded_size(&rq_size); - - if ((!sq_encoded_size) || (!rq_encoded_size)) { - nes_debug(NES_DBG_QP, "ERROR bad rq (%u) or sq (%u) size\n", - rq_size, sq_size); - return ERR_PTR(-EINVAL); - } - - init_attr->cap.max_send_wr = sq_size -2; - init_attr->cap.max_recv_wr = rq_size -1; - nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size); - - ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps, - nesadapter->max_qp, &qp_num, &nesadapter->next_qp, NES_RESOURCE_QP); - if (ret) { - return ERR_PTR(ret); - } - - /* Need 512 (actually now 1024) byte alignment on this structure */ - mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL); - if (!mem) { - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - return ERR_PTR(-ENOMEM); - } - u64nesqp = (unsigned long)mem; - u64nesqp += ((u64)NES_SW_CONTEXT_ALIGN) - 1; - u64temp = ((u64)NES_SW_CONTEXT_ALIGN) - 1; - u64nesqp &= ~u64temp; - nesqp = (struct nes_qp *)(unsigned long)u64nesqp; - /* nes_debug(NES_DBG_QP, "nesqp=%p, allocated buffer=%p. Rounded to closest %u\n", - nesqp, mem, NES_SW_CONTEXT_ALIGN); */ - nesqp->allocated_buffer = mem; - - if (udata) { - if (ib_copy_from_udata(&req, udata, sizeof(struct nes_create_qp_req))) { - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - kfree(nesqp->allocated_buffer); - nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); - return ERR_PTR(-EFAULT); - } - if (req.user_wqe_buffers) { - virt_wqs = 1; - } - if (req.user_qp_buffer) - nesqp->nesuqp_addr = req.user_qp_buffer; - - nesqp->user_mode = 1; - if (virt_wqs) { - err = 1; - list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) { - if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) { - list_del(&nespbl->list); - err = 0; - nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n", - nespbl, nespbl->user_base); - break; - } - } - if (err) { - nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n", - (long long unsigned int)req.user_wqe_buffers); - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - kfree(nesqp->allocated_buffer); - return ERR_PTR(-EFAULT); - } - } - - nesqp->mmap_sq_db_index = - find_next_zero_bit(nes_ucontext->allocated_wqs, - NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); - /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", - nespd->mmap_db_index); */ - if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { - nes_debug(NES_DBG_QP, - "db index > max user regions, failing create QP\n"); - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - if (virt_wqs) { - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, - nespbl->pbl_pbase); - kfree(nespbl); - } - kfree(nesqp->allocated_buffer); - return ERR_PTR(-ENOMEM); - } - set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); - nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; - nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1; - } - err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) : - nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size); - if (err) { - nes_debug(NES_DBG_QP, - "error geting qp mem code = %d\n", err); - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - kfree(nesqp->allocated_buffer); - return ERR_PTR(-ENOMEM); - } - - nesqp->hwqp.sq_size = sq_size; - nesqp->hwqp.sq_encoded_size = sq_encoded_size; - nesqp->hwqp.sq_head = 1; - nesqp->hwqp.rq_size = rq_size; - nesqp->hwqp.rq_encoded_size = rq_encoded_size; - /* nes_debug(NES_DBG_QP, "nesqp->nesqp_context_pbase = %p\n", - (void *)nesqp->nesqp_context_pbase); - */ - nesqp->hwqp.qp_id = qp_num; - nesqp->ibqp.qp_num = nesqp->hwqp.qp_id; - nesqp->nespd = nespd; - - nescq = to_nescq(init_attr->send_cq); - nesqp->nesscq = nescq; - nescq = to_nescq(init_attr->recv_cq); - nesqp->nesrcq = nescq; - - nesqp->nesqp_context->misc |= cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << - NES_QPCONTEXT_MISC_PCI_FCN_SHIFT); - nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.rq_encoded_size << - NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT); - nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size << - NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT); - if (!udata) { - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN); - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN); - } - nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number + - ((u32)nesqp->nesrcq->hw_cq.cq_number << 16)); - u64temp = (u64)nesqp->hwqp.sq_pbase; - nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp); - nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); - - - if (!virt_wqs) { - u64temp = (u64)nesqp->hwqp.sq_pbase; - nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp); - nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); - u64temp = (u64)nesqp->hwqp.rq_pbase; - nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); - nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); - } else { - u64temp = (u64)nesqp->pbl_pbase; - nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); - nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); - } - - /* nes_debug(NES_DBG_QP, "next_qp_nic_index=%u, using nic_index=%d\n", - nesvnic->next_qp_nic_index, - nesvnic->qp_nic_index[nesvnic->next_qp_nic_index]); */ - spin_lock_irqsave(&nesdev->cqp.lock, flags); - nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] << - NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT); - nesvnic->next_qp_nic_index++; - if ((nesvnic->next_qp_nic_index > 3) || - (nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] == 0xf)) { - nesvnic->next_qp_nic_index = 0; - } - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - - nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32((u32)nesqp->nespd->pd_id << 16); - u64temp = (u64)nesqp->hwqp.q2_pbase; - nesqp->nesqp_context->q2_addr_low = cpu_to_le32((u32)u64temp); - nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp >> 32)); - nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp))); - nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp)))); - nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM | - NES_QPCONTEXT_ORDIRD_AAH | - ((((u32)nesadapter->max_irrq_wr) << - NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK)); - if (disable_mpa_crc) { - nes_debug(NES_DBG_QP, "Disabling MPA crc checking due to module option.\n"); - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_RNMC); - } - - - /* Create the QP */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_QP, "Failed to get a cqp_request\n"); - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - nes_free_qp_mem(nesdev, nesqp,virt_wqs); - kfree(nesqp->allocated_buffer); - return ERR_PTR(-ENOMEM); - } - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - - if (!virt_wqs) { - opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | - NES_CQP_QP_IWARP_STATE_IDLE; - } else { - opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_VIRT_WQS | - NES_CQP_QP_IWARP_STATE_IDLE; - } - opcode |= NES_CQP_QP_CQS_VALID; - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); - - u64temp = (u64)nesqp->nesqp_context_pbase; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n", - nesqp->hwqp.qp_id); - ret = wait_event_timeout(cqp_request->waitq, - (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_QP, "Create iwarp QP%u completed, wait_event_timeout ret=%u," - " nesdev->cqp_head = %u, nesdev->cqp.sq_tail = %u," - " CQP Major:Minor codes = 0x%04X:0x%04X.\n", - nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, - cqp_request->major_code, cqp_request->minor_code); - if ((!ret) || (cqp_request->major_code)) { - nes_put_cqp_request(nesdev, cqp_request); - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - nes_free_qp_mem(nesdev, nesqp,virt_wqs); - kfree(nesqp->allocated_buffer); - if (!ret) { - return ERR_PTR(-ETIME); - } else { - return ERR_PTR(-EIO); - } - } - - nes_put_cqp_request(nesdev, cqp_request); - - if (udata) { - uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; - uresp.mmap_rq_db_index = 0; - uresp.actual_sq_size = sq_size; - uresp.actual_rq_size = rq_size; - uresp.qp_id = nesqp->hwqp.qp_id; - uresp.nes_drv_opt = nes_drv_opt; - if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - nes_free_qp_mem(nesdev, nesqp,virt_wqs); - kfree(nesqp->allocated_buffer); - return ERR_PTR(-EFAULT); - } - } - - nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n", - nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp)); - spin_lock_init(&nesqp->lock); - nes_add_ref(&nesqp->ibqp); - break; - default: - nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); - return ERR_PTR(-EINVAL); - } - init_completion(&nesqp->sq_drained); - init_completion(&nesqp->rq_drained); - - nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); - timer_setup(&nesqp->terminate_timer, nes_terminate_timeout, 0); - - /* update the QP table */ - nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; - nes_debug(NES_DBG_QP, "netdev refcnt=%u\n", - netdev_refcnt_read(nesvnic->netdev)); - - return &nesqp->ibqp; -} - -/** - * nes_clean_cq - */ -static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq) -{ - u32 cq_head; - u32 lo; - u32 hi; - u64 u64temp; - unsigned long flags = 0; - - spin_lock_irqsave(&nescq->lock, flags); - - cq_head = nescq->hw_cq.cq_head; - while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { - rmb(); - lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); - hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]); - u64temp = (((u64)hi) << 32) | ((u64)lo); - u64temp &= ~(NES_SW_CONTEXT_ALIGN-1); - if (u64temp == (u64)(unsigned long)nesqp) { - /* Zero the context value so cqe will be ignored */ - nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0; - nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0; - } - - if (++cq_head >= nescq->hw_cq.cq_size) - cq_head = 0; - } - - spin_unlock_irqrestore(&nescq->lock, flags); -} - - -/** - * nes_destroy_qp - */ -static int nes_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) -{ - struct nes_qp *nesqp = to_nesqp(ibqp); - struct nes_ucontext *nes_ucontext; - struct ib_qp_attr attr; - struct iw_cm_id *cm_id; - struct iw_cm_event cm_event; - int ret = 0; - - atomic_inc(&sw_qps_destroyed); - nesqp->destroyed = 1; - - /* Blow away the connection if it exists. */ - if (nesqp->ibqp_state >= IB_QPS_INIT && nesqp->ibqp_state <= IB_QPS_RTS) { - /* if (nesqp->ibqp_state == IB_QPS_RTS) { */ - attr.qp_state = IB_QPS_ERR; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - } - - if (((nesqp->ibqp_state == IB_QPS_INIT) || - (nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) { - cm_id = nesqp->cm_id; - cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - cm_event.status = -ETIMEDOUT; - cm_event.local_addr = cm_id->local_addr; - cm_event.remote_addr = cm_id->remote_addr; - cm_event.private_data = NULL; - cm_event.private_data_len = 0; - - nes_debug(NES_DBG_QP, "Generating a CM Timeout Event for " - "QP%u. cm_id = %p, refcount = %u. \n", - nesqp->hwqp.qp_id, cm_id, atomic_read(&nesqp->refcount)); - - cm_id->rem_ref(cm_id); - ret = cm_id->event_handler(cm_id, &cm_event); - if (ret) - nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret); - } - - if (nesqp->user_mode) { - if (udata) { - nes_ucontext = - rdma_udata_to_drv_context( - udata, - struct nes_ucontext, - ibucontext); - clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); - nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL; - if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) { - nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index; - } - } - if (nesqp->pbl_pbase && nesqp->sq_kmapped) { - nesqp->sq_kmapped = 0; - kunmap(nesqp->page); - } - } else { - /* Clean any pending completions from the cq(s) */ - if (nesqp->nesscq) - nes_clean_cq(nesqp, nesqp->nesscq); - - if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq)) - nes_clean_cq(nesqp, nesqp->nesrcq); - } - nes_rem_ref(&nesqp->ibqp); - return 0; -} - - -/** - * nes_create_cq - */ -static struct ib_cq *nes_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) -{ - int entries = attr->cqe; - u64 u64temp; - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_cq *nescq; - struct nes_ucontext *nes_ucontext = NULL; - struct nes_cqp_request *cqp_request; - void *mem = NULL; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_pbl *nespbl = NULL; - struct nes_create_cq_req req; - struct nes_create_cq_resp resp; - u32 cq_num = 0; - u32 opcode = 0; - u32 pbl_entries = 1; - int err; - unsigned long flags; - int ret; - - if (attr->flags) - return ERR_PTR(-EINVAL); - - if (entries > nesadapter->max_cqe) - return ERR_PTR(-EINVAL); - - err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, - nesadapter->max_cq, &cq_num, &nesadapter->next_cq, NES_RESOURCE_CQ); - if (err) { - return ERR_PTR(err); - } - - nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL); - if (!nescq) { - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - return ERR_PTR(-ENOMEM); - } - - nescq->hw_cq.cq_size = max(entries + 1, 5); - nescq->hw_cq.cq_number = cq_num; - nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1; - - if (udata) { - struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context( - udata, struct nes_ucontext, ibucontext); - - if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) { - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(-EFAULT); - } - nesvnic->mcrq_ucontext = nes_ucontext; - nes_ucontext->mcrqf = req.mcrqf; - if (nes_ucontext->mcrqf) { - if (nes_ucontext->mcrqf & 0x80000000) - nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 28 + 2 * ((nes_ucontext->mcrqf & 0xf) - 1); - else if (nes_ucontext->mcrqf & 0x40000000) - nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff; - else - nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1; - nescq->mcrqf = nes_ucontext->mcrqf; - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - } - nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n", - (unsigned long)req.user_cq_buffer, entries); - err = 1; - list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) { - if (nespbl->user_base == (unsigned long )req.user_cq_buffer) { - list_del(&nespbl->list); - err = 0; - nes_debug(NES_DBG_CQ, "Found PBL for virtual CQ. nespbl=%p.\n", - nespbl); - break; - } - } - if (err) { - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(-EFAULT); - } - - pbl_entries = nespbl->pbl_size >> 3; - nescq->cq_mem_size = 0; - } else { - nescq->cq_mem_size = nescq->hw_cq.cq_size * sizeof(struct nes_hw_cqe); - nes_debug(NES_DBG_CQ, "Attempting to allocate pci memory (%u entries, %u bytes) for CQ%u.\n", - entries, nescq->cq_mem_size, nescq->hw_cq.cq_number); - - /* allocate the physical buffer space */ - mem = pci_zalloc_consistent(nesdev->pcidev, nescq->cq_mem_size, - &nescq->hw_cq.cq_pbase); - if (!mem) { - printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n"); - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(-ENOMEM); - } - - nescq->hw_cq.cq_vbase = mem; - nescq->hw_cq.cq_head = 0; - nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n", - nescq->hw_cq.cq_number, nescq->hw_cq.cq_vbase, - (u32)nescq->hw_cq.cq_pbase); - } - - nescq->hw_cq.ce_handler = nes_iwarp_ce_handler; - spin_lock_init(&nescq->lock); - - /* send CreateCQ request to CQP */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); - if (!udata) - pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, - nescq->hw_cq.cq_pbase); - else { - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, - nespbl->pbl_vbase, nespbl->pbl_pbase); - kfree(nespbl); - } - - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(-ENOMEM); - } - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - - opcode = NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | - NES_CQP_CQ_CHK_OVERFLOW | - NES_CQP_CQ_CEQE_MASK | ((u32)nescq->hw_cq.cq_size << 16); - - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - - if (pbl_entries != 1) { - if (pbl_entries > 32) { - /* use 4k pbl */ - nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries); - if (nesadapter->free_4kpbl == 0) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - nes_free_cqp_request(nesdev, cqp_request); - if (!udata) - pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, - nescq->hw_cq.cq_pbase); - else { - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, - nespbl->pbl_vbase, nespbl->pbl_pbase); - kfree(nespbl); - } - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(-ENOMEM); - } else { - opcode |= (NES_CQP_CQ_VIRT | NES_CQP_CQ_4KB_CHUNK); - nescq->virtual_cq = 2; - nesadapter->free_4kpbl--; - } - } else { - /* use 256 byte pbl */ - nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries); - if (nesadapter->free_256pbl == 0) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - nes_free_cqp_request(nesdev, cqp_request); - if (!udata) - pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, - nescq->hw_cq.cq_pbase); - else { - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, - nespbl->pbl_vbase, nespbl->pbl_pbase); - kfree(nespbl); - } - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(-ENOMEM); - } else { - opcode |= NES_CQP_CQ_VIRT; - nescq->virtual_cq = 1; - nesadapter->free_256pbl--; - } - } - } - - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, - (nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16))); - - if (udata) { - if (pbl_entries != 1) - u64temp = (u64)nespbl->pbl_pbase; - else - u64temp = le64_to_cpu(nespbl->pbl_vbase[0]); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX, - nes_ucontext->mmap_db_index[0]); - } else { - u64temp = (u64)nescq->hw_cq.cq_pbase; - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; - } - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; - u64temp = (u64)(unsigned long)&nescq->hw_cq; - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = - cpu_to_le32((u32)(u64temp >> 1)); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = - cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n", - nescq->hw_cq.cq_number); - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), - NES_EVENT_TIMEOUT * 2); - nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n", - nescq->hw_cq.cq_number, ret); - if ((!ret) || (cqp_request->major_code)) { - nes_put_cqp_request(nesdev, cqp_request); - if (!udata) - pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, - nescq->hw_cq.cq_pbase); - else { - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, - nespbl->pbl_vbase, nespbl->pbl_pbase); - kfree(nespbl); - } - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(-EIO); - } - nes_put_cqp_request(nesdev, cqp_request); - - if (udata) { - /* free the nespbl */ - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, - nespbl->pbl_pbase); - kfree(nespbl); - resp.cq_id = nescq->hw_cq.cq_number; - resp.cq_size = nescq->hw_cq.cq_size; - resp.mmap_db_index = 0; - if (ib_copy_to_udata(udata, &resp, sizeof resp - sizeof resp.reserved)) { - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(-EFAULT); - } - } - - return &nescq->ibcq; -} - - -/** - * nes_destroy_cq - */ -static int nes_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) -{ - struct nes_cq *nescq; - struct nes_device *nesdev; - struct nes_vnic *nesvnic; - struct nes_adapter *nesadapter; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - unsigned long flags; - u32 opcode = 0; - int ret; - - if (ib_cq == NULL) - return 0; - - nescq = to_nescq(ib_cq); - nesvnic = to_nesvnic(ib_cq->device); - nesdev = nesvnic->nesdev; - nesadapter = nesdev->nesadapter; - - nes_debug(NES_DBG_CQ, "Destroy CQ%u\n", nescq->hw_cq.cq_number); - - /* Send DestroyCQ request to CQP */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); - return -ENOMEM; - } - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - opcode = NES_CQP_DESTROY_CQ | (nescq->hw_cq.cq_size << 16); - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - if (nescq->virtual_cq == 1) { - nesadapter->free_256pbl++; - if (nesadapter->free_256pbl > nesadapter->max_256pbl) { - printk(KERN_ERR PFX "%s: free 256B PBLs(%u) has exceeded the max(%u)\n", - __func__, nesadapter->free_256pbl, nesadapter->max_256pbl); - } - } else if (nescq->virtual_cq == 2) { - nesadapter->free_4kpbl++; - if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) { - printk(KERN_ERR PFX "%s: free 4K PBLs(%u) has exceeded the max(%u)\n", - __func__, nesadapter->free_4kpbl, nesadapter->max_4kpbl); - } - opcode |= NES_CQP_CQ_4KB_CHUNK; - } - - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, - (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16))); - if (!nescq->mcrqf) - nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n", - nescq->hw_cq.cq_number); - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_CQ, "Destroy iWARP CQ%u completed, wait_event_timeout ret = %u," - " CQP Major:Minor codes = 0x%04X:0x%04X.\n", - nescq->hw_cq.cq_number, ret, cqp_request->major_code, - cqp_request->minor_code); - if (!ret) { - nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", - nescq->hw_cq.cq_number); - ret = -ETIME; - } else if (cqp_request->major_code) { - nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", - nescq->hw_cq.cq_number); - ret = -EIO; - } else { - ret = 0; - } - nes_put_cqp_request(nesdev, cqp_request); - - if (nescq->cq_mem_size) - pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, - nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase); - kfree(nescq); - - return ret; -} - -/** - * root_256 - */ -static u32 root_256(struct nes_device *nesdev, - struct nes_root_vpbl *root_vpbl, - struct nes_root_vpbl *new_root, - u16 pbl_count_4k) -{ - u64 leaf_pbl; - int i, j, k; - - if (pbl_count_4k == 1) { - new_root->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, - 512, &new_root->pbl_pbase); - - if (new_root->pbl_vbase == NULL) - return 0; - - leaf_pbl = (u64)root_vpbl->pbl_pbase; - for (i = 0; i < 16; i++) { - new_root->pbl_vbase[i].pa_low = - cpu_to_le32((u32)leaf_pbl); - new_root->pbl_vbase[i].pa_high = - cpu_to_le32((u32)((((u64)leaf_pbl) >> 32))); - leaf_pbl += 256; - } - } else { - for (i = 3; i >= 0; i--) { - j = i * 16; - root_vpbl->pbl_vbase[j] = root_vpbl->pbl_vbase[i]; - leaf_pbl = le32_to_cpu(root_vpbl->pbl_vbase[j].pa_low) + - (((u64)le32_to_cpu(root_vpbl->pbl_vbase[j].pa_high)) - << 32); - for (k = 1; k < 16; k++) { - leaf_pbl += 256; - root_vpbl->pbl_vbase[j + k].pa_low = - cpu_to_le32((u32)leaf_pbl); - root_vpbl->pbl_vbase[j + k].pa_high = - cpu_to_le32((u32)((((u64)leaf_pbl) >> 32))); - } - } - } - - return 1; -} - - -/** - * nes_reg_mr - */ -static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, - u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl, - dma_addr_t single_buffer, u16 pbl_count_4k, - u16 residual_page_count_4k, int acc, u64 *iova_start, - u16 *actual_pbl_cnt, u8 *used_4k_pbls) -{ - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - unsigned long flags; - int ret; - struct nes_adapter *nesadapter = nesdev->nesadapter; - uint pg_cnt = 0; - u16 pbl_count_256 = 0; - u16 pbl_count = 0; - u8 use_256_pbls = 0; - u8 use_4k_pbls = 0; - u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0; - struct nes_root_vpbl new_root = { 0, NULL, NULL }; - u32 opcode = 0; - u16 major_code; - - /* Register the region with the adapter */ - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); - return -ENOMEM; - } - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - - if (pbl_count_4k) { - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - - pg_cnt = ((pbl_count_4k - 1) * 512) + residual_page_count_4k; - pbl_count_256 = (pg_cnt + 31) / 32; - if (pg_cnt <= 32) { - if (pbl_count_256 <= nesadapter->free_256pbl) - use_256_pbls = 1; - else if (pbl_count_4k <= nesadapter->free_4kpbl) - use_4k_pbls = 1; - } else if (pg_cnt <= 2048) { - if (((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) && - (nesadapter->free_4kpbl > (nesadapter->max_4kpbl >> 1))) { - use_4k_pbls = 1; - } else if ((pbl_count_256 + 1) <= nesadapter->free_256pbl) { - use_256_pbls = 1; - use_two_level = 1; - } else if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) { - use_4k_pbls = 1; - } - } else { - if ((pbl_count_4k + 1) <= nesadapter->free_4kpbl) - use_4k_pbls = 1; - } - - if (use_256_pbls) { - pbl_count = pbl_count_256; - nesadapter->free_256pbl -= pbl_count + use_two_level; - } else if (use_4k_pbls) { - pbl_count = pbl_count_4k; - nesadapter->free_4kpbl -= pbl_count + use_two_level; - } else { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - nes_debug(NES_DBG_MR, "Out of Pbls\n"); - nes_free_cqp_request(nesdev, cqp_request); - return -ENOMEM; - } - - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } - - if (use_256_pbls && use_two_level) { - if (root_256(nesdev, root_vpbl, &new_root, pbl_count_4k) == 1) { - if (new_root.pbl_pbase != 0) - root_vpbl = &new_root; - } else { - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - nesadapter->free_256pbl += pbl_count_256 + use_two_level; - use_256_pbls = 0; - - if (pbl_count_4k == 1) - use_two_level = 0; - pbl_count = pbl_count_4k; - - if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) { - nesadapter->free_4kpbl -= pbl_count + use_two_level; - use_4k_pbls = 1; - } - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - - if (use_4k_pbls == 0) - return -ENOMEM; - } - } - - opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ | - NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR; - if (acc & IB_ACCESS_LOCAL_WRITE) - opcode |= NES_CQP_STAG_RIGHTS_LOCAL_WRITE; - if (acc & IB_ACCESS_REMOTE_WRITE) - opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_REM_ACC_EN; - if (acc & IB_ACCESS_REMOTE_READ) - opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_REM_ACC_EN; - if (acc & IB_ACCESS_MW_BIND) - opcode |= NES_CQP_STAG_RIGHTS_WINDOW_BIND | NES_CQP_STAG_REM_ACC_EN; - - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, *iova_start); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, region_length); - - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] = - cpu_to_le32((u32)(region_length >> 8) & 0xff000000); - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |= - cpu_to_le32(nespd->pd_id & 0x00007fff); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); - - if (pbl_count == 0) { - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, single_buffer); - } else { - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (pg_cnt * 8)); - - if (use_4k_pbls) - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); - } - barrier(); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," - " CQP Major:Minor codes = 0x%04X:0x%04X.\n", - stag, ret, cqp_request->major_code, cqp_request->minor_code); - major_code = cqp_request->major_code; - nes_put_cqp_request(nesdev, cqp_request); - - if ((!ret || major_code) && pbl_count != 0) { - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - if (use_256_pbls) - nesadapter->free_256pbl += pbl_count + use_two_level; - else if (use_4k_pbls) - nesadapter->free_4kpbl += pbl_count + use_two_level; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } - if (new_root.pbl_pbase) - pci_free_consistent(nesdev->pcidev, 512, new_root.pbl_vbase, - new_root.pbl_pbase); - - if (!ret) - return -ETIME; - else if (major_code) - return -EIO; - - *actual_pbl_cnt = pbl_count + use_two_level; - *used_4k_pbls = use_4k_pbls; - return 0; -} - - -/** - * nes_reg_phys_mr - */ -struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, - int acc, u64 *iova_start) -{ - u64 region_length; - struct nes_pd *nespd = to_nespd(ib_pd); - struct nes_vnic *nesvnic = to_nesvnic(ib_pd->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_mr *nesmr; - struct ib_mr *ibmr; - struct nes_vpbl vpbl; - struct nes_root_vpbl root_vpbl; - u32 stag; - unsigned long mask; - u32 stag_index = 0; - u32 next_stag_index = 0; - u32 driver_key = 0; - int err = 0; - int ret = 0; - u16 pbl_count = 0; - u8 single_page = 1; - u8 stag_key = 0; - - region_length = 0; - vpbl.pbl_vbase = NULL; - root_vpbl.pbl_vbase = NULL; - root_vpbl.pbl_pbase = 0; - - get_random_bytes(&next_stag_index, sizeof(next_stag_index)); - stag_key = (u8)next_stag_index; - - driver_key = 0; - - next_stag_index >>= 8; - next_stag_index %= nesadapter->max_mr; - - if ((addr ^ *iova_start) & ~PAGE_MASK) - return ERR_PTR(-EINVAL); - - err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, - &stag_index, &next_stag_index, NES_RESOURCE_PHYS_MR); - if (err) { - return ERR_PTR(err); - } - - nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); - if (!nesmr) { - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - return ERR_PTR(-ENOMEM); - } - - /* Allocate a 4K buffer for the PBL */ - vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n", - vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase); - if (!vpbl.pbl_vbase) { - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); - kfree(nesmr); - goto reg_phys_err; - } - - - mask = !size; - - if (mask & ~PAGE_MASK) { - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n"); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_phys_err; - } - - region_length += size; - vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK); - vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32))); - - stag = stag_index << 8; - stag |= driver_key; - stag += (u32)stag_key; - - nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%016lX," - " length = 0x%016lX, index = 0x%08X\n", - stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index); - - /* Make the leaf PBL the root if only one PBL */ - root_vpbl.pbl_pbase = vpbl.pbl_pbase; - - if (single_page) { - pbl_count = 0; - } else { - pbl_count = 1; - } - ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl, - addr, pbl_count, 1, acc, iova_start, - &nesmr->pbls_used, &nesmr->pbl_4k); - - if (ret == 0) { - nesmr->ibmr.rkey = stag; - nesmr->ibmr.lkey = stag; - nesmr->mode = IWNES_MEMREG_TYPE_MEM; - ibmr = &nesmr->ibmr; - } else { - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - } - -reg_phys_err: - /* single PBL case */ - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase); - return ibmr; -} - - -/** - * nes_get_dma_mr - */ -static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc) -{ - u64 kva = 0; - - nes_debug(NES_DBG_MR, "\n"); - - return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva); -} - -/** - * nes_reg_user_mr - */ -static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int acc, struct ib_udata *udata) -{ - u64 iova_start; - __le64 *pbl; - u64 region_length; - dma_addr_t last_dma_addr = 0; - dma_addr_t first_dma_addr = 0; - struct nes_pd *nespd = to_nespd(pd); - struct nes_vnic *nesvnic = to_nesvnic(pd->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct ib_mr *ibmr = ERR_PTR(-EINVAL); - struct sg_dma_page_iter dma_iter; - struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context( - udata, struct nes_ucontext, ibucontext); - struct nes_pbl *nespbl; - struct nes_mr *nesmr; - struct ib_umem *region; - struct nes_mem_reg_req req; - struct nes_vpbl vpbl; - struct nes_root_vpbl root_vpbl; - int page_index; - int page_count = 0; - int err, pbl_depth = 0; - int ret; - u32 stag; - u32 stag_index = 0; - u32 next_stag_index; - u32 driver_key; - u32 root_pbl_index = 0; - u32 cur_pbl_index = 0; - u32 skip_pages; - u16 pbl_count; - u8 single_page = 1; - u8 stag_key; - - region = ib_umem_get(udata, start, length, acc, 0); - if (IS_ERR(region)) { - return (struct ib_mr *)region; - } - - nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u," - " offset = %u, page size = %lu.\n", - (unsigned long int)start, (unsigned long int)virt, (u32)length, - ib_umem_offset(region), BIT(region->page_shift)); - - skip_pages = ((u32)ib_umem_offset(region)) >> 12; - - if (ib_copy_from_udata(&req, udata, sizeof(req))) { - ib_umem_release(region); - return ERR_PTR(-EFAULT); - } - nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type); - - switch (req.reg_type) { - case IWNES_MEMREG_TYPE_MEM: - pbl_depth = 0; - region_length = 0; - vpbl.pbl_vbase = NULL; - root_vpbl.pbl_vbase = NULL; - root_vpbl.pbl_pbase = 0; - - get_random_bytes(&next_stag_index, sizeof(next_stag_index)); - stag_key = (u8)next_stag_index; - - driver_key = next_stag_index & 0x70000000; - - next_stag_index >>= 8; - next_stag_index %= nesadapter->max_mr; - - err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, - nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_USER_MR); - if (err) { - ib_umem_release(region); - return ERR_PTR(err); - } - - nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); - if (!nesmr) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - return ERR_PTR(-ENOMEM); - } - nesmr->region = region; - - for_each_sg_dma_page (region->sg_head.sgl, &dma_iter, region->nmap, 0) { - - region_length += PAGE_SIZE; - region_length -= skip_pages << 12; - skip_pages = 0; - if ((page_count != 0) && (page_count << 12) - (ib_umem_offset(region) & (4096 - 1)) >= region->length) - goto enough_pages; - if ((page_count & 0x01FF) == 0) { - if (page_count >= 1024 * 512) { - ib_umem_release(region); - nes_free_resource(nesadapter, - nesadapter->allocated_mrs, stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-E2BIG); - goto reg_user_mr_err; - } - if (root_pbl_index == 1) { - root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, - 8192, &root_vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", - root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); - if (!root_vpbl.pbl_vbase) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.leaf_vpbl = kcalloc(1024, - sizeof(*root_vpbl.leaf_vpbl), - GFP_KERNEL); - if (!root_vpbl.leaf_vpbl) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, - root_vpbl.pbl_pbase); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.pbl_vbase[0].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[0].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); - root_vpbl.leaf_vpbl[0] = vpbl; - } - vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", - vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); - if (!vpbl.pbl_vbase) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); - kfree(nesmr); - goto reg_user_mr_err; - } - if (1 <= root_pbl_index) { - root_vpbl.pbl_vbase[root_pbl_index].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[root_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); - root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; - } - root_pbl_index++; - cur_pbl_index = 0; - } - if (single_page) { - if (page_count != 0) { - if ((last_dma_addr + 4096) != sg_page_iter_dma_address(&dma_iter)) - single_page = 0; - last_dma_addr = sg_page_iter_dma_address(&dma_iter); - } else { - first_dma_addr = sg_page_iter_dma_address(&dma_iter); - last_dma_addr = first_dma_addr; - } - } - - vpbl.pbl_vbase[cur_pbl_index].pa_low = - cpu_to_le32((u32)(sg_page_iter_dma_address(&dma_iter))); - vpbl.pbl_vbase[cur_pbl_index].pa_high = - cpu_to_le32((u32)((u64)(sg_page_iter_dma_address(&dma_iter)))); - cur_pbl_index++; - page_count++; - } - -enough_pages: - nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x," - " stag_key=0x%08x\n", - stag_index, driver_key, stag_key); - stag = stag_index << 8; - stag |= driver_key; - stag += (u32)stag_key; - - iova_start = virt; - /* Make the leaf PBL the root if only one PBL */ - if (root_pbl_index == 1) { - root_vpbl.pbl_pbase = vpbl.pbl_pbase; - } - - if (single_page) { - pbl_count = 0; - } else { - pbl_count = root_pbl_index; - first_dma_addr = 0; - } - nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%08X, length = 0x%08X," - " index = 0x%08X, region->length=0x%08llx, pbl_count = %u\n", - stag, (unsigned int)iova_start, - (unsigned int)region_length, stag_index, - (unsigned long long)region->length, pbl_count); - ret = nes_reg_mr(nesdev, nespd, stag, region->length, &root_vpbl, - first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, - &iova_start, &nesmr->pbls_used, &nesmr->pbl_4k); - - nes_debug(NES_DBG_MR, "ret=%d\n", ret); - - if (ret == 0) { - nesmr->ibmr.rkey = stag; - nesmr->ibmr.lkey = stag; - nesmr->mode = IWNES_MEMREG_TYPE_MEM; - ibmr = &nesmr->ibmr; - } else { - ib_umem_release(region); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - } - -reg_user_mr_err: - /* free the resources */ - if (root_pbl_index == 1) { - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - } else { - for (page_index=0; page_index<root_pbl_index; page_index++) { - pci_free_consistent(nesdev->pcidev, 4096, - root_vpbl.leaf_vpbl[page_index].pbl_vbase, - root_vpbl.leaf_vpbl[page_index].pbl_pbase); - } - kfree(root_vpbl.leaf_vpbl); - pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, - root_vpbl.pbl_pbase); - } - - nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr); - - return ibmr; - case IWNES_MEMREG_TYPE_QP: - case IWNES_MEMREG_TYPE_CQ: - if (!region->length) { - nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n"); - ib_umem_release(region); - return ERR_PTR(-EINVAL); - } - nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); - if (!nespbl) { - ib_umem_release(region); - return ERR_PTR(-ENOMEM); - } - nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); - if (!nesmr) { - ib_umem_release(region); - kfree(nespbl); - return ERR_PTR(-ENOMEM); - } - nesmr->region = region; - pbl_depth = region->length >> 12; - pbl_depth += (region->length & (4096-1)) ? 1 : 0; - nespbl->pbl_size = pbl_depth*sizeof(u64); - if (req.reg_type == IWNES_MEMREG_TYPE_QP) { - nes_debug(NES_DBG_MR, "Attempting to allocate QP PBL memory"); - } else { - nes_debug(NES_DBG_MR, "Attempting to allocate CP PBL memory"); - } - - nes_debug(NES_DBG_MR, " %u bytes, %u entries.\n", - nespbl->pbl_size, pbl_depth); - pbl = pci_alloc_consistent(nesdev->pcidev, nespbl->pbl_size, - &nespbl->pbl_pbase); - if (!pbl) { - ib_umem_release(region); - kfree(nesmr); - kfree(nespbl); - nes_debug(NES_DBG_MR, "Unable to allocate PBL memory\n"); - return ERR_PTR(-ENOMEM); - } - - nespbl->pbl_vbase = (u64 *)pbl; - nespbl->user_base = start; - nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%lx," - " pbl_vbase=%p user_base=0x%lx\n", - nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase, - (void *) nespbl->pbl_vbase, nespbl->user_base); - - nespbl->page = sg_page(region->sg_head.sgl); - for_each_sg_dma_page(region->sg_head.sgl, &dma_iter, region->nmap, 0) { - ((__le32 *)pbl)[0] = cpu_to_le32((u32)(sg_page_iter_dma_address(&dma_iter))); - ((__le32 *)pbl)[1] = cpu_to_le32(((u64)(sg_page_iter_dma_address(&dma_iter)))>>32); - nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, - (unsigned long long)*pbl, - le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); - pbl++; - } - - if (req.reg_type == IWNES_MEMREG_TYPE_QP) { - list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list); - } else { - list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list); - } - nesmr->ibmr.rkey = -1; - nesmr->ibmr.lkey = -1; - nesmr->mode = req.reg_type; - return &nesmr->ibmr; - } - - ib_umem_release(region); - return ERR_PTR(-ENOSYS); -} - - -/** - * nes_dereg_mr - */ -static int nes_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) -{ - struct nes_mr *nesmr = to_nesmr(ib_mr); - struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_cqp_request *cqp_request; - unsigned long flags; - int ret; - u16 major_code; - u16 minor_code; - - - if (nesmr->pages) - pci_free_consistent(nesdev->pcidev, - nesmr->max_pages * sizeof(u64), - nesmr->pages, - nesmr->paddr); - - if (nesmr->region) { - ib_umem_release(nesmr->region); - } - if (nesmr->mode != IWNES_MEMREG_TYPE_MEM) { - kfree(nesmr); - return 0; - } - - /* Deallocate the region with the adapter */ - - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); - return -ENOMEM; - } - cqp_request->waiting = 1; - cqp_wqe = &cqp_request->cqp_wqe; - - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO | - NES_CQP_STAG_DEALLOC_PBLS | NES_CQP_STAG_MR); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey); - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey); - ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_MR, "Deallocate STag 0x%08X completed, wait_event_timeout ret = %u," - " CQP Major:Minor codes = 0x%04X:0x%04X\n", - ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code); - - major_code = cqp_request->major_code; - minor_code = cqp_request->minor_code; - - nes_put_cqp_request(nesdev, cqp_request); - - if (!ret) { - nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag," - " ib_mr=%p, rkey = 0x%08X\n", - ib_mr, ib_mr->rkey); - return -ETIME; - } else if (major_code) { - nes_debug(NES_DBG_MR, "Error (0x%04X:0x%04X) while attempting" - " to destroy STag, ib_mr=%p, rkey = 0x%08X\n", - major_code, minor_code, ib_mr, ib_mr->rkey); - return -EIO; - } - - if (nesmr->pbls_used != 0) { - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - if (nesmr->pbl_4k) { - nesadapter->free_4kpbl += nesmr->pbls_used; - if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) - printk(KERN_ERR PFX "free 4KB PBLs(%u) has " - "exceeded the max(%u)\n", - nesadapter->free_4kpbl, - nesadapter->max_4kpbl); - } else { - nesadapter->free_256pbl += nesmr->pbls_used; - if (nesadapter->free_256pbl > nesadapter->max_256pbl) - printk(KERN_ERR PFX "free 256B PBLs(%u) has " - "exceeded the max(%u)\n", - nesadapter->free_256pbl, - nesadapter->max_256pbl); - } - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - (ib_mr->rkey & 0x0fffff00) >> 8); - - kfree(nesmr); - - return 0; -} - - -/** - * show_rev - */ -static ssize_t hw_rev_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nes_ib_device *nesibdev = - rdma_device_to_drv_device(dev, struct nes_ib_device, ibdev); - struct nes_vnic *nesvnic = nesibdev->nesvnic; - - nes_debug(NES_DBG_INIT, "\n"); - return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev); -} -static DEVICE_ATTR_RO(hw_rev); - -/** - * show_hca - */ -static ssize_t hca_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - nes_debug(NES_DBG_INIT, "\n"); - return sprintf(buf, "NES020\n"); -} -static DEVICE_ATTR_RO(hca_type); - -/** - * show_board - */ -static ssize_t board_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - nes_debug(NES_DBG_INIT, "\n"); - return sprintf(buf, "%.*s\n", 32, "NES020 Board ID"); -} -static DEVICE_ATTR_RO(board_id); - -static struct attribute *nes_dev_attributes[] = { - &dev_attr_hw_rev.attr, - &dev_attr_hca_type.attr, - &dev_attr_board_id.attr, - NULL -}; - -static const struct attribute_group nes_attr_group = { - .attrs = nes_dev_attributes, -}; - -/** - * nes_query_qp - */ -static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_qp_init_attr *init_attr) -{ - struct nes_qp *nesqp = to_nesqp(ibqp); - - nes_debug(NES_DBG_QP, "\n"); - - attr->qp_access_flags = 0; - attr->cap.max_send_wr = nesqp->hwqp.sq_size; - attr->cap.max_recv_wr = nesqp->hwqp.rq_size; - attr->cap.max_recv_sge = 1; - if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) - attr->cap.max_inline_data = 0; - else - attr->cap.max_inline_data = 64; - - init_attr->event_handler = nesqp->ibqp.event_handler; - init_attr->qp_context = nesqp->ibqp.qp_context; - init_attr->send_cq = nesqp->ibqp.send_cq; - init_attr->recv_cq = nesqp->ibqp.recv_cq; - init_attr->srq = nesqp->ibqp.srq; - init_attr->cap = attr->cap; - - return 0; -} - - -/** - * nes_hw_modify_qp - */ -int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, - u32 next_iwarp_state, u32 termlen, u32 wait_completion) -{ - struct nes_hw_cqp_wqe *cqp_wqe; - /* struct iw_cm_id *cm_id = nesqp->cm_id; */ - /* struct iw_cm_event cm_event; */ - struct nes_cqp_request *cqp_request; - int ret; - u16 major_code; - - nes_debug(NES_DBG_MOD_QP, "QP%u, refcount=%d\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); - - cqp_request = nes_get_cqp_request(nesdev); - if (cqp_request == NULL) { - nes_debug(NES_DBG_MOD_QP, "Failed to get a cqp_request.\n"); - return -ENOMEM; - } - if (wait_completion) { - cqp_request->waiting = 1; - } else { - cqp_request->waiting = 0; - } - cqp_wqe = &cqp_request->cqp_wqe; - - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, - NES_CQP_MODIFY_QP | NES_CQP_QP_TYPE_IWARP | next_iwarp_state); - nes_debug(NES_DBG_MOD_QP, "using next_iwarp_state=%08x, wqe_words=%08x\n", - next_iwarp_state, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])); - nes_fill_init_cqp_wqe(cqp_wqe, nesdev); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); - - /* If sending a terminate message, fill in the length (in words) */ - if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) && - !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) { - termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT; - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen); - } - - atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request); - - /* Wait for CQP */ - if (wait_completion) { - /* nes_debug(NES_DBG_MOD_QP, "Waiting for modify iWARP QP%u to complete.\n", - nesqp->hwqp.qp_id); */ - ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u completed, wait_event_timeout ret=%u, " - "CQP Major:Minor codes = 0x%04X:0x%04X.\n", - nesqp->hwqp.qp_id, ret, cqp_request->major_code, cqp_request->minor_code); - major_code = cqp_request->major_code; - if (major_code) { - nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u failed" - "CQP Major:Minor codes = 0x%04X:0x%04X, intended next state = 0x%08X.\n", - nesqp->hwqp.qp_id, cqp_request->major_code, - cqp_request->minor_code, next_iwarp_state); - } - - nes_put_cqp_request(nesdev, cqp_request); - - if (!ret) - return -ETIME; - else if (major_code) - return -EIO; - else - return 0; - } else { - return 0; - } -} - - -/** - * nes_modify_qp - */ -int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - struct nes_qp *nesqp = to_nesqp(ibqp); - struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); - struct nes_device *nesdev = nesvnic->nesdev; - /* u32 cqp_head; */ - /* u32 counter; */ - u32 next_iwarp_state = 0; - int err; - unsigned long qplockflags; - int ret; - u16 original_last_aeq; - u8 issue_modify_qp = 0; - u8 dont_wait = 0; - - nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u," - " iwarp_state=0x%X, refcount=%d\n", - nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state, - nesqp->iwarp_state, atomic_read(&nesqp->refcount)); - - spin_lock_irqsave(&nesqp->lock, qplockflags); - - nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X," - " QP Access Flags=0x%X, attr_mask = 0x%0x\n", - nesqp->hwqp.qp_id, nesqp->hw_iwarp_state, - nesqp->hw_tcp_state, attr->qp_access_flags, attr_mask); - - if (attr_mask & IB_QP_STATE) { - switch (attr->qp_state) { - case IB_QPS_INIT: - nes_debug(NES_DBG_MOD_QP, "QP%u: new state = init\n", - nesqp->hwqp.qp_id); - if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return -EINVAL; - } - next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; - issue_modify_qp = 1; - break; - case IB_QPS_RTR: - nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rtr\n", - nesqp->hwqp.qp_id); - if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return -EINVAL; - } - next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; - issue_modify_qp = 1; - break; - case IB_QPS_RTS: - nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rts\n", - nesqp->hwqp.qp_id); - if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return -EINVAL; - } - if (nesqp->cm_id == NULL) { - nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n", - nesqp->hwqp.qp_id ); - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return -EINVAL; - } - next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS; - if (nesqp->iwarp_state != NES_CQP_QP_IWARP_STATE_RTS) - next_iwarp_state |= NES_CQP_QP_CONTEXT_VALID | - NES_CQP_QP_ARP_VALID | NES_CQP_QP_ORD_VALID; - issue_modify_qp = 1; - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_ESTABLISHED; - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_RTS; - nesqp->hte_added = 1; - break; - case IB_QPS_SQD: - issue_modify_qp = 1; - nes_debug(NES_DBG_MOD_QP, "QP%u: new state=closing. SQ head=%u, SQ tail=%u\n", - nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail); - if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return 0; - } else { - if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { - nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing" - " ignored due to current iWARP state\n", - nesqp->hwqp.qp_id); - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return -EINVAL; - } - if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) { - nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing" - " already done based on hw state.\n", - nesqp->hwqp.qp_id); - issue_modify_qp = 0; - } - switch (nesqp->hw_iwarp_state) { - case NES_AEQE_IWARP_STATE_CLOSING: - next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; - break; - case NES_AEQE_IWARP_STATE_TERMINATE: - next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; - break; - case NES_AEQE_IWARP_STATE_ERROR: - next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; - break; - default: - next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; - break; - } - } - break; - case IB_QPS_SQE: - nes_debug(NES_DBG_MOD_QP, "QP%u: new state = terminate\n", - nesqp->hwqp.qp_id); - if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return -EINVAL; - } - /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ - next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE; - issue_modify_qp = 1; - break; - case IB_QPS_ERR: - case IB_QPS_RESET: - if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return -EINVAL; - } - nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n", - nesqp->hwqp.qp_id); - if (nesqp->term_flags) - del_timer(&nesqp->terminate_timer); - - next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; - /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ - if (nesqp->hte_added) { - nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n"); - next_iwarp_state |= NES_CQP_QP_DEL_HTE; - nesqp->hte_added = 0; - } - if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) && - (nesdev->iw_status) && - (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) { - next_iwarp_state |= NES_CQP_QP_RESET; - } else { - nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n", - nesqp->hwqp.qp_id, nesqp->hw_tcp_state); - dont_wait = 1; - } - issue_modify_qp = 1; - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; - break; - default: - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - return -EINVAL; - break; - } - - nesqp->ibqp_state = attr->qp_state; - nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; - nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", - nesqp->iwarp_state); - } - - if (attr_mask & IB_QP_ACCESS_FLAGS) { - if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) { - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN | - NES_QPCONTEXT_MISC_RDMA_READ_EN); - issue_modify_qp = 1; - } - if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) { - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN); - issue_modify_qp = 1; - } - if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) { - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_READ_EN); - issue_modify_qp = 1; - } - if (attr->qp_access_flags & IB_ACCESS_MW_BIND) { - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WBIND_EN); - issue_modify_qp = 1; - } - - if (nesqp->user_mode) { - nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN | - NES_QPCONTEXT_MISC_RDMA_READ_EN); - issue_modify_qp = 1; - } - } - - original_last_aeq = nesqp->last_aeq; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - - nes_debug(NES_DBG_MOD_QP, "issue_modify_qp=%u\n", issue_modify_qp); - - ret = 0; - - - if (issue_modify_qp) { - nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n"); - ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1); - if (ret) - nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)" - " failed for QP%u.\n", - next_iwarp_state, nesqp->hwqp.qp_id); - - } - - if ((issue_modify_qp) && (nesqp->ibqp_state > IB_QPS_RTS)) { - nes_debug(NES_DBG_MOD_QP, "QP%u Issued ModifyQP refcount (%d)," - " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), - original_last_aeq, nesqp->last_aeq); - if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { - if (dont_wait) { - if (nesqp->cm_id && nesqp->hw_tcp_state != 0) { - nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d)," - " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), - original_last_aeq, nesqp->last_aeq); - /* this one is for the cm_disconnect thread */ - spin_lock_irqsave(&nesqp->lock, qplockflags); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); - } - } else { - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - /* These two are for the timer thread */ - if (atomic_inc_return(&nesqp->close_timer_started) == 1) { - nesqp->cm_id->add_ref(nesqp->cm_id); - nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," - " need ae to finish up, original_last_aeq = 0x%04X." - " last_aeq = 0x%04X, scheduling timer.\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), - original_last_aeq, nesqp->last_aeq); - schedule_nes_timer(nesqp->cm_node, (struct sk_buff *) nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); - } - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," - " need ae to finish up, original_last_aeq = 0x%04X." - " last_aeq = 0x%04X.\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), - original_last_aeq, nesqp->last_aeq); - } - } - } else { - nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," - " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), - original_last_aeq, nesqp->last_aeq); - } - } else { - nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," - " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), - original_last_aeq, nesqp->last_aeq); - } - - err = 0; - - nes_debug(NES_DBG_MOD_QP, "QP%u Leaving, refcount=%d\n", - nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); - - return err; -} - -static inline void -fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr, - u32 uselkey) -{ - int sge_index; - int total_payload_length = 0; - for (sge_index = 0; sge_index < ib_wr->num_sge; sge_index++) { - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX+(sge_index*4), - ib_wr->sg_list[sge_index].addr); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_LENGTH0_IDX + (sge_index*4), - ib_wr->sg_list[sge_index].length); - if (uselkey) - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), - (ib_wr->sg_list[sge_index].lkey)); - else - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), 0); - - total_payload_length += ib_wr->sg_list[sge_index].length; - } - nes_debug(NES_DBG_IW_TX, "UC UC UC, sending total_payload_length=%u \n", - total_payload_length); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, - total_payload_length); -} - -/** - * nes_post_send - */ -static int nes_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr, - const struct ib_send_wr **bad_wr) -{ - u64 u64temp; - unsigned long flags = 0; - struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_qp *nesqp = to_nesqp(ibqp); - struct nes_hw_qp_wqe *wqe; - int err = 0; - u32 qsize = nesqp->hwqp.sq_size; - u32 head; - u32 wqe_misc = 0; - u32 wqe_count = 0; - u32 counter; - - if (nesqp->ibqp_state > IB_QPS_RTS) { - err = -EINVAL; - goto out; - } - - spin_lock_irqsave(&nesqp->lock, flags); - - head = nesqp->hwqp.sq_head; - - while (ib_wr) { - /* Check for QP error */ - if (nesqp->term_flags) { - err = -EINVAL; - break; - } - - /* Check for SQ overflow */ - if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { - err = -ENOMEM; - break; - } - - wqe = &nesqp->hwqp.sq_vbase[head]; - /* nes_debug(NES_DBG_IW_TX, "processing sq wqe for QP%u at %p, head = %u.\n", - nesqp->hwqp.qp_id, wqe, head); */ - nes_fill_init_qp_wqe(wqe, nesqp, head); - u64temp = (u64)(ib_wr->wr_id); - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, - u64temp); - switch (ib_wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_INV: - if (IB_WR_SEND == ib_wr->opcode) { - if (ib_wr->send_flags & IB_SEND_SOLICITED) - wqe_misc = NES_IWARP_SQ_OP_SENDSE; - else - wqe_misc = NES_IWARP_SQ_OP_SEND; - } else { - if (ib_wr->send_flags & IB_SEND_SOLICITED) - wqe_misc = NES_IWARP_SQ_OP_SENDSEINV; - else - wqe_misc = NES_IWARP_SQ_OP_SENDINV; - - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX, - ib_wr->ex.invalidate_rkey); - } - - if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { - err = -EINVAL; - break; - } - - if (ib_wr->send_flags & IB_SEND_FENCE) - wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; - - if ((ib_wr->send_flags & IB_SEND_INLINE) && - ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && - (ib_wr->sg_list[0].length <= 64)) { - memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], - (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, - ib_wr->sg_list[0].length); - wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; - } else { - fill_wqe_sg_send(wqe, ib_wr, 1); - } - - break; - case IB_WR_RDMA_WRITE: - wqe_misc = NES_IWARP_SQ_OP_RDMAW; - if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { - nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n", - ib_wr->num_sge, nesdev->nesadapter->max_sge); - err = -EINVAL; - break; - } - - if (ib_wr->send_flags & IB_SEND_FENCE) - wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; - - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - rdma_wr(ib_wr)->rkey); - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - rdma_wr(ib_wr)->remote_addr); - - if ((ib_wr->send_flags & IB_SEND_INLINE) && - ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && - (ib_wr->sg_list[0].length <= 64)) { - memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], - (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, - ib_wr->sg_list[0].length); - wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; - } else { - fill_wqe_sg_send(wqe, ib_wr, 1); - } - - wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]; - break; - case IB_WR_RDMA_READ: - case IB_WR_RDMA_READ_WITH_INV: - /* iWARP only supports 1 sge for RDMA reads */ - if (ib_wr->num_sge > 1) { - nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n", - ib_wr->num_sge); - err = -EINVAL; - break; - } - if (ib_wr->opcode == IB_WR_RDMA_READ) { - wqe_misc = NES_IWARP_SQ_OP_RDMAR; - } else { - wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV; - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX, - ib_wr->ex.invalidate_rkey); - } - - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - rdma_wr(ib_wr)->remote_addr); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - rdma_wr(ib_wr)->rkey); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, - ib_wr->sg_list->length); - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, - ib_wr->sg_list->addr); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX, - ib_wr->sg_list->lkey); - break; - case IB_WR_LOCAL_INV: - wqe_misc = NES_IWARP_SQ_OP_LOCINV; - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX, - ib_wr->ex.invalidate_rkey); - break; - case IB_WR_REG_MR: - { - struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr); - int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size); - int flags = reg_wr(ib_wr)->access; - - if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { - nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); - err = -EINVAL; - break; - } - wqe_misc = NES_IWARP_SQ_OP_FAST_REG; - set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, - mr->ibmr.iova); - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, - lower_32_bits(mr->ibmr.length)); - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, - reg_wr(ib_wr)->key); - - if (page_shift == 12) { - wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; - } else if (page_shift == 21) { - wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; - } else { - nes_debug(NES_DBG_IW_TX, "Invalid page shift," - " ib_wr=%u, max=1\n", ib_wr->num_sge); - err = -EINVAL; - break; - } - - /* Set access_flags */ - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ; - if (flags & IB_ACCESS_LOCAL_WRITE) - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE; - - if (flags & IB_ACCESS_REMOTE_WRITE) - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE; - - if (flags & IB_ACCESS_REMOTE_READ) - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ; - - if (flags & IB_ACCESS_MW_BIND) - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; - - /* Fill in PBL info: */ - set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX, - mr->paddr); - - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, - mr->npages * 8); - - nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, " - "length: %lld, rkey: %0x, pgl_paddr: %llx, " - "page_list_len: %u, wqe_misc: %x\n", - (unsigned long long) mr->ibmr.iova, - mr->ibmr.length, - reg_wr(ib_wr)->key, - (unsigned long long) mr->paddr, - mr->npages, - wqe_misc); - break; - } - default: - /* error */ - err = -EINVAL; - break; - } - - if (err) - break; - - if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all) - wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; - - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc); - - ib_wr = ib_wr->next; - head++; - wqe_count++; - if (head >= qsize) - head = 0; - - } - - nesqp->hwqp.sq_head = head; - barrier(); - while (wqe_count) { - counter = min(wqe_count, ((u32)255)); - wqe_count -= counter; - nes_write32(nesdev->regs + NES_WQE_ALLOC, - (counter << 24) | 0x00800000 | nesqp->hwqp.qp_id); - } - - spin_unlock_irqrestore(&nesqp->lock, flags); - -out: - if (err) - *bad_wr = ib_wr; - return err; -} - - -/** - * nes_post_recv - */ -static int nes_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, - const struct ib_recv_wr **bad_wr) -{ - u64 u64temp; - unsigned long flags = 0; - struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_qp *nesqp = to_nesqp(ibqp); - struct nes_hw_qp_wqe *wqe; - int err = 0; - int sge_index; - u32 qsize = nesqp->hwqp.rq_size; - u32 head; - u32 wqe_count = 0; - u32 counter; - u32 total_payload_length; - - if (nesqp->ibqp_state > IB_QPS_RTS) { - err = -EINVAL; - goto out; - } - - spin_lock_irqsave(&nesqp->lock, flags); - - head = nesqp->hwqp.rq_head; - - while (ib_wr) { - /* Check for QP error */ - if (nesqp->term_flags) { - err = -EINVAL; - break; - } - - if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { - err = -EINVAL; - break; - } - /* Check for RQ overflow */ - if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) { - err = -ENOMEM; - break; - } - - nes_debug(NES_DBG_IW_RX, "ibwr sge count = %u.\n", ib_wr->num_sge); - wqe = &nesqp->hwqp.rq_vbase[head]; - - /* nes_debug(NES_DBG_IW_RX, "QP%u:processing rq wqe at %p, head = %u.\n", - nesqp->hwqp.qp_id, wqe, head); */ - nes_fill_init_qp_wqe(wqe, nesqp, head); - u64temp = (u64)(ib_wr->wr_id); - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, - u64temp); - total_payload_length = 0; - for (sge_index=0; sge_index < ib_wr->num_sge; sge_index++) { - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_FRAG0_LOW_IDX+(sge_index*4), - ib_wr->sg_list[sge_index].addr); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_LENGTH0_IDX+(sge_index*4), - ib_wr->sg_list[sge_index].length); - set_wqe_32bit_value(wqe->wqe_words,NES_IWARP_RQ_WQE_STAG0_IDX+(sge_index*4), - ib_wr->sg_list[sge_index].lkey); - - total_payload_length += ib_wr->sg_list[sge_index].length; - } - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX, - total_payload_length); - - ib_wr = ib_wr->next; - head++; - wqe_count++; - if (head >= qsize) - head = 0; - } - - nesqp->hwqp.rq_head = head; - barrier(); - while (wqe_count) { - counter = min(wqe_count, ((u32)255)); - wqe_count -= counter; - nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id); - } - - spin_unlock_irqrestore(&nesqp->lock, flags); - -out: - if (err) - *bad_wr = ib_wr; - return err; -} - -/** - * nes_drain_sq - drain sq - * @ibqp: pointer to ibqp - */ -static void nes_drain_sq(struct ib_qp *ibqp) -{ - struct nes_qp *nesqp = to_nesqp(ibqp); - - if (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head) - wait_for_completion(&nesqp->sq_drained); -} - -/** - * nes_drain_rq - drain rq - * @ibqp: pointer to ibqp - */ -static void nes_drain_rq(struct ib_qp *ibqp) -{ - struct nes_qp *nesqp = to_nesqp(ibqp); - - if (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head) - wait_for_completion(&nesqp->rq_drained); -} - -/** - * nes_poll_cq - */ -static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) -{ - u64 u64temp; - u64 wrid; - unsigned long flags = 0; - struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_cq *nescq = to_nescq(ibcq); - struct nes_qp *nesqp; - struct nes_hw_cqe cqe; - u32 head; - u32 wq_tail = 0; - u32 cq_size; - u32 cqe_count = 0; - u32 wqe_index; - u32 u32temp; - u32 move_cq_head = 1; - u32 err_code; - - nes_debug(NES_DBG_CQ, "\n"); - - spin_lock_irqsave(&nescq->lock, flags); - - head = nescq->hw_cq.cq_head; - cq_size = nescq->hw_cq.cq_size; - - while (cqe_count < num_entries) { - if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & - NES_CQE_VALID) == 0) - break; - - /* - * Make sure we read CQ entry contents *after* - * we've checked the valid bit. - */ - rmb(); - - cqe = nescq->hw_cq.cq_vbase[head]; - u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); - wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1); - u32temp &= ~(NES_SW_CONTEXT_ALIGN-1); - /* parse CQE, get completion context from WQE (either rq or sq) */ - u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | - ((u64)u32temp); - - if (u64temp) { - nesqp = (struct nes_qp *)(unsigned long)u64temp; - memset(entry, 0, sizeof *entry); - if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) { - entry->status = IB_WC_SUCCESS; - } else { - err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]); - if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) { - entry->status = err_code & 0x0000ffff; - - /* The rest of the cqe's will be marked as flushed */ - nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] = - cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) | - NES_IWARP_CQE_MINOR_FLUSH); - } else - entry->status = IB_WC_WR_FLUSH_ERR; - } - - entry->qp = &nesqp->ibqp; - entry->src_qp = nesqp->hwqp.qp_id; - - if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) { - if (nesqp->skip_lsmm) { - nesqp->skip_lsmm = 0; - nesqp->hwqp.sq_tail++; - } - - /* Working on a SQ Completion*/ - wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index]. - wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | - ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index]. - wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX]))); - entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. - wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]); - - switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. - wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) { - case NES_IWARP_SQ_OP_RDMAW: - nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n"); - entry->opcode = IB_WC_RDMA_WRITE; - break; - case NES_IWARP_SQ_OP_RDMAR: - nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n"); - entry->opcode = IB_WC_RDMA_READ; - entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. - wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]); - break; - case NES_IWARP_SQ_OP_SENDINV: - case NES_IWARP_SQ_OP_SENDSEINV: - case NES_IWARP_SQ_OP_SEND: - case NES_IWARP_SQ_OP_SENDSE: - nes_debug(NES_DBG_CQ, "Operation = Send.\n"); - entry->opcode = IB_WC_SEND; - break; - case NES_IWARP_SQ_OP_LOCINV: - entry->opcode = IB_WC_LOCAL_INV; - break; - case NES_IWARP_SQ_OP_FAST_REG: - entry->opcode = IB_WC_REG_MR; - break; - } - - nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1); - if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) { - move_cq_head = 0; - wq_tail = nesqp->hwqp.sq_tail; - } - } else { - /* Working on a RQ Completion*/ - entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]); - wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) | - ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32); - entry->opcode = IB_WC_RECV; - - nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1); - if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) { - move_cq_head = 0; - wq_tail = nesqp->hwqp.rq_tail; - } - } - - if (nesqp->iwarp_state > NES_CQP_QP_IWARP_STATE_RTS) { - if (nesqp->hwqp.sq_tail == nesqp->hwqp.sq_head) - complete(&nesqp->sq_drained); - if (nesqp->hwqp.rq_tail == nesqp->hwqp.rq_head) - complete(&nesqp->rq_drained); - } - - entry->wr_id = wrid; - entry++; - cqe_count++; - } - - if (move_cq_head) { - nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; - if (++head >= cq_size) - head = 0; - nescq->polled_completions++; - - if ((nescq->polled_completions > (cq_size / 2)) || - (nescq->polled_completions == 255)) { - nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes" - " are pending %u of %u.\n", - nescq->hw_cq.cq_number, nescq->polled_completions, cq_size); - nes_write32(nesdev->regs+NES_CQE_ALLOC, - nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); - nescq->polled_completions = 0; - } - } else { - /* Update the wqe index and set status to flush */ - wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); - wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail; - nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = - cpu_to_le32(wqe_index); - move_cq_head = 1; /* ready for next pass */ - } - } - - if (nescq->polled_completions) { - nes_write32(nesdev->regs+NES_CQE_ALLOC, - nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); - nescq->polled_completions = 0; - } - - nescq->hw_cq.cq_head = head; - nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n", - cqe_count, nescq->hw_cq.cq_number); - - spin_unlock_irqrestore(&nescq->lock, flags); - - return cqe_count; -} - - -/** - * nes_req_notify_cq - */ -static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) - { - struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_cq *nescq = to_nescq(ibcq); - u32 cq_arm; - - nes_debug(NES_DBG_CQ, "Requesting notification for CQ%u.\n", - nescq->hw_cq.cq_number); - - cq_arm = nescq->hw_cq.cq_number; - if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP) - cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT; - else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - cq_arm |= NES_CQE_ALLOC_NOTIFY_SE; - else - return -EINVAL; - - nes_write32(nesdev->regs+NES_CQE_ALLOC, cq_arm); - nes_read32(nesdev->regs+NES_CQE_ALLOC); - - return 0; -} - -static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; - - err = nes_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - - return 0; -} - -static void get_dev_fw_str(struct ib_device *dev, char *str) -{ - struct nes_ib_device *nesibdev = - container_of(dev, struct nes_ib_device, ibdev); - struct nes_vnic *nesvnic = nesibdev->nesvnic; - - nes_debug(NES_DBG_INIT, "\n"); - snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", - (nesvnic->nesdev->nesadapter->firmware_version >> 16), - (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); -} - -static const struct ib_device_ops nes_dev_ops = { - .alloc_mr = nes_alloc_mr, - .alloc_mw = nes_alloc_mw, - .alloc_pd = nes_alloc_pd, - .alloc_ucontext = nes_alloc_ucontext, - .create_cq = nes_create_cq, - .create_qp = nes_create_qp, - .dealloc_mw = nes_dealloc_mw, - .dealloc_pd = nes_dealloc_pd, - .dealloc_ucontext = nes_dealloc_ucontext, - .dereg_mr = nes_dereg_mr, - .destroy_cq = nes_destroy_cq, - .destroy_qp = nes_destroy_qp, - .drain_rq = nes_drain_rq, - .drain_sq = nes_drain_sq, - .get_dev_fw_str = get_dev_fw_str, - .get_dma_mr = nes_get_dma_mr, - .get_port_immutable = nes_port_immutable, - .iw_accept = nes_accept, - .iw_add_ref = nes_add_ref, - .iw_connect = nes_connect, - .iw_create_listen = nes_create_listen, - .iw_destroy_listen = nes_destroy_listen, - .iw_get_qp = nes_get_qp, - .iw_reject = nes_reject, - .iw_rem_ref = nes_rem_ref, - .map_mr_sg = nes_map_mr_sg, - .mmap = nes_mmap, - .modify_qp = nes_modify_qp, - .poll_cq = nes_poll_cq, - .post_recv = nes_post_recv, - .post_send = nes_post_send, - .query_device = nes_query_device, - .query_gid = nes_query_gid, - .query_pkey = nes_query_pkey, - .query_port = nes_query_port, - .query_qp = nes_query_qp, - .reg_user_mr = nes_reg_user_mr, - .req_notify_cq = nes_req_notify_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, nes_pd, ibpd), - INIT_RDMA_OBJ_SIZE(ib_ucontext, nes_ucontext, ibucontext), -}; - -/** - * nes_init_ofa_device - */ -struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) -{ - struct nes_ib_device *nesibdev; - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - - nesibdev = ib_alloc_device(nes_ib_device, ibdev); - if (nesibdev == NULL) { - return NULL; - } - nesibdev->ibdev.owner = THIS_MODULE; - - nesibdev->ibdev.node_type = RDMA_NODE_RNIC; - memset(&nesibdev->ibdev.node_guid, 0, sizeof(nesibdev->ibdev.node_guid)); - memcpy(&nesibdev->ibdev.node_guid, netdev->dev_addr, 6); - - nesibdev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_BIND_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_POST_SEND); - - nesibdev->ibdev.phys_port_cnt = 1; - nesibdev->ibdev.num_comp_vectors = 1; - nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; - - ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops); - memcpy(nesibdev->ibdev.iw_ifname, netdev->name, - sizeof(nesibdev->ibdev.iw_ifname)); - - return nesibdev; -} - - -/** - * nes_handle_delayed_event - */ -static void nes_handle_delayed_event(struct timer_list *t) -{ - struct nes_vnic *nesvnic = from_timer(nesvnic, t, event_timer); - - if (nesvnic->delayed_event != nesvnic->last_dispatched_event) { - struct ib_event event; - - event.device = &nesvnic->nesibdev->ibdev; - if (!event.device) - goto stop_timer; - event.event = nesvnic->delayed_event; - event.element.port_num = nesvnic->logical_port + 1; - ib_dispatch_event(&event); - } - -stop_timer: - nesvnic->event_timer.function = NULL; -} - - -void nes_port_ibevent(struct nes_vnic *nesvnic) -{ - struct nes_ib_device *nesibdev = nesvnic->nesibdev; - struct nes_device *nesdev = nesvnic->nesdev; - struct ib_event event; - event.device = &nesibdev->ibdev; - event.element.port_num = nesvnic->logical_port + 1; - event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; - - if (!nesvnic->event_timer.function) { - ib_dispatch_event(&event); - nesvnic->last_dispatched_event = event.event; - nesvnic->event_timer.function = nes_handle_delayed_event; - nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY; - add_timer(&nesvnic->event_timer); - } else { - mod_timer(&nesvnic->event_timer, jiffies + NES_EVENT_DELAY); - } - nesvnic->delayed_event = event.event; -} - - -/** - * nes_destroy_ofa_device - */ -void nes_destroy_ofa_device(struct nes_ib_device *nesibdev) -{ - if (nesibdev == NULL) - return; - - nes_unregister_ofa_device(nesibdev); - - ib_dealloc_device(&nesibdev->ibdev); -} - - -/** - * nes_register_ofa_device - */ -int nes_register_ofa_device(struct nes_ib_device *nesibdev) -{ - struct nes_vnic *nesvnic = nesibdev->nesvnic; - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - int ret; - - rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group); - nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES; - ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d"); - if (ret) { - return ret; - } - - /* Get the resources allocated to this device */ - nesibdev->max_cq = (nesadapter->max_cq-NES_FIRST_QPN) / nesadapter->port_count; - nesibdev->max_mr = nesadapter->max_mr / nesadapter->port_count; - nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count; - nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count; - - nesvnic->of_device_registered = 1; - - return 0; -} - - -/** - * nes_unregister_ofa_device - */ -static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev) -{ - struct nes_vnic *nesvnic = nesibdev->nesvnic; - - if (nesvnic->of_device_registered) - ib_unregister_device(&nesibdev->ibdev); - - nesvnic->of_device_registered = 0; -} diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h deleted file mode 100644 index 114a9b59fefd..000000000000 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef NES_VERBS_H -#define NES_VERBS_H - -struct nes_device; - -#define NES_MAX_USER_DB_REGIONS 4096 -#define NES_MAX_USER_WQ_REGIONS 4096 - -#define NES_TERM_SENT 0x01 -#define NES_TERM_RCVD 0x02 -#define NES_TERM_DONE 0x04 - -struct nes_ucontext { - struct ib_ucontext ibucontext; - struct nes_device *nesdev; - unsigned long mmap_wq_offset; - unsigned long mmap_cq_offset; /* to be removed */ - int index; /* rnic index (minor) */ - unsigned long allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)]; - u16 mmap_db_index[NES_MAX_USER_DB_REGIONS]; - u16 first_free_db; - unsigned long allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)]; - struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS]; - u16 first_free_wq; - struct list_head cq_reg_mem_list; - struct list_head qp_reg_mem_list; - u32 mcrqf; -}; - -struct nes_pd { - struct ib_pd ibpd; - u16 pd_id; - atomic_t sqp_count; - u16 mmap_db_index; -}; - -struct nes_mr { - union { - struct ib_mr ibmr; - struct ib_mw ibmw; - struct ib_fmr ibfmr; - }; - struct ib_umem *region; - u16 pbls_used; - u8 mode; - u8 pbl_4k; - __le64 *pages; - dma_addr_t paddr; - u32 max_pages; - u32 npages; -}; - -struct nes_hw_pb { - __le32 pa_low; - __le32 pa_high; -}; - -struct nes_vpbl { - dma_addr_t pbl_pbase; - struct nes_hw_pb *pbl_vbase; -}; - -struct nes_root_vpbl { - dma_addr_t pbl_pbase; - struct nes_hw_pb *pbl_vbase; - struct nes_vpbl *leaf_vpbl; -}; - -struct nes_fmr { - struct nes_mr nesmr; - u32 leaf_pbl_cnt; - struct nes_root_vpbl root_vpbl; - struct ib_qp *ib_qp; - int access_rights; - struct ib_fmr_attr attr; -}; - -struct nes_av; - -struct nes_cq { - struct ib_cq ibcq; - struct nes_hw_cq hw_cq; - u32 polled_completions; - u32 cq_mem_size; - spinlock_t lock; - u8 virtual_cq; - u8 pad[3]; - u32 mcrqf; -}; - -struct nes_wq { - spinlock_t lock; -}; - -struct disconn_work { - struct work_struct work; - struct nes_qp *nesqp; -}; - -struct iw_cm_id; -struct ietf_mpa_frame; - -struct nes_qp { - struct ib_qp ibqp; - void *allocated_buffer; - struct iw_cm_id *cm_id; - struct nes_cq *nesscq; - struct nes_cq *nesrcq; - struct nes_pd *nespd; - void *cm_node; /* handle of the node this QP is associated with */ - void *ietf_frame; - u8 ietf_frame_size; - dma_addr_t ietf_frame_pbase; - struct ib_mr *lsmm_mr; - struct nes_hw_qp hwqp; - struct work_struct work; - enum ib_qp_state ibqp_state; - u32 iwarp_state; - u32 hte_index; - u32 last_aeq; - u32 qp_mem_size; - atomic_t refcount; - atomic_t close_timer_started; - u32 mmap_sq_db_index; - u32 mmap_rq_db_index; - spinlock_t lock; - spinlock_t pau_lock; - struct nes_qp_context *nesqp_context; - dma_addr_t nesqp_context_pbase; - void *pbl_vbase; - dma_addr_t pbl_pbase; - struct page *page; - struct timer_list terminate_timer; - enum ib_event_type terminate_eventtype; - struct sk_buff_head pau_list; - u32 pau_rcv_nxt; - u16 active_conn:1; - u16 skip_lsmm:1; - u16 user_mode:1; - u16 hte_added:1; - u16 flush_issued:1; - u16 destroyed:1; - u16 sig_all:1; - u16 pau_mode:1; - u16 rsvd:8; - u16 private_data_len; - u16 term_sq_flush_code; - u16 term_rq_flush_code; - u8 hw_iwarp_state; - u8 hw_tcp_state; - u8 term_flags; - u8 sq_kmapped; - u8 pau_busy; - u8 pau_pending; - u8 pau_state; - __u64 nesuqp_addr; - struct completion sq_drained; - struct completion rq_drained; -}; - -struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, - u64 addr, u64 size, int acc, u64 *iova_start); - -#endif /* NES_VERBS_H */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 5127e2ea4bdd..d82d3ec3649e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1351,7 +1351,6 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa); mqe->u.nonemb_req.sge[0].len = dma.size; - memset(dma.va, 0, dma.size); ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va, OCRDMA_CMD_GET_CTRL_ATTRIBUTES, OCRDMA_SUBSYS_COMMON, @@ -1690,7 +1689,6 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) goto mem_err_ah; dev->av_tbl.pa = pa; dev->av_tbl.num_ah = max_ah; - memset(dev->av_tbl.va, 0, dev->av_tbl.size); pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va; for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) { @@ -1888,14 +1886,13 @@ mem_err: return status; } -int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq) +void ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq) { - int status = -ENOMEM; struct ocrdma_destroy_cq *cmd; cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_CQ, sizeof(*cmd)); if (!cmd) - return status; + return; ocrdma_init_mch(&cmd->req, OCRDMA_CMD_DELETE_CQ, OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); @@ -1903,11 +1900,10 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq) (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) & OCRDMA_DESTROY_CQ_QID_MASK; - status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); ocrdma_unbind_eq(dev, cq->eqn); dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa); kfree(cmd); - return status; } int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, @@ -2905,7 +2901,6 @@ static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, mqe_sge->pa_hi = (u32) upper_32_bits(pa); mqe_sge->len = cmd.hdr.pyld_len; - memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req)); ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG, OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len); req->param_type = ptype; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 06ec59326a90..12c23a7652b9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -122,7 +122,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr, u32 pd_id, int acc); int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *, int entries, int dpp_cq, u16 pd_id); -int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *); +void ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq); int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index fc6c0962dea9..c15cfc6cef81 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -144,6 +144,10 @@ static const struct attribute_group ocrdma_attr_group = { }; static const struct ib_device_ops ocrdma_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_OCRDMA, + .uverbs_abi_ver = OCRDMA_ABI_VERSION, + .alloc_mr = ocrdma_alloc_mr, .alloc_pd = ocrdma_alloc_pd, .alloc_ucontext = ocrdma_alloc_ucontext, @@ -178,6 +182,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .resize_cq = ocrdma_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, ocrdma_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, ocrdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, ocrdma_ucontext, ibucontext), }; @@ -200,8 +205,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.owner = THIS_MODULE; - dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION; dev->ibdev.uverbs_cmd_mask = OCRDMA_UVERBS(GET_CONTEXT) | OCRDMA_UVERBS(QUERY_DEVICE) | @@ -249,7 +252,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); } rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); - dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); if (ret) return ret; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 35ec87015792..bccc11378109 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -925,8 +925,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); /* it could be user registered memory. */ - if (mr->umem) - ib_umem_release(mr->umem); + ib_umem_release(mr->umem); kfree(mr); /* Don't stop cleanup, in case FW is unresponsive */ @@ -977,12 +976,12 @@ err: return status; } -struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; - struct ocrdma_cq *cq; + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( udata, struct ocrdma_ucontext, ibucontext); @@ -991,16 +990,13 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, struct ocrdma_create_cq_ureq ureq; if (attr->flags) - return ERR_PTR(-EINVAL); + return -EINVAL; if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) - return ERR_PTR(-EFAULT); + return -EFAULT; } else ureq.dpp_cq = 0; - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); spin_lock_init(&cq->cq_lock); spin_lock_init(&cq->comp_handler_lock); @@ -1011,10 +1007,9 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, pd_id = uctx->cntxt_pd->id; status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id); - if (status) { - kfree(cq); - return ERR_PTR(status); - } + if (status) + return status; + if (udata) { status = ocrdma_copy_cq_uresp(dev, cq, udata); if (status) @@ -1022,12 +1017,11 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, } cq->phase = OCRDMA_CQE_VALID; dev->cq_tbl[cq->id] = cq; - return &cq->ibcq; + return 0; ctx_err: ocrdma_mbx_destroy_cq(dev, cq); - kfree(cq); - return ERR_PTR(status); + return status; } int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt, @@ -1070,7 +1064,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq) spin_unlock_irqrestore(&cq->cq_lock, flags); } -int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_eq *eq = NULL; @@ -1080,14 +1074,13 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) dev->cq_tbl[cq->id] = NULL; indx = ocrdma_get_eq_table_index(dev, cq->eqn); - BUG_ON(indx == -EINVAL); eq = &dev->eq_tbl[indx]; irq = ocrdma_get_irq(dev, eq); synchronize_irq(irq); ocrdma_flush_cq(cq); - (void)ocrdma_mbx_destroy_cq(dev, cq); + ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { pdid = cq->ucontext->cntxt_pd->id; ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, @@ -1096,9 +1089,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } - - kfree(cq); - return 0; } static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index d76aae7ed0d3..32488da1b752 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -71,11 +71,10 @@ int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); struct ib_qp *ocrdma_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 5ebf3c53b3fb..533157a2a3be 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -183,6 +183,10 @@ static void qedr_roce_register_device(struct qedr_dev *dev) } static const struct ib_device_ops qedr_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_QEDR, + .uverbs_abi_ver = QEDR_ABI_VERSION, + .alloc_mr = qedr_alloc_mr, .alloc_pd = qedr_alloc_pd, .alloc_ucontext = qedr_alloc_ucontext, @@ -220,6 +224,7 @@ static const struct ib_device_ops qedr_dev_ops = { .resize_cq = qedr_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext), @@ -231,8 +236,6 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.node_guid = dev->attr.node_guid; memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); - dev->ibdev.owner = THIS_MODULE; - dev->ibdev.uverbs_abi_ver = QEDR_ABI_VERSION; dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) | QEDR_UVERBS(QUERY_DEVICE) | @@ -274,7 +277,6 @@ static int qedr_register_device(struct qedr_dev *dev) rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group); ib_set_device_ops(&dev->ibdev, &qedr_dev_ops); - dev->ibdev.driver_id = RDMA_DRIVER_QEDR; rc = ib_device_set_netdev(&dev->ibdev, dev->ndev, 1); if (rc) return rc; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 3d7bde19838e..27d90a84ea01 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -159,54 +159,47 @@ int qedr_query_device(struct ib_device *ibdev, return 0; } -#define QEDR_SPEED_SDR (1) -#define QEDR_SPEED_DDR (2) -#define QEDR_SPEED_QDR (4) -#define QEDR_SPEED_FDR10 (8) -#define QEDR_SPEED_FDR (16) -#define QEDR_SPEED_EDR (32) - static inline void get_link_speed_and_width(int speed, u8 *ib_speed, u8 *ib_width) { switch (speed) { case 1000: - *ib_speed = QEDR_SPEED_SDR; + *ib_speed = IB_SPEED_SDR; *ib_width = IB_WIDTH_1X; break; case 10000: - *ib_speed = QEDR_SPEED_QDR; + *ib_speed = IB_SPEED_QDR; *ib_width = IB_WIDTH_1X; break; case 20000: - *ib_speed = QEDR_SPEED_DDR; + *ib_speed = IB_SPEED_DDR; *ib_width = IB_WIDTH_4X; break; case 25000: - *ib_speed = QEDR_SPEED_EDR; + *ib_speed = IB_SPEED_EDR; *ib_width = IB_WIDTH_1X; break; case 40000: - *ib_speed = QEDR_SPEED_QDR; + *ib_speed = IB_SPEED_QDR; *ib_width = IB_WIDTH_4X; break; case 50000: - *ib_speed = QEDR_SPEED_QDR; - *ib_width = IB_WIDTH_4X; + *ib_speed = IB_SPEED_HDR; + *ib_width = IB_WIDTH_1X; break; case 100000: - *ib_speed = QEDR_SPEED_EDR; + *ib_speed = IB_SPEED_EDR; *ib_width = IB_WIDTH_4X; break; default: /* Unsupported */ - *ib_speed = QEDR_SPEED_SDR; + *ib_speed = IB_SPEED_SDR; *ib_width = IB_WIDTH_1X; } } @@ -813,20 +806,20 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) return 0; } -struct ib_cq *qedr_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; struct qedr_ucontext *ctx = rdma_udata_to_drv_context( udata, struct qedr_ucontext, ibucontext); struct qed_rdma_destroy_cq_out_params destroy_oparams; struct qed_rdma_destroy_cq_in_params destroy_iparams; struct qedr_dev *dev = get_qedr_dev(ibdev); struct qed_rdma_create_cq_in_params params; - struct qedr_create_cq_ureq ureq; + struct qedr_create_cq_ureq ureq = {}; int vector = attr->comp_vector; int entries = attr->cqe; - struct qedr_cq *cq; + struct qedr_cq *cq = get_qedr_cq(ibcq); int chain_entries; int page_cnt; u64 pbl_ptr; @@ -841,18 +834,13 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev, DP_ERR(dev, "create cq: the number of entries %d is too high. Must be equal or below %d.\n", entries, QEDR_MAX_CQES); - return ERR_PTR(-EINVAL); + return -EINVAL; } chain_entries = qedr_align_cq_entries(entries); chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); - if (udata) { - memset(&ureq, 0, sizeof(ureq)); if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { DP_ERR(dev, "create cq: problem copying data from user space\n"); @@ -930,7 +918,7 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev, "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n", cq->icid, cq, params.cq_size); - return &cq->ibcq; + return 0; err3: destroy_iparams.icid = cq->icid; @@ -945,8 +933,7 @@ err1: if (udata) ib_umem_release(cq->q.umem); err0: - kfree(cq); - return ERR_PTR(-EINVAL); + return -EINVAL; } int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) @@ -962,14 +949,13 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) #define QEDR_DESTROY_CQ_ITER_DURATION (10) -int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibcq->device); struct qed_rdma_destroy_cq_out_params oparams; struct qed_rdma_destroy_cq_in_params iparams; struct qedr_cq *cq = get_qedr_cq(ibcq); int iter; - int rc; DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid); @@ -977,13 +963,10 @@ int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) /* GSIs CQs are handled by driver, so they don't exist in the FW */ if (cq->cq_type == QEDR_CQ_TYPE_GSI) - goto done; + return; iparams.icid = cq->icid; - rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); - if (rc) - return rc; - + dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); dev->ops->common->chain_free(dev->cdev, &cq->pbl); if (udata) { @@ -1014,27 +997,11 @@ int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) iter--; } - if (oparams.num_cq_notif != cq->cnq_notif) - goto err; - /* Note that we don't need to have explicit code to wait for the * completion of the event handler because it is invoked from the EQ. * Since the destroy CQ ramrod has also been received on the EQ we can * be certain that there's no event handler in process. */ -done: - cq->sig = ~cq->sig; - - kfree(cq); - - return 0; - -err: - DP_ERR(dev, - "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n", - cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif); - - return -EINVAL; } static inline int get_gid_info_from_table(struct ib_qp *ibqp, @@ -1605,12 +1572,10 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev, static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) { - if (qp->usq.umem) - ib_umem_release(qp->usq.umem); + ib_umem_release(qp->usq.umem); qp->usq.umem = NULL; - if (qp->urq.umem) - ib_umem_release(qp->urq.umem); + ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; } @@ -2713,8 +2678,7 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); /* it could be user registered memory. */ - if (mr->umem) - ib_umem_release(mr->umem); + ib_umem_release(mr->umem); kfree(mr); diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 9328c80375ef..9aaa90283d6e 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -50,11 +50,10 @@ int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -struct ib_cq *qedr_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, struct ib_udata *); diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index a81905df2d0f..8d0563ef5be1 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2019 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -398,7 +398,7 @@ int qib_check_send_wqe(struct rvt_qp *qp, case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: - ah = ibah_to_rvtah(wqe->ud_wr.ah); + ah = rvt_get_swqe_ah(wqe); if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; /* progress hint */ diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 2ac4c67f5ba1..1d5e2d4ee257 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -921,20 +921,11 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) rvt_add_retry_timer(qp); while (qp->s_last != qp->s_acked) { - u32 s_last; - wqe = rvt_get_swqe_ptr(qp, qp->s_last); if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 && qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; - s_last = qp->s_last; - if (++s_last >= qp->s_size) - s_last = 0; - qp->s_last = s_last; - /* see post_send() */ - barrier(); - rvt_put_qp_swqe(qp, wqe); - rvt_qp_swqe_complete(qp, + rvt_qp_complete_swqe(qp, wqe, ib_qib_wc_opcode[wqe->wr.opcode], IB_WC_SUCCESS); @@ -972,21 +963,12 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, * is finished. */ if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 || - qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { - u32 s_last; - - rvt_put_qp_swqe(qp, wqe); - s_last = qp->s_last; - if (++s_last >= qp->s_size) - s_last = 0; - qp->s_last = s_last; - /* see post_send() */ - barrier(); - rvt_qp_swqe_complete(qp, + qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) + rvt_qp_complete_swqe(qp, wqe, ib_qib_wc_opcode[wqe->wr.opcode], IB_WC_SUCCESS); - } else + else this_cpu_inc(*ibp->rvp.rc_delayed_comp); qp->s_retry = qp->s_retry_cnt; @@ -1909,8 +1891,7 @@ send_last: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 30c70ad0f4bf..e17b91e2c22a 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -400,8 +400,7 @@ last_imm: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 5cdedba2d164..93ca21347959 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2012 - 2019 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -63,7 +64,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) enum ib_qp_type sqptype, dqptype; rcu_read_lock(); - qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, rvt_get_swqe_remote_qpn(swqe)); if (!qp) { ibp->rvp.n_pkt_drops++; goto drop; @@ -80,7 +81,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) goto drop; } - ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(swqe); ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -110,8 +111,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_num) { u32 qkey; - qkey = (int)swqe->ud_wr.remote_qkey < 0 ? - sqp->qkey : swqe->ud_wr.remote_qkey; + qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ? + sqp->qkey : rvt_get_swqe_remote_qkey(swqe); if (unlikely(qkey != qp->qkey)) goto drop; } @@ -203,15 +204,14 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? - swqe->ud_wr.pkey_index : 0; + rvt_get_swqe_pkey_index(swqe) : 0; wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); wc.sl = rdma_ah_get_sl(ah_attr); wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - swqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); ibp->rvp.n_loop_pkts++; bail_unlock: spin_unlock_irqrestore(&qp->r_lock, flags); @@ -271,7 +271,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(wqe); if (rdma_ah_get_dlid(ah_attr) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { if (rdma_ah_get_dlid(ah_attr) != be16_to_cpu(IB_LID_PERMISSIVE)) @@ -363,7 +363,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY : qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ? - wqe->ud_wr.pkey_index : qp->s_pkey_index); + rvt_get_swqe_pkey_index(wqe) : qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); /* * Use the multicast QP if the destination LID is a multicast LID. @@ -372,14 +372,15 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) be16_to_cpu(IB_MULTICAST_LID_BASE) && rdma_ah_get_dlid(ah_attr) != be16_to_cpu(IB_LID_PERMISSIVE) ? cpu_to_be32(QIB_MULTICAST_QPN) : - cpu_to_be32(wqe->ud_wr.remote_qpn); + cpu_to_be32(rvt_get_swqe_remote_qpn(wqe)); ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? - qp->qkey : wqe->ud_wr.remote_qkey); + ohdr->u.ud.deth[0] = + cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey : + rvt_get_swqe_remote_qkey(wqe)); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: @@ -573,8 +574,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); return; drop: diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index f712fb7fa82f..bfbfbb7e0ff4 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -40,13 +40,10 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, int dirty) { - size_t i; - - for (i = 0; i < num_pages; i++) { - if (dirty) - set_page_dirty_lock(p[i]); - put_page(p[i]); - } + if (dirty) + put_user_pages_dirty_lock(p, num_pages); + else + put_user_pages(p, num_pages); } /** diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 0c204776263f..05190edc2611 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -317,7 +317,7 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, * the caller can ignore this page. */ if (put) { - put_page(page); + put_user_page(page); } else { /* coalesce case */ kunmap(page); @@ -631,7 +631,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, kunmap(pkt->addr[i].page); if (pkt->addr[i].put_page) - put_page(pkt->addr[i].page); + put_user_page(pkt->addr[i].page); else __free_page(pkt->addr[i].page); } else if (pkt->addr[i].kvaddr) { @@ -706,7 +706,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, /* if error, return all pages not managed by pkt */ free_pages: while (i < j) - put_page(pages[i++]); + put_user_page(pages[i++]); done: return ret; @@ -904,10 +904,11 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, } if (frag_size) { - int pktsize, tidsmsize, n; + int tidsmsize, n; + size_t pktsize; n = npages*((2*PAGE_SIZE/frag_size)+1); - pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n; + pktsize = struct_size(pkt, addr, n); /* * Determine if this is tid-sdma or just sdma. diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 2c4e569ce438..33778d451b82 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1480,6 +1480,9 @@ static void qib_fill_device_attr(struct qib_devdata *dd) } static const struct ib_device_ops qib_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_QIB, + .init_port = qib_create_port_files, .modify_device = qib_modify_device, .process_mad = qib_process_mad, @@ -1543,7 +1546,6 @@ int qib_register_ib_device(struct qib_devdata *dd) if (!ib_qib_sys_image_guid) ib_qib_sys_image_guid = ppd->guid; - ibdev->owner = THIS_MODULE; ibdev->node_guid = ppd->guid; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; @@ -1614,7 +1616,7 @@ int qib_register_ib_device(struct qib_devdata *dd) rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group); ib_set_device_ops(ibdev, &qib_dev_ops); - ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB); + ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) goto err_tx; diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h index 525bf272671e..84dd682d2334 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib.h +++ b/drivers/infiniband/hw/usnic/usnic_ib.h @@ -61,6 +61,10 @@ struct usnic_ib_pd { struct usnic_uiom_pd *umem_pd; }; +struct usnic_ib_cq { + struct ib_cq ibcq; +}; + struct usnic_ib_mr { struct ib_mr ibmr; struct usnic_uiom_reg *umem; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 34c1f9d6c915..03f54eb9404b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -329,6 +329,10 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str) } static const struct ib_device_ops usnic_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_USNIC, + .uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION, + .alloc_pd = usnic_ib_alloc_pd, .alloc_ucontext = usnic_ib_alloc_ucontext, .create_cq = usnic_ib_create_cq, @@ -350,6 +354,7 @@ static const struct ib_device_ops usnic_dev_ops = { .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_cq, usnic_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_ucontext, usnic_ib_ucontext, ibucontext), }; @@ -384,12 +389,10 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->pdev = dev; us_ibdev->netdev = pci_get_drvdata(dev); - us_ibdev->ib_dev.owner = THIS_MODULE; us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP; us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT; us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; us_ibdev->ib_dev.dev.parent = &dev->dev; - us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION; us_ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -412,7 +415,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev) ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); - us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); ret = ib_device_set_netdev(&us_ibdev->ib_dev, us_ibdev->netdev, 1); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index e9352750e029..eeb07b245ef9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -587,28 +587,18 @@ out_unlock: return status; } -struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { - struct ib_cq *cq; - - usnic_dbg("\n"); if (attr->flags) - return ERR_PTR(-EINVAL); - - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) - return ERR_PTR(-EBUSY); + return -EINVAL; - return cq; + return 0; } -int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { - usnic_dbg("\n"); - kfree(cq); - return 0; + return; } struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 028f322f8e9b..2aedf78c13cf 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -58,10 +58,9 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); -int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index e312f522a66d..0b0237d41613 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -75,9 +75,10 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) for_each_sg(chunk->page_list, sg, chunk->nents, i) { page = sg_page(sg); pa = sg_phys(sg); - if (!PageDirty(page) && dirty) - set_page_dirty_lock(page); - put_page(page); + if (dirty) + put_user_pages_dirty_lock(&page, 1); + else + put_user_page(page); usnic_dbg("pa: %pa\n", &pa); } kfree(chunk); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 3c633ab58052..c142f5e7f25f 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -456,7 +456,7 @@ static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op) return PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP; case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: return PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD; - case IB_WR_REG_SIG_MR: + case IB_WR_REG_MR_INTEGRITY: return PVRDMA_WR_REG_SIG_MR; default: return PVRDMA_WR_ERROR; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index d7deb19a2800..7800e6930502 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -92,20 +92,19 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq, /** * pvrdma_create_cq - create completion queue - * @ibdev: the device + * @ibcq: Allocated CQ * @attr: completion queue attributes * @udata: user data * - * @return: ib_cq completion queue pointer on success, - * otherwise returns negative errno. + * @return: 0 on success */ -struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; int entries = attr->cqe; struct pvrdma_dev *dev = to_vdev(ibdev); - struct pvrdma_cq *cq; + struct pvrdma_cq *cq = to_vcq(ibcq); int ret; int npages; unsigned long flags; @@ -113,7 +112,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_cq *cmd = &req.create_cq; struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; - struct pvrdma_create_cq_resp cq_resp = {0}; + struct pvrdma_create_cq_resp cq_resp = {}; struct pvrdma_create_cq ucmd; struct pvrdma_ucontext *context = rdma_udata_to_drv_context( udata, struct pvrdma_ucontext, ibucontext); @@ -122,16 +121,10 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, entries = roundup_pow_of_two(entries); if (entries < 1 || entries > dev->dsr->caps.max_cqe) - return ERR_PTR(-EINVAL); + return -EINVAL; if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq)) - return ERR_PTR(-ENOMEM); - - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - atomic_dec(&dev->num_cqs); - return ERR_PTR(-ENOMEM); - } + return -ENOMEM; cq->ibcq.cqe = entries; cq->is_kernel = !udata; @@ -211,22 +204,19 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); pvrdma_destroy_cq(&cq->ibcq, udata); - return ERR_PTR(-EINVAL); + return -EINVAL; } } - return &cq->ibcq; + return 0; err_page_dir: pvrdma_page_dir_cleanup(dev, &cq->pdir); err_umem: - if (!cq->is_kernel) - ib_umem_release(cq->umem); + ib_umem_release(cq->umem); err_cq: atomic_dec(&dev->num_cqs); - kfree(cq); - - return ERR_PTR(ret); + return ret; } static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) @@ -235,21 +225,17 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) complete(&cq->free); wait_for_completion(&cq->free); - if (!cq->is_kernel) - ib_umem_release(cq->umem); + ib_umem_release(cq->umem); pvrdma_page_dir_cleanup(dev, &cq->pdir); - kfree(cq); } /** * pvrdma_destroy_cq - destroy completion queue * @cq: the completion queue to destroy. * @udata: user data or null for kernel object - * - * @return: 0 for success. */ -int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct pvrdma_cq *vcq = to_vcq(cq); union pvrdma_cmd_req req; @@ -275,8 +261,6 @@ int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) pvrdma_free_cq(dev, vcq); atomic_dec(&dev->num_cqs); - - return ret; } static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 40182297f87f..e580ae9cc55a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -144,6 +144,10 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, } static const struct ib_device_ops pvrdma_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_VMW_PVRDMA, + .uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION, + .add_gid = pvrdma_add_gid, .alloc_mr = pvrdma_alloc_mr, .alloc_pd = pvrdma_alloc_pd, @@ -178,6 +182,7 @@ static const struct ib_device_ops pvrdma_dev_ops = { .req_notify_cq = pvrdma_req_notify_cq, INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext), }; @@ -198,10 +203,8 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.node_guid = dev->dsr->caps.node_guid; dev->sys_image_guid = dev->dsr->caps.sys_image_guid; dev->flags = 0; - dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION; dev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -261,7 +264,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) if (!dev->srq_tbl) goto err_qp_free; } - dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA; ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1); if (ret) return ret; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index 65dc47ffb8f3..f3a3d22ee8d7 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -290,8 +290,7 @@ int pvrdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) "could not deregister mem region, error: %d\n", ret); pvrdma_page_dir_cleanup(dev, &mr->pdir); - if (mr->umem) - ib_umem_release(mr->umem); + ib_umem_release(mr->umem); kfree(mr->pages); kfree(mr); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 0eaaead5baec..bca6a58a442e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -391,12 +391,8 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, err_pdir: pvrdma_page_dir_cleanup(dev, &qp->pdir); err_umem: - if (!qp->is_kernel) { - if (qp->rumem) - ib_umem_release(qp->rumem); - if (qp->sumem) - ib_umem_release(qp->sumem); - } + ib_umem_release(qp->rumem); + ib_umem_release(qp->sumem); err_qp: kfree(qp); atomic_dec(&dev->num_qps); @@ -429,12 +425,8 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) complete(&qp->free); wait_for_completion(&qp->free); - if (!qp->is_kernel) { - if (qp->rumem) - ib_umem_release(qp->rumem); - if (qp->sumem) - ib_umem_release(qp->sumem); - } + ib_umem_release(qp->rumem); + ib_umem_release(qp->sumem); pvrdma_page_dir_cleanup(dev, &qp->pdir); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 9d7b021e1c59..e4a48f5c0c85 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -409,10 +409,9 @@ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); -int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile index ab48a9b60844..68e0230f8f31 100644 --- a/drivers/infiniband/sw/Makefile +++ b/drivers/infiniband/sw/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt/ obj-$(CONFIG_RDMA_RXE) += rxe/ +obj-$(CONFIG_RDMA_SIW) += siw/ diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 0e147b32cbe9..fe99da0ff060 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -119,8 +119,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, rdma_copy_ah_attr(&ah->attr, ah_attr); - atomic_set(&ah->refcount, 0); - if (dev->driver_f.notify_new_ah) dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah); @@ -141,8 +139,6 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) struct rvt_ah *ah = ibah_to_rvtah(ibah); unsigned long flags; - WARN_ON_ONCE(atomic_read(&ah->refcount)); - spin_lock_irqsave(&dev->n_ahs_lock, flags); dev->n_ahs_allocated--; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index a06e6da7a026..a85571a4cf57 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -60,22 +60,39 @@ static struct workqueue_struct *comp_vector_wq; * @solicited: true if @entry is solicited * * This may be called with qp->s_lock held. + * + * Return: return true on success, else return + * false if cq is full. */ -void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) +bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) { - struct rvt_cq_wc *wc; + struct ib_uverbs_wc *uqueue = NULL; + struct ib_wc *kqueue = NULL; + struct rvt_cq_wc *u_wc = NULL; + struct rvt_k_cq_wc *k_wc = NULL; unsigned long flags; u32 head; u32 next; + u32 tail; spin_lock_irqsave(&cq->lock, flags); + if (cq->ip) { + u_wc = cq->queue; + uqueue = &u_wc->uqueue[0]; + head = RDMA_READ_UAPI_ATOMIC(u_wc->head); + tail = RDMA_READ_UAPI_ATOMIC(u_wc->tail); + } else { + k_wc = cq->kqueue; + kqueue = &k_wc->kqueue[0]; + head = k_wc->head; + tail = k_wc->tail; + } + /* - * Note that the head pointer might be writable by user processes. - * Take care to verify it is a sane value. + * Note that the head pointer might be writable by + * user processes.Take care to verify it is a sane value. */ - wc = cq->queue; - head = wc->head; if (head >= (unsigned)cq->ibcq.cqe) { head = cq->ibcq.cqe; next = 0; @@ -83,7 +100,12 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) next = head + 1; } - if (unlikely(next == wc->tail)) { + if (unlikely(next == tail || cq->cq_full)) { + struct rvt_dev_info *rdi = cq->rdi; + + if (!cq->cq_full) + rvt_pr_err_ratelimited(rdi, "CQ is full!\n"); + cq->cq_full = true; spin_unlock_irqrestore(&cq->lock, flags); if (cq->ibcq.event_handler) { struct ib_event ev; @@ -93,30 +115,30 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) ev.event = IB_EVENT_CQ_ERR; cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); } - return; + return false; } trace_rvt_cq_enter(cq, entry, head); - if (cq->ip) { - wc->uqueue[head].wr_id = entry->wr_id; - wc->uqueue[head].status = entry->status; - wc->uqueue[head].opcode = entry->opcode; - wc->uqueue[head].vendor_err = entry->vendor_err; - wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].ex.imm_data = entry->ex.imm_data; - wc->uqueue[head].qp_num = entry->qp->qp_num; - wc->uqueue[head].src_qp = entry->src_qp; - wc->uqueue[head].wc_flags = entry->wc_flags; - wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = ib_lid_cpu16(entry->slid); - wc->uqueue[head].sl = entry->sl; - wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; - wc->uqueue[head].port_num = entry->port_num; + if (uqueue) { + uqueue[head].wr_id = entry->wr_id; + uqueue[head].status = entry->status; + uqueue[head].opcode = entry->opcode; + uqueue[head].vendor_err = entry->vendor_err; + uqueue[head].byte_len = entry->byte_len; + uqueue[head].ex.imm_data = entry->ex.imm_data; + uqueue[head].qp_num = entry->qp->qp_num; + uqueue[head].src_qp = entry->src_qp; + uqueue[head].wc_flags = entry->wc_flags; + uqueue[head].pkey_index = entry->pkey_index; + uqueue[head].slid = ib_lid_cpu16(entry->slid); + uqueue[head].sl = entry->sl; + uqueue[head].dlid_path_bits = entry->dlid_path_bits; + uqueue[head].port_num = entry->port_num; /* Make sure entry is written before the head index. */ - smp_wmb(); + RDMA_WRITE_UAPI_ATOMIC(u_wc->head, next); } else { - wc->kqueue[head] = *entry; + kqueue[head] = *entry; + k_wc->head = next; } - wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && @@ -132,6 +154,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) } spin_unlock_irqrestore(&cq->lock, flags); + return true; } EXPORT_SYMBOL(rvt_cq_enter); @@ -166,43 +189,38 @@ static void send_complete(struct work_struct *work) /** * rvt_create_cq - create a completion queue - * @ibdev: the device this completion queue is attached to + * @ibcq: Allocated CQ * @attr: creation attributes * @udata: user data for libibverbs.so * * Called by ib_create_cq() in the generic verbs code. * - * Return: pointer to the completion queue or negative errno values - * for failure. + * Return: 0 on success */ -struct ib_cq *rvt_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; struct rvt_dev_info *rdi = ib_to_rvt(ibdev); - struct rvt_cq *cq; - struct rvt_cq_wc *wc; - struct ib_cq *ret; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + struct rvt_cq_wc *u_wc = NULL; + struct rvt_k_cq_wc *k_wc = NULL; u32 sz; unsigned int entries = attr->cqe; int comp_vector = attr->comp_vector; + int err; if (attr->flags) - return ERR_PTR(-EINVAL); + return -EINVAL; if (entries < 1 || entries > rdi->dparms.props.max_cqe) - return ERR_PTR(-EINVAL); + return -EINVAL; if (comp_vector < 0) comp_vector = 0; comp_vector = comp_vector % rdi->ibdev.num_comp_vectors; - /* Allocate the completion queue structure. */ - cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node); - if (!cq) - return ERR_PTR(-ENOMEM); - /* * Allocate the completion queue entries and head/tail pointers. * This is allocated separately so that it can be resized and @@ -210,17 +228,18 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, * We need to use vmalloc() in order to support mmap and large * numbers of entries. */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (entries + 1); - else - sz += sizeof(struct ib_wc) * (entries + 1); - wc = udata ? - vmalloc_user(sz) : - vzalloc_node(sz, rdi->dparms.node); - if (!wc) { - ret = ERR_PTR(-ENOMEM); - goto bail_cq; + if (udata && udata->outlen >= sizeof(__u64)) { + sz = sizeof(struct ib_uverbs_wc) * (entries + 1); + sz += sizeof(*u_wc); + u_wc = vmalloc_user(sz); + if (!u_wc) + return -ENOMEM; + } else { + sz = sizeof(struct ib_wc) * (entries + 1); + sz += sizeof(*k_wc); + k_wc = vzalloc_node(sz, rdi->dparms.node); + if (!k_wc) + return -ENOMEM; } /* @@ -228,26 +247,22 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - int err; - - cq->ip = rvt_create_mmap_info(rdi, sz, udata, wc); + cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc); if (!cq->ip) { - ret = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto bail_wc; } err = ib_copy_to_udata(udata, &cq->ip->offset, sizeof(cq->ip->offset)); - if (err) { - ret = ERR_PTR(err); + if (err) goto bail_ip; - } } spin_lock_irq(&rdi->n_cqs_lock); if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { spin_unlock_irq(&rdi->n_cqs_lock); - ret = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto bail_ip; } @@ -277,21 +292,20 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, cq->notify = RVT_CQ_NONE; spin_lock_init(&cq->lock); INIT_WORK(&cq->comptask, send_complete); - cq->queue = wc; - - ret = &cq->ibcq; + if (u_wc) + cq->queue = u_wc; + else + cq->kqueue = k_wc; trace_rvt_create_cq(cq, attr); - goto done; + return 0; bail_ip: kfree(cq->ip); bail_wc: - vfree(wc); -bail_cq: - kfree(cq); -done: - return ret; + vfree(u_wc); + vfree(k_wc); + return err; } /** @@ -300,10 +314,8 @@ done: * @udata: user data or NULL for kernel object * * Called by ib_destroy_cq() in the generic verbs code. - * - * Return: always 0 */ -int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); struct rvt_dev_info *rdi = cq->rdi; @@ -316,9 +328,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) kref_put(&cq->ip->ref, rvt_release_mmap_info); else vfree(cq->queue); - kfree(cq); - - return 0; } /** @@ -345,9 +354,16 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && - cq->queue->head != cq->queue->tail) - ret = 1; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + if (cq->queue) { + if (RDMA_READ_UAPI_ATOMIC(cq->queue->head) != + RDMA_READ_UAPI_ATOMIC(cq->queue->tail)) + ret = 1; + } else { + if (cq->kqueue->head != cq->kqueue->tail) + ret = 1; + } + } spin_unlock_irqrestore(&cq->lock, flags); @@ -363,12 +379,14 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); - struct rvt_cq_wc *old_wc; - struct rvt_cq_wc *wc; u32 head, tail, n; int ret; u32 sz; struct rvt_dev_info *rdi = cq->rdi; + struct rvt_cq_wc *u_wc = NULL; + struct rvt_cq_wc *old_u_wc = NULL; + struct rvt_k_cq_wc *k_wc = NULL; + struct rvt_k_cq_wc *old_k_wc = NULL; if (cqe < 1 || cqe > rdi->dparms.props.max_cqe) return -EINVAL; @@ -376,17 +394,19 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) /* * Need to use vmalloc() if we want to support large #s of entries. */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); - else - sz += sizeof(struct ib_wc) * (cqe + 1); - wc = udata ? - vmalloc_user(sz) : - vzalloc_node(sz, rdi->dparms.node); - if (!wc) - return -ENOMEM; - + if (udata && udata->outlen >= sizeof(__u64)) { + sz = sizeof(struct ib_uverbs_wc) * (cqe + 1); + sz += sizeof(*u_wc); + u_wc = vmalloc_user(sz); + if (!u_wc) + return -ENOMEM; + } else { + sz = sizeof(struct ib_wc) * (cqe + 1); + sz += sizeof(*k_wc); + k_wc = vzalloc_node(sz, rdi->dparms.node); + if (!k_wc) + return -ENOMEM; + } /* Check that we can write the offset to mmap. */ if (udata && udata->outlen >= sizeof(__u64)) { __u64 offset = 0; @@ -401,11 +421,18 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) * Make sure head and tail are sane since they * might be user writable. */ - old_wc = cq->queue; - head = old_wc->head; + if (u_wc) { + old_u_wc = cq->queue; + head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head); + tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail); + } else { + old_k_wc = cq->kqueue; + head = old_k_wc->head; + tail = old_k_wc->tail; + } + if (head > (u32)cq->ibcq.cqe) head = (u32)cq->ibcq.cqe; - tail = old_wc->tail; if (tail > (u32)cq->ibcq.cqe) tail = (u32)cq->ibcq.cqe; if (head < tail) @@ -417,27 +444,36 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) goto bail_unlock; } for (n = 0; tail != head; n++) { - if (cq->ip) - wc->uqueue[n] = old_wc->uqueue[tail]; + if (u_wc) + u_wc->uqueue[n] = old_u_wc->uqueue[tail]; else - wc->kqueue[n] = old_wc->kqueue[tail]; + k_wc->kqueue[n] = old_k_wc->kqueue[tail]; if (tail == (u32)cq->ibcq.cqe) tail = 0; else tail++; } cq->ibcq.cqe = cqe; - wc->head = n; - wc->tail = 0; - cq->queue = wc; + if (u_wc) { + RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n); + RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0); + cq->queue = u_wc; + } else { + k_wc->head = n; + k_wc->tail = 0; + cq->kqueue = k_wc; + } spin_unlock_irq(&cq->lock); - vfree(old_wc); + if (u_wc) + vfree(old_u_wc); + else + vfree(old_k_wc); if (cq->ip) { struct rvt_mmap_info *ip = cq->ip; - rvt_update_mmap_info(rdi, ip, sz, wc); + rvt_update_mmap_info(rdi, ip, sz, u_wc); /* * Return the offset to mmap. @@ -461,7 +497,9 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) bail_unlock: spin_unlock_irq(&cq->lock); bail_free: - vfree(wc); + vfree(u_wc); + vfree(k_wc); + return ret; } @@ -479,7 +517,7 @@ bail_free: int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); - struct rvt_cq_wc *wc; + struct rvt_k_cq_wc *wc; unsigned long flags; int npolled; u32 tail; @@ -490,7 +528,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) spin_lock_irqsave(&cq->lock, flags); - wc = cq->queue; + wc = cq->kqueue; tail = wc->tail; if (tail > (u32)cq->ibcq.cqe) tail = (u32)cq->ibcq.cqe; diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 3ad6faf18ecb..5e26a2eb19a4 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -51,10 +51,9 @@ #include <rdma/rdma_vt.h> #include <rdma/rdmavt_cq.h> -struct ib_cq *rvt_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); -int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index f48240f66b8f..a6a39f01dca3 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -562,8 +562,7 @@ int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) if (ret) goto out; rvt_deinit_mregion(&mr->mr); - if (mr->umem) - ib_umem_release(mr->umem); + ib_umem_release(mr->umem); kfree(mr); out: return ret; @@ -613,8 +612,8 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].length = ps; - trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); mr->mr.length += ps; + trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); return 0; } @@ -643,6 +642,7 @@ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, mr->mr.iova = ibmr->iova; mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; mr->mr.length = (size_t)ibmr->length; + trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset); return ret; } diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index c5a50614a6c6..0b0a241c57ff 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 - 2018 Intel Corporation. + * Copyright(c) 2016 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -58,6 +58,8 @@ #include "vt.h" #include "trace.h" +#define RVT_RWQ_COUNT_THRESHOLD 16 + static void rvt_rc_timeout(struct timer_list *t); /* @@ -803,6 +805,47 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) } /** + * rvt_alloc_rq - allocate memory for user or kernel buffer + * @rq: receive queue data structure + * @size: number of request queue entries + * @node: The NUMA node + * @udata: True if user data is available or not false + * + * Return: If memory allocation failed, return -ENONEM + * This function is used by both shared receive + * queues and non-shared receive queues to allocate + * memory. + */ +int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node, + struct ib_udata *udata) +{ + if (udata) { + rq->wq = vmalloc_user(sizeof(struct rvt_rwq) + size); + if (!rq->wq) + goto bail; + /* need kwq with no buffers */ + rq->kwq = kzalloc_node(sizeof(*rq->kwq), GFP_KERNEL, node); + if (!rq->kwq) + goto bail; + rq->kwq->curr_wq = rq->wq->wq; + } else { + /* need kwq with buffers */ + rq->kwq = + vzalloc_node(sizeof(struct rvt_krwq) + size, node); + if (!rq->kwq) + goto bail; + rq->kwq->curr_wq = rq->kwq->wq; + } + + spin_lock_init(&rq->kwq->p_lock); + spin_lock_init(&rq->kwq->c_lock); + return 0; +bail: + rvt_free_rq(rq); + return -ENOMEM; +} + +/** * rvt_init_qp - initialize the QP state to the reset state * @qp: the QP to init or reinit * @type: the QP type @@ -852,10 +895,8 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_tail_ack_queue = 0; qp->s_acked_ack_queue = 0; qp->s_num_rd_atomic = 0; - if (qp->r_rq.wq) { - qp->r_rq.wq->head = 0; - qp->r_rq.wq->tail = 0; - } + if (qp->r_rq.kwq) + qp->r_rq.kwq->count = qp->r_rq.size; qp->r_sge.num_sge = 0; atomic_set(&qp->s_reserved_used, 0); } @@ -928,6 +969,61 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) } /** + * get_allowed_ops - Given a QP type return the appropriate allowed OP + * @type: valid, supported, QP type + */ +static u8 get_allowed_ops(enum ib_qp_type type) +{ + return type == IB_QPT_RC ? IB_OPCODE_RC : type == IB_QPT_UC ? + IB_OPCODE_UC : IB_OPCODE_UD; +} + +/** + * free_ud_wq_attr - Clean up AH attribute cache for UD QPs + * @qp: Valid QP with allowed_ops set + * + * The rvt_swqe data structure being used is a union, so this is + * only valid for UD QPs. + */ +static void free_ud_wq_attr(struct rvt_qp *qp) +{ + struct rvt_swqe *wqe; + int i; + + for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) { + wqe = rvt_get_swqe_ptr(qp, i); + kfree(wqe->ud_wr.attr); + wqe->ud_wr.attr = NULL; + } +} + +/** + * alloc_ud_wq_attr - AH attribute cache for UD QPs + * @qp: Valid QP with allowed_ops set + * @node: Numa node for allocation + * + * The rvt_swqe data structure being used is a union, so this is + * only valid for UD QPs. + */ +static int alloc_ud_wq_attr(struct rvt_qp *qp, int node) +{ + struct rvt_swqe *wqe; + int i; + + for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) { + wqe = rvt_get_swqe_ptr(qp, i); + wqe->ud_wr.attr = kzalloc_node(sizeof(*wqe->ud_wr.attr), + GFP_KERNEL, node); + if (!wqe->ud_wr.attr) { + free_ud_wq_attr(qp); + return -ENOMEM; + } + } + + return 0; +} + +/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair @@ -989,9 +1085,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: - sz = sizeof(struct rvt_sge) * - init_attr->cap.max_send_sge + - sizeof(struct rvt_swqe); + sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge); swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -1011,6 +1105,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, rdi->dparms.node); if (!qp) goto bail_swq; + qp->allowed_ops = get_allowed_ops(init_attr->qp_type); RCU_INIT_POINTER(qp->next, NULL); if (init_attr->qp_type == IB_QPT_RC) { @@ -1048,17 +1143,12 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); - if (udata) - qp->r_rq.wq = vmalloc_user( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); - else - qp->r_rq.wq = vzalloc_node( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz, - rdi->dparms.node); - if (!qp->r_rq.wq) + err = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz, + rdi->dparms.node, udata); + if (err) { + ret = ERR_PTR(err); goto bail_driver_priv; + } } /* @@ -1068,7 +1158,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_lock); spin_lock_init(&qp->s_hlock); spin_lock_init(&qp->s_lock); - spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); atomic_set(&qp->local_ops_pending, 0); init_waitqueue_head(&qp->wait); @@ -1080,6 +1169,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; + err = alloc_ud_wq_attr(qp, rdi->dparms.node); + if (err) { + ret = (ERR_PTR(err)); + goto bail_driver_priv; + } err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, @@ -1172,28 +1266,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, ret = &qp->ibqp; - /* - * We have our QP and its good, now keep track of what types of opcodes - * can be processed on this QP. We do this by keeping track of what the - * 3 high order bits of the opcode are. - */ - switch (init_attr->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - case IB_QPT_UD: - qp->allowed_ops = IB_OPCODE_UD; - break; - case IB_QPT_RC: - qp->allowed_ops = IB_OPCODE_RC; - break; - case IB_QPT_UC: - qp->allowed_ops = IB_OPCODE_UC; - break; - default: - ret = ERR_PTR(-EINVAL); - goto bail_ip; - } - return ret; bail_ip: @@ -1204,8 +1276,8 @@ bail_qpn: rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - if (!qp->ip) - vfree(qp->r_rq.wq); + rvt_free_rq(&qp->r_rq); + free_ud_wq_attr(qp); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); @@ -1271,19 +1343,26 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) } wc.status = IB_WC_WR_FLUSH_ERR; - if (qp->r_rq.wq) { - struct rvt_rwq *wq; + if (qp->r_rq.kwq) { u32 head; u32 tail; - - spin_lock(&qp->r_rq.lock); - + struct rvt_rwq *wq = NULL; + struct rvt_krwq *kwq = NULL; + + spin_lock(&qp->r_rq.kwq->c_lock); + /* qp->ip used to validate if there is a user buffer mmaped */ + if (qp->ip) { + wq = qp->r_rq.wq; + head = RDMA_READ_UAPI_ATOMIC(wq->head); + tail = RDMA_READ_UAPI_ATOMIC(wq->tail); + } else { + kwq = qp->r_rq.kwq; + head = kwq->head; + tail = kwq->tail; + } /* sanity check pointers before trusting them */ - wq = qp->r_rq.wq; - head = wq->head; if (head >= qp->r_rq.size) head = 0; - tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; while (tail != head) { @@ -1292,9 +1371,11 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) tail = 0; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } - wq->tail = tail; - - spin_unlock(&qp->r_rq.lock); + if (qp->ip) + RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail); + else + kwq->tail = tail; + spin_unlock(&qp->r_rq.kwq->c_lock); } else if (qp->ibqp.event_handler) { ret = 1; } @@ -1636,12 +1717,12 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (qp->ip) kref_put(&qp->ip->ref, rvt_release_mmap_info); - else - vfree(qp->r_rq.wq); + kvfree(qp->r_rq.kwq); rdi->driver_f.qp_priv_free(rdi, qp); kfree(qp->s_ack_queue); rdma_destroy_ah_attr(&qp->remote_ah_attr); rdma_destroy_ah_attr(&qp->alt_ah_attr); + free_ud_wq_attr(qp); vfree(qp->s_wq); kfree(qp); return 0; @@ -1723,7 +1804,7 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct rvt_rwq *wq = qp->r_rq.wq; + struct rvt_krwq *wq = qp->r_rq.kwq; unsigned long flags; int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) && !qp->ibqp.srq; @@ -1744,12 +1825,12 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, return -EINVAL; } - spin_lock_irqsave(&qp->r_rq.lock, flags); + spin_lock_irqsave(&qp->r_rq.kwq->p_lock, flags); next = wq->head + 1; if (next >= qp->r_rq.size) next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + if (next == READ_ONCE(wq->tail)) { + spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags); *bad_wr = wr; return -ENOMEM; } @@ -1766,16 +1847,18 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; + for (i = 0; i < wr->num_sge; i++) { + wqe->sg_list[i].addr = wr->sg_list[i].addr; + wqe->sg_list[i].length = wr->sg_list[i].length; + wqe->sg_list[i].lkey = wr->sg_list[i].lkey; + } /* * Make sure queue entry is written * before the head index. */ - smp_wmb(); - wq->head = next; + smp_store_release(&wq->head, next); } - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags); } return 0; } @@ -1856,10 +1939,9 @@ static inline int rvt_qp_is_avail( /* see rvt_qp_wqe_unreserve() */ smp_mb__before_atomic(); - reserved_used = atomic_read(&qp->s_reserved_used); if (unlikely(reserved_op)) { /* see rvt_qp_wqe_unreserve() */ - smp_mb__before_atomic(); + reserved_used = atomic_read(&qp->s_reserved_used); if (reserved_used >= rdi->dparms.reserved_operations) return -ENOMEM; return 0; @@ -1867,14 +1949,13 @@ static inline int rvt_qp_is_avail( /* non-reserved operations */ if (likely(qp->s_avail)) return 0; - slast = READ_ONCE(qp->s_last); + /* See rvt_qp_complete_swqe() */ + slast = smp_load_acquire(&qp->s_last); if (qp->s_head >= slast) avail = qp->s_size - (qp->s_head - slast); else avail = slast - qp->s_head; - /* see rvt_qp_wqe_unreserve() */ - smp_mb__before_atomic(); reserved_used = atomic_read(&qp->s_reserved_used); avail = avail - 1 - (rdi->dparms.reserved_operations - reserved_used); @@ -2011,10 +2092,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp, */ log_pmtu = qp->log_pmtu; if (qp->allowed_ops == IB_OPCODE_UD) { - struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); + struct rvt_ah *ah = rvt_get_swqe_ah(wqe); log_pmtu = ah->log_pmtu; - atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + rdma_copy_ah_attr(wqe->ud_wr.attr, &ah->attr); } if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) { @@ -2059,7 +2140,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, bail_inval_free_ref: if (qp->allowed_ops == IB_OPCODE_UD) - atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + rdma_destroy_ah_attr(wqe->ud_wr.attr); bail_inval_free: /* release mr holds */ while (j) { @@ -2145,7 +2226,7 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct rvt_rwq *wq; + struct rvt_krwq *wq; unsigned long flags; for (; wr; wr = wr->next) { @@ -2158,13 +2239,13 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, return -EINVAL; } - spin_lock_irqsave(&srq->rq.lock, flags); - wq = srq->rq.wq; + spin_lock_irqsave(&srq->rq.kwq->p_lock, flags); + wq = srq->rq.kwq; next = wq->head + 1; if (next >= srq->rq.size) next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&srq->rq.lock, flags); + if (next == READ_ONCE(wq->tail)) { + spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags); *bad_wr = wr; return -ENOMEM; } @@ -2172,17 +2253,35 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; + for (i = 0; i < wr->num_sge; i++) { + wqe->sg_list[i].addr = wr->sg_list[i].addr; + wqe->sg_list[i].length = wr->sg_list[i].length; + wqe->sg_list[i].lkey = wr->sg_list[i].lkey; + } /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&srq->rq.lock, flags); + smp_store_release(&wq->head, next); + spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags); } return 0; } /* + * rvt used the internal kernel struct as part of its ABI, for now make sure + * the kernel struct does not change layout. FIXME: rvt should never cast the + * user struct to a kernel struct. + */ +static struct ib_sge *rvt_cast_sge(struct rvt_wqe_sge *sge) +{ + BUILD_BUG_ON(offsetof(struct ib_sge, addr) != + offsetof(struct rvt_wqe_sge, addr)); + BUILD_BUG_ON(offsetof(struct ib_sge, length) != + offsetof(struct rvt_wqe_sge, length)); + BUILD_BUG_ON(offsetof(struct ib_sge, lkey) != + offsetof(struct rvt_wqe_sge, lkey)); + return (struct ib_sge *)sge; +} + +/* * Validate a RWQE and fill in the SGE state. * Return 1 if OK. */ @@ -2205,7 +2304,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], + NULL, rvt_cast_sge(&wqe->sg_list[i]), IB_ACCESS_LOCAL_WRITE); if (unlikely(ret <= 0)) goto bad_lkey; @@ -2234,6 +2333,50 @@ bad_lkey: } /** + * get_count - count numbers of request work queue entries + * in circular buffer + * @rq: data structure for request queue entry + * @tail: tail indices of the circular buffer + * @head: head indices of the circular buffer + * + * Return - total number of entries in the circular buffer + */ +static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head) +{ + u32 count; + + count = head; + + if (count >= rq->size) + count = 0; + if (count < tail) + count += rq->size - tail; + else + count -= tail; + + return count; +} + +/** + * get_rvt_head - get head indices of the circular buffer + * @rq: data structure for request queue entry + * @ip: the QP + * + * Return - head index value + */ +static inline u32 get_rvt_head(struct rvt_rq *rq, void *ip) +{ + u32 head; + + if (ip) + head = RDMA_READ_UAPI_ATOMIC(rq->wq->head); + else + head = rq->kwq->head; + + return head; +} + +/** * rvt_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP * @wr_id_only: update qp->r_wr_id only, not qp->r_sge @@ -2247,39 +2390,54 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) { unsigned long flags; struct rvt_rq *rq; + struct rvt_krwq *kwq = NULL; struct rvt_rwq *wq; struct rvt_srq *srq; struct rvt_rwqe *wqe; void (*handler)(struct ib_event *, void *); u32 tail; + u32 head; int ret; + void *ip = NULL; if (qp->ibqp.srq) { srq = ibsrq_to_rvtsrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; rq = &srq->rq; + ip = srq->ip; } else { srq = NULL; handler = NULL; rq = &qp->r_rq; + ip = qp->ip; } - spin_lock_irqsave(&rq->lock, flags); + spin_lock_irqsave(&rq->kwq->c_lock, flags); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ret = 0; goto unlock; } + kwq = rq->kwq; + if (ip) { + wq = rq->wq; + tail = RDMA_READ_UAPI_ATOMIC(wq->tail); + } else { + tail = kwq->tail; + } - wq = rq->wq; - tail = wq->tail; /* Validate tail before using it since it is user writable. */ if (tail >= rq->size) tail = 0; - if (unlikely(tail == wq->head)) { + + if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) { + head = get_rvt_head(rq, ip); + kwq->count = get_count(rq, tail, head); + } + if (unlikely(kwq->count == 0)) { ret = 0; goto unlock; } - /* Make sure entry is read after head index is read. */ + /* Make sure entry is read after the count is read. */ smp_rmb(); wqe = rvt_get_rwqe_ptr(rq, tail); /* @@ -2289,43 +2447,41 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) */ if (++tail >= rq->size) tail = 0; - wq->tail = tail; + if (ip) + RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail); + else + kwq->tail = tail; if (!wr_id_only && !init_sge(qp, wqe)) { ret = -1; goto unlock; } qp->r_wr_id = wqe->wr_id; + kwq->count--; ret = 1; set_bit(RVT_R_WRID_VALID, &qp->r_aflags); if (handler) { - u32 n; - /* * Validate head pointer value and compute * the number of remaining WQEs. */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; + if (kwq->count < srq->limit) { + kwq->count = get_count(rq, tail, get_rvt_head(rq, ip)); + if (kwq->count < srq->limit) { + struct ib_event ev; + + srq->limit = 0; + spin_unlock_irqrestore(&rq->kwq->c_lock, flags); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + handler(&ev, srq->ibsrq.srq_context); + goto bail; + } } } unlock: - spin_unlock_irqrestore(&rq->lock, flags); + spin_unlock_irqrestore(&rq->kwq->c_lock, flags); bail: return ret; } @@ -2667,27 +2823,16 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + struct rvt_dev_info *rdi; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; + rdi = ib_to_rvt(qp->ibqp.device); - last = qp->s_last; - old_last = last; - trace_rvt_qp_send_completion(qp, wqe, last); - if (++last >= qp->s_size) - last = 0; - trace_rvt_qp_send_completion(qp, wqe, last); - qp->s_last = last; - /* See post_send() */ - barrier(); - rvt_put_qp_swqe(qp, wqe); - - rvt_qp_swqe_complete(qp, - wqe, - rdi->wc_opcode[wqe->wr.opcode], - status); - + old_last = qp->s_last; + trace_rvt_qp_send_completion(qp, wqe, old_last); + last = rvt_qp_complete_swqe(qp, wqe, rdi->wc_opcode[wqe->wr.opcode], + status); if (qp->s_acked == old_last) qp->s_acked = last; if (qp->s_cur == old_last) @@ -3021,8 +3166,7 @@ do_write: wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - wqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_recv_cq(qp, &wc, wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: spin_unlock_irqrestore(&qp->r_lock, flags); diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 6db1619389b0..2cdba1283bf6 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -68,4 +68,6 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int rvt_wss_init(struct rvt_dev_info *rdi); void rvt_wss_exit(struct rvt_dev_info *rdi); +int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node, + struct ib_udata *udata); #endif /* DEF_RVTQP_H */ diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c index 09f0cf538be6..890d7b760d2e 100644 --- a/drivers/infiniband/sw/rdmavt/rc.c +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -104,26 +104,33 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp) } else { u32 min, max, x; u32 credits; - struct rvt_rwq *wq = qp->r_rq.wq; u32 head; u32 tail; - /* sanity check pointers before trusting them */ - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - /* - * Compute the number of credits available (RWQEs). - * There is a small chance that the pair of reads are - * not atomic, which is OK, since the fuzziness is - * resolved as further ACKs go out. - */ - credits = head - tail; - if ((int)credits < 0) - credits += qp->r_rq.size; + credits = READ_ONCE(qp->r_rq.kwq->count); + if (credits == 0) { + /* sanity check pointers before trusting them */ + if (qp->ip) { + head = RDMA_READ_UAPI_ATOMIC(qp->r_rq.wq->head); + tail = RDMA_READ_UAPI_ATOMIC(qp->r_rq.wq->tail); + } else { + head = READ_ONCE(qp->r_rq.kwq->head); + tail = READ_ONCE(qp->r_rq.kwq->tail); + } + if (head >= qp->r_rq.size) + head = 0; + if (tail >= qp->r_rq.size) + tail = 0; + /* + * Compute the number of credits available (RWQEs). + * There is a small chance that the pair of reads are + * not atomic, which is OK, since the fuzziness is + * resolved as further ACKs go out. + */ + credits = head - tail; + if ((int)credits < 0) + credits += qp->r_rq.size; + } /* * Binary search the credit table to find the code to * use. diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 8d6b3e764255..24fef021d51d 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -52,7 +52,7 @@ #include "srq.h" #include "vt.h" - +#include "qp.h" /** * rvt_driver_srq_init - init srq resources on a per driver basis * @rdi: rvt dev structure @@ -97,11 +97,8 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, srq->rq.max_sge = srq_init_attr->attr.max_sge; sz = sizeof(struct ib_sge) * srq->rq.max_sge + sizeof(struct rvt_rwqe); - srq->rq.wq = udata ? - vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz) : - vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz, - dev->dparms.node); - if (!srq->rq.wq) { + if (rvt_alloc_rq(&srq->rq, srq->rq.size * sz, + dev->dparms.node, udata)) { ret = -ENOMEM; goto bail_srq; } @@ -152,7 +149,7 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, bail_ip: kfree(srq->ip); bail_wq: - vfree(srq->rq.wq); + rvt_free_rq(&srq->rq); bail_srq: return ret; } @@ -172,11 +169,12 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); - struct rvt_rwq *wq; + struct rvt_rq tmp_rq = {}; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { - struct rvt_rwq *owq; + struct rvt_krwq *okwq = NULL; + struct rvt_rwq *owq = NULL; struct rvt_rwqe *p; u32 sz, size, n, head, tail; @@ -185,17 +183,12 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, ((attr_mask & IB_SRQ_LIMIT) ? attr->srq_limit : srq->limit) > attr->max_wr) return -EINVAL; - sz = sizeof(struct rvt_rwqe) + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; - wq = udata ? - vmalloc_user(sizeof(struct rvt_rwq) + size * sz) : - vzalloc_node(sizeof(struct rvt_rwq) + size * sz, - dev->dparms.node); - if (!wq) + if (rvt_alloc_rq(&tmp_rq, size * sz, dev->dparms.node, + udata)) return -ENOMEM; - /* Check that we can write the offset to mmap. */ if (udata && udata->inlen >= sizeof(__u64)) { __u64 offset_addr; @@ -213,14 +206,20 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail_free; } - spin_lock_irq(&srq->rq.lock); + spin_lock_irq(&srq->rq.kwq->c_lock); /* * validate head and tail pointer values and compute * the number of remaining WQEs. */ - owq = srq->rq.wq; - head = owq->head; - tail = owq->tail; + if (udata) { + owq = srq->rq.wq; + head = RDMA_READ_UAPI_ATOMIC(owq->head); + tail = RDMA_READ_UAPI_ATOMIC(owq->tail); + } else { + okwq = srq->rq.kwq; + head = okwq->head; + tail = okwq->tail; + } if (head >= srq->rq.size || tail >= srq->rq.size) { ret = -EINVAL; goto bail_unlock; @@ -235,7 +234,7 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail_unlock; } n = 0; - p = wq->wq; + p = tmp_rq.kwq->curr_wq; while (tail != head) { struct rvt_rwqe *wqe; int i; @@ -250,22 +249,29 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (++tail >= srq->rq.size) tail = 0; } - srq->rq.wq = wq; + srq->rq.kwq = tmp_rq.kwq; + if (udata) { + srq->rq.wq = tmp_rq.wq; + RDMA_WRITE_UAPI_ATOMIC(tmp_rq.wq->head, n); + RDMA_WRITE_UAPI_ATOMIC(tmp_rq.wq->tail, 0); + } else { + tmp_rq.kwq->head = n; + tmp_rq.kwq->tail = 0; + } srq->rq.size = size; - wq->head = n; - wq->tail = 0; if (attr_mask & IB_SRQ_LIMIT) srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); + spin_unlock_irq(&srq->rq.kwq->c_lock); vfree(owq); + kvfree(okwq); if (srq->ip) { struct rvt_mmap_info *ip = srq->ip; struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device); u32 s = sizeof(struct rvt_rwq) + size * sz; - rvt_update_mmap_info(dev, ip, s, wq); + rvt_update_mmap_info(dev, ip, s, tmp_rq.wq); /* * Return the offset to mmap. @@ -289,19 +295,19 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, spin_unlock_irq(&dev->pending_lock); } } else if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irq(&srq->rq.lock); + spin_lock_irq(&srq->rq.kwq->c_lock); if (attr->srq_limit >= srq->rq.size) ret = -EINVAL; else srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); + spin_unlock_irq(&srq->rq.kwq->c_lock); } return ret; bail_unlock: - spin_unlock_irq(&srq->rq.lock); + spin_unlock_irq(&srq->rq.kwq->c_lock); bail_free: - vfree(wq); + rvt_free_rq(&tmp_rq); return ret; } @@ -336,6 +342,5 @@ void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) spin_unlock(&dev->n_srqs_lock); if (srq->ip) kref_put(&srq->ip->ref, rvt_release_mmap_info); - else - vfree(srq->rq.wq); + kvfree(srq->rq.kwq); } diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h index 976e482930a3..95b8a0e3b8bd 100644 --- a/drivers/infiniband/sw/rdmavt/trace_mr.h +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -54,6 +54,8 @@ #include <rdma/rdma_vt.h> #include <rdma/rdmavt_mr.h> +#include "mr.h" + #undef TRACE_SYSTEM #define TRACE_SYSTEM rvt_mr DECLARE_EVENT_CLASS( @@ -64,8 +66,12 @@ DECLARE_EVENT_CLASS( RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device)) __field(void *, vaddr) __field(struct page *, page) + __field(u64, iova) + __field(u64, user_base) __field(size_t, len) + __field(size_t, length) __field(u32, lkey) + __field(u32, offset) __field(u16, m) __field(u16, n) ), @@ -73,18 +79,28 @@ DECLARE_EVENT_CLASS( RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device)); __entry->vaddr = v; __entry->page = virt_to_page(v); + __entry->iova = mr->iova; + __entry->user_base = mr->user_base; + __entry->lkey = mr->lkey; __entry->m = m; __entry->n = n; __entry->len = len; + __entry->length = mr->length; + __entry->offset = mr->offset; ), TP_printk( - "[%s] vaddr %p page %p m %u n %u len %ld", + "[%s] lkey %x iova %llx user_base %llx mr_len %lu vaddr %llx page %p m %u n %u len %lu off %u", __get_str(dev), - __entry->vaddr, + __entry->lkey, + __entry->iova, + __entry->user_base, + __entry->length, + (unsigned long long)__entry->vaddr, __entry->page, __entry->m, __entry->n, - __entry->len + __entry->len, + __entry->offset ) ); @@ -165,6 +181,40 @@ DEFINE_EVENT( TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), TP_ARGS(sge, isge)); +TRACE_EVENT( + rvt_map_mr_sg, + TP_PROTO(struct ib_mr *ibmr, int sg_nents, unsigned int *sg_offset), + TP_ARGS(ibmr, sg_nents, sg_offset), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(to_imr(ibmr)->mr.pd->device)) + __field(u64, iova) + __field(u64, ibmr_iova) + __field(u64, user_base) + __field(u64, ibmr_length) + __field(int, sg_nents) + __field(uint, sg_offset) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(to_imr(ibmr)->mr.pd->device)) + __entry->ibmr_iova = ibmr->iova; + __entry->iova = to_imr(ibmr)->mr.iova; + __entry->user_base = to_imr(ibmr)->mr.user_base; + __entry->ibmr_length = to_imr(ibmr)->mr.length; + __entry->sg_nents = sg_nents; + __entry->sg_offset = sg_offset ? *sg_offset : 0; + ), + TP_printk( + "[%s] ibmr_iova %llx iova %llx user_base %llx length %llx sg_nents %d sg_offset %u", + __get_str(dev), + __entry->ibmr_iova, + __entry->iova, + __entry->user_base, + __entry->ibmr_length, + __entry->sg_nents, + __entry->sg_offset + ) +); + #endif /* __RVT_TRACE_MR_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 9546a837a8ac..18da1e1ea979 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -382,6 +382,8 @@ enum { }; static const struct ib_device_ops rvt_dev_ops = { + .uverbs_abi_ver = RVT_UVERBS_ABI_VERSION, + .alloc_fmr = rvt_alloc_fmr, .alloc_mr = rvt_alloc_mr, .alloc_pd = rvt_alloc_pd, @@ -427,6 +429,7 @@ static const struct ib_device_ops rvt_dev_ops = { .unmap_fmr = rvt_unmap_fmr, INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, rvt_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext), @@ -530,7 +533,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) * * Return: 0 on success otherwise an errno. */ -int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) +int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; @@ -600,7 +603,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) * exactly which functions rdmavt supports, nor do they know the ABI * version, so we do all of this sort of stuff here. */ - rdi->ibdev.uverbs_abi_ver = RVT_UVERBS_ABI_VERSION; rdi->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -636,7 +638,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) if (!rdi->ibdev.num_comp_vectors) rdi->ibdev.num_comp_vectors = 1; - rdi->ibdev.driver_id = driver_id; /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev)); if (ret) { diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 0675ea6c3872..d19ff817c2c7 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -78,6 +78,12 @@ fmt, \ ##__VA_ARGS__) +#define rvt_pr_err_ratelimited(rdi, fmt, ...) \ + __rvt_pr_err_ratelimited((rdi)->driver_f.get_pci_dev(rdi), \ + rvt_get_ibdev_name(rdi), \ + fmt, \ + ##__VA_ARGS__) + #define __rvt_pr_info(pdev, name, fmt, ...) \ dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) @@ -87,6 +93,9 @@ #define __rvt_pr_err(pdev, name, fmt, ...) \ dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) +#define __rvt_pr_err_ratelimited(pdev, name, fmt, ...) \ + dev_err_ratelimited(&(pdev)->dev, "%s: " fmt, name, ##__VA_ARGS__) + static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 00eb99d3df86..116cafc9afcf 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -558,7 +558,7 @@ int rxe_completer(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - struct rxe_send_wqe *wqe = wqe; + struct rxe_send_wqe *wqe = NULL; struct sk_buff *skb = NULL; struct rxe_pkt_info *pkt = NULL; enum comp_state state; diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index f501f72489d8..ea6a819b7167 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -96,8 +96,7 @@ void rxe_mem_cleanup(struct rxe_pool_entry *arg) struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); int i; - if (mem->umem) - ib_umem_release(mem->umem); + ib_umem_release(mem->umem); if (mem->map) { for (i = 0; i < mem->num_map; i++) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 56cf18af016a..fbcbac52290b 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -72,6 +72,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_CQ] = { .name = "rxe-cq", .size = sizeof(struct rxe_cq), + .flags = RXE_POOL_NO_ALLOC, .cleanup = rxe_cq_cleanup, }, [RXE_TYPE_MR] = { diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index aca9f60f9b21..1cbfbd98eb22 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -431,6 +431,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, qp->resp.va = reth_va(pkt); qp->resp.rkey = reth_rkey(pkt); qp->resp.resid = reth_len(pkt); + qp->resp.length = reth_len(pkt); } access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ : IB_ACCESS_REMOTE_WRITE; @@ -856,7 +857,9 @@ static enum resp_states do_complete(struct rxe_qp *qp, pkt->mask & RXE_WRITE_MASK) ? IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; wc->vendor_err = 0; - wc->byte_len = wqe->dma.length - wqe->dma.resid; + wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && + pkt->mask & RXE_WRITE_MASK) ? + qp->resp.length : wqe->dma.length - wqe->dma.resid; /* fields after byte_len are different between kernel and user * space diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 8c3e2a18cfe4..4ebdfcf4d33e 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -778,55 +778,43 @@ err1: return err; } -static struct ib_cq *rxe_create_cq(struct ib_device *dev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { int err; + struct ib_device *dev = ibcq->device; struct rxe_dev *rxe = to_rdev(dev); - struct rxe_cq *cq; + struct rxe_cq *cq = to_rcq(ibcq); struct rxe_create_cq_resp __user *uresp = NULL; if (udata) { if (udata->outlen < sizeof(*uresp)) - return ERR_PTR(-EINVAL); + return -EINVAL; uresp = udata->outbuf; } if (attr->flags) - return ERR_PTR(-EINVAL); + return -EINVAL; err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); if (err) - goto err1; - - cq = rxe_alloc(&rxe->cq_pool); - if (!cq) { - err = -ENOMEM; - goto err1; - } + return err; err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata, uresp); if (err) - goto err2; - - return &cq->ibcq; + return err; -err2: - rxe_drop_ref(cq); -err1: - return ERR_PTR(err); + return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem); } -static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rxe_cq *cq = to_rcq(ibcq); rxe_cq_disable(cq); rxe_drop_ref(cq); - return 0; } static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) @@ -1111,6 +1099,10 @@ static int rxe_enable_driver(struct ib_device *ib_dev) } static const struct ib_device_ops rxe_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_RXE, + .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION, + .alloc_hw_stats = rxe_ib_alloc_hw_stats, .alloc_mr = rxe_alloc_mr, .alloc_pd = rxe_alloc_pd, @@ -1157,6 +1149,7 @@ static const struct ib_device_ops rxe_dev_ops = { .resize_cq = rxe_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc), @@ -1170,7 +1163,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); - dev->owner = THIS_MODULE; dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); @@ -1182,7 +1174,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dma_coerce_mask_and_coherent(&dev->dev, dma_get_required_mask(&dev->dev)); - dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) @@ -1230,7 +1221,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) rxe->tfm = tfm; rdma_set_device_sysfs_group(dev, &rxe_attr_group); - dev->driver_id = RDMA_DRIVER_RXE; err = ib_register_device(dev, ibdev_name); if (err) pr_warn("%s failed with error %d\n", __func__, err); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index e8be7f44e3be..5c4b2239129c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -85,8 +85,8 @@ struct rxe_cqe { }; struct rxe_cq { - struct rxe_pool_entry pelem; struct ib_cq ibcq; + struct rxe_pool_entry pelem; struct rxe_queue *queue; spinlock_t cq_lock; u8 notify; @@ -213,6 +213,7 @@ struct rxe_resp_info { struct rxe_mem *mr; u32 resid; u32 rkey; + u32 length; u64 atomic_orig; /* SRQ only */ diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig new file mode 100644 index 000000000000..dace276aea14 --- /dev/null +++ b/drivers/infiniband/sw/siw/Kconfig @@ -0,0 +1,18 @@ +config RDMA_SIW + tristate "Software RDMA over TCP/IP (iWARP) driver" + depends on INET && INFINIBAND && LIBCRC32C && 64BIT + select DMA_VIRT_OPS + help + This driver implements the iWARP RDMA transport over + the Linux TCP/IP network stack. It enables a system with a + standard Ethernet adapter to interoperate with a iWARP + adapter or with another system running the SIW driver. + (See also RXE which is a similar software driver for RoCE.) + + The driver interfaces with the Linux RDMA stack and + implements both a kernel and user space RDMA verbs API. + The user space verbs API requires a support + library named libsiw which is loaded by the generic user + space verbs API, libibverbs. To implement RDMA over + TCP/IP, the driver further interfaces with the Linux + in-kernel TCP socket layer. diff --git a/drivers/infiniband/sw/siw/Makefile b/drivers/infiniband/sw/siw/Makefile new file mode 100644 index 000000000000..f5f7e3867889 --- /dev/null +++ b/drivers/infiniband/sw/siw/Makefile @@ -0,0 +1,11 @@ +obj-$(CONFIG_RDMA_SIW) += siw.o + +siw-y := \ + siw_cm.o \ + siw_cq.o \ + siw_main.o \ + siw_mem.o \ + siw_qp.o \ + siw_qp_tx.o \ + siw_qp_rx.o \ + siw_verbs.o diff --git a/drivers/infiniband/sw/siw/iwarp.h b/drivers/infiniband/sw/siw/iwarp.h new file mode 100644 index 000000000000..e8a04d9c89cb --- /dev/null +++ b/drivers/infiniband/sw/siw/iwarp.h @@ -0,0 +1,380 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#ifndef _IWARP_H +#define _IWARP_H + +#include <rdma/rdma_user_cm.h> /* RDMA_MAX_PRIVATE_DATA */ +#include <linux/types.h> +#include <asm/byteorder.h> + +#define RDMAP_VERSION 1 +#define DDP_VERSION 1 +#define MPA_REVISION_1 1 +#define MPA_REVISION_2 2 +#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA +#define MPA_KEY_REQ "MPA ID Req Frame" +#define MPA_KEY_REP "MPA ID Rep Frame" +#define MPA_IRD_ORD_MASK 0x3fff + +struct mpa_rr_params { + __be16 bits; + __be16 pd_len; +}; + +/* + * MPA request/response header bits & fields + */ +enum { + MPA_RR_FLAG_MARKERS = cpu_to_be16(0x8000), + MPA_RR_FLAG_CRC = cpu_to_be16(0x4000), + MPA_RR_FLAG_REJECT = cpu_to_be16(0x2000), + MPA_RR_FLAG_ENHANCED = cpu_to_be16(0x1000), + MPA_RR_FLAG_GSO_EXP = cpu_to_be16(0x0800), + MPA_RR_MASK_REVISION = cpu_to_be16(0x00ff) +}; + +/* + * MPA request/reply header + */ +struct mpa_rr { + __u8 key[16]; + struct mpa_rr_params params; +}; + +static inline void __mpa_rr_set_revision(__be16 *bits, u8 rev) +{ + *bits = (*bits & ~MPA_RR_MASK_REVISION) | + (cpu_to_be16(rev) & MPA_RR_MASK_REVISION); +} + +static inline u8 __mpa_rr_revision(__be16 mpa_rr_bits) +{ + __be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION; + + return be16_to_cpu(rev); +} + +enum mpa_v2_ctrl { + MPA_V2_PEER_TO_PEER = cpu_to_be16(0x8000), + MPA_V2_ZERO_LENGTH_RTR = cpu_to_be16(0x4000), + MPA_V2_RDMA_WRITE_RTR = cpu_to_be16(0x8000), + MPA_V2_RDMA_READ_RTR = cpu_to_be16(0x4000), + MPA_V2_RDMA_NO_RTR = cpu_to_be16(0x0000), + MPA_V2_MASK_IRD_ORD = cpu_to_be16(0x3fff) +}; + +struct mpa_v2_data { + __be16 ird; + __be16 ord; +}; + +struct mpa_marker { + __be16 rsvd; + __be16 fpdu_hmd; /* FPDU header-marker distance (= MPA's FPDUPTR) */ +}; + +/* + * maximum MPA trailer + */ +struct mpa_trailer { + __u8 pad[4]; + __be32 crc; +}; + +#define MPA_HDR_SIZE 2 +#define MPA_CRC_SIZE 4 + +/* + * Common portion of iWARP headers (MPA, DDP, RDMAP) + * for any FPDU + */ +struct iwarp_ctrl { + __be16 mpa_len; + __be16 ddp_rdmap_ctrl; +}; + +/* + * DDP/RDMAP Hdr bits & fields + */ +enum { + DDP_FLAG_TAGGED = cpu_to_be16(0x8000), + DDP_FLAG_LAST = cpu_to_be16(0x4000), + DDP_MASK_RESERVED = cpu_to_be16(0x3C00), + DDP_MASK_VERSION = cpu_to_be16(0x0300), + RDMAP_MASK_VERSION = cpu_to_be16(0x00C0), + RDMAP_MASK_RESERVED = cpu_to_be16(0x0030), + RDMAP_MASK_OPCODE = cpu_to_be16(0x000f) +}; + +static inline u8 __ddp_get_version(struct iwarp_ctrl *ctrl) +{ + return be16_to_cpu(ctrl->ddp_rdmap_ctrl & DDP_MASK_VERSION) >> 8; +} + +static inline void __ddp_set_version(struct iwarp_ctrl *ctrl, u8 version) +{ + ctrl->ddp_rdmap_ctrl = + (ctrl->ddp_rdmap_ctrl & ~DDP_MASK_VERSION) | + (cpu_to_be16((u16)version << 8) & DDP_MASK_VERSION); +} + +static inline u8 __rdmap_get_version(struct iwarp_ctrl *ctrl) +{ + __be16 ver = ctrl->ddp_rdmap_ctrl & RDMAP_MASK_VERSION; + + return be16_to_cpu(ver) >> 6; +} + +static inline void __rdmap_set_version(struct iwarp_ctrl *ctrl, u8 version) +{ + ctrl->ddp_rdmap_ctrl = (ctrl->ddp_rdmap_ctrl & ~RDMAP_MASK_VERSION) | + (cpu_to_be16(version << 6) & RDMAP_MASK_VERSION); +} + +static inline u8 __rdmap_get_opcode(struct iwarp_ctrl *ctrl) +{ + return be16_to_cpu(ctrl->ddp_rdmap_ctrl & RDMAP_MASK_OPCODE); +} + +static inline void __rdmap_set_opcode(struct iwarp_ctrl *ctrl, u8 opcode) +{ + ctrl->ddp_rdmap_ctrl = (ctrl->ddp_rdmap_ctrl & ~RDMAP_MASK_OPCODE) | + (cpu_to_be16(opcode) & RDMAP_MASK_OPCODE); +} + +struct iwarp_rdma_write { + struct iwarp_ctrl ctrl; + __be32 sink_stag; + __be64 sink_to; +}; + +struct iwarp_rdma_rreq { + struct iwarp_ctrl ctrl; + __be32 rsvd; + __be32 ddp_qn; + __be32 ddp_msn; + __be32 ddp_mo; + __be32 sink_stag; + __be64 sink_to; + __be32 read_size; + __be32 source_stag; + __be64 source_to; +}; + +struct iwarp_rdma_rresp { + struct iwarp_ctrl ctrl; + __be32 sink_stag; + __be64 sink_to; +}; + +struct iwarp_send { + struct iwarp_ctrl ctrl; + __be32 rsvd; + __be32 ddp_qn; + __be32 ddp_msn; + __be32 ddp_mo; +}; + +struct iwarp_send_inv { + struct iwarp_ctrl ctrl; + __be32 inval_stag; + __be32 ddp_qn; + __be32 ddp_msn; + __be32 ddp_mo; +}; + +struct iwarp_terminate { + struct iwarp_ctrl ctrl; + __be32 rsvd; + __be32 ddp_qn; + __be32 ddp_msn; + __be32 ddp_mo; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __be32 layer : 4; + __be32 etype : 4; + __be32 ecode : 8; + __be32 flag_m : 1; + __be32 flag_d : 1; + __be32 flag_r : 1; + __be32 reserved : 13; +#elif defined(__BIG_ENDIAN_BITFIELD) + __be32 reserved : 13; + __be32 flag_r : 1; + __be32 flag_d : 1; + __be32 flag_m : 1; + __be32 ecode : 8; + __be32 etype : 4; + __be32 layer : 4; +#else +#error "undefined byte order" +#endif +}; + +/* + * Terminate Hdr bits & fields + */ +enum { + TERM_MASK_LAYER = cpu_to_be32(0xf0000000), + TERM_MASK_ETYPE = cpu_to_be32(0x0f000000), + TERM_MASK_ECODE = cpu_to_be32(0x00ff0000), + TERM_FLAG_M = cpu_to_be32(0x00008000), + TERM_FLAG_D = cpu_to_be32(0x00004000), + TERM_FLAG_R = cpu_to_be32(0x00002000), + TERM_MASK_RESVD = cpu_to_be32(0x00001fff) +}; + +static inline u8 __rdmap_term_layer(struct iwarp_terminate *term) +{ + return term->layer; +} + +static inline void __rdmap_term_set_layer(struct iwarp_terminate *term, + u8 layer) +{ + term->layer = layer & 0xf; +} + +static inline u8 __rdmap_term_etype(struct iwarp_terminate *term) +{ + return term->etype; +} + +static inline void __rdmap_term_set_etype(struct iwarp_terminate *term, + u8 etype) +{ + term->etype = etype & 0xf; +} + +static inline u8 __rdmap_term_ecode(struct iwarp_terminate *term) +{ + return term->ecode; +} + +static inline void __rdmap_term_set_ecode(struct iwarp_terminate *term, + u8 ecode) +{ + term->ecode = ecode; +} + +/* + * Common portion of iWARP headers (MPA, DDP, RDMAP) + * for an FPDU carrying an untagged DDP segment + */ +struct iwarp_ctrl_untagged { + struct iwarp_ctrl ctrl; + __be32 rsvd; + __be32 ddp_qn; + __be32 ddp_msn; + __be32 ddp_mo; +}; + +/* + * Common portion of iWARP headers (MPA, DDP, RDMAP) + * for an FPDU carrying a tagged DDP segment + */ +struct iwarp_ctrl_tagged { + struct iwarp_ctrl ctrl; + __be32 ddp_stag; + __be64 ddp_to; +}; + +union iwarp_hdr { + struct iwarp_ctrl ctrl; + struct iwarp_ctrl_untagged c_untagged; + struct iwarp_ctrl_tagged c_tagged; + struct iwarp_rdma_write rwrite; + struct iwarp_rdma_rreq rreq; + struct iwarp_rdma_rresp rresp; + struct iwarp_terminate terminate; + struct iwarp_send send; + struct iwarp_send_inv send_inv; +}; + +enum term_elayer { + TERM_ERROR_LAYER_RDMAP = 0x00, + TERM_ERROR_LAYER_DDP = 0x01, + TERM_ERROR_LAYER_LLP = 0x02 /* eg., MPA */ +}; + +enum ddp_etype { + DDP_ETYPE_CATASTROPHIC = 0x0, + DDP_ETYPE_TAGGED_BUF = 0x1, + DDP_ETYPE_UNTAGGED_BUF = 0x2, + DDP_ETYPE_RSVD = 0x3 +}; + +enum ddp_ecode { + /* unspecified, set to zero */ + DDP_ECODE_CATASTROPHIC = 0x00, + /* Tagged Buffer Errors */ + DDP_ECODE_T_INVALID_STAG = 0x00, + DDP_ECODE_T_BASE_BOUNDS = 0x01, + DDP_ECODE_T_STAG_NOT_ASSOC = 0x02, + DDP_ECODE_T_TO_WRAP = 0x03, + DDP_ECODE_T_VERSION = 0x04, + /* Untagged Buffer Errors */ + DDP_ECODE_UT_INVALID_QN = 0x01, + DDP_ECODE_UT_INVALID_MSN_NOBUF = 0x02, + DDP_ECODE_UT_INVALID_MSN_RANGE = 0x03, + DDP_ECODE_UT_INVALID_MO = 0x04, + DDP_ECODE_UT_MSG_TOOLONG = 0x05, + DDP_ECODE_UT_VERSION = 0x06 +}; + +enum rdmap_untagged_qn { + RDMAP_UNTAGGED_QN_SEND = 0, + RDMAP_UNTAGGED_QN_RDMA_READ = 1, + RDMAP_UNTAGGED_QN_TERMINATE = 2, + RDMAP_UNTAGGED_QN_COUNT = 3 +}; + +enum rdmap_etype { + RDMAP_ETYPE_CATASTROPHIC = 0x0, + RDMAP_ETYPE_REMOTE_PROTECTION = 0x1, + RDMAP_ETYPE_REMOTE_OPERATION = 0x2 +}; + +enum rdmap_ecode { + RDMAP_ECODE_INVALID_STAG = 0x00, + RDMAP_ECODE_BASE_BOUNDS = 0x01, + RDMAP_ECODE_ACCESS_RIGHTS = 0x02, + RDMAP_ECODE_STAG_NOT_ASSOC = 0x03, + RDMAP_ECODE_TO_WRAP = 0x04, + RDMAP_ECODE_VERSION = 0x05, + RDMAP_ECODE_OPCODE = 0x06, + RDMAP_ECODE_CATASTROPHIC_STREAM = 0x07, + RDMAP_ECODE_CATASTROPHIC_GLOBAL = 0x08, + RDMAP_ECODE_CANNOT_INVALIDATE = 0x09, + RDMAP_ECODE_UNSPECIFIED = 0xff +}; + +enum llp_ecode { + LLP_ECODE_TCP_STREAM_LOST = 0x01, /* How to transfer this ?? */ + LLP_ECODE_RECEIVED_CRC = 0x02, + LLP_ECODE_FPDU_START = 0x03, + LLP_ECODE_INVALID_REQ_RESP = 0x04, + + /* Errors for Enhanced Connection Establishment only */ + LLP_ECODE_LOCAL_CATASTROPHIC = 0x05, + LLP_ECODE_INSUFFICIENT_IRD = 0x06, + LLP_ECODE_NO_MATCHING_RTR = 0x07 +}; + +enum llp_etype { LLP_ETYPE_MPA = 0x00 }; + +enum rdma_opcode { + RDMAP_RDMA_WRITE = 0x0, + RDMAP_RDMA_READ_REQ = 0x1, + RDMAP_RDMA_READ_RESP = 0x2, + RDMAP_SEND = 0x3, + RDMAP_SEND_INVAL = 0x4, + RDMAP_SEND_SE = 0x5, + RDMAP_SEND_SE_INVAL = 0x6, + RDMAP_TERMINATE = 0x7, + RDMAP_NOT_SUPPORTED = RDMAP_TERMINATE + 1 +}; + +#endif diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h new file mode 100644 index 000000000000..03fd7b2f595f --- /dev/null +++ b/drivers/infiniband/sw/siw/siw.h @@ -0,0 +1,745 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#ifndef _SIW_H +#define _SIW_H + +#include <rdma/ib_verbs.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <crypto/hash.h> +#include <linux/crc32.h> +#include <linux/crc32c.h> + +#include <rdma/siw-abi.h> +#include "iwarp.h" + +#define SIW_VENDOR_ID 0x626d74 /* ascii 'bmt' for now */ +#define SIW_VENDORT_PART_ID 0 +#define SIW_MAX_QP (1024 * 100) +#define SIW_MAX_QP_WR (1024 * 32) +#define SIW_MAX_ORD_QP 128 +#define SIW_MAX_IRD_QP 128 +#define SIW_MAX_SGE_PBL 256 /* max num sge's for PBL */ +#define SIW_MAX_SGE_RD 1 /* iwarp limitation. we could relax */ +#define SIW_MAX_CQ (1024 * 100) +#define SIW_MAX_CQE (SIW_MAX_QP_WR * 100) +#define SIW_MAX_MR (SIW_MAX_QP * 10) +#define SIW_MAX_PD SIW_MAX_QP +#define SIW_MAX_MW 0 /* to be set if MW's are supported */ +#define SIW_MAX_FMR SIW_MAX_MR +#define SIW_MAX_SRQ SIW_MAX_QP +#define SIW_MAX_SRQ_WR (SIW_MAX_QP_WR * 10) +#define SIW_MAX_CONTEXT SIW_MAX_PD + +/* Min number of bytes for using zero copy transmit */ +#define SENDPAGE_THRESH PAGE_SIZE + +/* Maximum number of frames which can be send in one SQ processing */ +#define SQ_USER_MAXBURST 100 + +/* Maximum number of consecutive IRQ elements which get served + * if SQ has pending work. Prevents starving local SQ processing + * by serving peer Read Requests. + */ +#define SIW_IRQ_MAXBURST_SQ_ACTIVE 4 + +struct siw_dev_cap { + int max_qp; + int max_qp_wr; + int max_ord; /* max. outbound read queue depth */ + int max_ird; /* max. inbound read queue depth */ + int max_sge; + int max_sge_rd; + int max_cq; + int max_cqe; + int max_mr; + int max_pd; + int max_mw; + int max_fmr; + int max_srq; + int max_srq_wr; + int max_srq_sge; +}; + +struct siw_pd { + struct ib_pd base_pd; +}; + +struct siw_device { + struct ib_device base_dev; + struct net_device *netdev; + struct siw_dev_cap attrs; + + u32 vendor_part_id; + int numa_node; + + /* physical port state (only one port per device) */ + enum ib_port_state state; + + spinlock_t lock; + + struct xarray qp_xa; + struct xarray mem_xa; + + struct list_head cep_list; + struct list_head qp_list; + + /* active objects statistics to enforce limits */ + atomic_t num_qp; + atomic_t num_cq; + atomic_t num_pd; + atomic_t num_mr; + atomic_t num_srq; + atomic_t num_ctx; + + struct work_struct netdev_down; +}; + +struct siw_uobj { + void *addr; + u32 size; +}; + +struct siw_ucontext { + struct ib_ucontext base_ucontext; + struct siw_device *sdev; + + /* xarray of user mappable objects */ + struct xarray xa; + u32 uobj_nextkey; +}; + +/* + * The RDMA core does not define LOCAL_READ access, which is always + * enabled implictely. + */ +#define IWARP_ACCESS_MASK \ + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | \ + IB_ACCESS_REMOTE_READ) + +/* + * siw presentation of user memory registered as source + * or target of RDMA operations. + */ + +struct siw_page_chunk { + struct page **plist; +}; + +struct siw_umem { + struct siw_page_chunk *page_chunk; + int num_pages; + bool writable; + u64 fp_addr; /* First page base address */ + struct mm_struct *owning_mm; +}; + +struct siw_pble { + u64 addr; /* Address of assigned user buffer */ + u64 size; /* Size of this entry */ + u64 pbl_off; /* Total offset from start of PBL */ +}; + +struct siw_pbl { + unsigned int num_buf; + unsigned int max_buf; + struct siw_pble pbe[1]; +}; + +struct siw_mr; + +/* + * Generic memory representation for registered siw memory. + * Memory lookup always via higher 24 bit of STag (STag index). + */ +struct siw_mem { + struct siw_device *sdev; + struct kref ref; + u64 va; /* VA of memory */ + u64 len; /* length of the memory buffer in bytes */ + u32 stag; /* iWarp memory access steering tag */ + u8 stag_valid; /* VALID or INVALID */ + u8 is_pbl; /* PBL or user space mem */ + u8 is_mw; /* Memory Region or Memory Window */ + enum ib_access_flags perms; /* local/remote READ & WRITE */ + union { + struct siw_umem *umem; + struct siw_pbl *pbl; + void *mem_obj; + }; + struct ib_pd *pd; +}; + +struct siw_mr { + struct ib_mr base_mr; + struct siw_mem *mem; + struct rcu_head rcu; +}; + +/* + * Error codes for local or remote + * access to registered memory + */ +enum siw_access_state { + E_ACCESS_OK, + E_STAG_INVALID, + E_BASE_BOUNDS, + E_ACCESS_PERM, + E_PD_MISMATCH +}; + +enum siw_wr_state { + SIW_WR_IDLE, + SIW_WR_QUEUED, /* processing has not started yet */ + SIW_WR_INPROGRESS /* initiated processing of the WR */ +}; + +/* The WQE currently being processed (RX or TX) */ +struct siw_wqe { + /* Copy of applications SQE or RQE */ + union { + struct siw_sqe sqe; + struct siw_rqe rqe; + }; + struct siw_mem *mem[SIW_MAX_SGE]; /* per sge's resolved mem */ + enum siw_wr_state wr_status; + enum siw_wc_status wc_status; + u32 bytes; /* total bytes to process */ + u32 processed; /* bytes processed */ +}; + +struct siw_cq { + struct ib_cq base_cq; + spinlock_t lock; + u64 *notify; + struct siw_cqe *queue; + u32 cq_put; + u32 cq_get; + u32 num_cqe; + bool kernel_verbs; + u32 xa_cq_index; /* mmap information for CQE array */ + u32 id; /* For debugging only */ +}; + +enum siw_qp_state { + SIW_QP_STATE_IDLE, + SIW_QP_STATE_RTR, + SIW_QP_STATE_RTS, + SIW_QP_STATE_CLOSING, + SIW_QP_STATE_TERMINATE, + SIW_QP_STATE_ERROR, + SIW_QP_STATE_COUNT +}; + +enum siw_qp_flags { + SIW_RDMA_BIND_ENABLED = (1 << 0), + SIW_RDMA_WRITE_ENABLED = (1 << 1), + SIW_RDMA_READ_ENABLED = (1 << 2), + SIW_SIGNAL_ALL_WR = (1 << 3), + SIW_MPA_CRC = (1 << 4), + SIW_QP_IN_DESTROY = (1 << 5) +}; + +enum siw_qp_attr_mask { + SIW_QP_ATTR_STATE = (1 << 0), + SIW_QP_ATTR_ACCESS_FLAGS = (1 << 1), + SIW_QP_ATTR_LLP_HANDLE = (1 << 2), + SIW_QP_ATTR_ORD = (1 << 3), + SIW_QP_ATTR_IRD = (1 << 4), + SIW_QP_ATTR_SQ_SIZE = (1 << 5), + SIW_QP_ATTR_RQ_SIZE = (1 << 6), + SIW_QP_ATTR_MPA = (1 << 7) +}; + +struct siw_srq { + struct ib_srq base_srq; + spinlock_t lock; + u32 max_sge; + u32 limit; /* low watermark for async event */ + struct siw_rqe *recvq; + u32 rq_put; + u32 rq_get; + u32 num_rqe; /* max # of wqe's allowed */ + u32 xa_srq_index; /* mmap information for SRQ array */ + char armed; /* inform user if limit hit */ + char kernel_verbs; /* '1' if kernel client */ +}; + +struct siw_qp_attrs { + enum siw_qp_state state; + u32 sq_size; + u32 rq_size; + u32 orq_size; + u32 irq_size; + u32 sq_max_sges; + u32 rq_max_sges; + enum siw_qp_flags flags; + + struct socket *sk; +}; + +enum siw_tx_ctx { + SIW_SEND_HDR, /* start or continue sending HDR */ + SIW_SEND_DATA, /* start or continue sending DDP payload */ + SIW_SEND_TRAILER, /* start or continue sending TRAILER */ + SIW_SEND_SHORT_FPDU/* send whole FPDU hdr|data|trailer at once */ +}; + +enum siw_rx_state { + SIW_GET_HDR, /* await new hdr or within hdr */ + SIW_GET_DATA_START, /* start of inbound DDP payload */ + SIW_GET_DATA_MORE, /* continuation of (misaligned) DDP payload */ + SIW_GET_TRAILER/* await new trailer or within trailer */ +}; + +struct siw_rx_stream { + struct sk_buff *skb; + int skb_new; /* pending unread bytes in skb */ + int skb_offset; /* offset in skb */ + int skb_copied; /* processed bytes in skb */ + + union iwarp_hdr hdr; + struct mpa_trailer trailer; + + enum siw_rx_state state; + + /* + * For each FPDU, main RX loop runs through 3 stages: + * Receiving protocol headers, placing DDP payload and receiving + * trailer information (CRC + possibly padding). + * Next two variables keep state on receive status of the + * current FPDU part (hdr, data, trailer). + */ + int fpdu_part_rcvd; /* bytes in pkt part copied */ + int fpdu_part_rem; /* bytes in pkt part not seen */ + + /* + * Next expected DDP MSN for each QN + + * expected steering tag + + * expected DDP tagget offset (all HBO) + */ + u32 ddp_msn[RDMAP_UNTAGGED_QN_COUNT]; + u32 ddp_stag; + u64 ddp_to; + u32 inval_stag; /* Stag to be invalidated */ + + struct shash_desc *mpa_crc_hd; + u8 rx_suspend : 1; + u8 pad : 2; /* # of pad bytes expected */ + u8 rdmap_op : 4; /* opcode of current frame */ +}; + +struct siw_rx_fpdu { + /* + * Local destination memory of inbound RDMA operation. + * Valid, according to wqe->wr_status + */ + struct siw_wqe wqe_active; + + unsigned int pbl_idx; /* Index into current PBL */ + unsigned int sge_idx; /* current sge in rx */ + unsigned int sge_off; /* already rcvd in curr. sge */ + + char first_ddp_seg; /* this is the first DDP seg */ + char more_ddp_segs; /* more DDP segs expected */ + u8 prev_rdmap_op : 4; /* opcode of prev frame */ +}; + +/* + * Shorthands for short packets w/o payload + * to be transmitted more efficient. + */ +struct siw_send_pkt { + struct iwarp_send send; + __be32 crc; +}; + +struct siw_write_pkt { + struct iwarp_rdma_write write; + __be32 crc; +}; + +struct siw_rreq_pkt { + struct iwarp_rdma_rreq rreq; + __be32 crc; +}; + +struct siw_rresp_pkt { + struct iwarp_rdma_rresp rresp; + __be32 crc; +}; + +struct siw_iwarp_tx { + union { + union iwarp_hdr hdr; + + /* Generic part of FPDU header */ + struct iwarp_ctrl ctrl; + struct iwarp_ctrl_untagged c_untagged; + struct iwarp_ctrl_tagged c_tagged; + + /* FPDU headers */ + struct iwarp_rdma_write rwrite; + struct iwarp_rdma_rreq rreq; + struct iwarp_rdma_rresp rresp; + struct iwarp_terminate terminate; + struct iwarp_send send; + struct iwarp_send_inv send_inv; + + /* complete short FPDUs */ + struct siw_send_pkt send_pkt; + struct siw_write_pkt write_pkt; + struct siw_rreq_pkt rreq_pkt; + struct siw_rresp_pkt rresp_pkt; + } pkt; + + struct mpa_trailer trailer; + /* DDP MSN for untagged messages */ + u32 ddp_msn[RDMAP_UNTAGGED_QN_COUNT]; + + enum siw_tx_ctx state; + u16 ctrl_len; /* ddp+rdmap hdr */ + u16 ctrl_sent; + int burst; + int bytes_unsent; /* ddp payload bytes */ + + struct shash_desc *mpa_crc_hd; + + u8 do_crc : 1; /* do crc for segment */ + u8 use_sendpage : 1; /* send w/o copy */ + u8 tx_suspend : 1; /* stop sending DDP segs. */ + u8 pad : 2; /* # pad in current fpdu */ + u8 orq_fence : 1; /* ORQ full or Send fenced */ + u8 in_syscall : 1; /* TX out of user context */ + u8 zcopy_tx : 1; /* Use TCP_SENDPAGE if possible */ + u8 gso_seg_limit; /* Maximum segments for GSO, 0 = unbound */ + + u16 fpdu_len; /* len of FPDU to tx */ + unsigned int tcp_seglen; /* remaining tcp seg space */ + + struct siw_wqe wqe_active; + + int pbl_idx; /* Index into current PBL */ + int sge_idx; /* current sge in tx */ + u32 sge_off; /* already sent in curr. sge */ +}; + +struct siw_qp { + struct siw_device *sdev; + struct ib_qp *ib_qp; + struct kref ref; + u32 qp_num; + struct list_head devq; + int tx_cpu; + bool kernel_verbs; + struct siw_qp_attrs attrs; + + struct siw_cep *cep; + struct rw_semaphore state_lock; + + struct ib_pd *pd; + struct siw_cq *scq; + struct siw_cq *rcq; + struct siw_srq *srq; + + struct siw_iwarp_tx tx_ctx; /* Transmit context */ + spinlock_t sq_lock; + struct siw_sqe *sendq; /* send queue element array */ + uint32_t sq_get; /* consumer index into sq array */ + uint32_t sq_put; /* kernel prod. index into sq array */ + struct llist_node tx_list; + + struct siw_sqe *orq; /* outbound read queue element array */ + spinlock_t orq_lock; + uint32_t orq_get; /* consumer index into orq array */ + uint32_t orq_put; /* shared producer index for ORQ */ + + struct siw_rx_stream rx_stream; + struct siw_rx_fpdu *rx_fpdu; + struct siw_rx_fpdu rx_tagged; + struct siw_rx_fpdu rx_untagged; + spinlock_t rq_lock; + struct siw_rqe *recvq; /* recv queue element array */ + uint32_t rq_get; /* consumer index into rq array */ + uint32_t rq_put; /* kernel prod. index into rq array */ + + struct siw_sqe *irq; /* inbound read queue element array */ + uint32_t irq_get; /* consumer index into irq array */ + uint32_t irq_put; /* producer index into irq array */ + int irq_burst; + + struct { /* information to be carried in TERMINATE pkt, if valid */ + u8 valid; + u8 in_tx; + u8 layer : 4, etype : 4; + u8 ecode; + } term_info; + u32 xa_sq_index; /* mmap information for SQE array */ + u32 xa_rq_index; /* mmap information for RQE array */ + struct rcu_head rcu; +}; + +struct siw_base_qp { + struct ib_qp base_qp; + struct siw_qp *qp; +}; + +/* helper macros */ +#define rx_qp(rx) container_of(rx, struct siw_qp, rx_stream) +#define tx_qp(tx) container_of(tx, struct siw_qp, tx_ctx) +#define tx_wqe(qp) (&(qp)->tx_ctx.wqe_active) +#define rx_wqe(rctx) (&(rctx)->wqe_active) +#define rx_mem(rctx) ((rctx)->wqe_active.mem[0]) +#define tx_type(wqe) ((wqe)->sqe.opcode) +#define rx_type(wqe) ((wqe)->rqe.opcode) +#define tx_flags(wqe) ((wqe)->sqe.flags) + +struct iwarp_msg_info { + int hdr_len; + struct iwarp_ctrl ctrl; + int (*rx_data)(struct siw_qp *qp); +}; + +/* Global siw parameters. Currently set in siw_main.c */ +extern const bool zcopy_tx; +extern const bool try_gso; +extern const bool loopback_enabled; +extern const bool mpa_crc_required; +extern const bool mpa_crc_strict; +extern const bool siw_tcp_nagle; +extern u_char mpa_version; +extern const bool peer_to_peer; +extern struct task_struct *siw_tx_thread[]; + +extern struct crypto_shash *siw_crypto_shash; +extern struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1]; + +/* QP general functions */ +int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attr, + enum siw_qp_attr_mask mask); +int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl); +void siw_qp_llp_close(struct siw_qp *qp); +void siw_qp_cm_drop(struct siw_qp *qp, int schedule); +void siw_send_terminate(struct siw_qp *qp); + +void siw_qp_get_ref(struct ib_qp *qp); +void siw_qp_put_ref(struct ib_qp *qp); +int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp); +void siw_free_qp(struct kref *ref); + +void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, + u8 etype, u8 ecode, int in_tx); +enum ddp_ecode siw_tagged_error(enum siw_access_state state); +enum rdmap_ecode siw_rdmap_error(enum siw_access_state state); + +void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe); +int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes, + enum siw_wc_status status); +int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes, + u32 inval_stag, enum siw_wc_status status); +void siw_qp_llp_data_ready(struct sock *sk); +void siw_qp_llp_write_space(struct sock *sk); + +/* QP TX path functions */ +int siw_run_sq(void *arg); +int siw_qp_sq_process(struct siw_qp *qp); +int siw_sq_start(struct siw_qp *qp); +int siw_activate_tx(struct siw_qp *qp); +void siw_stop_tx_thread(int nr_cpu); +int siw_get_tx_cpu(struct siw_device *sdev); +void siw_put_tx_cpu(int cpu); + +/* QP RX path functions */ +int siw_proc_send(struct siw_qp *qp); +int siw_proc_rreq(struct siw_qp *qp); +int siw_proc_rresp(struct siw_qp *qp); +int siw_proc_write(struct siw_qp *qp); +int siw_proc_terminate(struct siw_qp *qp); + +int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb, + unsigned int off, size_t len); + +static inline void set_rx_fpdu_context(struct siw_qp *qp, u8 opcode) +{ + if (opcode == RDMAP_RDMA_WRITE || opcode == RDMAP_RDMA_READ_RESP) + qp->rx_fpdu = &qp->rx_tagged; + else + qp->rx_fpdu = &qp->rx_untagged; + + qp->rx_stream.rdmap_op = opcode; +} + +static inline struct siw_ucontext *to_siw_ctx(struct ib_ucontext *base_ctx) +{ + return container_of(base_ctx, struct siw_ucontext, base_ucontext); +} + +static inline struct siw_base_qp *to_siw_base_qp(struct ib_qp *base_qp) +{ + return container_of(base_qp, struct siw_base_qp, base_qp); +} + +static inline struct siw_qp *to_siw_qp(struct ib_qp *base_qp) +{ + return to_siw_base_qp(base_qp)->qp; +} + +static inline struct siw_cq *to_siw_cq(struct ib_cq *base_cq) +{ + return container_of(base_cq, struct siw_cq, base_cq); +} + +static inline struct siw_srq *to_siw_srq(struct ib_srq *base_srq) +{ + return container_of(base_srq, struct siw_srq, base_srq); +} + +static inline struct siw_device *to_siw_dev(struct ib_device *base_dev) +{ + return container_of(base_dev, struct siw_device, base_dev); +} + +static inline struct siw_mr *to_siw_mr(struct ib_mr *base_mr) +{ + return container_of(base_mr, struct siw_mr, base_mr); +} + +static inline struct siw_qp *siw_qp_id2obj(struct siw_device *sdev, int id) +{ + struct siw_qp *qp; + + rcu_read_lock(); + qp = xa_load(&sdev->qp_xa, id); + if (likely(qp && kref_get_unless_zero(&qp->ref))) { + rcu_read_unlock(); + return qp; + } + rcu_read_unlock(); + return NULL; +} + +static inline u32 qp_id(struct siw_qp *qp) +{ + return qp->qp_num; +} + +static inline void siw_qp_get(struct siw_qp *qp) +{ + kref_get(&qp->ref); +} + +static inline void siw_qp_put(struct siw_qp *qp) +{ + kref_put(&qp->ref, siw_free_qp); +} + +static inline int siw_sq_empty(struct siw_qp *qp) +{ + struct siw_sqe *sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size]; + + return READ_ONCE(sqe->flags) == 0; +} + +static inline struct siw_sqe *sq_get_next(struct siw_qp *qp) +{ + struct siw_sqe *sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size]; + + if (READ_ONCE(sqe->flags) & SIW_WQE_VALID) + return sqe; + + return NULL; +} + +static inline struct siw_sqe *orq_get_current(struct siw_qp *qp) +{ + return &qp->orq[qp->orq_get % qp->attrs.orq_size]; +} + +static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp) +{ + return &qp->orq[qp->orq_put % qp->attrs.orq_size]; +} + +static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) +{ + struct siw_sqe *orq_e = orq_get_tail(qp); + + if (orq_e && READ_ONCE(orq_e->flags) == 0) + return orq_e; + + return NULL; +} + +static inline int siw_orq_empty(struct siw_qp *qp) +{ + return qp->orq[qp->orq_get % qp->attrs.orq_size].flags == 0 ? 1 : 0; +} + +static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp) +{ + struct siw_sqe *irq_e = &qp->irq[qp->irq_put % qp->attrs.irq_size]; + + if (READ_ONCE(irq_e->flags) == 0) { + qp->irq_put++; + return irq_e; + } + return NULL; +} + +static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum) +{ + return (__force __wsum)crc32c((__force __u32)sum, buff, len); +} + +static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset, + int len) +{ + return (__force __wsum)__crc32c_le_combine((__force __u32)csum, + (__force __u32)csum2, len); +} + +static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) +{ + const struct skb_checksum_ops siw_cs_ops = { + .update = siw_csum_update, + .combine = siw_csum_combine, + }; + __wsum crc = *(u32 *)shash_desc_ctx(srx->mpa_crc_hd); + + crc = __skb_checksum(srx->skb, srx->skb_offset, len, crc, + &siw_cs_ops); + *(u32 *)shash_desc_ctx(srx->mpa_crc_hd) = crc; +} + +#define siw_dbg(ibdev, fmt, ...) \ + ibdev_dbg(ibdev, "%s: " fmt, __func__, ##__VA_ARGS__) + +#define siw_dbg_qp(qp, fmt, ...) \ + ibdev_dbg(&qp->sdev->base_dev, "QP[%u] %s: " fmt, qp_id(qp), __func__, \ + ##__VA_ARGS__) + +#define siw_dbg_cq(cq, fmt, ...) \ + ibdev_dbg(cq->base_cq.device, "CQ[%u] %s: " fmt, cq->id, __func__, \ + ##__VA_ARGS__) + +#define siw_dbg_pd(pd, fmt, ...) \ + ibdev_dbg(pd->device, "PD[%u] %s: " fmt, pd->res.id, __func__, \ + ##__VA_ARGS__) + +#define siw_dbg_mem(mem, fmt, ...) \ + ibdev_dbg(&mem->sdev->base_dev, \ + "MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__) + +#define siw_dbg_cep(cep, fmt, ...) \ + ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%p] %s: " fmt, \ + cep, __func__, ##__VA_ARGS__) + +void siw_cq_flush(struct siw_cq *cq); +void siw_sq_flush(struct siw_qp *qp); +void siw_rq_flush(struct siw_qp *qp); +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc); + +#endif diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c new file mode 100644 index 000000000000..a7cde98e73e8 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -0,0 +1,2070 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Fredy Neeser */ +/* Greg Joyce <greg@opengridcomputing.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ +/* Copyright (c) 2017, Open Grid Computing, Inc. */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/net.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> +#include <linux/workqueue.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <linux/inet.h> +#include <linux/tcp.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> + +#include "siw.h" +#include "siw_cm.h" + +/* + * Set to any combination of + * MPA_V2_RDMA_NO_RTR, MPA_V2_RDMA_READ_RTR, MPA_V2_RDMA_WRITE_RTR + */ +static __be16 rtr_type = MPA_V2_RDMA_READ_RTR | MPA_V2_RDMA_WRITE_RTR; +static const bool relaxed_ird_negotiation = 1; + +static void siw_cm_llp_state_change(struct sock *s); +static void siw_cm_llp_data_ready(struct sock *s); +static void siw_cm_llp_write_space(struct sock *s); +static void siw_cm_llp_error_report(struct sock *s); +static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, + int status); + +static void siw_sk_assign_cm_upcalls(struct sock *sk) +{ + write_lock_bh(&sk->sk_callback_lock); + sk->sk_state_change = siw_cm_llp_state_change; + sk->sk_data_ready = siw_cm_llp_data_ready; + sk->sk_write_space = siw_cm_llp_write_space; + sk->sk_error_report = siw_cm_llp_error_report; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void siw_sk_save_upcalls(struct sock *sk) +{ + struct siw_cep *cep = sk_to_cep(sk); + + write_lock_bh(&sk->sk_callback_lock); + cep->sk_state_change = sk->sk_state_change; + cep->sk_data_ready = sk->sk_data_ready; + cep->sk_write_space = sk->sk_write_space; + cep->sk_error_report = sk->sk_error_report; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void siw_sk_restore_upcalls(struct sock *sk, struct siw_cep *cep) +{ + sk->sk_state_change = cep->sk_state_change; + sk->sk_data_ready = cep->sk_data_ready; + sk->sk_write_space = cep->sk_write_space; + sk->sk_error_report = cep->sk_error_report; + sk->sk_user_data = NULL; +} + +static void siw_qp_socket_assoc(struct siw_cep *cep, struct siw_qp *qp) +{ + struct socket *s = cep->sock; + struct sock *sk = s->sk; + + write_lock_bh(&sk->sk_callback_lock); + + qp->attrs.sk = s; + sk->sk_data_ready = siw_qp_llp_data_ready; + sk->sk_write_space = siw_qp_llp_write_space; + + write_unlock_bh(&sk->sk_callback_lock); +} + +static void siw_socket_disassoc(struct socket *s) +{ + struct sock *sk = s->sk; + struct siw_cep *cep; + + if (sk) { + write_lock_bh(&sk->sk_callback_lock); + cep = sk_to_cep(sk); + if (cep) { + siw_sk_restore_upcalls(sk, cep); + siw_cep_put(cep); + } else { + pr_warn("siw: cannot restore sk callbacks: no ep\n"); + } + write_unlock_bh(&sk->sk_callback_lock); + } else { + pr_warn("siw: cannot restore sk callbacks: no sk\n"); + } +} + +static void siw_rtr_data_ready(struct sock *sk) +{ + struct siw_cep *cep; + struct siw_qp *qp = NULL; + read_descriptor_t rd_desc; + + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (!cep) { + WARN(1, "No connection endpoint\n"); + goto out; + } + qp = sk_to_qp(sk); + + memset(&rd_desc, 0, sizeof(rd_desc)); + rd_desc.arg.data = qp; + rd_desc.count = 1; + + tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data); + /* + * Check if first frame was successfully processed. + * Signal connection full establishment if yes. + * Failed data processing would have already scheduled + * connection drop. + */ + if (!qp->rx_stream.rx_suspend) + siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0); +out: + read_unlock(&sk->sk_callback_lock); + if (qp) + siw_qp_socket_assoc(cep, qp); +} + +static void siw_sk_assign_rtr_upcalls(struct siw_cep *cep) +{ + struct sock *sk = cep->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = siw_rtr_data_ready; + sk->sk_write_space = siw_qp_llp_write_space; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s) +{ + cep->sock = s; + siw_cep_get(cep); + s->sk->sk_user_data = cep; + + siw_sk_save_upcalls(s->sk); + siw_sk_assign_cm_upcalls(s->sk); +} + +static struct siw_cep *siw_cep_alloc(struct siw_device *sdev) +{ + struct siw_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL); + unsigned long flags; + + if (!cep) + return NULL; + + INIT_LIST_HEAD(&cep->listenq); + INIT_LIST_HEAD(&cep->devq); + INIT_LIST_HEAD(&cep->work_freelist); + + kref_init(&cep->ref); + cep->state = SIW_EPSTATE_IDLE; + init_waitqueue_head(&cep->waitq); + spin_lock_init(&cep->lock); + cep->sdev = sdev; + cep->enhanced_rdma_conn_est = false; + + spin_lock_irqsave(&sdev->lock, flags); + list_add_tail(&cep->devq, &sdev->cep_list); + spin_unlock_irqrestore(&sdev->lock, flags); + + siw_dbg_cep(cep, "new endpoint\n"); + return cep; +} + +static void siw_cm_free_work(struct siw_cep *cep) +{ + struct list_head *w, *tmp; + struct siw_cm_work *work; + + list_for_each_safe(w, tmp, &cep->work_freelist) { + work = list_entry(w, struct siw_cm_work, list); + list_del(&work->list); + kfree(work); + } +} + +static void siw_cancel_mpatimer(struct siw_cep *cep) +{ + spin_lock_bh(&cep->lock); + if (cep->mpa_timer) { + if (cancel_delayed_work(&cep->mpa_timer->work)) { + siw_cep_put(cep); + kfree(cep->mpa_timer); /* not needed again */ + } + cep->mpa_timer = NULL; + } + spin_unlock_bh(&cep->lock); +} + +static void siw_put_work(struct siw_cm_work *work) +{ + INIT_LIST_HEAD(&work->list); + spin_lock_bh(&work->cep->lock); + list_add(&work->list, &work->cep->work_freelist); + spin_unlock_bh(&work->cep->lock); +} + +static void siw_cep_set_inuse(struct siw_cep *cep) +{ + unsigned long flags; + int rv; +retry: + spin_lock_irqsave(&cep->lock, flags); + + if (cep->in_use) { + spin_unlock_irqrestore(&cep->lock, flags); + rv = wait_event_interruptible(cep->waitq, !cep->in_use); + if (signal_pending(current)) + flush_signals(current); + goto retry; + } else { + cep->in_use = 1; + spin_unlock_irqrestore(&cep->lock, flags); + } +} + +static void siw_cep_set_free(struct siw_cep *cep) +{ + unsigned long flags; + + spin_lock_irqsave(&cep->lock, flags); + cep->in_use = 0; + spin_unlock_irqrestore(&cep->lock, flags); + + wake_up(&cep->waitq); +} + +static void __siw_cep_dealloc(struct kref *ref) +{ + struct siw_cep *cep = container_of(ref, struct siw_cep, ref); + struct siw_device *sdev = cep->sdev; + unsigned long flags; + + WARN_ON(cep->listen_cep); + + /* kfree(NULL) is safe */ + kfree(cep->mpa.pdata); + spin_lock_bh(&cep->lock); + if (!list_empty(&cep->work_freelist)) + siw_cm_free_work(cep); + spin_unlock_bh(&cep->lock); + + spin_lock_irqsave(&sdev->lock, flags); + list_del(&cep->devq); + spin_unlock_irqrestore(&sdev->lock, flags); + + siw_dbg_cep(cep, "free endpoint\n"); + kfree(cep); +} + +static struct siw_cm_work *siw_get_work(struct siw_cep *cep) +{ + struct siw_cm_work *work = NULL; + + spin_lock_bh(&cep->lock); + if (!list_empty(&cep->work_freelist)) { + work = list_entry(cep->work_freelist.next, struct siw_cm_work, + list); + list_del_init(&work->list); + } + spin_unlock_bh(&cep->lock); + return work; +} + +static int siw_cm_alloc_work(struct siw_cep *cep, int num) +{ + struct siw_cm_work *work; + + while (num--) { + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) { + if (!(list_empty(&cep->work_freelist))) + siw_cm_free_work(cep); + return -ENOMEM; + } + work->cep = cep; + INIT_LIST_HEAD(&work->list); + list_add(&work->list, &cep->work_freelist); + } + return 0; +} + +/* + * siw_cm_upcall() + * + * Upcall to IWCM to inform about async connection events + */ +static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, + int status) +{ + struct iw_cm_event event; + struct iw_cm_id *id; + + memset(&event, 0, sizeof(event)); + event.status = status; + event.event = reason; + + if (reason == IW_CM_EVENT_CONNECT_REQUEST) { + event.provider_data = cep; + id = cep->listen_cep->cm_id; + } else { + id = cep->cm_id; + } + /* Signal IRD and ORD */ + if (reason == IW_CM_EVENT_ESTABLISHED || + reason == IW_CM_EVENT_CONNECT_REPLY) { + /* Signal negotiated IRD/ORD values we will use */ + event.ird = cep->ird; + event.ord = cep->ord; + } else if (reason == IW_CM_EVENT_CONNECT_REQUEST) { + event.ird = cep->ord; + event.ord = cep->ird; + } + /* Signal private data and address information */ + if (reason == IW_CM_EVENT_CONNECT_REQUEST || + reason == IW_CM_EVENT_CONNECT_REPLY) { + u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len); + + if (pd_len) { + /* + * hand over MPA private data + */ + event.private_data_len = pd_len; + event.private_data = cep->mpa.pdata; + + /* Hide MPA V2 IRD/ORD control */ + if (cep->enhanced_rdma_conn_est) { + event.private_data_len -= + sizeof(struct mpa_v2_data); + event.private_data += + sizeof(struct mpa_v2_data); + } + } + getname_local(cep->sock, &event.local_addr); + getname_peer(cep->sock, &event.remote_addr); + } + siw_dbg_cep(cep, "[QP %u]: id 0x%p, reason=%d, status=%d\n", + cep->qp ? qp_id(cep->qp) : -1, id, reason, status); + + return id->event_handler(id, &event); +} + +/* + * siw_qp_cm_drop() + * + * Drops established LLP connection if present and not already + * scheduled for dropping. Called from user context, SQ workqueue + * or receive IRQ. Caller signals if socket can be immediately + * closed (basically, if not in IRQ). + */ +void siw_qp_cm_drop(struct siw_qp *qp, int schedule) +{ + struct siw_cep *cep = qp->cep; + + qp->rx_stream.rx_suspend = 1; + qp->tx_ctx.tx_suspend = 1; + + if (!qp->cep) + return; + + if (schedule) { + siw_cm_queue_work(cep, SIW_CM_WORK_CLOSE_LLP); + } else { + siw_cep_set_inuse(cep); + + if (cep->state == SIW_EPSTATE_CLOSED) { + siw_dbg_cep(cep, "already closed\n"); + goto out; + } + siw_dbg_cep(cep, "immediate close, state %d\n", cep->state); + + if (qp->term_info.valid) + siw_send_terminate(qp); + + if (cep->cm_id) { + switch (cep->state) { + case SIW_EPSTATE_AWAIT_MPAREP: + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -EINVAL); + break; + + case SIW_EPSTATE_RDMA_MODE: + siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); + break; + + case SIW_EPSTATE_IDLE: + case SIW_EPSTATE_LISTENING: + case SIW_EPSTATE_CONNECTING: + case SIW_EPSTATE_AWAIT_MPAREQ: + case SIW_EPSTATE_RECVD_MPAREQ: + case SIW_EPSTATE_CLOSED: + default: + break; + } + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; + siw_cep_put(cep); + } + cep->state = SIW_EPSTATE_CLOSED; + + if (cep->sock) { + siw_socket_disassoc(cep->sock); + /* + * Immediately close socket + */ + sock_release(cep->sock); + cep->sock = NULL; + } + if (cep->qp) { + cep->qp = NULL; + siw_qp_put(qp); + } +out: + siw_cep_set_free(cep); + } +} + +void siw_cep_put(struct siw_cep *cep) +{ + WARN_ON(kref_read(&cep->ref) < 1); + kref_put(&cep->ref, __siw_cep_dealloc); +} + +void siw_cep_get(struct siw_cep *cep) +{ + kref_get(&cep->ref); +} + +/* + * Expects params->pd_len in host byte order + */ +static int siw_send_mpareqrep(struct siw_cep *cep, const void *pdata, u8 pd_len) +{ + struct socket *s = cep->sock; + struct mpa_rr *rr = &cep->mpa.hdr; + struct kvec iov[3]; + struct msghdr msg; + int rv; + int iovec_num = 0; + int mpa_len; + + memset(&msg, 0, sizeof(msg)); + + iov[iovec_num].iov_base = rr; + iov[iovec_num].iov_len = sizeof(*rr); + mpa_len = sizeof(*rr); + + if (cep->enhanced_rdma_conn_est) { + iovec_num++; + iov[iovec_num].iov_base = &cep->mpa.v2_ctrl; + iov[iovec_num].iov_len = sizeof(cep->mpa.v2_ctrl); + mpa_len += sizeof(cep->mpa.v2_ctrl); + } + if (pd_len) { + iovec_num++; + iov[iovec_num].iov_base = (char *)pdata; + iov[iovec_num].iov_len = pd_len; + mpa_len += pd_len; + } + if (cep->enhanced_rdma_conn_est) + pd_len += sizeof(cep->mpa.v2_ctrl); + + rr->params.pd_len = cpu_to_be16(pd_len); + + rv = kernel_sendmsg(s, &msg, iov, iovec_num + 1, mpa_len); + + return rv < 0 ? rv : 0; +} + +/* + * Receive MPA Request/Reply header. + * + * Returns 0 if complete MPA Request/Reply header including + * eventual private data was received. Returns -EAGAIN if + * header was partially received or negative error code otherwise. + * + * Context: May be called in process context only + */ +static int siw_recv_mpa_rr(struct siw_cep *cep) +{ + struct mpa_rr *hdr = &cep->mpa.hdr; + struct socket *s = cep->sock; + u16 pd_len; + int rcvd, to_rcv; + + if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) { + rcvd = ksock_recv(s, (char *)hdr + cep->mpa.bytes_rcvd, + sizeof(struct mpa_rr) - cep->mpa.bytes_rcvd, + 0); + if (rcvd <= 0) + return -ECONNABORTED; + + cep->mpa.bytes_rcvd += rcvd; + + if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) + return -EAGAIN; + + if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA) + return -EPROTO; + } + pd_len = be16_to_cpu(hdr->params.pd_len); + + /* + * At least the MPA Request/Reply header (frame not including + * private data) has been received. + * Receive (or continue receiving) any private data. + */ + to_rcv = pd_len - (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr)); + + if (!to_rcv) { + /* + * We must have hdr->params.pd_len == 0 and thus received a + * complete MPA Request/Reply frame. + * Check against peer protocol violation. + */ + u32 word; + + rcvd = ksock_recv(s, (char *)&word, sizeof(word), MSG_DONTWAIT); + if (rcvd == -EAGAIN) + return 0; + + if (rcvd == 0) { + siw_dbg_cep(cep, "peer EOF\n"); + return -EPIPE; + } + if (rcvd < 0) { + siw_dbg_cep(cep, "error: %d\n", rcvd); + return rcvd; + } + siw_dbg_cep(cep, "peer sent extra data: %d\n", rcvd); + + return -EPROTO; + } + + /* + * At this point, we must have hdr->params.pd_len != 0. + * A private data buffer gets allocated if hdr->params.pd_len != 0. + */ + if (!cep->mpa.pdata) { + cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL); + if (!cep->mpa.pdata) + return -ENOMEM; + } + rcvd = ksock_recv( + s, cep->mpa.pdata + cep->mpa.bytes_rcvd - sizeof(struct mpa_rr), + to_rcv + 4, MSG_DONTWAIT); + + if (rcvd < 0) + return rcvd; + + if (rcvd > to_rcv) + return -EPROTO; + + cep->mpa.bytes_rcvd += rcvd; + + if (to_rcv == rcvd) { + siw_dbg_cep(cep, "%d bytes private data received\n", pd_len); + return 0; + } + return -EAGAIN; +} + +/* + * siw_proc_mpareq() + * + * Read MPA Request from socket and signal new connection to IWCM + * if success. Caller must hold lock on corresponding listening CEP. + */ +static int siw_proc_mpareq(struct siw_cep *cep) +{ + struct mpa_rr *req; + int version, rv; + u16 pd_len; + + rv = siw_recv_mpa_rr(cep); + if (rv) + return rv; + + req = &cep->mpa.hdr; + + version = __mpa_rr_revision(req->params.bits); + pd_len = be16_to_cpu(req->params.pd_len); + + if (version > MPA_REVISION_2) + /* allow for 0, 1, and 2 only */ + return -EPROTO; + + if (memcmp(req->key, MPA_KEY_REQ, 16)) + return -EPROTO; + + /* Prepare for sending MPA reply */ + memcpy(req->key, MPA_KEY_REP, 16); + + if (version == MPA_REVISION_2 && + (req->params.bits & MPA_RR_FLAG_ENHANCED)) { + /* + * MPA version 2 must signal IRD/ORD values and P2P mode + * in private data if header flag MPA_RR_FLAG_ENHANCED + * is set. + */ + if (pd_len < sizeof(struct mpa_v2_data)) + goto reject_conn; + + cep->enhanced_rdma_conn_est = true; + } + + /* MPA Markers: currently not supported. Marker TX to be added. */ + if (req->params.bits & MPA_RR_FLAG_MARKERS) + goto reject_conn; + + if (req->params.bits & MPA_RR_FLAG_CRC) { + /* + * RFC 5044, page 27: CRC MUST be used if peer requests it. + * siw specific: 'mpa_crc_strict' parameter to reject + * connection with CRC if local CRC off enforced by + * 'mpa_crc_strict' module parameter. + */ + if (!mpa_crc_required && mpa_crc_strict) + goto reject_conn; + + /* Enable CRC if requested by module parameter */ + if (mpa_crc_required) + req->params.bits |= MPA_RR_FLAG_CRC; + } + if (cep->enhanced_rdma_conn_est) { + struct mpa_v2_data *v2 = (struct mpa_v2_data *)cep->mpa.pdata; + + /* + * Peer requested ORD becomes requested local IRD, + * peer requested IRD becomes requested local ORD. + * IRD and ORD get limited by global maximum values. + */ + cep->ord = ntohs(v2->ird) & MPA_IRD_ORD_MASK; + cep->ord = min(cep->ord, SIW_MAX_ORD_QP); + cep->ird = ntohs(v2->ord) & MPA_IRD_ORD_MASK; + cep->ird = min(cep->ird, SIW_MAX_IRD_QP); + + /* May get overwritten by locally negotiated values */ + cep->mpa.v2_ctrl.ird = htons(cep->ird); + cep->mpa.v2_ctrl.ord = htons(cep->ord); + + /* + * Support for peer sent zero length Write or Read to + * let local side enter RTS. Writes are preferred. + * Sends would require pre-posting a Receive and are + * not supported. + * Propose zero length Write if none of Read and Write + * is indicated. + */ + if (v2->ird & MPA_V2_PEER_TO_PEER) { + cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER; + + if (v2->ord & MPA_V2_RDMA_WRITE_RTR) + cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR; + else if (v2->ord & MPA_V2_RDMA_READ_RTR) + cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_READ_RTR; + else + cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR; + } + } + + cep->state = SIW_EPSTATE_RECVD_MPAREQ; + + /* Keep reference until IWCM accepts/rejects */ + siw_cep_get(cep); + rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0); + if (rv) + siw_cep_put(cep); + + return rv; + +reject_conn: + siw_dbg_cep(cep, "reject: crc %d:%d:%d, m %d:%d\n", + req->params.bits & MPA_RR_FLAG_CRC ? 1 : 0, + mpa_crc_required, mpa_crc_strict, + req->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0); + + req->params.bits &= ~MPA_RR_FLAG_MARKERS; + req->params.bits |= MPA_RR_FLAG_REJECT; + + if (!mpa_crc_required && mpa_crc_strict) + req->params.bits &= ~MPA_RR_FLAG_CRC; + + if (pd_len) + kfree(cep->mpa.pdata); + + cep->mpa.pdata = NULL; + + siw_send_mpareqrep(cep, NULL, 0); + + return -EOPNOTSUPP; +} + +static int siw_proc_mpareply(struct siw_cep *cep) +{ + struct siw_qp_attrs qp_attrs; + enum siw_qp_attr_mask qp_attr_mask; + struct siw_qp *qp = cep->qp; + struct mpa_rr *rep; + int rv; + u16 rep_ord; + u16 rep_ird; + bool ird_insufficient = false; + enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR; + + rv = siw_recv_mpa_rr(cep); + if (rv != -EAGAIN) + siw_cancel_mpatimer(cep); + if (rv) + goto out_err; + + rep = &cep->mpa.hdr; + + if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) { + /* allow for 0, 1, and 2 only */ + rv = -EPROTO; + goto out_err; + } + if (memcmp(rep->key, MPA_KEY_REP, 16)) { + siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, LLP_ETYPE_MPA, + LLP_ECODE_INVALID_REQ_RESP, 0); + siw_send_terminate(qp); + rv = -EPROTO; + goto out_err; + } + if (rep->params.bits & MPA_RR_FLAG_REJECT) { + siw_dbg_cep(cep, "got mpa reject\n"); + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET); + + return -ECONNRESET; + } + if (try_gso && rep->params.bits & MPA_RR_FLAG_GSO_EXP) { + siw_dbg_cep(cep, "peer allows GSO on TX\n"); + qp->tx_ctx.gso_seg_limit = 0; + } + if ((rep->params.bits & MPA_RR_FLAG_MARKERS) || + (mpa_crc_required && !(rep->params.bits & MPA_RR_FLAG_CRC)) || + (mpa_crc_strict && !mpa_crc_required && + (rep->params.bits & MPA_RR_FLAG_CRC))) { + siw_dbg_cep(cep, "reply unsupp: crc %d:%d:%d, m %d:%d\n", + rep->params.bits & MPA_RR_FLAG_CRC ? 1 : 0, + mpa_crc_required, mpa_crc_strict, + rep->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0); + + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); + + return -EINVAL; + } + if (cep->enhanced_rdma_conn_est) { + struct mpa_v2_data *v2; + + if (__mpa_rr_revision(rep->params.bits) < MPA_REVISION_2 || + !(rep->params.bits & MPA_RR_FLAG_ENHANCED)) { + /* + * Protocol failure: The responder MUST reply with + * MPA version 2 and MUST set MPA_RR_FLAG_ENHANCED. + */ + siw_dbg_cep(cep, "mpa reply error: vers %d, enhcd %d\n", + __mpa_rr_revision(rep->params.bits), + rep->params.bits & MPA_RR_FLAG_ENHANCED ? + 1 : + 0); + + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -ECONNRESET); + return -EINVAL; + } + v2 = (struct mpa_v2_data *)cep->mpa.pdata; + rep_ird = ntohs(v2->ird) & MPA_IRD_ORD_MASK; + rep_ord = ntohs(v2->ord) & MPA_IRD_ORD_MASK; + + if (cep->ird < rep_ord && + (relaxed_ird_negotiation == false || + rep_ord > cep->sdev->attrs.max_ird)) { + siw_dbg_cep(cep, "ird %d, rep_ord %d, max_ord %d\n", + cep->ird, rep_ord, + cep->sdev->attrs.max_ord); + ird_insufficient = true; + } + if (cep->ord > rep_ird && relaxed_ird_negotiation == false) { + siw_dbg_cep(cep, "ord %d, rep_ird %d\n", cep->ord, + rep_ird); + ird_insufficient = true; + } + /* + * Always report negotiated peer values to user, + * even if IRD/ORD negotiation failed + */ + cep->ird = rep_ord; + cep->ord = rep_ird; + + if (ird_insufficient) { + /* + * If the initiator IRD is insuffient for the + * responder ORD, send a TERM. + */ + siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, + LLP_ETYPE_MPA, + LLP_ECODE_INSUFFICIENT_IRD, 0); + siw_send_terminate(qp); + rv = -ENOMEM; + goto out_err; + } + if (cep->mpa.v2_ctrl_req.ird & MPA_V2_PEER_TO_PEER) + mpa_p2p_mode = + cep->mpa.v2_ctrl_req.ord & + (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR); + + /* + * Check if we requested P2P mode, and if peer agrees + */ + if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) { + if ((mpa_p2p_mode & v2->ord) == 0) { + /* + * We requested RTR mode(s), but the peer + * did not pick any mode we support. + */ + siw_dbg_cep(cep, + "rtr mode: req %2x, got %2x\n", + mpa_p2p_mode, + v2->ord & (MPA_V2_RDMA_WRITE_RTR | + MPA_V2_RDMA_READ_RTR)); + + siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, + LLP_ETYPE_MPA, + LLP_ECODE_NO_MATCHING_RTR, + 0); + siw_send_terminate(qp); + rv = -EPROTO; + goto out_err; + } + mpa_p2p_mode = v2->ord & (MPA_V2_RDMA_WRITE_RTR | + MPA_V2_RDMA_READ_RTR); + } + } + memset(&qp_attrs, 0, sizeof(qp_attrs)); + + if (rep->params.bits & MPA_RR_FLAG_CRC) + qp_attrs.flags = SIW_MPA_CRC; + + qp_attrs.irq_size = cep->ird; + qp_attrs.orq_size = cep->ord; + qp_attrs.sk = cep->sock; + qp_attrs.state = SIW_QP_STATE_RTS; + + qp_attr_mask = SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE | + SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA; + + /* Move socket RX/TX under QP control */ + down_write(&qp->state_lock); + if (qp->attrs.state > SIW_QP_STATE_RTR) { + rv = -EINVAL; + up_write(&qp->state_lock); + goto out_err; + } + rv = siw_qp_modify(qp, &qp_attrs, qp_attr_mask); + + siw_qp_socket_assoc(cep, qp); + + up_write(&qp->state_lock); + + /* Send extra RDMA frame to trigger peer RTS if negotiated */ + if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) { + rv = siw_qp_mpa_rts(qp, mpa_p2p_mode); + if (rv) + goto out_err; + } + if (!rv) { + rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0); + if (!rv) + cep->state = SIW_EPSTATE_RDMA_MODE; + + return 0; + } + +out_err: + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); + + return rv; +} + +/* + * siw_accept_newconn - accept an incoming pending connection + * + */ +static void siw_accept_newconn(struct siw_cep *cep) +{ + struct socket *s = cep->sock; + struct socket *new_s = NULL; + struct siw_cep *new_cep = NULL; + int rv = 0; /* debug only. should disappear */ + + if (cep->state != SIW_EPSTATE_LISTENING) + goto error; + + new_cep = siw_cep_alloc(cep->sdev); + if (!new_cep) + goto error; + + /* + * 4: Allocate a sufficient number of work elements + * to allow concurrent handling of local + peer close + * events, MPA header processing + MPA timeout. + */ + if (siw_cm_alloc_work(new_cep, 4) != 0) + goto error; + + /* + * Copy saved socket callbacks from listening CEP + * and assign new socket with new CEP + */ + new_cep->sk_state_change = cep->sk_state_change; + new_cep->sk_data_ready = cep->sk_data_ready; + new_cep->sk_write_space = cep->sk_write_space; + new_cep->sk_error_report = cep->sk_error_report; + + rv = kernel_accept(s, &new_s, O_NONBLOCK); + if (rv != 0) { + /* + * Connection already aborted by peer..? + */ + siw_dbg_cep(cep, "kernel_accept() error: %d\n", rv); + goto error; + } + new_cep->sock = new_s; + siw_cep_get(new_cep); + new_s->sk->sk_user_data = new_cep; + + siw_dbg_cep(cep, "listen socket 0x%p, new 0x%p\n", s, new_s); + + if (siw_tcp_nagle == false) { + int val = 1; + + rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY, + (char *)&val, sizeof(val)); + if (rv) { + siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv); + goto error; + } + } + new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ; + + rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT); + if (rv) + goto error; + /* + * See siw_proc_mpareq() etc. for the use of new_cep->listen_cep. + */ + new_cep->listen_cep = cep; + siw_cep_get(cep); + + if (atomic_read(&new_s->sk->sk_rmem_alloc)) { + /* + * MPA REQ already queued + */ + siw_dbg_cep(cep, "immediate mpa request\n"); + + siw_cep_set_inuse(new_cep); + rv = siw_proc_mpareq(new_cep); + siw_cep_set_free(new_cep); + + if (rv != -EAGAIN) { + siw_cep_put(cep); + new_cep->listen_cep = NULL; + if (rv) + goto error; + } + } + return; + +error: + if (new_cep) + siw_cep_put(new_cep); + + if (new_s) { + siw_socket_disassoc(new_s); + sock_release(new_s); + new_cep->sock = NULL; + } + siw_dbg_cep(cep, "error %d\n", rv); +} + +static void siw_cm_work_handler(struct work_struct *w) +{ + struct siw_cm_work *work; + struct siw_cep *cep; + int release_cep = 0, rv = 0; + + work = container_of(w, struct siw_cm_work, work.work); + cep = work->cep; + + siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n", + cep->qp ? qp_id(cep->qp) : -1, work->type, cep->state); + + siw_cep_set_inuse(cep); + + switch (work->type) { + case SIW_CM_WORK_ACCEPT: + siw_accept_newconn(cep); + break; + + case SIW_CM_WORK_READ_MPAHDR: + if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { + if (cep->listen_cep) { + siw_cep_set_inuse(cep->listen_cep); + + if (cep->listen_cep->state == + SIW_EPSTATE_LISTENING) + rv = siw_proc_mpareq(cep); + else + rv = -EFAULT; + + siw_cep_set_free(cep->listen_cep); + + if (rv != -EAGAIN) { + siw_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + if (rv) + siw_cep_put(cep); + } + } + } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { + rv = siw_proc_mpareply(cep); + } else { + /* + * CEP already moved out of MPA handshake. + * any connection management already done. + * silently ignore the mpa packet. + */ + if (cep->state == SIW_EPSTATE_RDMA_MODE) { + cep->sock->sk->sk_data_ready(cep->sock->sk); + siw_dbg_cep(cep, "already in RDMA mode"); + } else { + siw_dbg_cep(cep, "out of state: %d\n", + cep->state); + } + } + if (rv && rv != EAGAIN) + release_cep = 1; + break; + + case SIW_CM_WORK_CLOSE_LLP: + /* + * QP scheduled LLP close + */ + if (cep->qp && cep->qp->term_info.valid) + siw_send_terminate(cep->qp); + + if (cep->cm_id) + siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); + + release_cep = 1; + break; + + case SIW_CM_WORK_PEER_CLOSE: + if (cep->cm_id) { + if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { + /* + * MPA reply not received, but connection drop + */ + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -ECONNRESET); + } else if (cep->state == SIW_EPSTATE_RDMA_MODE) { + /* + * NOTE: IW_CM_EVENT_DISCONNECT is given just + * to transition IWCM into CLOSING. + */ + siw_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0); + siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); + } + /* + * for other states there is no connection + * known to the IWCM. + */ + } else { + if (cep->state == SIW_EPSTATE_RECVD_MPAREQ) { + /* + * Wait for the ulp/CM to call accept/reject + */ + siw_dbg_cep(cep, + "mpa req recvd, wait for ULP\n"); + } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { + /* + * Socket close before MPA request received. + */ + siw_dbg_cep(cep, "no mpareq: drop listener\n"); + siw_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + } + } + release_cep = 1; + break; + + case SIW_CM_WORK_MPATIMEOUT: + cep->mpa_timer = NULL; + + if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { + /* + * MPA request timed out: + * Hide any partially received private data and signal + * timeout + */ + cep->mpa.hdr.params.pd_len = 0; + + if (cep->cm_id) + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -ETIMEDOUT); + release_cep = 1; + + } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { + /* + * No MPA request received after peer TCP stream setup. + */ + if (cep->listen_cep) { + siw_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + } + release_cep = 1; + } + break; + + default: + WARN(1, "Undefined CM work type: %d\n", work->type); + } + if (release_cep) { + siw_dbg_cep(cep, + "release: timer=%s, QP[%u], id 0x%p\n", + cep->mpa_timer ? "y" : "n", + cep->qp ? qp_id(cep->qp) : -1, cep->cm_id); + + siw_cancel_mpatimer(cep); + + cep->state = SIW_EPSTATE_CLOSED; + + if (cep->qp) { + struct siw_qp *qp = cep->qp; + /* + * Serialize a potential race with application + * closing the QP and calling siw_qp_cm_drop() + */ + siw_qp_get(qp); + siw_cep_set_free(cep); + + siw_qp_llp_close(qp); + siw_qp_put(qp); + + siw_cep_set_inuse(cep); + cep->qp = NULL; + siw_qp_put(qp); + } + if (cep->sock) { + siw_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + } + if (cep->cm_id) { + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; + siw_cep_put(cep); + } + } + siw_cep_set_free(cep); + siw_put_work(work); + siw_cep_put(cep); +} + +static struct workqueue_struct *siw_cm_wq; + +int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type) +{ + struct siw_cm_work *work = siw_get_work(cep); + unsigned long delay = 0; + + if (!work) { + siw_dbg_cep(cep, "failed with no work available\n"); + return -ENOMEM; + } + work->type = type; + work->cep = cep; + + siw_cep_get(cep); + + INIT_DELAYED_WORK(&work->work, siw_cm_work_handler); + + if (type == SIW_CM_WORK_MPATIMEOUT) { + cep->mpa_timer = work; + + if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) + delay = MPAREQ_TIMEOUT; + else + delay = MPAREP_TIMEOUT; + } + siw_dbg_cep(cep, "[QP %u]: work type: %d, work 0x%p, timeout %lu\n", + cep->qp ? qp_id(cep->qp) : -1, type, work, delay); + + queue_delayed_work(siw_cm_wq, &work->work, delay); + + return 0; +} + +static void siw_cm_llp_data_ready(struct sock *sk) +{ + struct siw_cep *cep; + + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (!cep) { + WARN_ON(1); + goto out; + } + siw_dbg_cep(cep, "state: %d\n", cep->state); + + switch (cep->state) { + case SIW_EPSTATE_RDMA_MODE: + /* fall through */ + case SIW_EPSTATE_LISTENING: + break; + + case SIW_EPSTATE_AWAIT_MPAREQ: + /* fall through */ + case SIW_EPSTATE_AWAIT_MPAREP: + siw_cm_queue_work(cep, SIW_CM_WORK_READ_MPAHDR); + break; + + default: + siw_dbg_cep(cep, "unexpected data, state %d\n", cep->state); + break; + } +out: + read_unlock(&sk->sk_callback_lock); +} + +static void siw_cm_llp_write_space(struct sock *sk) +{ + struct siw_cep *cep = sk_to_cep(sk); + + if (cep) + siw_dbg_cep(cep, "state: %d\n", cep->state); +} + +static void siw_cm_llp_error_report(struct sock *sk) +{ + struct siw_cep *cep = sk_to_cep(sk); + + if (cep) { + siw_dbg_cep(cep, "error %d, socket state: %d, cep state: %d\n", + sk->sk_err, sk->sk_state, cep->state); + cep->sk_error_report(sk); + } +} + +static void siw_cm_llp_state_change(struct sock *sk) +{ + struct siw_cep *cep; + void (*orig_state_change)(struct sock *s); + + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (!cep) { + /* endpoint already disassociated */ + read_unlock(&sk->sk_callback_lock); + return; + } + orig_state_change = cep->sk_state_change; + + siw_dbg_cep(cep, "state: %d\n", cep->state); + + switch (sk->sk_state) { + case TCP_ESTABLISHED: + /* + * handle accepting socket as special case where only + * new connection is possible + */ + siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT); + break; + + case TCP_CLOSE: + case TCP_CLOSE_WAIT: + if (cep->qp) + cep->qp->tx_ctx.tx_suspend = 1; + siw_cm_queue_work(cep, SIW_CM_WORK_PEER_CLOSE); + break; + + default: + siw_dbg_cep(cep, "unexpected socket state %d\n", sk->sk_state); + } + read_unlock(&sk->sk_callback_lock); + orig_state_change(sk); +} + +static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, + struct sockaddr *raddr) +{ + int rv, flags = 0, s_val = 1; + size_t size = laddr->sa_family == AF_INET ? + sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); + + /* + * Make address available again asap. + */ + rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, + sizeof(s_val)); + if (rv < 0) + return rv; + + rv = s->ops->bind(s, laddr, size); + if (rv < 0) + return rv; + + rv = s->ops->connect(s, raddr, size, flags); + + return rv < 0 ? rv : 0; +} + +int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) +{ + struct siw_device *sdev = to_siw_dev(id->device); + struct siw_qp *qp; + struct siw_cep *cep = NULL; + struct socket *s = NULL; + struct sockaddr *laddr = (struct sockaddr *)&id->local_addr, + *raddr = (struct sockaddr *)&id->remote_addr; + bool p2p_mode = peer_to_peer, v4 = true; + u16 pd_len = params->private_data_len; + int version = mpa_version, rv; + + if (pd_len > MPA_MAX_PRIVDATA) + return -EINVAL; + + if (params->ird > sdev->attrs.max_ird || + params->ord > sdev->attrs.max_ord) + return -ENOMEM; + + if (laddr->sa_family == AF_INET6) + v4 = false; + else if (laddr->sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* + * Respect any iwarp port mapping: Use mapped remote address + * if valid. Local address must not be mapped, since siw + * uses kernel TCP stack. + */ + if ((v4 && to_sockaddr_in(id->remote_addr).sin_port != 0) || + to_sockaddr_in6(id->remote_addr).sin6_port != 0) + raddr = (struct sockaddr *)&id->m_remote_addr; + + qp = siw_qp_id2obj(sdev, params->qpn); + if (!qp) { + WARN(1, "[QP %u] does not exist\n", params->qpn); + rv = -EINVAL; + goto error; + } + if (v4) + siw_dbg_qp(qp, + "id 0x%p, pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", + id, pd_len, + &((struct sockaddr_in *)(laddr))->sin_addr, + ntohs(((struct sockaddr_in *)(laddr))->sin_port), + &((struct sockaddr_in *)(raddr))->sin_addr, + ntohs(((struct sockaddr_in *)(raddr))->sin_port)); + else + siw_dbg_qp(qp, + "id 0x%p, pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", + id, pd_len, + &((struct sockaddr_in6 *)(laddr))->sin6_addr, + ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port), + &((struct sockaddr_in6 *)(raddr))->sin6_addr, + ntohs(((struct sockaddr_in6 *)(raddr))->sin6_port)); + + rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s); + if (rv < 0) + goto error; + + /* + * NOTE: For simplification, connect() is called in blocking + * mode. Might be reconsidered for async connection setup at + * TCP level. + */ + rv = kernel_bindconnect(s, laddr, raddr); + if (rv != 0) { + siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv); + goto error; + } + if (siw_tcp_nagle == false) { + int val = 1; + + rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val, + sizeof(val)); + if (rv) { + siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv); + goto error; + } + } + cep = siw_cep_alloc(sdev); + if (!cep) { + rv = -ENOMEM; + goto error; + } + siw_cep_set_inuse(cep); + + /* Associate QP with CEP */ + siw_cep_get(cep); + qp->cep = cep; + + /* siw_qp_get(qp) already done by QP lookup */ + cep->qp = qp; + + id->add_ref(id); + cep->cm_id = id; + + /* + * 4: Allocate a sufficient number of work elements + * to allow concurrent handling of local + peer close + * events, MPA header processing + MPA timeout. + */ + rv = siw_cm_alloc_work(cep, 4); + if (rv != 0) { + rv = -ENOMEM; + goto error; + } + cep->ird = params->ird; + cep->ord = params->ord; + + if (p2p_mode && cep->ord == 0) + cep->ord = 1; + + cep->state = SIW_EPSTATE_CONNECTING; + + /* + * Associate CEP with socket + */ + siw_cep_socket_assoc(cep, s); + + cep->state = SIW_EPSTATE_AWAIT_MPAREP; + + /* + * Set MPA Request bits: CRC if required, no MPA Markers, + * MPA Rev. according to module parameter 'mpa_version', Key 'Request'. + */ + cep->mpa.hdr.params.bits = 0; + if (version > MPA_REVISION_2) { + pr_warn("Setting MPA version to %u\n", MPA_REVISION_2); + version = MPA_REVISION_2; + /* Adjust also module parameter */ + mpa_version = MPA_REVISION_2; + } + __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, version); + + if (try_gso) + cep->mpa.hdr.params.bits |= MPA_RR_FLAG_GSO_EXP; + + if (mpa_crc_required) + cep->mpa.hdr.params.bits |= MPA_RR_FLAG_CRC; + + /* + * If MPA version == 2: + * o Include ORD and IRD. + * o Indicate peer-to-peer mode, if required by module + * parameter 'peer_to_peer'. + */ + if (version == MPA_REVISION_2) { + cep->enhanced_rdma_conn_est = true; + cep->mpa.hdr.params.bits |= MPA_RR_FLAG_ENHANCED; + + cep->mpa.v2_ctrl.ird = htons(cep->ird); + cep->mpa.v2_ctrl.ord = htons(cep->ord); + + if (p2p_mode) { + cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER; + cep->mpa.v2_ctrl.ord |= rtr_type; + } + /* Remember own P2P mode requested */ + cep->mpa.v2_ctrl_req.ird = cep->mpa.v2_ctrl.ird; + cep->mpa.v2_ctrl_req.ord = cep->mpa.v2_ctrl.ord; + } + memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, 16); + + rv = siw_send_mpareqrep(cep, params->private_data, pd_len); + /* + * Reset private data. + */ + cep->mpa.hdr.params.pd_len = 0; + + if (rv >= 0) { + rv = siw_cm_queue_work(cep, SIW_CM_WORK_MPATIMEOUT); + if (!rv) { + siw_dbg_cep(cep, "id 0x%p, [QP %u]: exit\n", id, + qp_id(qp)); + siw_cep_set_free(cep); + return 0; + } + } +error: + siw_dbg_qp(qp, "failed: %d\n", rv); + + if (cep) { + siw_socket_disassoc(s); + sock_release(s); + cep->sock = NULL; + + cep->qp = NULL; + + cep->cm_id = NULL; + id->rem_ref(id); + siw_cep_put(cep); + + qp->cep = NULL; + siw_cep_put(cep); + + cep->state = SIW_EPSTATE_CLOSED; + + siw_cep_set_free(cep); + + siw_cep_put(cep); + + } else if (s) { + sock_release(s); + } + siw_qp_put(qp); + + return rv; +} + +/* + * siw_accept - Let SoftiWARP accept an RDMA connection request + * + * @id: New connection management id to be used for accepted + * connection request + * @params: Connection parameters provided by ULP for accepting connection + * + * Transition QP to RTS state, associate new CM id @id with accepted CEP + * and get prepared for TCP input by installing socket callbacks. + * Then send MPA Reply and generate the "connection established" event. + * Socket callbacks must be installed before sending MPA Reply, because + * the latter may cause a first RDMA message to arrive from the RDMA Initiator + * side very quickly, at which time the socket callbacks must be ready. + */ +int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) +{ + struct siw_device *sdev = to_siw_dev(id->device); + struct siw_cep *cep = (struct siw_cep *)id->provider_data; + struct siw_qp *qp; + struct siw_qp_attrs qp_attrs; + int rv, max_priv_data = MPA_MAX_PRIVDATA; + bool wait_for_peer_rts = false; + + siw_cep_set_inuse(cep); + siw_cep_put(cep); + + /* Free lingering inbound private data */ + if (cep->mpa.hdr.params.pd_len) { + cep->mpa.hdr.params.pd_len = 0; + kfree(cep->mpa.pdata); + cep->mpa.pdata = NULL; + } + siw_cancel_mpatimer(cep); + + if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { + siw_dbg_cep(cep, "id 0x%p: out of state\n", id); + + siw_cep_set_free(cep); + siw_cep_put(cep); + + return -ECONNRESET; + } + qp = siw_qp_id2obj(sdev, params->qpn); + if (!qp) { + WARN(1, "[QP %d] does not exist\n", params->qpn); + siw_cep_set_free(cep); + siw_cep_put(cep); + + return -EINVAL; + } + down_write(&qp->state_lock); + if (qp->attrs.state > SIW_QP_STATE_RTR) { + rv = -EINVAL; + up_write(&qp->state_lock); + goto error; + } + siw_dbg_cep(cep, "id 0x%p\n", id); + + if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { + siw_dbg_cep(cep, "peer allows GSO on TX\n"); + qp->tx_ctx.gso_seg_limit = 0; + } + if (params->ord > sdev->attrs.max_ord || + params->ird > sdev->attrs.max_ird) { + siw_dbg_cep( + cep, + "id 0x%p, [QP %u]: ord %d (max %d), ird %d (max %d)\n", + id, qp_id(qp), params->ord, sdev->attrs.max_ord, + params->ird, sdev->attrs.max_ird); + rv = -EINVAL; + up_write(&qp->state_lock); + goto error; + } + if (cep->enhanced_rdma_conn_est) + max_priv_data -= sizeof(struct mpa_v2_data); + + if (params->private_data_len > max_priv_data) { + siw_dbg_cep( + cep, + "id 0x%p, [QP %u]: private data length: %d (max %d)\n", + id, qp_id(qp), params->private_data_len, max_priv_data); + rv = -EINVAL; + up_write(&qp->state_lock); + goto error; + } + if (cep->enhanced_rdma_conn_est) { + if (params->ord > cep->ord) { + if (relaxed_ird_negotiation) { + params->ord = cep->ord; + } else { + cep->ird = params->ird; + cep->ord = params->ord; + rv = -EINVAL; + up_write(&qp->state_lock); + goto error; + } + } + if (params->ird < cep->ird) { + if (relaxed_ird_negotiation && + cep->ird <= sdev->attrs.max_ird) + params->ird = cep->ird; + else { + rv = -ENOMEM; + up_write(&qp->state_lock); + goto error; + } + } + if (cep->mpa.v2_ctrl.ord & + (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR)) + wait_for_peer_rts = true; + /* + * Signal back negotiated IRD and ORD values + */ + cep->mpa.v2_ctrl.ord = + htons(params->ord & MPA_IRD_ORD_MASK) | + (cep->mpa.v2_ctrl.ord & ~MPA_V2_MASK_IRD_ORD); + cep->mpa.v2_ctrl.ird = + htons(params->ird & MPA_IRD_ORD_MASK) | + (cep->mpa.v2_ctrl.ird & ~MPA_V2_MASK_IRD_ORD); + } + cep->ird = params->ird; + cep->ord = params->ord; + + cep->cm_id = id; + id->add_ref(id); + + memset(&qp_attrs, 0, sizeof(qp_attrs)); + qp_attrs.orq_size = cep->ord; + qp_attrs.irq_size = cep->ird; + qp_attrs.sk = cep->sock; + if (cep->mpa.hdr.params.bits & MPA_RR_FLAG_CRC) + qp_attrs.flags = SIW_MPA_CRC; + qp_attrs.state = SIW_QP_STATE_RTS; + + siw_dbg_cep(cep, "id 0x%p, [QP%u]: moving to rts\n", id, qp_id(qp)); + + /* Associate QP with CEP */ + siw_cep_get(cep); + qp->cep = cep; + + /* siw_qp_get(qp) already done by QP lookup */ + cep->qp = qp; + + cep->state = SIW_EPSTATE_RDMA_MODE; + + /* Move socket RX/TX under QP control */ + rv = siw_qp_modify(qp, &qp_attrs, + SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE | + SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | + SIW_QP_ATTR_MPA); + up_write(&qp->state_lock); + + if (rv) + goto error; + + siw_dbg_cep(cep, "id 0x%p, [QP %u]: send mpa reply, %d byte pdata\n", + id, qp_id(qp), params->private_data_len); + + rv = siw_send_mpareqrep(cep, params->private_data, + params->private_data_len); + if (rv != 0) + goto error; + + if (wait_for_peer_rts) { + siw_sk_assign_rtr_upcalls(cep); + } else { + siw_qp_socket_assoc(cep, qp); + rv = siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0); + if (rv) + goto error; + } + siw_cep_set_free(cep); + + return 0; +error: + siw_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + + cep->state = SIW_EPSTATE_CLOSED; + + if (cep->cm_id) { + cep->cm_id->rem_ref(id); + cep->cm_id = NULL; + } + if (qp->cep) { + siw_cep_put(cep); + qp->cep = NULL; + } + cep->qp = NULL; + siw_qp_put(qp); + + siw_cep_set_free(cep); + siw_cep_put(cep); + + return rv; +} + +/* + * siw_reject() + * + * Local connection reject case. Send private data back to peer, + * close connection and dereference connection id. + */ +int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) +{ + struct siw_cep *cep = (struct siw_cep *)id->provider_data; + + siw_cep_set_inuse(cep); + siw_cep_put(cep); + + siw_cancel_mpatimer(cep); + + if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { + siw_dbg_cep(cep, "id 0x%p: out of state\n", id); + + siw_cep_set_free(cep); + siw_cep_put(cep); /* put last reference */ + + return -ECONNRESET; + } + siw_dbg_cep(cep, "id 0x%p, cep->state %d, pd_len %d\n", id, cep->state, + pd_len); + + if (__mpa_rr_revision(cep->mpa.hdr.params.bits) >= MPA_REVISION_1) { + cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ + siw_send_mpareqrep(cep, pdata, pd_len); + } + siw_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + + cep->state = SIW_EPSTATE_CLOSED; + + siw_cep_set_free(cep); + siw_cep_put(cep); + + return 0; +} + +static int siw_listen_address(struct iw_cm_id *id, int backlog, + struct sockaddr *laddr, int addr_family) +{ + struct socket *s; + struct siw_cep *cep = NULL; + struct siw_device *sdev = to_siw_dev(id->device); + int rv = 0, s_val; + + rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); + if (rv < 0) + return rv; + + /* + * Allow binding local port when still in TIME_WAIT from last close. + */ + s_val = 1; + rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, + sizeof(s_val)); + if (rv) { + siw_dbg(id->device, "id 0x%p: setsockopt error: %d\n", id, rv); + goto error; + } + rv = s->ops->bind(s, laddr, addr_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + if (rv) { + siw_dbg(id->device, "id 0x%p: socket bind error: %d\n", id, rv); + goto error; + } + cep = siw_cep_alloc(sdev); + if (!cep) { + rv = -ENOMEM; + goto error; + } + siw_cep_socket_assoc(cep, s); + + rv = siw_cm_alloc_work(cep, backlog); + if (rv) { + siw_dbg(id->device, + "id 0x%p: alloc_work error %d, backlog %d\n", id, + rv, backlog); + goto error; + } + rv = s->ops->listen(s, backlog); + if (rv) { + siw_dbg(id->device, "id 0x%p: listen error %d\n", id, rv); + goto error; + } + cep->cm_id = id; + id->add_ref(id); + + /* + * In case of a wildcard rdma_listen on a multi-homed device, + * a listener's IWCM id is associated with more than one listening CEP. + * + * We currently use id->provider_data in three different ways: + * + * o For a listener's IWCM id, id->provider_data points to + * the list_head of the list of listening CEPs. + * Uses: siw_create_listen(), siw_destroy_listen() + * + * o For each accepted passive-side IWCM id, id->provider_data + * points to the CEP itself. This is a consequence of + * - siw_cm_upcall() setting event.provider_data = cep and + * - the IWCM's cm_conn_req_handler() setting provider_data of the + * new passive-side IWCM id equal to event.provider_data + * Uses: siw_accept(), siw_reject() + * + * o For an active-side IWCM id, id->provider_data is not used at all. + * + */ + if (!id->provider_data) { + id->provider_data = + kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!id->provider_data) { + rv = -ENOMEM; + goto error; + } + INIT_LIST_HEAD((struct list_head *)id->provider_data); + } + list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); + cep->state = SIW_EPSTATE_LISTENING; + + if (addr_family == AF_INET) + siw_dbg(id->device, "Listen at laddr %pI4 %u\n", + &(((struct sockaddr_in *)laddr)->sin_addr), + ((struct sockaddr_in *)laddr)->sin_port); + else + siw_dbg(id->device, "Listen at laddr %pI6 %u\n", + &(((struct sockaddr_in6 *)laddr)->sin6_addr), + ((struct sockaddr_in6 *)laddr)->sin6_port); + + return 0; + +error: + siw_dbg(id->device, "failed: %d\n", rv); + + if (cep) { + siw_cep_set_inuse(cep); + + if (cep->cm_id) { + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; + } + cep->sock = NULL; + siw_socket_disassoc(s); + cep->state = SIW_EPSTATE_CLOSED; + + siw_cep_set_free(cep); + siw_cep_put(cep); + } + sock_release(s); + + return rv; +} + +static void siw_drop_listeners(struct iw_cm_id *id) +{ + struct list_head *p, *tmp; + + /* + * In case of a wildcard rdma_listen on a multi-homed device, + * a listener's IWCM id is associated with more than one listening CEP. + */ + list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) { + struct siw_cep *cep = list_entry(p, struct siw_cep, listenq); + + list_del(p); + + siw_dbg_cep(cep, "id 0x%p: drop cep, state %d\n", id, + cep->state); + + siw_cep_set_inuse(cep); + + if (cep->cm_id) { + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; + } + if (cep->sock) { + siw_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + } + cep->state = SIW_EPSTATE_CLOSED; + siw_cep_set_free(cep); + siw_cep_put(cep); + } +} + +/* + * siw_create_listen - Create resources for a listener's IWCM ID @id + * + * Listens on the socket addresses id->local_addr and id->remote_addr. + * + * If the listener's @id provides a specific local IP address, at most one + * listening socket is created and associated with @id. + * + * If the listener's @id provides the wildcard (zero) local IP address, + * a separate listen is performed for each local IP address of the device + * by creating a listening socket and binding to that local IP address. + * + */ +int siw_create_listen(struct iw_cm_id *id, int backlog) +{ + struct net_device *dev = to_siw_dev(id->device)->netdev; + int rv = 0, listeners = 0; + + siw_dbg(id->device, "id 0x%p: backlog %d\n", id, backlog); + + /* + * For each attached address of the interface, create a + * listening socket, if id->local_addr is the wildcard + * IP address or matches the IP address. + */ + if (id->local_addr.ss_family == AF_INET) { + struct in_device *in_dev = in_dev_get(dev); + struct sockaddr_in s_laddr, *s_raddr; + const struct in_ifaddr *ifa; + + memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); + s_raddr = (struct sockaddr_in *)&id->remote_addr; + + siw_dbg(id->device, + "id 0x%p: laddr %pI4:%d, raddr %pI4:%d\n", + id, &s_laddr.sin_addr, ntohs(s_laddr.sin_port), + &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); + + rtnl_lock(); + in_dev_for_each_ifa_rtnl(ifa, in_dev) { + if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) || + s_laddr.sin_addr.s_addr == ifa->ifa_address) { + s_laddr.sin_addr.s_addr = ifa->ifa_address; + + rv = siw_listen_address(id, backlog, + (struct sockaddr *)&s_laddr, + AF_INET); + if (!rv) + listeners++; + } + } + rtnl_unlock(); + in_dev_put(in_dev); + } else if (id->local_addr.ss_family == AF_INET6) { + struct inet6_dev *in6_dev = in6_dev_get(dev); + struct inet6_ifaddr *ifp; + struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr), + *s_raddr = &to_sockaddr_in6(id->remote_addr); + + siw_dbg(id->device, + "id 0x%p: laddr %pI6:%d, raddr %pI6:%d\n", + id, &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), + &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); + + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + struct sockaddr_in6 bind_addr; + + if (ipv6_addr_any(&s_laddr->sin6_addr) || + ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) { + bind_addr.sin6_family = AF_INET6; + bind_addr.sin6_port = s_laddr->sin6_port; + bind_addr.sin6_flowinfo = 0; + bind_addr.sin6_addr = ifp->addr; + bind_addr.sin6_scope_id = dev->ifindex; + + rv = siw_listen_address(id, backlog, + (struct sockaddr *)&bind_addr, + AF_INET6); + if (!rv) + listeners++; + } + } + read_unlock_bh(&in6_dev->lock); + + in6_dev_put(in6_dev); + } else { + return -EAFNOSUPPORT; + } + if (listeners) + rv = 0; + else if (!rv) + rv = -EINVAL; + + siw_dbg(id->device, "id 0x%p: %s\n", id, rv ? "FAIL" : "OK"); + + return rv; +} + +int siw_destroy_listen(struct iw_cm_id *id) +{ + siw_dbg(id->device, "id 0x%p\n", id); + + if (!id->provider_data) { + siw_dbg(id->device, "id 0x%p: no cep(s)\n", id); + return 0; + } + siw_drop_listeners(id); + kfree(id->provider_data); + id->provider_data = NULL; + + return 0; +} + +int siw_cm_init(void) +{ + /* + * create_single_workqueue for strict ordering + */ + siw_cm_wq = create_singlethread_workqueue("siw_cm_wq"); + if (!siw_cm_wq) + return -ENOMEM; + + return 0; +} + +void siw_cm_exit(void) +{ + if (siw_cm_wq) { + flush_workqueue(siw_cm_wq); + destroy_workqueue(siw_cm_wq); + } +} diff --git a/drivers/infiniband/sw/siw/siw_cm.h b/drivers/infiniband/sw/siw/siw_cm.h new file mode 100644 index 000000000000..8c59cb3e2868 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_cm.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Greg Joyce <greg@opengridcomputing.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ +/* Copyright (c) 2017, Open Grid Computing, Inc. */ + +#ifndef _SIW_CM_H +#define _SIW_CM_H + +#include <net/sock.h> +#include <linux/tcp.h> + +#include <rdma/iw_cm.h> + +enum siw_cep_state { + SIW_EPSTATE_IDLE = 1, + SIW_EPSTATE_LISTENING, + SIW_EPSTATE_CONNECTING, + SIW_EPSTATE_AWAIT_MPAREQ, + SIW_EPSTATE_RECVD_MPAREQ, + SIW_EPSTATE_AWAIT_MPAREP, + SIW_EPSTATE_RDMA_MODE, + SIW_EPSTATE_CLOSED +}; + +struct siw_mpa_info { + struct mpa_rr hdr; /* peer mpa hdr in host byte order */ + struct mpa_v2_data v2_ctrl; + struct mpa_v2_data v2_ctrl_req; + char *pdata; + int bytes_rcvd; +}; + +struct siw_device; + +struct siw_cep { + struct iw_cm_id *cm_id; + struct siw_device *sdev; + struct list_head devq; + spinlock_t lock; + struct kref ref; + int in_use; + wait_queue_head_t waitq; + enum siw_cep_state state; + + struct list_head listenq; + struct siw_cep *listen_cep; + + struct siw_qp *qp; + struct socket *sock; + + struct siw_cm_work *mpa_timer; + struct list_head work_freelist; + + struct siw_mpa_info mpa; + int ord; + int ird; + bool enhanced_rdma_conn_est; + + /* Saved upcalls of socket */ + void (*sk_state_change)(struct sock *sk); + void (*sk_data_ready)(struct sock *sk); + void (*sk_write_space)(struct sock *sk); + void (*sk_error_report)(struct sock *sk); +}; + +/* + * Connection initiator waits 10 seconds to receive an + * MPA reply after sending out MPA request. Reponder waits for + * 5 seconds for MPA request to arrive if new TCP connection + * was set up. + */ +#define MPAREQ_TIMEOUT (HZ * 10) +#define MPAREP_TIMEOUT (HZ * 5) + +enum siw_work_type { + SIW_CM_WORK_ACCEPT = 1, + SIW_CM_WORK_READ_MPAHDR, + SIW_CM_WORK_CLOSE_LLP, /* close socket */ + SIW_CM_WORK_PEER_CLOSE, /* socket indicated peer close */ + SIW_CM_WORK_MPATIMEOUT +}; + +struct siw_cm_work { + struct delayed_work work; + struct list_head list; + enum siw_work_type type; + struct siw_cep *cep; +}; + +#define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a))) +#define to_sockaddr_in6(a) (*(struct sockaddr_in6 *)(&(a))) + +static inline int getname_peer(struct socket *s, struct sockaddr_storage *a) +{ + return s->ops->getname(s, (struct sockaddr *)a, 1); +} + +static inline int getname_local(struct socket *s, struct sockaddr_storage *a) +{ + return s->ops->getname(s, (struct sockaddr *)a, 0); +} + +static inline int ksock_recv(struct socket *sock, char *buf, size_t size, + int flags) +{ + struct kvec iov = { buf, size }; + struct msghdr msg = { .msg_name = NULL, .msg_flags = flags }; + + return kernel_recvmsg(sock, &msg, &iov, 1, size, flags); +} + +int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *parm); +int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *param); +int siw_reject(struct iw_cm_id *id, const void *data, u8 len); +int siw_create_listen(struct iw_cm_id *id, int backlog); +int siw_destroy_listen(struct iw_cm_id *id); + +void siw_cep_get(struct siw_cep *cep); +void siw_cep_put(struct siw_cep *cep); +int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type); + +int siw_cm_init(void); +void siw_cm_exit(void); + +/* + * TCP socket interface + */ +#define sk_to_qp(sk) (((struct siw_cep *)((sk)->sk_user_data))->qp) +#define sk_to_cep(sk) ((struct siw_cep *)((sk)->sk_user_data)) + +#endif diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c new file mode 100644 index 000000000000..e381ae9b7d62 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include <linux/errno.h> +#include <linux/types.h> + +#include <rdma/ib_verbs.h> + +#include "siw.h" + +static int map_wc_opcode[SIW_NUM_OPCODES] = { + [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, + [SIW_OP_SEND] = IB_WC_SEND, + [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, + [SIW_OP_READ] = IB_WC_RDMA_READ, + [SIW_OP_READ_LOCAL_INV] = IB_WC_RDMA_READ, + [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, + [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, + [SIW_OP_REG_MR] = IB_WC_REG_MR, + [SIW_OP_RECEIVE] = IB_WC_RECV, + [SIW_OP_READ_RESPONSE] = -1 /* not used */ +}; + +static struct { + enum siw_wc_status siw; + enum ib_wc_status ib; +} map_cqe_status[SIW_NUM_WC_STATUS] = { + { SIW_WC_SUCCESS, IB_WC_SUCCESS }, + { SIW_WC_LOC_LEN_ERR, IB_WC_LOC_LEN_ERR }, + { SIW_WC_LOC_PROT_ERR, IB_WC_LOC_PROT_ERR }, + { SIW_WC_LOC_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR }, + { SIW_WC_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR }, + { SIW_WC_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR }, + { SIW_WC_LOC_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR }, + { SIW_WC_REM_ACCESS_ERR, IB_WC_REM_ACCESS_ERR }, + { SIW_WC_REM_INV_REQ_ERR, IB_WC_REM_INV_REQ_ERR }, + { SIW_WC_GENERAL_ERR, IB_WC_GENERAL_ERR } +}; + +/* + * Reap one CQE from the CQ. Only used by kernel clients + * during CQ normal operation. Might be called during CQ + * flush for user mapped CQE array as well. + */ +int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) +{ + struct siw_cqe *cqe; + unsigned long flags; + + spin_lock_irqsave(&cq->lock, flags); + + cqe = &cq->queue[cq->cq_get % cq->num_cqe]; + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { + memset(wc, 0, sizeof(*wc)); + wc->wr_id = cqe->id; + wc->status = map_cqe_status[cqe->status].ib; + wc->opcode = map_wc_opcode[cqe->opcode]; + wc->byte_len = cqe->bytes; + + /* + * During CQ flush, also user land CQE's may get + * reaped here, which do not hold a QP reference + * and do not qualify for memory extension verbs. + */ + if (likely(cq->kernel_verbs)) { + if (cqe->flags & SIW_WQE_REM_INVAL) { + wc->ex.invalidate_rkey = cqe->inval_stag; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + } + wc->qp = cqe->base_qp; + siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%p\n", + cq->cq_get % cq->num_cqe, cqe->opcode, + cqe->flags, (void *)cqe->id); + } + WRITE_ONCE(cqe->flags, 0); + cq->cq_get++; + + spin_unlock_irqrestore(&cq->lock, flags); + + return 1; + } + spin_unlock_irqrestore(&cq->lock, flags); + + return 0; +} + +/* + * siw_cq_flush() + * + * Flush all CQ elements. + */ +void siw_cq_flush(struct siw_cq *cq) +{ + struct ib_wc wc; + + while (siw_reap_cqe(cq, &wc)) + ; +} diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c new file mode 100644 index 000000000000..f55c4e80aea4 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -0,0 +1,685 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <net/net_namespace.h> +#include <linux/rtnetlink.h> +#include <linux/if_arp.h> +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/dma-mapping.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/rdma_netlink.h> +#include <linux/kthread.h> + +#include "siw.h" +#include "siw_verbs.h" + +MODULE_AUTHOR("Bernard Metzler"); +MODULE_DESCRIPTION("Software iWARP Driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +/* transmit from user buffer, if possible */ +const bool zcopy_tx = true; + +/* Restrict usage of GSO, if hardware peer iwarp is unable to process + * large packets. try_gso = true lets siw try to use local GSO, + * if peer agrees. Not using GSO severly limits siw maximum tx bandwidth. + */ +const bool try_gso; + +/* Attach siw also with loopback devices */ +const bool loopback_enabled = true; + +/* We try to negotiate CRC on, if true */ +const bool mpa_crc_required; + +/* MPA CRC on/off enforced */ +const bool mpa_crc_strict; + +/* Control TCP_NODELAY socket option */ +const bool siw_tcp_nagle; + +/* Select MPA version to be used during connection setup */ +u_char mpa_version = MPA_REVISION_2; + +/* Selects MPA P2P mode (additional handshake during connection + * setup, if true. + */ +const bool peer_to_peer; + +struct task_struct *siw_tx_thread[NR_CPUS]; +struct crypto_shash *siw_crypto_shash; + +static int siw_device_register(struct siw_device *sdev, const char *name) +{ + struct ib_device *base_dev = &sdev->base_dev; + static int dev_id = 1; + int rv; + + rv = ib_register_device(base_dev, name); + if (rv) { + pr_warn("siw: device registration error %d\n", rv); + return rv; + } + sdev->vendor_part_id = dev_id++; + + siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); + + return 0; +} + +static void siw_device_cleanup(struct ib_device *base_dev) +{ + struct siw_device *sdev = to_siw_dev(base_dev); + + xa_destroy(&sdev->qp_xa); + xa_destroy(&sdev->mem_xa); +} + +static int siw_create_tx_threads(void) +{ + int cpu, assigned = 0; + + for_each_online_cpu(cpu) { + /* Skip HT cores */ + if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) + continue; + + siw_tx_thread[cpu] = + kthread_create(siw_run_sq, (unsigned long *)(long)cpu, + "siw_tx/%d", cpu); + if (IS_ERR(siw_tx_thread[cpu])) { + siw_tx_thread[cpu] = NULL; + continue; + } + kthread_bind(siw_tx_thread[cpu], cpu); + + wake_up_process(siw_tx_thread[cpu]); + assigned++; + } + return assigned; +} + +static int siw_dev_qualified(struct net_device *netdev) +{ + /* + * Additional hardware support can be added here + * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see + * <linux/if_arp.h> for type identifiers. + */ + if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 || + (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) + return 1; + + return 0; +} + +static DEFINE_PER_CPU(atomic_t, siw_use_cnt); + +static struct { + struct cpumask **tx_valid_cpus; + int num_nodes; +} siw_cpu_info; + +static int siw_init_cpulist(void) +{ + int i, num_nodes = num_possible_nodes(); + + memset(siw_tx_thread, 0, sizeof(siw_tx_thread)); + + siw_cpu_info.num_nodes = num_nodes; + + siw_cpu_info.tx_valid_cpus = + kcalloc(num_nodes, sizeof(struct cpumask *), GFP_KERNEL); + if (!siw_cpu_info.tx_valid_cpus) { + siw_cpu_info.num_nodes = 0; + return -ENOMEM; + } + for (i = 0; i < siw_cpu_info.num_nodes; i++) { + siw_cpu_info.tx_valid_cpus[i] = + kzalloc(sizeof(struct cpumask), GFP_KERNEL); + if (!siw_cpu_info.tx_valid_cpus[i]) + goto out_err; + + cpumask_clear(siw_cpu_info.tx_valid_cpus[i]); + } + for_each_possible_cpu(i) + cpumask_set_cpu(i, siw_cpu_info.tx_valid_cpus[cpu_to_node(i)]); + + return 0; + +out_err: + siw_cpu_info.num_nodes = 0; + while (i) { + kfree(siw_cpu_info.tx_valid_cpus[i]); + siw_cpu_info.tx_valid_cpus[i--] = NULL; + } + kfree(siw_cpu_info.tx_valid_cpus); + siw_cpu_info.tx_valid_cpus = NULL; + + return -ENOMEM; +} + +static void siw_destroy_cpulist(void) +{ + int i = 0; + + while (i < siw_cpu_info.num_nodes) + kfree(siw_cpu_info.tx_valid_cpus[i++]); + + kfree(siw_cpu_info.tx_valid_cpus); +} + +/* + * Choose CPU with least number of active QP's from NUMA node of + * TX interface. + */ +int siw_get_tx_cpu(struct siw_device *sdev) +{ + const struct cpumask *tx_cpumask; + int i, num_cpus, cpu, min_use, node = sdev->numa_node, tx_cpu = -1; + + if (node < 0) + tx_cpumask = cpu_online_mask; + else + tx_cpumask = siw_cpu_info.tx_valid_cpus[node]; + + num_cpus = cpumask_weight(tx_cpumask); + if (!num_cpus) { + /* no CPU on this NUMA node */ + tx_cpumask = cpu_online_mask; + num_cpus = cpumask_weight(tx_cpumask); + } + if (!num_cpus) + goto out; + + cpu = cpumask_first(tx_cpumask); + + for (i = 0, min_use = SIW_MAX_QP; i < num_cpus; + i++, cpu = cpumask_next(cpu, tx_cpumask)) { + int usage; + + /* Skip any cores which have no TX thread */ + if (!siw_tx_thread[cpu]) + continue; + + usage = atomic_read(&per_cpu(siw_use_cnt, cpu)); + if (usage <= min_use) { + tx_cpu = cpu; + min_use = usage; + } + } + siw_dbg(&sdev->base_dev, + "tx cpu %d, node %d, %d qp's\n", tx_cpu, node, min_use); + +out: + if (tx_cpu >= 0) + atomic_inc(&per_cpu(siw_use_cnt, tx_cpu)); + else + pr_warn("siw: no tx cpu found\n"); + + return tx_cpu; +} + +void siw_put_tx_cpu(int cpu) +{ + atomic_dec(&per_cpu(siw_use_cnt, cpu)); +} + +static struct ib_qp *siw_get_base_qp(struct ib_device *base_dev, int id) +{ + struct siw_qp *qp = siw_qp_id2obj(to_siw_dev(base_dev), id); + + if (qp) { + /* + * siw_qp_id2obj() increments object reference count + */ + siw_qp_put(qp); + return qp->ib_qp; + } + return NULL; +} + +static void siw_verbs_sq_flush(struct ib_qp *base_qp) +{ + struct siw_qp *qp = to_siw_qp(base_qp); + + down_write(&qp->state_lock); + siw_sq_flush(qp); + up_write(&qp->state_lock); +} + +static void siw_verbs_rq_flush(struct ib_qp *base_qp) +{ + struct siw_qp *qp = to_siw_qp(base_qp); + + down_write(&qp->state_lock); + siw_rq_flush(qp); + up_write(&qp->state_lock); +} + +static const struct ib_device_ops siw_device_ops = { + .owner = THIS_MODULE, + .uverbs_abi_ver = SIW_ABI_VERSION, + .driver_id = RDMA_DRIVER_SIW, + + .alloc_mr = siw_alloc_mr, + .alloc_pd = siw_alloc_pd, + .alloc_ucontext = siw_alloc_ucontext, + .create_cq = siw_create_cq, + .create_qp = siw_create_qp, + .create_srq = siw_create_srq, + .dealloc_driver = siw_device_cleanup, + .dealloc_pd = siw_dealloc_pd, + .dealloc_ucontext = siw_dealloc_ucontext, + .dereg_mr = siw_dereg_mr, + .destroy_cq = siw_destroy_cq, + .destroy_qp = siw_destroy_qp, + .destroy_srq = siw_destroy_srq, + .drain_rq = siw_verbs_rq_flush, + .drain_sq = siw_verbs_sq_flush, + .get_dma_mr = siw_get_dma_mr, + .get_port_immutable = siw_get_port_immutable, + .iw_accept = siw_accept, + .iw_add_ref = siw_qp_get_ref, + .iw_connect = siw_connect, + .iw_create_listen = siw_create_listen, + .iw_destroy_listen = siw_destroy_listen, + .iw_get_qp = siw_get_base_qp, + .iw_reject = siw_reject, + .iw_rem_ref = siw_qp_put_ref, + .map_mr_sg = siw_map_mr_sg, + .mmap = siw_mmap, + .modify_qp = siw_verbs_modify_qp, + .modify_srq = siw_modify_srq, + .poll_cq = siw_poll_cq, + .post_recv = siw_post_receive, + .post_send = siw_post_send, + .post_srq_recv = siw_post_srq_recv, + .query_device = siw_query_device, + .query_gid = siw_query_gid, + .query_pkey = siw_query_pkey, + .query_port = siw_query_port, + .query_qp = siw_query_qp, + .query_srq = siw_query_srq, + .req_notify_cq = siw_req_notify_cq, + .reg_user_mr = siw_reg_user_mr, + + INIT_RDMA_OBJ_SIZE(ib_cq, siw_cq, base_cq), + INIT_RDMA_OBJ_SIZE(ib_pd, siw_pd, base_pd), + INIT_RDMA_OBJ_SIZE(ib_srq, siw_srq, base_srq), + INIT_RDMA_OBJ_SIZE(ib_ucontext, siw_ucontext, base_ucontext), +}; + +static struct siw_device *siw_device_create(struct net_device *netdev) +{ + struct siw_device *sdev = NULL; + struct ib_device *base_dev; + struct device *parent = netdev->dev.parent; + int rv; + + if (!parent) { + /* + * The loopback device has no parent device, + * so it appears as a top-level device. To support + * loopback device connectivity, take this device + * as the parent device. Skip all other devices + * w/o parent device. + */ + if (netdev->type != ARPHRD_LOOPBACK) { + pr_warn("siw: device %s error: no parent device\n", + netdev->name); + return NULL; + } + parent = &netdev->dev; + } + sdev = ib_alloc_device(siw_device, base_dev); + if (!sdev) + return NULL; + + base_dev = &sdev->base_dev; + + sdev->netdev = netdev; + + if (netdev->type != ARPHRD_LOOPBACK) { + memcpy(&base_dev->node_guid, netdev->dev_addr, 6); + } else { + /* + * The loopback device does not have a HW address, + * but connection mangagement lib expects gid != 0 + */ + size_t gidlen = min_t(size_t, strlen(base_dev->name), 6); + + memcpy(&base_dev->node_guid, base_dev->name, gidlen); + } + base_dev->uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + base_dev->node_type = RDMA_NODE_RNIC; + memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, + sizeof(SIW_NODE_DESC_COMMON)); + + /* + * Current model (one-to-one device association): + * One Softiwarp device per net_device or, equivalently, + * per physical port. + */ + base_dev->phys_port_cnt = 1; + base_dev->dev.parent = parent; + base_dev->dev.dma_ops = &dma_virt_ops; + base_dev->num_comp_vectors = num_possible_cpus(); + + ib_set_device_ops(base_dev, &siw_device_ops); + rv = ib_device_set_netdev(base_dev, netdev, 1); + if (rv) + goto error; + + memcpy(base_dev->iw_ifname, netdev->name, + sizeof(base_dev->iw_ifname)); + + /* Disable TCP port mapping */ + base_dev->iw_driver_flags = IW_F_NO_PORT_MAP, + + sdev->attrs.max_qp = SIW_MAX_QP; + sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; + sdev->attrs.max_ord = SIW_MAX_ORD_QP; + sdev->attrs.max_ird = SIW_MAX_IRD_QP; + sdev->attrs.max_sge = SIW_MAX_SGE; + sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD; + sdev->attrs.max_cq = SIW_MAX_CQ; + sdev->attrs.max_cqe = SIW_MAX_CQE; + sdev->attrs.max_mr = SIW_MAX_MR; + sdev->attrs.max_pd = SIW_MAX_PD; + sdev->attrs.max_mw = SIW_MAX_MW; + sdev->attrs.max_fmr = SIW_MAX_FMR; + sdev->attrs.max_srq = SIW_MAX_SRQ; + sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; + sdev->attrs.max_srq_sge = SIW_MAX_SGE; + + xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1); + + INIT_LIST_HEAD(&sdev->cep_list); + INIT_LIST_HEAD(&sdev->qp_list); + + atomic_set(&sdev->num_ctx, 0); + atomic_set(&sdev->num_srq, 0); + atomic_set(&sdev->num_qp, 0); + atomic_set(&sdev->num_cq, 0); + atomic_set(&sdev->num_mr, 0); + atomic_set(&sdev->num_pd, 0); + + sdev->numa_node = dev_to_node(parent); + spin_lock_init(&sdev->lock); + + return sdev; +error: + ib_dealloc_device(base_dev); + + return NULL; +} + +/* + * Network link becomes unavailable. Mark all + * affected QP's accordingly. + */ +static void siw_netdev_down(struct work_struct *work) +{ + struct siw_device *sdev = + container_of(work, struct siw_device, netdev_down); + + struct siw_qp_attrs qp_attrs; + struct list_head *pos, *tmp; + + memset(&qp_attrs, 0, sizeof(qp_attrs)); + qp_attrs.state = SIW_QP_STATE_ERROR; + + list_for_each_safe(pos, tmp, &sdev->qp_list) { + struct siw_qp *qp = list_entry(pos, struct siw_qp, devq); + + down_write(&qp->state_lock); + WARN_ON(siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE)); + up_write(&qp->state_lock); + } + ib_device_put(&sdev->base_dev); +} + +static void siw_device_goes_down(struct siw_device *sdev) +{ + if (ib_device_try_get(&sdev->base_dev)) { + INIT_WORK(&sdev->netdev_down, siw_netdev_down); + schedule_work(&sdev->netdev_down); + } +} + +static int siw_netdev_event(struct notifier_block *nb, unsigned long event, + void *arg) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(arg); + struct ib_device *base_dev; + struct siw_device *sdev; + + dev_dbg(&netdev->dev, "siw: event %lu\n", event); + + if (dev_net(netdev) != &init_net) + return NOTIFY_OK; + + base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW); + if (!base_dev) + return NOTIFY_OK; + + sdev = to_siw_dev(base_dev); + + switch (event) { + case NETDEV_UP: + sdev->state = IB_PORT_ACTIVE; + siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); + break; + + case NETDEV_GOING_DOWN: + siw_device_goes_down(sdev); + break; + + case NETDEV_DOWN: + sdev->state = IB_PORT_DOWN; + siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); + break; + + case NETDEV_REGISTER: + /* + * Device registration now handled only by + * rdma netlink commands. So it shall be impossible + * to end up here with a valid siw device. + */ + siw_dbg(base_dev, "unexpected NETDEV_REGISTER event\n"); + break; + + case NETDEV_UNREGISTER: + ib_unregister_device_queued(&sdev->base_dev); + break; + + case NETDEV_CHANGEADDR: + siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); + break; + /* + * Todo: Below netdev events are currently not handled. + */ + case NETDEV_CHANGEMTU: + case NETDEV_CHANGE: + break; + + default: + break; + } + ib_device_put(&sdev->base_dev); + + return NOTIFY_OK; +} + +static struct notifier_block siw_netdev_nb = { + .notifier_call = siw_netdev_event, +}; + +static int siw_newlink(const char *basedev_name, struct net_device *netdev) +{ + struct ib_device *base_dev; + struct siw_device *sdev = NULL; + int rv = -ENOMEM; + + if (!siw_dev_qualified(netdev)) + return -EINVAL; + + base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW); + if (base_dev) { + ib_device_put(base_dev); + return -EEXIST; + } + sdev = siw_device_create(netdev); + if (sdev) { + dev_dbg(&netdev->dev, "siw: new device\n"); + + if (netif_running(netdev) && netif_carrier_ok(netdev)) + sdev->state = IB_PORT_ACTIVE; + else + sdev->state = IB_PORT_DOWN; + + rv = siw_device_register(sdev, basedev_name); + if (rv) + ib_dealloc_device(&sdev->base_dev); + } + return rv; +} + +static struct rdma_link_ops siw_link_ops = { + .type = "siw", + .newlink = siw_newlink, +}; + +/* + * siw_init_module - Initialize Softiwarp module and register with netdev + * subsystem. + */ +static __init int siw_init_module(void) +{ + int rv; + int nr_cpu; + + if (SENDPAGE_THRESH < SIW_MAX_INLINE) { + pr_info("siw: sendpage threshold too small: %u\n", + (int)SENDPAGE_THRESH); + rv = -EINVAL; + goto out_error; + } + rv = siw_init_cpulist(); + if (rv) + goto out_error; + + rv = siw_cm_init(); + if (rv) + goto out_error; + + if (!siw_create_tx_threads()) { + pr_info("siw: Could not start any TX thread\n"); + goto out_error; + } + /* + * Locate CRC32 algorithm. If unsuccessful, fail + * loading siw only, if CRC is required. + */ + siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0); + if (IS_ERR(siw_crypto_shash)) { + pr_info("siw: Loading CRC32c failed: %ld\n", + PTR_ERR(siw_crypto_shash)); + siw_crypto_shash = NULL; + if (mpa_crc_required) { + rv = -EOPNOTSUPP; + goto out_error; + } + } + rv = register_netdevice_notifier(&siw_netdev_nb); + if (rv) + goto out_error; + + rdma_link_register(&siw_link_ops); + + pr_info("SoftiWARP attached\n"); + return 0; + +out_error: + for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) { + if (siw_tx_thread[nr_cpu]) { + siw_stop_tx_thread(nr_cpu); + siw_tx_thread[nr_cpu] = NULL; + } + } + if (siw_crypto_shash) + crypto_free_shash(siw_crypto_shash); + + pr_info("SoftIWARP attach failed. Error: %d\n", rv); + + siw_cm_exit(); + siw_destroy_cpulist(); + + return rv; +} + +static void __exit siw_exit_module(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (siw_tx_thread[cpu]) { + siw_stop_tx_thread(cpu); + siw_tx_thread[cpu] = NULL; + } + } + unregister_netdevice_notifier(&siw_netdev_nb); + rdma_link_unregister(&siw_link_ops); + ib_unregister_driver(RDMA_DRIVER_SIW); + + siw_cm_exit(); + + siw_destroy_cpulist(); + + if (siw_crypto_shash) + crypto_free_shash(siw_crypto_shash); + + pr_info("SoftiWARP detached\n"); +} + +module_init(siw_init_module); +module_exit(siw_exit_module); + +MODULE_ALIAS_RDMA_LINK("siw"); diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c new file mode 100644 index 000000000000..67171c82b0c4 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include <linux/gfp.h> +#include <rdma/ib_verbs.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/sched/mm.h> +#include <linux/resource.h> + +#include "siw.h" +#include "siw_mem.h" + +/* + * Stag lookup is based on its index part only (24 bits). + * The code avoids special Stag of zero and tries to randomize + * STag values between 1 and SIW_STAG_MAX_INDEX. + */ +int siw_mem_add(struct siw_device *sdev, struct siw_mem *m) +{ + struct xa_limit limit = XA_LIMIT(1, 0x00ffffff); + u32 id, next; + + get_random_bytes(&next, 4); + next &= 0x00ffffff; + + if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next, + GFP_KERNEL) < 0) + return -ENOMEM; + + /* Set the STag index part */ + m->stag = id << 8; + + siw_dbg_mem(m, "new MEM object\n"); + + return 0; +} + +/* + * siw_mem_id2obj() + * + * resolves memory from stag given by id. might be called from: + * o process context before sending out of sgl, or + * o in softirq when resolving target memory + */ +struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) +{ + struct siw_mem *mem; + + rcu_read_lock(); + mem = xa_load(&sdev->mem_xa, stag_index); + if (likely(mem && kref_get_unless_zero(&mem->ref))) { + rcu_read_unlock(); + return mem; + } + rcu_read_unlock(); + + return NULL; +} + +static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages, + bool dirty) +{ + struct page **p = chunk->plist; + + while (num_pages--) { + if (!PageDirty(*p) && dirty) + put_user_pages_dirty_lock(p, 1); + else + put_user_page(*p); + p++; + } +} + +void siw_umem_release(struct siw_umem *umem, bool dirty) +{ + struct mm_struct *mm_s = umem->owning_mm; + int i, num_pages = umem->num_pages; + + for (i = 0; num_pages; i++) { + int to_free = min_t(int, PAGES_PER_CHUNK, num_pages); + + siw_free_plist(&umem->page_chunk[i], to_free, + umem->writable && dirty); + kfree(umem->page_chunk[i].plist); + num_pages -= to_free; + } + atomic64_sub(umem->num_pages, &mm_s->pinned_vm); + + mmdrop(mm_s); + kfree(umem->page_chunk); + kfree(umem); +} + +int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, + u64 start, u64 len, int rights) +{ + struct siw_device *sdev = to_siw_dev(pd->device); + struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL); + struct xa_limit limit = XA_LIMIT(1, 0x00ffffff); + u32 id, next; + + if (!mem) + return -ENOMEM; + + mem->mem_obj = mem_obj; + mem->stag_valid = 0; + mem->sdev = sdev; + mem->va = start; + mem->len = len; + mem->pd = pd; + mem->perms = rights & IWARP_ACCESS_MASK; + kref_init(&mem->ref); + + mr->mem = mem; + + get_random_bytes(&next, 4); + next &= 0x00ffffff; + + if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next, + GFP_KERNEL) < 0) { + kfree(mem); + return -ENOMEM; + } + /* Set the STag index part */ + mem->stag = id << 8; + mr->base_mr.lkey = mr->base_mr.rkey = mem->stag; + + return 0; +} + +void siw_mr_drop_mem(struct siw_mr *mr) +{ + struct siw_mem *mem = mr->mem, *found; + + mem->stag_valid = 0; + + /* make STag invalid visible asap */ + smp_mb(); + + found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8); + WARN_ON(found != mem); + siw_mem_put(mem); +} + +void siw_free_mem(struct kref *ref) +{ + struct siw_mem *mem = container_of(ref, struct siw_mem, ref); + + siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n"); + + if (!mem->is_mw && mem->mem_obj) { + if (mem->is_pbl == 0) + siw_umem_release(mem->umem, true); + else + kfree(mem->pbl); + } + kfree(mem); +} + +/* + * siw_check_mem() + * + * Check protection domain, STAG state, access permissions and + * address range for memory object. + * + * @pd: Protection Domain memory should belong to + * @mem: memory to be checked + * @addr: starting addr of mem + * @perms: requested access permissions + * @len: len of memory interval to be checked + * + */ +int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, + enum ib_access_flags perms, int len) +{ + if (!mem->stag_valid) { + siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag); + return -E_STAG_INVALID; + } + if (mem->pd != pd) { + siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag); + return -E_PD_MISMATCH; + } + /* + * check access permissions + */ + if ((mem->perms & perms) < perms) { + siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n", + mem->perms, perms); + return -E_ACCESS_PERM; + } + /* + * Check if access falls into valid memory interval. + */ + if (addr < mem->va || addr + len > mem->va + mem->len) { + siw_dbg_pd(pd, "MEM interval len %d\n", len); + siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] out of bounds\n", + (unsigned long long)addr, + (unsigned long long)(addr + len)); + siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] STag=0x%08x\n", + (unsigned long long)mem->va, + (unsigned long long)(mem->va + mem->len), + mem->stag); + + return -E_BASE_BOUNDS; + } + return E_ACCESS_OK; +} + +/* + * siw_check_sge() + * + * Check SGE for access rights in given interval + * + * @pd: Protection Domain memory should belong to + * @sge: SGE to be checked + * @mem: location of memory reference within array + * @perms: requested access permissions + * @off: starting offset in SGE + * @len: len of memory interval to be checked + * + * NOTE: Function references SGE's memory object (mem->obj) + * if not yet done. New reference is kept if check went ok and + * released if check failed. If mem->obj is already valid, no new + * lookup is being done and mem is not released it check fails. + */ +int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[], + enum ib_access_flags perms, u32 off, int len) +{ + struct siw_device *sdev = to_siw_dev(pd->device); + struct siw_mem *new = NULL; + int rv = E_ACCESS_OK; + + if (len + off > sge->length) { + rv = -E_BASE_BOUNDS; + goto fail; + } + if (*mem == NULL) { + new = siw_mem_id2obj(sdev, sge->lkey >> 8); + if (unlikely(!new)) { + siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey); + rv = -E_STAG_INVALID; + goto fail; + } + *mem = new; + } + /* Check if user re-registered with different STag key */ + if (unlikely((*mem)->stag != sge->lkey)) { + siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey); + rv = -E_STAG_INVALID; + goto fail; + } + rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len); + if (unlikely(rv)) + goto fail; + + return 0; + +fail: + if (new) { + *mem = NULL; + siw_mem_put(new); + } + return rv; +} + +void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op) +{ + switch (op) { + case SIW_OP_SEND: + case SIW_OP_WRITE: + case SIW_OP_SEND_WITH_IMM: + case SIW_OP_SEND_REMOTE_INV: + case SIW_OP_READ: + case SIW_OP_READ_LOCAL_INV: + if (!(wqe->sqe.flags & SIW_WQE_INLINE)) + siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge); + break; + + case SIW_OP_RECEIVE: + siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge); + break; + + case SIW_OP_READ_RESPONSE: + siw_unref_mem_sgl(wqe->mem, 1); + break; + + default: + /* + * SIW_OP_INVAL_STAG and SIW_OP_REG_MR + * do not hold memory references + */ + break; + } +} + +int siw_invalidate_stag(struct ib_pd *pd, u32 stag) +{ + struct siw_device *sdev = to_siw_dev(pd->device); + struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8); + int rv = 0; + + if (unlikely(!mem)) { + siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag); + return -EINVAL; + } + if (unlikely(mem->pd != pd)) { + siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag); + rv = -EACCES; + goto out; + } + /* + * Per RDMA verbs definition, an STag may already be in invalid + * state if invalidation is requested. So no state check here. + */ + mem->stag_valid = 0; + + siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag); +out: + siw_mem_put(mem); + return rv; +} + +/* + * Gets physical address backed by PBL element. Address is referenced + * by linear byte offset into list of variably sized PB elements. + * Optionally, provides remaining len within current element, and + * current PBL index for later resume at same element. + */ +u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) +{ + int i = idx ? *idx : 0; + + while (i < pbl->num_buf) { + struct siw_pble *pble = &pbl->pbe[i]; + + if (pble->pbl_off + pble->size > off) { + u64 pble_off = off - pble->pbl_off; + + if (len) + *len = pble->size - pble_off; + if (idx) + *idx = i; + + return pble->addr + pble_off; + } + i++; + } + if (len) + *len = 0; + return 0; +} + +struct siw_pbl *siw_pbl_alloc(u32 num_buf) +{ + struct siw_pbl *pbl; + int buf_size = sizeof(*pbl); + + if (num_buf == 0) + return ERR_PTR(-EINVAL); + + buf_size += ((num_buf - 1) * sizeof(struct siw_pble)); + + pbl = kzalloc(buf_size, GFP_KERNEL); + if (!pbl) + return ERR_PTR(-ENOMEM); + + pbl->max_buf = num_buf; + + return pbl; +} + +struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) +{ + struct siw_umem *umem; + struct mm_struct *mm_s; + u64 first_page_va; + unsigned long mlock_limit; + unsigned int foll_flags = FOLL_WRITE; + int num_pages, num_chunks, i, rv = 0; + + if (!can_do_mlock()) + return ERR_PTR(-EPERM); + + if (!len) + return ERR_PTR(-EINVAL); + + first_page_va = start & PAGE_MASK; + num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT; + num_chunks = (num_pages >> CHUNK_SHIFT) + 1; + + umem = kzalloc(sizeof(*umem), GFP_KERNEL); + if (!umem) + return ERR_PTR(-ENOMEM); + + mm_s = current->mm; + umem->owning_mm = mm_s; + umem->writable = writable; + + mmgrab(mm_s); + + if (!writable) + foll_flags |= FOLL_FORCE; + + down_read(&mm_s->mmap_sem); + + mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) { + rv = -ENOMEM; + goto out_sem_up; + } + umem->fp_addr = first_page_va; + + umem->page_chunk = + kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL); + if (!umem->page_chunk) { + rv = -ENOMEM; + goto out_sem_up; + } + for (i = 0; num_pages; i++) { + int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK); + + umem->page_chunk[i].plist = + kcalloc(nents, sizeof(struct page *), GFP_KERNEL); + if (!umem->page_chunk[i].plist) { + rv = -ENOMEM; + goto out_sem_up; + } + got = 0; + while (nents) { + struct page **plist = &umem->page_chunk[i].plist[got]; + + rv = get_user_pages(first_page_va, nents, + foll_flags | FOLL_LONGTERM, + plist, NULL); + if (rv < 0) + goto out_sem_up; + + umem->num_pages += rv; + atomic64_add(rv, &mm_s->pinned_vm); + first_page_va += rv * PAGE_SIZE; + nents -= rv; + got += rv; + } + num_pages -= got; + } +out_sem_up: + up_read(&mm_s->mmap_sem); + + if (rv > 0) + return umem; + + siw_umem_release(umem, false); + + return ERR_PTR(rv); +} diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h new file mode 100644 index 000000000000..f43daf280891 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#ifndef _SIW_MEM_H +#define _SIW_MEM_H + +struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable); +void siw_umem_release(struct siw_umem *umem, bool dirty); +struct siw_pbl *siw_pbl_alloc(u32 num_buf); +u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); +struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index); +int siw_mem_add(struct siw_device *sdev, struct siw_mem *m); +int siw_invalidate_stag(struct ib_pd *pd, u32 stag); +int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, + enum ib_access_flags perms, int len); +int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, + struct siw_mem *mem[], enum ib_access_flags perms, + u32 off, int len); +void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op); +int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, + u64 start, u64 len, int rights); +void siw_mr_drop_mem(struct siw_mr *mr); +void siw_free_mem(struct kref *ref); + +static inline void siw_mem_put(struct siw_mem *mem) +{ + kref_put(&mem->ref, siw_free_mem); +} + +static inline struct siw_mr *siw_mem2mr(struct siw_mem *m) +{ + return container_of(m, struct siw_mr, mem); +} + +static inline void siw_unref_mem_sgl(struct siw_mem **mem, unsigned int num_sge) +{ + while (num_sge) { + if (*mem == NULL) + break; + + siw_mem_put(*mem); + *mem = NULL; + mem++; + num_sge--; + } +} + +#define CHUNK_SHIFT 9 /* sets number of pages per chunk */ +#define PAGES_PER_CHUNK (_AC(1, UL) << CHUNK_SHIFT) +#define CHUNK_MASK (~(PAGES_PER_CHUNK - 1)) +#define PAGE_CHUNK_SIZE (PAGES_PER_CHUNK * sizeof(struct page *)) + +/* + * siw_get_upage() + * + * Get page pointer for address on given umem. + * + * @umem: two dimensional list of page pointers + * @addr: user virtual address + */ +static inline struct page *siw_get_upage(struct siw_umem *umem, u64 addr) +{ + unsigned int page_idx = (addr - umem->fp_addr) >> PAGE_SHIFT, + chunk_idx = page_idx >> CHUNK_SHIFT, + page_in_chunk = page_idx & ~CHUNK_MASK; + + if (likely(page_idx < umem->num_pages)) + return umem->page_chunk[chunk_idx].plist[page_in_chunk]; + + return NULL; +} +#endif diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c new file mode 100644 index 000000000000..11383d9f95ef --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -0,0 +1,1322 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/net.h> +#include <linux/scatterlist.h> +#include <linux/llist.h> +#include <asm/barrier.h> +#include <net/tcp.h> + +#include "siw.h" +#include "siw_verbs.h" +#include "siw_mem.h" + +static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = { + [SIW_QP_STATE_IDLE] = "IDLE", + [SIW_QP_STATE_RTR] = "RTR", + [SIW_QP_STATE_RTS] = "RTS", + [SIW_QP_STATE_CLOSING] = "CLOSING", + [SIW_QP_STATE_TERMINATE] = "TERMINATE", + [SIW_QP_STATE_ERROR] = "ERROR" +}; + +/* + * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a + * per-RDMAP message basis. Please keep order of initializer. All MPA len + * is initialized to minimum packet size. + */ +struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = { + { /* RDMAP_RDMA_WRITE */ + .hdr_len = sizeof(struct iwarp_rdma_write), + .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2), + .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST | + cpu_to_be16(DDP_VERSION << 8) | + cpu_to_be16(RDMAP_VERSION << 6) | + cpu_to_be16(RDMAP_RDMA_WRITE), + .rx_data = siw_proc_write }, + { /* RDMAP_RDMA_READ_REQ */ + .hdr_len = sizeof(struct iwarp_rdma_rreq), + .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2), + .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) | + cpu_to_be16(RDMAP_VERSION << 6) | + cpu_to_be16(RDMAP_RDMA_READ_REQ), + .rx_data = siw_proc_rreq }, + { /* RDMAP_RDMA_READ_RESP */ + .hdr_len = sizeof(struct iwarp_rdma_rresp), + .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2), + .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST | + cpu_to_be16(DDP_VERSION << 8) | + cpu_to_be16(RDMAP_VERSION << 6) | + cpu_to_be16(RDMAP_RDMA_READ_RESP), + .rx_data = siw_proc_rresp }, + { /* RDMAP_SEND */ + .hdr_len = sizeof(struct iwarp_send), + .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2), + .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) | + cpu_to_be16(RDMAP_VERSION << 6) | + cpu_to_be16(RDMAP_SEND), + .rx_data = siw_proc_send }, + { /* RDMAP_SEND_INVAL */ + .hdr_len = sizeof(struct iwarp_send_inv), + .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2), + .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) | + cpu_to_be16(RDMAP_VERSION << 6) | + cpu_to_be16(RDMAP_SEND_INVAL), + .rx_data = siw_proc_send }, + { /* RDMAP_SEND_SE */ + .hdr_len = sizeof(struct iwarp_send), + .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2), + .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) | + cpu_to_be16(RDMAP_VERSION << 6) | + cpu_to_be16(RDMAP_SEND_SE), + .rx_data = siw_proc_send }, + { /* RDMAP_SEND_SE_INVAL */ + .hdr_len = sizeof(struct iwarp_send_inv), + .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2), + .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) | + cpu_to_be16(RDMAP_VERSION << 6) | + cpu_to_be16(RDMAP_SEND_SE_INVAL), + .rx_data = siw_proc_send }, + { /* RDMAP_TERMINATE */ + .hdr_len = sizeof(struct iwarp_terminate), + .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2), + .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) | + cpu_to_be16(RDMAP_VERSION << 6) | + cpu_to_be16(RDMAP_TERMINATE), + .rx_data = siw_proc_terminate } +}; + +void siw_qp_llp_data_ready(struct sock *sk) +{ + struct siw_qp *qp; + + read_lock(&sk->sk_callback_lock); + + if (unlikely(!sk->sk_user_data || !sk_to_qp(sk))) + goto done; + + qp = sk_to_qp(sk); + + if (likely(!qp->rx_stream.rx_suspend && + down_read_trylock(&qp->state_lock))) { + read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 }; + + if (likely(qp->attrs.state == SIW_QP_STATE_RTS)) + /* + * Implements data receive operation during + * socket callback. TCP gracefully catches + * the case where there is nothing to receive + * (not calling siw_tcp_rx_data() then). + */ + tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data); + + up_read(&qp->state_lock); + } else { + siw_dbg_qp(qp, "unable to process RX, suspend: %d\n", + qp->rx_stream.rx_suspend); + } +done: + read_unlock(&sk->sk_callback_lock); +} + +void siw_qp_llp_close(struct siw_qp *qp) +{ + siw_dbg_qp(qp, "enter llp close, state = %s\n", + siw_qp_state_to_string[qp->attrs.state]); + + down_write(&qp->state_lock); + + qp->rx_stream.rx_suspend = 1; + qp->tx_ctx.tx_suspend = 1; + qp->attrs.sk = NULL; + + switch (qp->attrs.state) { + case SIW_QP_STATE_RTS: + case SIW_QP_STATE_RTR: + case SIW_QP_STATE_IDLE: + case SIW_QP_STATE_TERMINATE: + qp->attrs.state = SIW_QP_STATE_ERROR; + break; + /* + * SIW_QP_STATE_CLOSING: + * + * This is a forced close. shall the QP be moved to + * ERROR or IDLE ? + */ + case SIW_QP_STATE_CLOSING: + if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) + qp->attrs.state = SIW_QP_STATE_ERROR; + else + qp->attrs.state = SIW_QP_STATE_IDLE; + break; + + default: + siw_dbg_qp(qp, "llp close: no state transition needed: %s\n", + siw_qp_state_to_string[qp->attrs.state]); + break; + } + siw_sq_flush(qp); + siw_rq_flush(qp); + + /* + * Dereference closing CEP + */ + if (qp->cep) { + siw_cep_put(qp->cep); + qp->cep = NULL; + } + + up_write(&qp->state_lock); + + siw_dbg_qp(qp, "llp close exit: state %s\n", + siw_qp_state_to_string[qp->attrs.state]); +} + +/* + * socket callback routine informing about newly available send space. + * Function schedules SQ work for processing SQ items. + */ +void siw_qp_llp_write_space(struct sock *sk) +{ + struct siw_cep *cep = sk_to_cep(sk); + + cep->sk_write_space(sk); + + if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + (void)siw_sq_start(cep->qp); +} + +static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) +{ + irq_size = roundup_pow_of_two(irq_size); + orq_size = roundup_pow_of_two(orq_size); + + qp->attrs.irq_size = irq_size; + qp->attrs.orq_size = orq_size; + + qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + if (!qp->irq) { + siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size); + qp->attrs.irq_size = 0; + return -ENOMEM; + } + qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + if (!qp->orq) { + siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size); + qp->attrs.orq_size = 0; + qp->attrs.irq_size = 0; + vfree(qp->irq); + return -ENOMEM; + } + siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size); + return 0; +} + +static int siw_qp_enable_crc(struct siw_qp *qp) +{ + struct siw_rx_stream *c_rx = &qp->rx_stream; + struct siw_iwarp_tx *c_tx = &qp->tx_ctx; + int size = crypto_shash_descsize(siw_crypto_shash) + + sizeof(struct shash_desc); + + if (siw_crypto_shash == NULL) + return -ENOENT; + + c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); + c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL); + if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) { + kfree(c_tx->mpa_crc_hd); + kfree(c_rx->mpa_crc_hd); + c_tx->mpa_crc_hd = NULL; + c_rx->mpa_crc_hd = NULL; + return -ENOMEM; + } + c_tx->mpa_crc_hd->tfm = siw_crypto_shash; + c_rx->mpa_crc_hd->tfm = siw_crypto_shash; + + return 0; +} + +/* + * Send a non signalled READ or WRITE to peer side as negotiated + * with MPAv2 P2P setup protocol. The work request is only created + * as a current active WR and does not consume Send Queue space. + * + * Caller must hold QP state lock. + */ +int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl) +{ + struct siw_wqe *wqe = tx_wqe(qp); + unsigned long flags; + int rv = 0; + + spin_lock_irqsave(&qp->sq_lock, flags); + + if (unlikely(wqe->wr_status != SIW_WR_IDLE)) { + spin_unlock_irqrestore(&qp->sq_lock, flags); + return -EIO; + } + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + + wqe->wr_status = SIW_WR_QUEUED; + wqe->sqe.flags = 0; + wqe->sqe.num_sge = 1; + wqe->sqe.sge[0].length = 0; + wqe->sqe.sge[0].laddr = 0; + wqe->sqe.sge[0].lkey = 0; + /* + * While it must not be checked for inbound zero length + * READ/WRITE, some HW may treat STag 0 special. + */ + wqe->sqe.rkey = 1; + wqe->sqe.raddr = 0; + wqe->processed = 0; + + if (ctrl & MPA_V2_RDMA_WRITE_RTR) + wqe->sqe.opcode = SIW_OP_WRITE; + else if (ctrl & MPA_V2_RDMA_READ_RTR) { + struct siw_sqe *rreq; + + wqe->sqe.opcode = SIW_OP_READ; + + spin_lock(&qp->orq_lock); + + rreq = orq_get_free(qp); + if (rreq) { + siw_read_to_orq(rreq, &wqe->sqe); + qp->orq_put++; + } else + rv = -EIO; + + spin_unlock(&qp->orq_lock); + } else + rv = -EINVAL; + + if (rv) + wqe->wr_status = SIW_WR_IDLE; + + spin_unlock_irqrestore(&qp->sq_lock, flags); + + if (!rv) + rv = siw_sq_start(qp); + + return rv; +} + +/* + * Map memory access error to DDP tagged error + */ +enum ddp_ecode siw_tagged_error(enum siw_access_state state) +{ + switch (state) { + case E_STAG_INVALID: + return DDP_ECODE_T_INVALID_STAG; + case E_BASE_BOUNDS: + return DDP_ECODE_T_BASE_BOUNDS; + case E_PD_MISMATCH: + return DDP_ECODE_T_STAG_NOT_ASSOC; + case E_ACCESS_PERM: + /* + * RFC 5041 (DDP) lacks an ecode for insufficient access + * permissions. 'Invalid STag' seem to be the closest + * match though. + */ + return DDP_ECODE_T_INVALID_STAG; + default: + WARN_ON(1); + return DDP_ECODE_T_INVALID_STAG; + } +} + +/* + * Map memory access error to RDMAP protection error + */ +enum rdmap_ecode siw_rdmap_error(enum siw_access_state state) +{ + switch (state) { + case E_STAG_INVALID: + return RDMAP_ECODE_INVALID_STAG; + case E_BASE_BOUNDS: + return RDMAP_ECODE_BASE_BOUNDS; + case E_PD_MISMATCH: + return RDMAP_ECODE_STAG_NOT_ASSOC; + case E_ACCESS_PERM: + return RDMAP_ECODE_ACCESS_RIGHTS; + default: + return RDMAP_ECODE_UNSPECIFIED; + } +} + +void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype, + u8 ecode, int in_tx) +{ + if (!qp->term_info.valid) { + memset(&qp->term_info, 0, sizeof(qp->term_info)); + qp->term_info.layer = layer; + qp->term_info.etype = etype; + qp->term_info.ecode = ecode; + qp->term_info.in_tx = in_tx; + qp->term_info.valid = 1; + } + siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n", + layer, etype, ecode, in_tx ? "yes" : "no"); +} + +/* + * Send a TERMINATE message, as defined in RFC's 5040/5041/5044/6581. + * Sending TERMINATE messages is best effort - such messages + * can only be send if the QP is still connected and it does + * not have another outbound message in-progress, i.e. the + * TERMINATE message must not interfer with an incomplete current + * transmit operation. + */ +void siw_send_terminate(struct siw_qp *qp) +{ + struct kvec iov[3]; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; + struct iwarp_terminate *term = NULL; + union iwarp_hdr *err_hdr = NULL; + struct socket *s = qp->attrs.sk; + struct siw_rx_stream *srx = &qp->rx_stream; + union iwarp_hdr *rx_hdr = &srx->hdr; + u32 crc = 0; + int num_frags, len_terminate, rv; + + if (!qp->term_info.valid) + return; + + qp->term_info.valid = 0; + + if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) { + siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n", + tx_type(tx_wqe(qp))); + return; + } + if (!s && qp->cep) + /* QP not yet in RTS. Take socket from connection end point */ + s = qp->cep->sock; + + if (!s) { + siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n"); + return; + } + + term = kzalloc(sizeof(*term), GFP_KERNEL); + if (!term) + return; + + term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE); + term->ddp_mo = 0; + term->ddp_msn = cpu_to_be32(1); + + iov[0].iov_base = term; + iov[0].iov_len = sizeof(*term); + + if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) || + ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) && + (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) { + err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL); + if (!err_hdr) { + kfree(term); + return; + } + } + memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl, + sizeof(struct iwarp_ctrl)); + + __rdmap_term_set_layer(term, qp->term_info.layer); + __rdmap_term_set_etype(term, qp->term_info.etype); + __rdmap_term_set_ecode(term, qp->term_info.ecode); + + switch (qp->term_info.layer) { + case TERM_ERROR_LAYER_RDMAP: + if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC) + /* No additional DDP/RDMAP header to be included */ + break; + + if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) { + /* + * Complete RDMAP frame will get attached, and + * DDP segment length is valid + */ + term->flag_m = 1; + term->flag_d = 1; + term->flag_r = 1; + + if (qp->term_info.in_tx) { + struct iwarp_rdma_rreq *rreq; + struct siw_wqe *wqe = tx_wqe(qp); + + /* Inbound RREQ error, detected during + * RRESP creation. Take state from + * current TX work queue element to + * reconstruct peers RREQ. + */ + rreq = (struct iwarp_rdma_rreq *)err_hdr; + + memcpy(&rreq->ctrl, + &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl, + sizeof(struct iwarp_ctrl)); + + rreq->rsvd = 0; + rreq->ddp_qn = + htonl(RDMAP_UNTAGGED_QN_RDMA_READ); + + /* Provide RREQ's MSN as kept aside */ + rreq->ddp_msn = htonl(wqe->sqe.sge[0].length); + + rreq->ddp_mo = htonl(wqe->processed); + rreq->sink_stag = htonl(wqe->sqe.rkey); + rreq->sink_to = cpu_to_be64(wqe->sqe.raddr); + rreq->read_size = htonl(wqe->sqe.sge[0].length); + rreq->source_stag = htonl(wqe->sqe.sge[0].lkey); + rreq->source_to = + cpu_to_be64(wqe->sqe.sge[0].laddr); + + iov[1].iov_base = rreq; + iov[1].iov_len = sizeof(*rreq); + + rx_hdr = (union iwarp_hdr *)rreq; + } else { + /* Take RDMAP/DDP information from + * current (failed) inbound frame. + */ + iov[1].iov_base = rx_hdr; + + if (__rdmap_get_opcode(&rx_hdr->ctrl) == + RDMAP_RDMA_READ_REQ) + iov[1].iov_len = + sizeof(struct iwarp_rdma_rreq); + else /* SEND type */ + iov[1].iov_len = + sizeof(struct iwarp_send); + } + } else { + /* Do not report DDP hdr information if packet + * layout is unknown + */ + if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) || + (qp->term_info.ecode == RDMAP_ECODE_OPCODE)) + break; + + iov[1].iov_base = rx_hdr; + + /* Only DDP frame will get attached */ + if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED) + iov[1].iov_len = + sizeof(struct iwarp_rdma_write); + else + iov[1].iov_len = sizeof(struct iwarp_send); + + term->flag_m = 1; + term->flag_d = 1; + } + term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len); + break; + + case TERM_ERROR_LAYER_DDP: + /* Report error encountered while DDP processing. + * This can only happen as a result of inbound + * DDP processing + */ + + /* Do not report DDP hdr information if packet + * layout is unknown + */ + if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) && + (qp->term_info.ecode == DDP_ECODE_T_VERSION)) || + ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) && + (qp->term_info.ecode == DDP_ECODE_UT_VERSION))) + break; + + iov[1].iov_base = rx_hdr; + + if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED) + iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged); + else + iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged); + + term->flag_m = 1; + term->flag_d = 1; + break; + + default: + break; + } + if (term->flag_m || term->flag_d || term->flag_r) { + iov[2].iov_base = &crc; + iov[2].iov_len = sizeof(crc); + len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE; + num_frags = 3; + } else { + iov[1].iov_base = &crc; + iov[1].iov_len = sizeof(crc); + len_terminate = sizeof(*term) + MPA_CRC_SIZE; + num_frags = 2; + } + + /* Adjust DDP Segment Length parameter, if valid */ + if (term->flag_m) { + u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len); + enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl); + + real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE; + rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len); + } + + term->ctrl.mpa_len = + cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE)); + if (qp->tx_ctx.mpa_crc_hd) { + crypto_shash_init(qp->tx_ctx.mpa_crc_hd); + if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd, + (u8 *)iov[0].iov_base, + iov[0].iov_len)) + goto out; + + if (num_frags == 3) { + if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd, + (u8 *)iov[1].iov_base, + iov[1].iov_len)) + goto out; + } + crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc); + } + + rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate); + siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n", + rv == len_terminate ? "success" : "failure", + __rdmap_term_layer(term), __rdmap_term_etype(term), + __rdmap_term_ecode(term), rv); +out: + kfree(term); + kfree(err_hdr); +} + +/* + * Handle all attrs other than state + */ +static void siw_qp_modify_nonstate(struct siw_qp *qp, + struct siw_qp_attrs *attrs, + enum siw_qp_attr_mask mask) +{ + if (mask & SIW_QP_ATTR_ACCESS_FLAGS) { + if (attrs->flags & SIW_RDMA_BIND_ENABLED) + qp->attrs.flags |= SIW_RDMA_BIND_ENABLED; + else + qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED; + + if (attrs->flags & SIW_RDMA_WRITE_ENABLED) + qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED; + else + qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED; + + if (attrs->flags & SIW_RDMA_READ_ENABLED) + qp->attrs.flags |= SIW_RDMA_READ_ENABLED; + else + qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED; + } +} + +static int siw_qp_nextstate_from_idle(struct siw_qp *qp, + struct siw_qp_attrs *attrs, + enum siw_qp_attr_mask mask) +{ + int rv = 0; + + switch (attrs->state) { + case SIW_QP_STATE_RTS: + if (attrs->flags & SIW_MPA_CRC) { + rv = siw_qp_enable_crc(qp); + if (rv) + break; + } + if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) { + siw_dbg_qp(qp, "no socket\n"); + rv = -EINVAL; + break; + } + if (!(mask & SIW_QP_ATTR_MPA)) { + siw_dbg_qp(qp, "no MPA\n"); + rv = -EINVAL; + break; + } + /* + * Initialize iWARP TX state + */ + qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0; + qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0; + qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0; + + /* + * Initialize iWARP RX state + */ + qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1; + qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1; + qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1; + + /* + * init IRD free queue, caller has already checked + * limits. + */ + rv = siw_qp_readq_init(qp, attrs->irq_size, + attrs->orq_size); + if (rv) + break; + + qp->attrs.sk = attrs->sk; + qp->attrs.state = SIW_QP_STATE_RTS; + + siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n", + attrs->flags & SIW_MPA_CRC ? "y" : "n", + qp->attrs.orq_size, qp->attrs.irq_size); + break; + + case SIW_QP_STATE_ERROR: + siw_rq_flush(qp); + qp->attrs.state = SIW_QP_STATE_ERROR; + if (qp->cep) { + siw_cep_put(qp->cep); + qp->cep = NULL; + } + break; + + default: + break; + } + return rv; +} + +static int siw_qp_nextstate_from_rts(struct siw_qp *qp, + struct siw_qp_attrs *attrs) +{ + int drop_conn = 0; + + switch (attrs->state) { + case SIW_QP_STATE_CLOSING: + /* + * Verbs: move to IDLE if SQ and ORQ are empty. + * Move to ERROR otherwise. But first of all we must + * close the connection. So we keep CLOSING or ERROR + * as a transient state, schedule connection drop work + * and wait for the socket state change upcall to + * come back closed. + */ + if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) { + qp->attrs.state = SIW_QP_STATE_CLOSING; + } else { + qp->attrs.state = SIW_QP_STATE_ERROR; + siw_sq_flush(qp); + } + siw_rq_flush(qp); + + drop_conn = 1; + break; + + case SIW_QP_STATE_TERMINATE: + qp->attrs.state = SIW_QP_STATE_TERMINATE; + + siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, + RDMAP_ETYPE_CATASTROPHIC, + RDMAP_ECODE_UNSPECIFIED, 1); + drop_conn = 1; + break; + + case SIW_QP_STATE_ERROR: + /* + * This is an emergency close. + * + * Any in progress transmit operation will get + * cancelled. + * This will likely result in a protocol failure, + * if a TX operation is in transit. The caller + * could unconditional wait to give the current + * operation a chance to complete. + * Esp., how to handle the non-empty IRQ case? + * The peer was asking for data transfer at a valid + * point in time. + */ + siw_sq_flush(qp); + siw_rq_flush(qp); + qp->attrs.state = SIW_QP_STATE_ERROR; + drop_conn = 1; + break; + + default: + break; + } + return drop_conn; +} + +static void siw_qp_nextstate_from_term(struct siw_qp *qp, + struct siw_qp_attrs *attrs) +{ + switch (attrs->state) { + case SIW_QP_STATE_ERROR: + siw_rq_flush(qp); + qp->attrs.state = SIW_QP_STATE_ERROR; + + if (tx_wqe(qp)->wr_status != SIW_WR_IDLE) + siw_sq_flush(qp); + break; + + default: + break; + } +} + +static int siw_qp_nextstate_from_close(struct siw_qp *qp, + struct siw_qp_attrs *attrs) +{ + int rv = 0; + + switch (attrs->state) { + case SIW_QP_STATE_IDLE: + WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE); + qp->attrs.state = SIW_QP_STATE_IDLE; + break; + + case SIW_QP_STATE_CLOSING: + /* + * The LLP may already moved the QP to closing + * due to graceful peer close init + */ + break; + + case SIW_QP_STATE_ERROR: + /* + * QP was moved to CLOSING by LLP event + * not yet seen by user. + */ + qp->attrs.state = SIW_QP_STATE_ERROR; + + if (tx_wqe(qp)->wr_status != SIW_WR_IDLE) + siw_sq_flush(qp); + + siw_rq_flush(qp); + break; + + default: + siw_dbg_qp(qp, "state transition undefined: %s => %s\n", + siw_qp_state_to_string[qp->attrs.state], + siw_qp_state_to_string[attrs->state]); + + rv = -ECONNABORTED; + } + return rv; +} + +/* + * Caller must hold qp->state_lock + */ +int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs, + enum siw_qp_attr_mask mask) +{ + int drop_conn = 0, rv = 0; + + if (!mask) + return 0; + + siw_dbg_qp(qp, "state: %s => %s\n", + siw_qp_state_to_string[qp->attrs.state], + siw_qp_state_to_string[attrs->state]); + + if (mask != SIW_QP_ATTR_STATE) + siw_qp_modify_nonstate(qp, attrs, mask); + + if (!(mask & SIW_QP_ATTR_STATE)) + return 0; + + switch (qp->attrs.state) { + case SIW_QP_STATE_IDLE: + case SIW_QP_STATE_RTR: + rv = siw_qp_nextstate_from_idle(qp, attrs, mask); + break; + + case SIW_QP_STATE_RTS: + drop_conn = siw_qp_nextstate_from_rts(qp, attrs); + break; + + case SIW_QP_STATE_TERMINATE: + siw_qp_nextstate_from_term(qp, attrs); + break; + + case SIW_QP_STATE_CLOSING: + siw_qp_nextstate_from_close(qp, attrs); + break; + default: + break; + } + if (drop_conn) + siw_qp_cm_drop(qp, 0); + + return rv; +} + +void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) +{ + rreq->id = sqe->id; + rreq->opcode = sqe->opcode; + rreq->sge[0].laddr = sqe->sge[0].laddr; + rreq->sge[0].length = sqe->sge[0].length; + rreq->sge[0].lkey = sqe->sge[0].lkey; + rreq->sge[1].lkey = sqe->sge[1].lkey; + rreq->flags = sqe->flags | SIW_WQE_VALID; + rreq->num_sge = 1; +} + +/* + * Must be called with SQ locked. + * To avoid complete SQ starvation by constant inbound READ requests, + * the active IRQ will not be served after qp->irq_burst, if the + * SQ has pending work. + */ +int siw_activate_tx(struct siw_qp *qp) +{ + struct siw_sqe *irqe, *sqe; + struct siw_wqe *wqe = tx_wqe(qp); + int rv = 1; + + irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; + + if (irqe->flags & SIW_WQE_VALID) { + sqe = sq_get_next(qp); + + /* + * Avoid local WQE processing starvation in case + * of constant inbound READ request stream + */ + if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { + qp->irq_burst = 0; + goto skip_irq; + } + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; + + /* start READ RESPONSE */ + wqe->sqe.opcode = SIW_OP_READ_RESPONSE; + wqe->sqe.flags = 0; + if (irqe->num_sge) { + wqe->sqe.num_sge = 1; + wqe->sqe.sge[0].length = irqe->sge[0].length; + wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; + wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; + } else { + wqe->sqe.num_sge = 0; + } + + /* Retain original RREQ's message sequence number for + * potential error reporting cases. + */ + wqe->sqe.sge[1].length = irqe->sge[1].length; + + wqe->sqe.rkey = irqe->rkey; + wqe->sqe.raddr = irqe->raddr; + + wqe->processed = 0; + qp->irq_get++; + + /* mark current IRQ entry free */ + smp_store_mb(irqe->flags, 0); + + goto out; + } + sqe = sq_get_next(qp); + if (sqe) { +skip_irq: + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; + + /* First copy SQE to kernel private memory */ + memcpy(&wqe->sqe, sqe, sizeof(*sqe)); + + if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + rv = -EINVAL; + goto out; + } + if (wqe->sqe.flags & SIW_WQE_INLINE) { + if (wqe->sqe.opcode != SIW_OP_SEND && + wqe->sqe.opcode != SIW_OP_WRITE) { + rv = -EINVAL; + goto out; + } + if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { + rv = -EINVAL; + goto out; + } + wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].lkey = 0; + wqe->sqe.num_sge = 1; + } + if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { + /* A READ cannot be fenced */ + if (unlikely(wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == + SIW_OP_READ_LOCAL_INV)) { + siw_dbg_qp(qp, "cannot fence read\n"); + rv = -EINVAL; + goto out; + } + spin_lock(&qp->orq_lock); + + if (!siw_orq_empty(qp)) { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); + + } else if (wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { + struct siw_sqe *rreq; + + wqe->sqe.num_sge = 1; + + spin_lock(&qp->orq_lock); + + rreq = orq_get_free(qp); + if (rreq) { + /* + * Make an immediate copy in ORQ to be ready + * to process loopback READ reply + */ + siw_read_to_orq(rreq, &wqe->sqe); + qp->orq_put++; + } else { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); + } + + /* Clear SQE, can be re-used by application */ + smp_store_mb(sqe->flags, 0); + qp->sq_get++; + } else { + rv = 0; + } +out: + if (unlikely(rv < 0)) { + siw_dbg_qp(qp, "error %d\n", rv); + wqe->wr_status = SIW_WR_IDLE; + } + return rv; +} + +/* + * Check if current CQ state qualifies for calling CQ completion + * handler. Must be called with CQ lock held. + */ +static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags) +{ + u64 cq_notify; + + if (!cq->base_cq.comp_handler) + return false; + + cq_notify = READ_ONCE(*cq->notify); + + if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) || + ((cq_notify & SIW_NOTIFY_SOLICITED) && + (flags & SIW_WQE_SOLICITED))) { + /* dis-arm CQ */ + smp_store_mb(*cq->notify, SIW_NOTIFY_NOT); + + return true; + } + return false; +} + +int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes, + enum siw_wc_status status) +{ + struct siw_cq *cq = qp->scq; + int rv = 0; + + if (cq) { + u32 sqe_flags = sqe->flags; + struct siw_cqe *cqe; + u32 idx; + unsigned long flags; + + spin_lock_irqsave(&cq->lock, flags); + + idx = cq->cq_put % cq->num_cqe; + cqe = &cq->queue[idx]; + + if (!READ_ONCE(cqe->flags)) { + bool notify; + + cqe->id = sqe->id; + cqe->opcode = sqe->opcode; + cqe->status = status; + cqe->imm_data = 0; + cqe->bytes = bytes; + + if (cq->kernel_verbs) + cqe->base_qp = qp->ib_qp; + else + cqe->qp_id = qp_id(qp); + + /* mark CQE valid for application */ + WRITE_ONCE(cqe->flags, SIW_WQE_VALID); + /* recycle SQE */ + smp_store_mb(sqe->flags, 0); + + cq->cq_put++; + notify = siw_cq_notify_now(cq, sqe_flags); + + spin_unlock_irqrestore(&cq->lock, flags); + + if (notify) { + siw_dbg_cq(cq, "Call completion handler\n"); + cq->base_cq.comp_handler(&cq->base_cq, + cq->base_cq.cq_context); + } + } else { + spin_unlock_irqrestore(&cq->lock, flags); + rv = -ENOMEM; + siw_cq_event(cq, IB_EVENT_CQ_ERR); + } + } else { + /* recycle SQE */ + smp_store_mb(sqe->flags, 0); + } + return rv; +} + +int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes, + u32 inval_stag, enum siw_wc_status status) +{ + struct siw_cq *cq = qp->rcq; + int rv = 0; + + if (cq) { + struct siw_cqe *cqe; + u32 idx; + unsigned long flags; + + spin_lock_irqsave(&cq->lock, flags); + + idx = cq->cq_put % cq->num_cqe; + cqe = &cq->queue[idx]; + + if (!READ_ONCE(cqe->flags)) { + bool notify; + u8 cqe_flags = SIW_WQE_VALID; + + cqe->id = rqe->id; + cqe->opcode = SIW_OP_RECEIVE; + cqe->status = status; + cqe->imm_data = 0; + cqe->bytes = bytes; + + if (cq->kernel_verbs) { + cqe->base_qp = qp->ib_qp; + if (inval_stag) { + cqe_flags |= SIW_WQE_REM_INVAL; + cqe->inval_stag = inval_stag; + } + } else { + cqe->qp_id = qp_id(qp); + } + /* mark CQE valid for application */ + WRITE_ONCE(cqe->flags, cqe_flags); + /* recycle RQE */ + smp_store_mb(rqe->flags, 0); + + cq->cq_put++; + notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED); + + spin_unlock_irqrestore(&cq->lock, flags); + + if (notify) { + siw_dbg_cq(cq, "Call completion handler\n"); + cq->base_cq.comp_handler(&cq->base_cq, + cq->base_cq.cq_context); + } + } else { + spin_unlock_irqrestore(&cq->lock, flags); + rv = -ENOMEM; + siw_cq_event(cq, IB_EVENT_CQ_ERR); + } + } else { + /* recycle RQE */ + smp_store_mb(rqe->flags, 0); + } + return rv; +} + +/* + * siw_sq_flush() + * + * Flush SQ and ORRQ entries to CQ. + * + * Must be called with QP state write lock held. + * Therefore, SQ and ORQ lock must not be taken. + */ +void siw_sq_flush(struct siw_qp *qp) +{ + struct siw_sqe *sqe; + struct siw_wqe *wqe = tx_wqe(qp); + int async_event = 0; + + /* + * Start with completing any work currently on the ORQ + */ + while (qp->attrs.orq_size) { + sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size]; + if (!READ_ONCE(sqe->flags)) + break; + + if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0) + break; + + WRITE_ONCE(sqe->flags, 0); + qp->orq_get++; + } + /* + * Flush an in-progress WQE if present + */ + if (wqe->wr_status != SIW_WR_IDLE) { + siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n", + tx_type(wqe), wqe->wr_status); + + siw_wqe_put_mem(wqe, tx_type(wqe)); + + if (tx_type(wqe) != SIW_OP_READ_RESPONSE && + ((tx_type(wqe) != SIW_OP_READ && + tx_type(wqe) != SIW_OP_READ_LOCAL_INV) || + wqe->wr_status == SIW_WR_QUEUED)) + /* + * An in-progress Read Request is already in + * the ORQ + */ + siw_sqe_complete(qp, &wqe->sqe, wqe->bytes, + SIW_WC_WR_FLUSH_ERR); + + wqe->wr_status = SIW_WR_IDLE; + } + /* + * Flush the Send Queue + */ + while (qp->attrs.sq_size) { + sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size]; + if (!READ_ONCE(sqe->flags)) + break; + + async_event = 1; + if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0) + /* + * Shall IB_EVENT_SQ_DRAINED be supressed if work + * completion fails? + */ + break; + + WRITE_ONCE(sqe->flags, 0); + qp->sq_get++; + } + if (async_event) + siw_qp_event(qp, IB_EVENT_SQ_DRAINED); +} + +/* + * siw_rq_flush() + * + * Flush recv queue entries to CQ. Also + * takes care of pending active tagged and untagged + * inbound transfers, which have target memory + * referenced. + * + * Must be called with QP state write lock held. + * Therefore, RQ lock must not be taken. + */ +void siw_rq_flush(struct siw_qp *qp) +{ + struct siw_wqe *wqe = &qp->rx_untagged.wqe_active; + + /* + * Flush an in-progress untagged operation if present + */ + if (wqe->wr_status != SIW_WR_IDLE) { + siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n", + rx_type(wqe), wqe->wr_status); + + siw_wqe_put_mem(wqe, rx_type(wqe)); + + if (rx_type(wqe) == SIW_OP_RECEIVE) { + siw_rqe_complete(qp, &wqe->rqe, wqe->bytes, + 0, SIW_WC_WR_FLUSH_ERR); + } else if (rx_type(wqe) != SIW_OP_READ && + rx_type(wqe) != SIW_OP_READ_RESPONSE && + rx_type(wqe) != SIW_OP_WRITE) { + siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR); + } + wqe->wr_status = SIW_WR_IDLE; + } + wqe = &qp->rx_tagged.wqe_active; + + if (wqe->wr_status != SIW_WR_IDLE) { + siw_wqe_put_mem(wqe, rx_type(wqe)); + wqe->wr_status = SIW_WR_IDLE; + } + /* + * Flush the Receive Queue + */ + while (qp->attrs.rq_size) { + struct siw_rqe *rqe = + &qp->recvq[qp->rq_get % qp->attrs.rq_size]; + + if (!READ_ONCE(rqe->flags)) + break; + + if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0) + break; + + WRITE_ONCE(rqe->flags, 0); + qp->rq_get++; + } +} + +int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp) +{ + int rv = xa_alloc(&sdev->qp_xa, &qp->ib_qp->qp_num, qp, xa_limit_32b, + GFP_KERNEL); + + if (!rv) { + kref_init(&qp->ref); + qp->sdev = sdev; + qp->qp_num = qp->ib_qp->qp_num; + siw_dbg_qp(qp, "new QP\n"); + } + return rv; +} + +void siw_free_qp(struct kref *ref) +{ + struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref); + struct siw_device *sdev = qp->sdev; + unsigned long flags; + + if (qp->cep) + siw_cep_put(qp->cep); + + found = xa_erase(&sdev->qp_xa, qp_id(qp)); + WARN_ON(found != qp); + spin_lock_irqsave(&sdev->lock, flags); + list_del(&qp->devq); + spin_unlock_irqrestore(&sdev->lock, flags); + + vfree(qp->sendq); + vfree(qp->recvq); + vfree(qp->irq); + vfree(qp->orq); + + siw_put_tx_cpu(qp->tx_cpu); + + atomic_dec(&sdev->num_qp); + siw_dbg_qp(qp, "free QP\n"); + kfree_rcu(qp, rcu); +} diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c new file mode 100644 index 000000000000..f87657a11657 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -0,0 +1,1458 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/net.h> +#include <linux/scatterlist.h> +#include <linux/highmem.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> + +#include "siw.h" +#include "siw_verbs.h" +#include "siw_mem.h" + +/* + * siw_rx_umem() + * + * Receive data of @len into target referenced by @dest_addr. + * + * @srx: Receive Context + * @umem: siw representation of target memory + * @dest_addr: user virtual address + * @len: number of bytes to place + */ +static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, + u64 dest_addr, int len) +{ + int copied = 0; + + while (len) { + struct page *p; + int pg_off, bytes, rv; + void *dest; + + p = siw_get_upage(umem, dest_addr); + if (unlikely(!p)) { + pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n", + __func__, qp_id(rx_qp(srx)), + (void *)dest_addr, (void *)umem->fp_addr); + /* siw internal error */ + srx->skb_copied += copied; + srx->skb_new -= copied; + + return -EFAULT; + } + pg_off = dest_addr & ~PAGE_MASK; + bytes = min(len, (int)PAGE_SIZE - pg_off); + + siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes); + + dest = kmap_atomic(p); + rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off, + bytes); + + if (unlikely(rv)) { + kunmap_atomic(dest); + srx->skb_copied += copied; + srx->skb_new -= copied; + + pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n", + qp_id(rx_qp(srx)), __func__, len, p, rv); + + return -EFAULT; + } + if (srx->mpa_crc_hd) { + if (rx_qp(srx)->kernel_verbs) { + crypto_shash_update(srx->mpa_crc_hd, + (u8 *)(dest + pg_off), bytes); + kunmap_atomic(dest); + } else { + kunmap_atomic(dest); + /* + * Do CRC on original, not target buffer. + * Some user land applications may + * concurrently write the target buffer, + * which would yield a broken CRC. + * Walking the skb twice is very ineffcient. + * Folding the CRC into skb_copy_bits() + * would be much better, but is currently + * not supported. + */ + siw_crc_skb(srx, bytes); + } + } else { + kunmap_atomic(dest); + } + srx->skb_offset += bytes; + copied += bytes; + len -= bytes; + dest_addr += bytes; + pg_off = 0; + } + srx->skb_copied += copied; + srx->skb_new -= copied; + + return copied; +} + +static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len) +{ + int rv; + + siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len); + + rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len); + if (unlikely(rv)) { + pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n", + qp_id(rx_qp(srx)), __func__, len, kva, rv); + + return rv; + } + if (srx->mpa_crc_hd) + crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len); + + srx->skb_offset += len; + srx->skb_copied += len; + srx->skb_new -= len; + + return len; +} + +static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, + struct siw_mem *mem, u64 addr, int len) +{ + struct siw_pbl *pbl = mem->pbl; + u64 offset = addr - mem->va; + int copied = 0; + + while (len) { + int bytes; + u64 buf_addr = + siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx); + if (!buf_addr) + break; + + bytes = min(bytes, len); + if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) { + copied += bytes; + offset += bytes; + len -= bytes; + } else { + break; + } + } + return copied; +} + +/* + * siw_rresp_check_ntoh() + * + * Check incoming RRESP fragment header against expected + * header values and update expected values for potential next + * fragment. + * + * NOTE: This function must be called only if a RRESP DDP segment + * starts but not for fragmented consecutive pieces of an + * already started DDP segment. + */ +static int siw_rresp_check_ntoh(struct siw_rx_stream *srx, + struct siw_rx_fpdu *frx) +{ + struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp; + struct siw_wqe *wqe = &frx->wqe_active; + enum ddp_ecode ecode; + + u32 sink_stag = be32_to_cpu(rresp->sink_stag); + u64 sink_to = be64_to_cpu(rresp->sink_to); + + if (frx->first_ddp_seg) { + srx->ddp_stag = wqe->sqe.sge[0].lkey; + srx->ddp_to = wqe->sqe.sge[0].laddr; + frx->pbl_idx = 0; + } + /* Below checks extend beyond the semantics of DDP, and + * into RDMAP: + * We check if the read response matches exactly the + * read request which was send to the remote peer to + * trigger this read response. RFC5040/5041 do not + * always have a proper error code for the detected + * error cases. We choose 'base or bounds error' for + * cases where the inbound STag is valid, but offset + * or length do not match our response receive state. + */ + if (unlikely(srx->ddp_stag != sink_stag)) { + pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n", + qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag); + ecode = DDP_ECODE_T_INVALID_STAG; + goto error; + } + if (unlikely(srx->ddp_to != sink_to)) { + pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n", + qp_id(rx_qp(srx)), (unsigned long long)sink_to, + (unsigned long long)srx->ddp_to); + ecode = DDP_ECODE_T_BASE_BOUNDS; + goto error; + } + if (unlikely(!frx->more_ddp_segs && + (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) { + pr_warn("siw: [QP %u]: rresp len: %d != %d\n", + qp_id(rx_qp(srx)), + wqe->processed + srx->fpdu_part_rem, wqe->bytes); + ecode = DDP_ECODE_T_BASE_BOUNDS; + goto error; + } + return 0; +error: + siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, + DDP_ETYPE_TAGGED_BUF, ecode, 0); + return -EINVAL; +} + +/* + * siw_write_check_ntoh() + * + * Check incoming WRITE fragment header against expected + * header values and update expected values for potential next + * fragment + * + * NOTE: This function must be called only if a WRITE DDP segment + * starts but not for fragmented consecutive pieces of an + * already started DDP segment. + */ +static int siw_write_check_ntoh(struct siw_rx_stream *srx, + struct siw_rx_fpdu *frx) +{ + struct iwarp_rdma_write *write = &srx->hdr.rwrite; + enum ddp_ecode ecode; + + u32 sink_stag = be32_to_cpu(write->sink_stag); + u64 sink_to = be64_to_cpu(write->sink_to); + + if (frx->first_ddp_seg) { + srx->ddp_stag = sink_stag; + srx->ddp_to = sink_to; + frx->pbl_idx = 0; + } else { + if (unlikely(srx->ddp_stag != sink_stag)) { + pr_warn("siw: [QP %u]: write stag: %08x != %08x\n", + qp_id(rx_qp(srx)), sink_stag, + srx->ddp_stag); + ecode = DDP_ECODE_T_INVALID_STAG; + goto error; + } + if (unlikely(srx->ddp_to != sink_to)) { + pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n", + qp_id(rx_qp(srx)), + (unsigned long long)sink_to, + (unsigned long long)srx->ddp_to); + ecode = DDP_ECODE_T_BASE_BOUNDS; + goto error; + } + } + return 0; +error: + siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, + DDP_ETYPE_TAGGED_BUF, ecode, 0); + return -EINVAL; +} + +/* + * siw_send_check_ntoh() + * + * Check incoming SEND fragment header against expected + * header values and update expected MSN if no next + * fragment expected + * + * NOTE: This function must be called only if a SEND DDP segment + * starts but not for fragmented consecutive pieces of an + * already started DDP segment. + */ +static int siw_send_check_ntoh(struct siw_rx_stream *srx, + struct siw_rx_fpdu *frx) +{ + struct iwarp_send_inv *send = &srx->hdr.send_inv; + struct siw_wqe *wqe = &frx->wqe_active; + enum ddp_ecode ecode; + + u32 ddp_msn = be32_to_cpu(send->ddp_msn); + u32 ddp_mo = be32_to_cpu(send->ddp_mo); + u32 ddp_qn = be32_to_cpu(send->ddp_qn); + + if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) { + pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n", + qp_id(rx_qp(srx)), ddp_qn); + ecode = DDP_ECODE_UT_INVALID_QN; + goto error; + } + if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) { + pr_warn("siw: [QP %u]: send msn: %u != %u\n", + qp_id(rx_qp(srx)), ddp_msn, + srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]); + ecode = DDP_ECODE_UT_INVALID_MSN_RANGE; + goto error; + } + if (unlikely(ddp_mo != wqe->processed)) { + pr_warn("siw: [QP %u], send mo: %u != %u\n", + qp_id(rx_qp(srx)), ddp_mo, wqe->processed); + ecode = DDP_ECODE_UT_INVALID_MO; + goto error; + } + if (frx->first_ddp_seg) { + /* initialize user memory write position */ + frx->sge_idx = 0; + frx->sge_off = 0; + frx->pbl_idx = 0; + + /* only valid for SEND_INV and SEND_SE_INV operations */ + srx->inval_stag = be32_to_cpu(send->inval_stag); + } + if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) { + siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n", + wqe->bytes, wqe->processed, srx->fpdu_part_rem); + wqe->wc_status = SIW_WC_LOC_LEN_ERR; + ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF; + goto error; + } + return 0; +error: + siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, + DDP_ETYPE_UNTAGGED_BUF, ecode, 0); + return -EINVAL; +} + +static struct siw_wqe *siw_rqe_get(struct siw_qp *qp) +{ + struct siw_rqe *rqe; + struct siw_srq *srq; + struct siw_wqe *wqe = NULL; + bool srq_event = false; + unsigned long flags; + + srq = qp->srq; + if (srq) { + spin_lock_irqsave(&srq->lock, flags); + if (unlikely(!srq->num_rqe)) + goto out; + + rqe = &srq->recvq[srq->rq_get % srq->num_rqe]; + } else { + if (unlikely(!qp->recvq)) + goto out; + + rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size]; + } + if (likely(rqe->flags == SIW_WQE_VALID)) { + int num_sge = rqe->num_sge; + + if (likely(num_sge <= SIW_MAX_SGE)) { + int i = 0; + + wqe = rx_wqe(&qp->rx_untagged); + rx_type(wqe) = SIW_OP_RECEIVE; + wqe->wr_status = SIW_WR_INPROGRESS; + wqe->bytes = 0; + wqe->processed = 0; + + wqe->rqe.id = rqe->id; + wqe->rqe.num_sge = num_sge; + + while (i < num_sge) { + wqe->rqe.sge[i].laddr = rqe->sge[i].laddr; + wqe->rqe.sge[i].lkey = rqe->sge[i].lkey; + wqe->rqe.sge[i].length = rqe->sge[i].length; + wqe->bytes += wqe->rqe.sge[i].length; + wqe->mem[i] = NULL; + i++; + } + /* can be re-used by appl */ + smp_store_mb(rqe->flags, 0); + } else { + siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge); + if (srq) + spin_unlock_irqrestore(&srq->lock, flags); + return NULL; + } + if (!srq) { + qp->rq_get++; + } else { + if (srq->armed) { + /* Test SRQ limit */ + u32 off = (srq->rq_get + srq->limit) % + srq->num_rqe; + struct siw_rqe *rqe2 = &srq->recvq[off]; + + if (!(rqe2->flags & SIW_WQE_VALID)) { + srq->armed = 0; + srq_event = true; + } + } + srq->rq_get++; + } + } +out: + if (srq) { + spin_unlock_irqrestore(&srq->lock, flags); + if (srq_event) + siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED); + } + return wqe; +} + +/* + * siw_proc_send: + * + * Process one incoming SEND and place data into memory referenced by + * receive wqe. + * + * Function supports partially received sends (suspending/resuming + * current receive wqe processing) + * + * return value: + * 0: reached the end of a DDP segment + * -EAGAIN: to be called again to finish the DDP segment + */ +int siw_proc_send(struct siw_qp *qp) +{ + struct siw_rx_stream *srx = &qp->rx_stream; + struct siw_rx_fpdu *frx = &qp->rx_untagged; + struct siw_wqe *wqe; + u32 data_bytes; /* all data bytes available */ + u32 rcvd_bytes; /* sum of data bytes rcvd */ + int rv = 0; + + if (frx->first_ddp_seg) { + wqe = siw_rqe_get(qp); + if (unlikely(!wqe)) { + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_UNTAGGED_BUF, + DDP_ECODE_UT_INVALID_MSN_NOBUF, 0); + return -ENOENT; + } + } else { + wqe = rx_wqe(frx); + } + if (srx->state == SIW_GET_DATA_START) { + rv = siw_send_check_ntoh(srx, frx); + if (unlikely(rv)) { + siw_qp_event(qp, IB_EVENT_QP_FATAL); + return rv; + } + if (!srx->fpdu_part_rem) /* zero length SEND */ + return 0; + } + data_bytes = min(srx->fpdu_part_rem, srx->skb_new); + rcvd_bytes = 0; + + /* A zero length SEND will skip below loop */ + while (data_bytes) { + struct ib_pd *pd; + struct siw_mem **mem, *mem_p; + struct siw_sge *sge; + u32 sge_bytes; /* data bytes avail for SGE */ + + sge = &wqe->rqe.sge[frx->sge_idx]; + + if (!sge->length) { + /* just skip empty sge's */ + frx->sge_idx++; + frx->sge_off = 0; + frx->pbl_idx = 0; + continue; + } + sge_bytes = min(data_bytes, sge->length - frx->sge_off); + mem = &wqe->mem[frx->sge_idx]; + + /* + * check with QP's PD if no SRQ present, SRQ's PD otherwise + */ + pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd; + + rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE, + frx->sge_off, sge_bytes); + if (unlikely(rv)) { + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_CATASTROPHIC, + DDP_ECODE_CATASTROPHIC, 0); + + siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); + break; + } + mem_p = *mem; + if (mem_p->mem_obj == NULL) + rv = siw_rx_kva(srx, + (void *)(sge->laddr + frx->sge_off), + sge_bytes); + else if (!mem_p->is_pbl) + rv = siw_rx_umem(srx, mem_p->umem, + sge->laddr + frx->sge_off, sge_bytes); + else + rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, + sge->laddr + frx->sge_off, sge_bytes); + + if (unlikely(rv != sge_bytes)) { + wqe->processed += rcvd_bytes; + + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_CATASTROPHIC, + DDP_ECODE_CATASTROPHIC, 0); + return -EINVAL; + } + frx->sge_off += rv; + + if (frx->sge_off == sge->length) { + frx->sge_idx++; + frx->sge_off = 0; + frx->pbl_idx = 0; + } + data_bytes -= rv; + rcvd_bytes += rv; + + srx->fpdu_part_rem -= rv; + srx->fpdu_part_rcvd += rv; + } + wqe->processed += rcvd_bytes; + + if (!srx->fpdu_part_rem) + return 0; + + return (rv < 0) ? rv : -EAGAIN; +} + +/* + * siw_proc_write: + * + * Place incoming WRITE after referencing and checking target buffer + + * Function supports partially received WRITEs (suspending/resuming + * current receive processing) + * + * return value: + * 0: reached the end of a DDP segment + * -EAGAIN: to be called again to finish the DDP segment + */ +int siw_proc_write(struct siw_qp *qp) +{ + struct siw_rx_stream *srx = &qp->rx_stream; + struct siw_rx_fpdu *frx = &qp->rx_tagged; + struct siw_mem *mem; + int bytes, rv; + + if (srx->state == SIW_GET_DATA_START) { + if (!srx->fpdu_part_rem) /* zero length WRITE */ + return 0; + + rv = siw_write_check_ntoh(srx, frx); + if (unlikely(rv)) { + siw_qp_event(qp, IB_EVENT_QP_FATAL); + return rv; + } + } + bytes = min(srx->fpdu_part_rem, srx->skb_new); + + if (frx->first_ddp_seg) { + struct siw_wqe *wqe = rx_wqe(frx); + + rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8); + if (unlikely(!rx_mem(frx))) { + siw_dbg_qp(qp, + "sink stag not found/invalid, stag 0x%08x\n", + srx->ddp_stag); + + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_TAGGED_BUF, + DDP_ECODE_T_INVALID_STAG, 0); + return -EINVAL; + } + wqe->rqe.num_sge = 1; + rx_type(wqe) = SIW_OP_WRITE; + wqe->wr_status = SIW_WR_INPROGRESS; + } + mem = rx_mem(frx); + + /* + * Check if application re-registered memory with different + * key field of STag. + */ + if (unlikely(mem->stag != srx->ddp_stag)) { + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_TAGGED_BUF, + DDP_ECODE_T_INVALID_STAG, 0); + return -EINVAL; + } + rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd, + IB_ACCESS_REMOTE_WRITE, bytes); + if (unlikely(rv)) { + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv), + 0); + + siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); + + return -EINVAL; + } + + if (mem->mem_obj == NULL) + rv = siw_rx_kva(srx, + (void *)(srx->ddp_to + srx->fpdu_part_rcvd), + bytes); + else if (!mem->is_pbl) + rv = siw_rx_umem(srx, mem->umem, + srx->ddp_to + srx->fpdu_part_rcvd, bytes); + else + rv = siw_rx_pbl(srx, &frx->pbl_idx, mem, + srx->ddp_to + srx->fpdu_part_rcvd, bytes); + + if (unlikely(rv != bytes)) { + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_CATASTROPHIC, + DDP_ECODE_CATASTROPHIC, 0); + return -EINVAL; + } + srx->fpdu_part_rem -= rv; + srx->fpdu_part_rcvd += rv; + + if (!srx->fpdu_part_rem) { + srx->ddp_to += srx->fpdu_part_rcvd; + return 0; + } + return -EAGAIN; +} + +/* + * Inbound RREQ's cannot carry user data. + */ +int siw_proc_rreq(struct siw_qp *qp) +{ + struct siw_rx_stream *srx = &qp->rx_stream; + + if (!srx->fpdu_part_rem) + return 0; + + pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp), + be16_to_cpu(srx->hdr.ctrl.mpa_len)); + + return -EPROTO; +} + +/* + * siw_init_rresp: + * + * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE. + * Put it at the tail of the IRQ, if there is another WQE currently in + * transmit processing. If not, make it the current WQE to be processed + * and schedule transmit processing. + * + * Can be called from softirq context and from process + * context (RREAD socket loopback case!) + * + * return value: + * 0: success, + * failure code otherwise + */ + +static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) +{ + struct siw_wqe *tx_work = tx_wqe(qp); + struct siw_sqe *resp; + + uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to), + laddr = be64_to_cpu(srx->hdr.rreq.source_to); + uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size), + lkey = be32_to_cpu(srx->hdr.rreq.source_stag), + rkey = be32_to_cpu(srx->hdr.rreq.sink_stag), + msn = be32_to_cpu(srx->hdr.rreq.ddp_msn); + + int run_sq = 1, rv = 0; + unsigned long flags; + + if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) { + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_UNTAGGED_BUF, + DDP_ECODE_UT_INVALID_MSN_RANGE, 0); + return -EPROTO; + } + spin_lock_irqsave(&qp->sq_lock, flags); + + if (tx_work->wr_status == SIW_WR_IDLE) { + /* + * immediately schedule READ response w/o + * consuming IRQ entry: IRQ must be empty. + */ + tx_work->processed = 0; + tx_work->mem[0] = NULL; + tx_work->wr_status = SIW_WR_QUEUED; + resp = &tx_work->sqe; + } else { + resp = irq_alloc_free(qp); + run_sq = 0; + } + if (likely(resp)) { + resp->opcode = SIW_OP_READ_RESPONSE; + + resp->sge[0].length = length; + resp->sge[0].laddr = laddr; + resp->sge[0].lkey = lkey; + + /* Keep aside message sequence number for potential + * error reporting during Read Response generation. + */ + resp->sge[1].length = msn; + + resp->raddr = raddr; + resp->rkey = rkey; + resp->num_sge = length ? 1 : 0; + + /* RRESP now valid as current TX wqe or placed into IRQ */ + smp_store_mb(resp->flags, SIW_WQE_VALID); + } else { + pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp), + qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size); + + siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, + RDMAP_ETYPE_REMOTE_OPERATION, + RDMAP_ECODE_CATASTROPHIC_STREAM, 0); + rv = -EPROTO; + } + + spin_unlock_irqrestore(&qp->sq_lock, flags); + + if (run_sq) + rv = siw_sq_start(qp); + + return rv; +} + +/* + * Only called at start of Read.Resonse processing. + * Transfer pending Read from tip of ORQ into currrent rx wqe, + * but keep ORQ entry valid until Read.Response processing done. + * No Queue locking needed. + */ +static int siw_orqe_start_rx(struct siw_qp *qp) +{ + struct siw_sqe *orqe; + struct siw_wqe *wqe = NULL; + + /* make sure ORQ indices are current */ + smp_mb(); + + orqe = orq_get_current(qp); + if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) { + /* RRESP is a TAGGED RDMAP operation */ + wqe = rx_wqe(&qp->rx_tagged); + wqe->sqe.id = orqe->id; + wqe->sqe.opcode = orqe->opcode; + wqe->sqe.sge[0].laddr = orqe->sge[0].laddr; + wqe->sqe.sge[0].lkey = orqe->sge[0].lkey; + wqe->sqe.sge[0].length = orqe->sge[0].length; + wqe->sqe.flags = orqe->flags; + wqe->sqe.num_sge = 1; + wqe->bytes = orqe->sge[0].length; + wqe->processed = 0; + wqe->mem[0] = NULL; + /* make sure WQE is completely written before valid */ + smp_wmb(); + wqe->wr_status = SIW_WR_INPROGRESS; + + return 0; + } + return -EPROTO; +} + +/* + * siw_proc_rresp: + * + * Place incoming RRESP data into memory referenced by RREQ WQE + * which is at the tip of the ORQ + * + * Function supports partially received RRESP's (suspending/resuming + * current receive processing) + */ +int siw_proc_rresp(struct siw_qp *qp) +{ + struct siw_rx_stream *srx = &qp->rx_stream; + struct siw_rx_fpdu *frx = &qp->rx_tagged; + struct siw_wqe *wqe = rx_wqe(frx); + struct siw_mem **mem, *mem_p; + struct siw_sge *sge; + int bytes, rv; + + if (frx->first_ddp_seg) { + if (unlikely(wqe->wr_status != SIW_WR_IDLE)) { + pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n", + qp_id(qp), wqe->wr_status, wqe->sqe.opcode); + rv = -EPROTO; + goto error_term; + } + /* + * fetch pending RREQ from orq + */ + rv = siw_orqe_start_rx(qp); + if (rv) { + pr_warn("siw: [QP %u]: ORQ empty at idx %d\n", + qp_id(qp), qp->orq_get % qp->attrs.orq_size); + goto error_term; + } + rv = siw_rresp_check_ntoh(srx, frx); + if (unlikely(rv)) { + siw_qp_event(qp, IB_EVENT_QP_FATAL); + return rv; + } + } else { + if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) { + pr_warn("siw: [QP %u]: resume RRESP: status %d\n", + qp_id(qp), wqe->wr_status); + rv = -EPROTO; + goto error_term; + } + } + if (!srx->fpdu_part_rem) /* zero length RRESPONSE */ + return 0; + + sge = wqe->sqe.sge; /* there is only one */ + mem = &wqe->mem[0]; + + if (!(*mem)) { + /* + * check target memory which resolves memory on first fragment + */ + rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0, + wqe->bytes); + if (unlikely(rv)) { + siw_dbg_qp(qp, "target mem check: %d\n", rv); + wqe->wc_status = SIW_WC_LOC_PROT_ERR; + + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, + DDP_ETYPE_TAGGED_BUF, + siw_tagged_error(-rv), 0); + + siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); + + return -EINVAL; + } + } + mem_p = *mem; + + bytes = min(srx->fpdu_part_rem, srx->skb_new); + + if (mem_p->mem_obj == NULL) + rv = siw_rx_kva(srx, (void *)(sge->laddr + wqe->processed), + bytes); + else if (!mem_p->is_pbl) + rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, + bytes); + else + rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, + sge->laddr + wqe->processed, bytes); + if (rv != bytes) { + wqe->wc_status = SIW_WC_GENERAL_ERR; + rv = -EINVAL; + goto error_term; + } + srx->fpdu_part_rem -= rv; + srx->fpdu_part_rcvd += rv; + wqe->processed += rv; + + if (!srx->fpdu_part_rem) { + srx->ddp_to += srx->fpdu_part_rcvd; + return 0; + } + return -EAGAIN; + +error_term: + siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC, + DDP_ECODE_CATASTROPHIC, 0); + return rv; +} + +int siw_proc_terminate(struct siw_qp *qp) +{ + struct siw_rx_stream *srx = &qp->rx_stream; + struct sk_buff *skb = srx->skb; + struct iwarp_terminate *term = &srx->hdr.terminate; + union iwarp_hdr term_info; + u8 *infop = (u8 *)&term_info; + enum rdma_opcode op; + u16 to_copy = sizeof(struct iwarp_ctrl); + + pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n", + __rdmap_term_layer(term), __rdmap_term_etype(term), + __rdmap_term_ecode(term)); + + if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE || + be32_to_cpu(term->ddp_msn) != + qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] || + be32_to_cpu(term->ddp_mo) != 0) { + pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n", + be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn), + be32_to_cpu(term->ddp_mo)); + return -ECONNRESET; + } + /* + * Receive remaining pieces of TERM if indicated + */ + if (!term->flag_m) + return -ECONNRESET; + + /* Do not take the effort to reassemble a network fragmented + * TERM message + */ + if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged)) + return -ECONNRESET; + + memset(infop, 0, sizeof(term_info)); + + skb_copy_bits(skb, srx->skb_offset, infop, to_copy); + + op = __rdmap_get_opcode(&term_info.ctrl); + if (op >= RDMAP_TERMINATE) + goto out; + + infop += to_copy; + srx->skb_offset += to_copy; + srx->skb_new -= to_copy; + srx->skb_copied += to_copy; + srx->fpdu_part_rcvd += to_copy; + srx->fpdu_part_rem -= to_copy; + + to_copy = iwarp_pktinfo[op].hdr_len - to_copy; + + /* Again, no network fragmented TERM's */ + if (to_copy + MPA_CRC_SIZE > srx->skb_new) + return -ECONNRESET; + + skb_copy_bits(skb, srx->skb_offset, infop, to_copy); + + if (term->flag_r) { + siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n", + op, be16_to_cpu(term_info.ctrl.mpa_len), + term->flag_m ? "valid" : "invalid"); + } else if (term->flag_d) { + siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n", + op, be16_to_cpu(term_info.ctrl.mpa_len), + term->flag_m ? "valid" : "invalid"); + } +out: + srx->skb_new -= to_copy; + srx->skb_offset += to_copy; + srx->skb_copied += to_copy; + srx->fpdu_part_rcvd += to_copy; + srx->fpdu_part_rem -= to_copy; + + return -ECONNRESET; +} + +static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) +{ + struct sk_buff *skb = srx->skb; + u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad; + __wsum crc_in, crc_own = 0; + + siw_dbg_qp(qp, "expected %d, available %d, pad %u\n", + srx->fpdu_part_rem, srx->skb_new, srx->pad); + + if (srx->skb_new < srx->fpdu_part_rem) + return -EAGAIN; + + skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem); + + if (srx->mpa_crc_hd && srx->pad) + crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); + + srx->skb_new -= srx->fpdu_part_rem; + srx->skb_offset += srx->fpdu_part_rem; + srx->skb_copied += srx->fpdu_part_rem; + + if (!srx->mpa_crc_hd) + return 0; + + /* + * CRC32 is computed, transmitted and received directly in NBO, + * so there's never a reason to convert byte order. + */ + crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own); + crc_in = (__force __wsum)srx->trailer.crc; + + if (unlikely(crc_in != crc_own)) { + pr_warn("siw: crc error. in: %08x, own %08x, op %u\n", + crc_in, crc_own, qp->rx_stream.rdmap_op); + + siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, + LLP_ETYPE_MPA, + LLP_ECODE_RECEIVED_CRC, 0); + return -EINVAL; + } + return 0; +} + +#define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged) + +static int siw_get_hdr(struct siw_rx_stream *srx) +{ + struct sk_buff *skb = srx->skb; + struct siw_qp *qp = rx_qp(srx); + struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl; + struct siw_rx_fpdu *frx; + u8 opcode; + int bytes; + + if (srx->fpdu_part_rcvd < MIN_DDP_HDR) { + /* + * copy a mimimum sized (tagged) DDP frame control part + */ + bytes = min_t(int, srx->skb_new, + MIN_DDP_HDR - srx->fpdu_part_rcvd); + + skb_copy_bits(skb, srx->skb_offset, + (char *)c_hdr + srx->fpdu_part_rcvd, bytes); + + srx->fpdu_part_rcvd += bytes; + + srx->skb_new -= bytes; + srx->skb_offset += bytes; + srx->skb_copied += bytes; + + if (srx->fpdu_part_rcvd < MIN_DDP_HDR) + return -EAGAIN; + + if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) { + enum ddp_etype etype; + enum ddp_ecode ecode; + + pr_warn("siw: received ddp version unsupported %d\n", + __ddp_get_version(c_hdr)); + + if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) { + etype = DDP_ETYPE_TAGGED_BUF; + ecode = DDP_ECODE_T_VERSION; + } else { + etype = DDP_ETYPE_UNTAGGED_BUF; + ecode = DDP_ECODE_UT_VERSION; + } + siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, + etype, ecode, 0); + return -EINVAL; + } + if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) { + pr_warn("siw: received rdmap version unsupported %d\n", + __rdmap_get_version(c_hdr)); + + siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP, + RDMAP_ETYPE_REMOTE_OPERATION, + RDMAP_ECODE_VERSION, 0); + return -EINVAL; + } + opcode = __rdmap_get_opcode(c_hdr); + + if (opcode > RDMAP_TERMINATE) { + pr_warn("siw: received unknown packet type %u\n", + opcode); + + siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP, + RDMAP_ETYPE_REMOTE_OPERATION, + RDMAP_ECODE_OPCODE, 0); + return -EINVAL; + } + siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode); + } else { + opcode = __rdmap_get_opcode(c_hdr); + } + set_rx_fpdu_context(qp, opcode); + frx = qp->rx_fpdu; + + /* + * Figure out len of current hdr: variable length of + * iwarp hdr may force us to copy hdr information in + * two steps. Only tagged DDP messages are already + * completely received. + */ + if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) { + bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR; + + if (srx->skb_new < bytes) + return -EAGAIN; + + skb_copy_bits(skb, srx->skb_offset, + (char *)c_hdr + srx->fpdu_part_rcvd, bytes); + + srx->fpdu_part_rcvd += bytes; + + srx->skb_new -= bytes; + srx->skb_offset += bytes; + srx->skb_copied += bytes; + } + + /* + * DDP/RDMAP header receive completed. Check if the current + * DDP segment starts a new RDMAP message or continues a previously + * started RDMAP message. + * + * Alternating reception of DDP segments (or FPDUs) from incomplete + * tagged and untagged RDMAP messages is supported, as long as + * the current tagged or untagged message gets eventually completed + * w/o intersection from another message of the same type + * (tagged/untagged). E.g., a WRITE can get intersected by a SEND, + * but not by a READ RESPONSE etc. + */ + if (srx->mpa_crc_hd) { + /* + * Restart CRC computation + */ + crypto_shash_init(srx->mpa_crc_hd); + crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr, + srx->fpdu_part_rcvd); + } + if (frx->more_ddp_segs) { + frx->first_ddp_seg = 0; + if (frx->prev_rdmap_op != opcode) { + pr_warn("siw: packet intersection: %u : %u\n", + frx->prev_rdmap_op, opcode); + /* + * The last inbound RDMA operation of same type + * (tagged or untagged) is left unfinished. + * To complete it in error, make it the current + * operation again, even with the header already + * overwritten. For error handling, only the opcode + * and current rx context are relevant. + */ + set_rx_fpdu_context(qp, frx->prev_rdmap_op); + __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op); + return -EPROTO; + } + } else { + frx->prev_rdmap_op = opcode; + frx->first_ddp_seg = 1; + } + frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1; + + return 0; +} + +static int siw_check_tx_fence(struct siw_qp *qp) +{ + struct siw_wqe *tx_waiting = tx_wqe(qp); + struct siw_sqe *rreq; + int resume_tx = 0, rv = 0; + unsigned long flags; + + spin_lock_irqsave(&qp->orq_lock, flags); + + rreq = orq_get_current(qp); + + /* free current orq entry */ + WRITE_ONCE(rreq->flags, 0); + + if (qp->tx_ctx.orq_fence) { + if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { + pr_warn("siw: [QP %u]: fence resume: bad status %d\n", + qp_id(qp), tx_waiting->wr_status); + rv = -EPROTO; + goto out; + } + /* resume SQ processing */ + if (tx_waiting->sqe.opcode == SIW_OP_READ || + tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { + rreq = orq_get_tail(qp); + if (unlikely(!rreq)) { + pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); + rv = -EPROTO; + goto out; + } + siw_read_to_orq(rreq, &tx_waiting->sqe); + + qp->orq_put++; + qp->tx_ctx.orq_fence = 0; + resume_tx = 1; + + } else if (siw_orq_empty(qp)) { + qp->tx_ctx.orq_fence = 0; + resume_tx = 1; + } else { + pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n", + qp_id(qp), qp->orq_get, qp->orq_put); + rv = -EPROTO; + } + } + qp->orq_get++; +out: + spin_unlock_irqrestore(&qp->orq_lock, flags); + + if (resume_tx) + rv = siw_sq_start(qp); + + return rv; +} + +/* + * siw_rdmap_complete() + * + * Complete processing of an RDMA message after receiving all + * DDP segmens or ABort processing after encountering error case. + * + * o SENDs + RRESPs will need for completion, + * o RREQs need for READ RESPONSE initialization + * o WRITEs need memory dereferencing + * + * TODO: Failed WRITEs need local error to be surfaced. + */ +static int siw_rdmap_complete(struct siw_qp *qp, int error) +{ + struct siw_rx_stream *srx = &qp->rx_stream; + struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu); + enum siw_wc_status wc_status = wqe->wc_status; + u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl); + int rv = 0; + + switch (opcode) { + case RDMAP_SEND_SE: + case RDMAP_SEND_SE_INVAL: + wqe->rqe.flags |= SIW_WQE_SOLICITED; + /* Fall through */ + + case RDMAP_SEND: + case RDMAP_SEND_INVAL: + if (wqe->wr_status == SIW_WR_IDLE) + break; + + srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++; + + if (error != 0 && wc_status == SIW_WC_SUCCESS) + wc_status = SIW_WC_GENERAL_ERR; + /* + * Handle STag invalidation request + */ + if (wc_status == SIW_WC_SUCCESS && + (opcode == RDMAP_SEND_INVAL || + opcode == RDMAP_SEND_SE_INVAL)) { + rv = siw_invalidate_stag(qp->pd, srx->inval_stag); + if (rv) { + siw_init_terminate( + qp, TERM_ERROR_LAYER_RDMAP, + rv == -EACCES ? + RDMAP_ETYPE_REMOTE_PROTECTION : + RDMAP_ETYPE_REMOTE_OPERATION, + RDMAP_ECODE_CANNOT_INVALIDATE, 0); + + wc_status = SIW_WC_REM_INV_REQ_ERR; + } + rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed, + rv ? 0 : srx->inval_stag, + wc_status); + } else { + rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed, + 0, wc_status); + } + siw_wqe_put_mem(wqe, SIW_OP_RECEIVE); + break; + + case RDMAP_RDMA_READ_RESP: + if (wqe->wr_status == SIW_WR_IDLE) + break; + + if (error != 0) { + if ((srx->state == SIW_GET_HDR && + qp->rx_fpdu->first_ddp_seg) || error == -ENODATA) + /* possible RREQ in ORQ left untouched */ + break; + + if (wc_status == SIW_WC_SUCCESS) + wc_status = SIW_WC_GENERAL_ERR; + } else if (qp->kernel_verbs && + rx_type(wqe) == SIW_OP_READ_LOCAL_INV) { + /* + * Handle any STag invalidation request + */ + rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey); + if (rv) { + siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, + RDMAP_ETYPE_CATASTROPHIC, + RDMAP_ECODE_UNSPECIFIED, 0); + + if (wc_status == SIW_WC_SUCCESS) { + wc_status = SIW_WC_GENERAL_ERR; + error = rv; + } + } + } + /* + * All errors turn the wqe into signalled. + */ + if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0) + rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed, + wc_status); + siw_wqe_put_mem(wqe, SIW_OP_READ); + + if (!error) + rv = siw_check_tx_fence(qp); + else + /* Disable current ORQ eleement */ + WRITE_ONCE(orq_get_current(qp)->flags, 0); + break; + + case RDMAP_RDMA_READ_REQ: + if (!error) { + rv = siw_init_rresp(qp, srx); + srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++; + } + break; + + case RDMAP_RDMA_WRITE: + if (wqe->wr_status == SIW_WR_IDLE) + break; + + /* + * Free References from memory object if + * attached to receive context (inbound WRITE). + * While a zero-length WRITE is allowed, + * no memory reference got created. + */ + if (rx_mem(&qp->rx_tagged)) { + siw_mem_put(rx_mem(&qp->rx_tagged)); + rx_mem(&qp->rx_tagged) = NULL; + } + break; + + default: + break; + } + wqe->wr_status = SIW_WR_IDLE; + + return rv; +} + +/* + * siw_tcp_rx_data() + * + * Main routine to consume inbound TCP payload + * + * @rd_desc: read descriptor + * @skb: socket buffer + * @off: offset in skb + * @len: skb->len - offset : payload in skb + */ +int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb, + unsigned int off, size_t len) +{ + struct siw_qp *qp = rd_desc->arg.data; + struct siw_rx_stream *srx = &qp->rx_stream; + int rv; + + srx->skb = skb; + srx->skb_new = skb->len - off; + srx->skb_offset = off; + srx->skb_copied = 0; + + siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new); + + while (srx->skb_new) { + int run_completion = 1; + + if (unlikely(srx->rx_suspend)) { + /* Do not process any more data */ + srx->skb_copied += srx->skb_new; + break; + } + switch (srx->state) { + case SIW_GET_HDR: + rv = siw_get_hdr(srx); + if (!rv) { + srx->fpdu_part_rem = + be16_to_cpu(srx->hdr.ctrl.mpa_len) - + srx->fpdu_part_rcvd + MPA_HDR_SIZE; + + if (srx->fpdu_part_rem) + srx->pad = -srx->fpdu_part_rem & 0x3; + else + srx->pad = 0; + + srx->state = SIW_GET_DATA_START; + srx->fpdu_part_rcvd = 0; + } + break; + + case SIW_GET_DATA_MORE: + /* + * Another data fragment of the same DDP segment. + * Setting first_ddp_seg = 0 avoids repeating + * initializations that shall occur only once per + * DDP segment. + */ + qp->rx_fpdu->first_ddp_seg = 0; + /* Fall through */ + + case SIW_GET_DATA_START: + /* + * Headers will be checked by the opcode-specific + * data receive function below. + */ + rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp); + if (!rv) { + int mpa_len = + be16_to_cpu(srx->hdr.ctrl.mpa_len) + + MPA_HDR_SIZE; + + srx->fpdu_part_rem = (-mpa_len & 0x3) + + MPA_CRC_SIZE; + srx->fpdu_part_rcvd = 0; + srx->state = SIW_GET_TRAILER; + } else { + if (unlikely(rv == -ECONNRESET)) + run_completion = 0; + else + srx->state = SIW_GET_DATA_MORE; + } + break; + + case SIW_GET_TRAILER: + /* + * read CRC + any padding + */ + rv = siw_get_trailer(qp, srx); + if (likely(!rv)) { + /* + * FPDU completed. + * complete RDMAP message if last fragment + */ + srx->state = SIW_GET_HDR; + srx->fpdu_part_rcvd = 0; + + if (!(srx->hdr.ctrl.ddp_rdmap_ctrl & + DDP_FLAG_LAST)) + /* more frags */ + break; + + rv = siw_rdmap_complete(qp, 0); + run_completion = 0; + } + break; + + default: + pr_warn("QP[%u]: RX out of state\n", qp_id(qp)); + rv = -EPROTO; + run_completion = 0; + } + if (unlikely(rv != 0 && rv != -EAGAIN)) { + if ((srx->state > SIW_GET_HDR || + qp->rx_fpdu->more_ddp_segs) && run_completion) + siw_rdmap_complete(qp, rv); + + siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv, + srx->state); + + siw_qp_cm_drop(qp, 1); + + break; + } + if (rv) { + siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n", + srx->state, srx->fpdu_part_rem); + break; + } + } + return srx->skb_copied; +} diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c new file mode 100644 index 000000000000..43020d2040fc --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -0,0 +1,1269 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/net.h> +#include <linux/scatterlist.h> +#include <linux/highmem.h> +#include <net/tcp.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> + +#include "siw.h" +#include "siw_verbs.h" +#include "siw_mem.h" + +#define MAX_HDR_INLINE \ + (((uint32_t)(sizeof(struct siw_rreq_pkt) - \ + sizeof(struct iwarp_send))) & 0xF8) + +static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) +{ + struct siw_pbl *pbl = mem->pbl; + u64 offset = addr - mem->va; + u64 paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); + + if (paddr) + return virt_to_page(paddr); + + return NULL; +} + +/* + * Copy short payload at provided destination payload address + */ +static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) +{ + struct siw_wqe *wqe = &c_tx->wqe_active; + struct siw_sge *sge = &wqe->sqe.sge[0]; + u32 bytes = sge->length; + + if (bytes > MAX_HDR_INLINE || wqe->sqe.num_sge != 1) + return MAX_HDR_INLINE + 1; + + if (!bytes) + return 0; + + if (tx_flags(wqe) & SIW_WQE_INLINE) { + memcpy((void *)paddr, &wqe->sqe.sge[1], bytes); + } else { + struct siw_mem *mem = wqe->mem[0]; + + if (!mem->mem_obj) { + /* Kernel client using kva */ + memcpy((void *)paddr, (void *)sge->laddr, bytes); + } else if (c_tx->in_syscall) { + if (copy_from_user((void *)paddr, + (const void __user *)sge->laddr, + bytes)) + return -EFAULT; + } else { + unsigned int off = sge->laddr & ~PAGE_MASK; + struct page *p; + char *buffer; + int pbl_idx = 0; + + if (!mem->is_pbl) + p = siw_get_upage(mem->umem, sge->laddr); + else + p = siw_get_pblpage(mem, sge->laddr, &pbl_idx); + + if (unlikely(!p)) + return -EFAULT; + + buffer = kmap_atomic(p); + + if (likely(PAGE_SIZE - off >= bytes)) { + memcpy((void *)paddr, buffer + off, bytes); + kunmap_atomic(buffer); + } else { + unsigned long part = bytes - (PAGE_SIZE - off); + + memcpy((void *)paddr, buffer + off, part); + kunmap_atomic(buffer); + + if (!mem->is_pbl) + p = siw_get_upage(mem->umem, + sge->laddr + part); + else + p = siw_get_pblpage(mem, + sge->laddr + part, + &pbl_idx); + if (unlikely(!p)) + return -EFAULT; + + buffer = kmap_atomic(p); + memcpy((void *)(paddr + part), buffer, + bytes - part); + kunmap_atomic(buffer); + } + } + } + return (int)bytes; +} + +#define PKT_FRAGMENTED 1 +#define PKT_COMPLETE 0 + +/* + * siw_qp_prepare_tx() + * + * Prepare tx state for sending out one fpdu. Builds complete pkt + * if no user data or only immediate data are present. + * + * returns PKT_COMPLETE if complete pkt built, PKT_FRAGMENTED otherwise. + */ +static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) +{ + struct siw_wqe *wqe = &c_tx->wqe_active; + char *crc = NULL; + int data = 0; + + switch (tx_type(wqe)) { + case SIW_OP_READ: + case SIW_OP_READ_LOCAL_INV: + memcpy(&c_tx->pkt.ctrl, + &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl, + sizeof(struct iwarp_ctrl)); + + c_tx->pkt.rreq.rsvd = 0; + c_tx->pkt.rreq.ddp_qn = htonl(RDMAP_UNTAGGED_QN_RDMA_READ); + c_tx->pkt.rreq.ddp_msn = + htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]); + c_tx->pkt.rreq.ddp_mo = 0; + c_tx->pkt.rreq.sink_stag = htonl(wqe->sqe.sge[0].lkey); + c_tx->pkt.rreq.sink_to = + cpu_to_be64(wqe->sqe.sge[0].laddr); + c_tx->pkt.rreq.source_stag = htonl(wqe->sqe.rkey); + c_tx->pkt.rreq.source_to = cpu_to_be64(wqe->sqe.raddr); + c_tx->pkt.rreq.read_size = htonl(wqe->sqe.sge[0].length); + + c_tx->ctrl_len = sizeof(struct iwarp_rdma_rreq); + crc = (char *)&c_tx->pkt.rreq_pkt.crc; + break; + + case SIW_OP_SEND: + if (tx_flags(wqe) & SIW_WQE_SOLICITED) + memcpy(&c_tx->pkt.ctrl, + &iwarp_pktinfo[RDMAP_SEND_SE].ctrl, + sizeof(struct iwarp_ctrl)); + else + memcpy(&c_tx->pkt.ctrl, &iwarp_pktinfo[RDMAP_SEND].ctrl, + sizeof(struct iwarp_ctrl)); + + c_tx->pkt.send.ddp_qn = RDMAP_UNTAGGED_QN_SEND; + c_tx->pkt.send.ddp_msn = + htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]); + c_tx->pkt.send.ddp_mo = 0; + + c_tx->pkt.send_inv.inval_stag = 0; + + c_tx->ctrl_len = sizeof(struct iwarp_send); + + crc = (char *)&c_tx->pkt.send_pkt.crc; + data = siw_try_1seg(c_tx, (u64)crc); + break; + + case SIW_OP_SEND_REMOTE_INV: + if (tx_flags(wqe) & SIW_WQE_SOLICITED) + memcpy(&c_tx->pkt.ctrl, + &iwarp_pktinfo[RDMAP_SEND_SE_INVAL].ctrl, + sizeof(struct iwarp_ctrl)); + else + memcpy(&c_tx->pkt.ctrl, + &iwarp_pktinfo[RDMAP_SEND_INVAL].ctrl, + sizeof(struct iwarp_ctrl)); + + c_tx->pkt.send.ddp_qn = RDMAP_UNTAGGED_QN_SEND; + c_tx->pkt.send.ddp_msn = + htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]); + c_tx->pkt.send.ddp_mo = 0; + + c_tx->pkt.send_inv.inval_stag = cpu_to_be32(wqe->sqe.rkey); + + c_tx->ctrl_len = sizeof(struct iwarp_send_inv); + + crc = (char *)&c_tx->pkt.send_pkt.crc; + data = siw_try_1seg(c_tx, (u64)crc); + break; + + case SIW_OP_WRITE: + memcpy(&c_tx->pkt.ctrl, &iwarp_pktinfo[RDMAP_RDMA_WRITE].ctrl, + sizeof(struct iwarp_ctrl)); + + c_tx->pkt.rwrite.sink_stag = htonl(wqe->sqe.rkey); + c_tx->pkt.rwrite.sink_to = cpu_to_be64(wqe->sqe.raddr); + c_tx->ctrl_len = sizeof(struct iwarp_rdma_write); + + crc = (char *)&c_tx->pkt.write_pkt.crc; + data = siw_try_1seg(c_tx, (u64)crc); + break; + + case SIW_OP_READ_RESPONSE: + memcpy(&c_tx->pkt.ctrl, + &iwarp_pktinfo[RDMAP_RDMA_READ_RESP].ctrl, + sizeof(struct iwarp_ctrl)); + + /* NBO */ + c_tx->pkt.rresp.sink_stag = cpu_to_be32(wqe->sqe.rkey); + c_tx->pkt.rresp.sink_to = cpu_to_be64(wqe->sqe.raddr); + + c_tx->ctrl_len = sizeof(struct iwarp_rdma_rresp); + + crc = (char *)&c_tx->pkt.write_pkt.crc; + data = siw_try_1seg(c_tx, (u64)crc); + break; + + default: + siw_dbg_qp(tx_qp(c_tx), "stale wqe type %d\n", tx_type(wqe)); + return -EOPNOTSUPP; + } + if (unlikely(data < 0)) + return data; + + c_tx->ctrl_sent = 0; + + if (data <= MAX_HDR_INLINE) { + if (data) { + wqe->processed = data; + + c_tx->pkt.ctrl.mpa_len = + htons(c_tx->ctrl_len + data - MPA_HDR_SIZE); + + /* Add pad, if needed */ + data += -(int)data & 0x3; + /* advance CRC location after payload */ + crc += data; + c_tx->ctrl_len += data; + + if (!(c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)) + c_tx->pkt.c_untagged.ddp_mo = 0; + else + c_tx->pkt.c_tagged.ddp_to = + cpu_to_be64(wqe->sqe.raddr); + } + + *(u32 *)crc = 0; + /* + * Do complete CRC if enabled and short packet + */ + if (c_tx->mpa_crc_hd) { + crypto_shash_init(c_tx->mpa_crc_hd); + if (crypto_shash_update(c_tx->mpa_crc_hd, + (u8 *)&c_tx->pkt, + c_tx->ctrl_len)) + return -EINVAL; + crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)crc); + } + c_tx->ctrl_len += MPA_CRC_SIZE; + + return PKT_COMPLETE; + } + c_tx->ctrl_len += MPA_CRC_SIZE; + c_tx->sge_idx = 0; + c_tx->sge_off = 0; + c_tx->pbl_idx = 0; + + /* + * Allow direct sending out of user buffer if WR is non signalled + * and payload is over threshold. + * Per RDMA verbs, the application should not change the send buffer + * until the work completed. In iWarp, work completion is only + * local delivery to TCP. TCP may reuse the buffer for + * retransmission. Changing unsent data also breaks the CRC, + * if applied. + */ + if (c_tx->zcopy_tx && wqe->bytes >= SENDPAGE_THRESH && + !(tx_flags(wqe) & SIW_WQE_SIGNALLED)) + c_tx->use_sendpage = 1; + else + c_tx->use_sendpage = 0; + + return PKT_FRAGMENTED; +} + +/* + * Send out one complete control type FPDU, or header of FPDU carrying + * data. Used for fixed sized packets like Read.Requests or zero length + * SENDs, WRITEs, READ.Responses, or header only. + */ +static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, + int flags) +{ + struct msghdr msg = { .msg_flags = flags }; + struct kvec iov = { .iov_base = + (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent, + .iov_len = c_tx->ctrl_len - c_tx->ctrl_sent }; + + int rv = kernel_sendmsg(s, &msg, &iov, 1, + c_tx->ctrl_len - c_tx->ctrl_sent); + + if (rv >= 0) { + c_tx->ctrl_sent += rv; + + if (c_tx->ctrl_sent == c_tx->ctrl_len) + rv = 0; + else + rv = -EAGAIN; + } + return rv; +} + +/* + * 0copy TCP transmit interface: Use do_tcp_sendpages. + * + * Using sendpage to push page by page appears to be less efficient + * than using sendmsg, even if data are copied. + * + * A general performance limitation might be the extra four bytes + * trailer checksum segment to be pushed after user data. + */ +static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, + size_t size) +{ + struct sock *sk = s->sk; + int i = 0, rv = 0, sent = 0, + flags = MSG_MORE | MSG_DONTWAIT | MSG_SENDPAGE_NOTLAST; + + while (size) { + size_t bytes = min_t(size_t, PAGE_SIZE - offset, size); + + if (size + offset <= PAGE_SIZE) + flags = MSG_MORE | MSG_DONTWAIT; + + tcp_rate_check_app_limited(sk); +try_page_again: + lock_sock(sk); + rv = do_tcp_sendpages(sk, page[i], offset, bytes, flags); + release_sock(sk); + + if (rv > 0) { + size -= rv; + sent += rv; + if (rv != bytes) { + offset += rv; + bytes -= rv; + goto try_page_again; + } + offset = 0; + } else { + if (rv == -EAGAIN || rv == 0) + break; + return rv; + } + i++; + } + return sent; +} + +/* + * siw_0copy_tx() + * + * Pushes list of pages to TCP socket. If pages from multiple + * SGE's, all referenced pages of each SGE are pushed in one + * shot. + */ +static int siw_0copy_tx(struct socket *s, struct page **page, + struct siw_sge *sge, unsigned int offset, + unsigned int size) +{ + int i = 0, sent = 0, rv; + int sge_bytes = min(sge->length - offset, size); + + offset = (sge->laddr + offset) & ~PAGE_MASK; + + while (sent != size) { + rv = siw_tcp_sendpages(s, &page[i], offset, sge_bytes); + if (rv >= 0) { + sent += rv; + if (size == sent || sge_bytes > rv) + break; + + i += PAGE_ALIGN(sge_bytes + offset) >> PAGE_SHIFT; + sge++; + sge_bytes = min(sge->length, size - sent); + offset = sge->laddr & ~PAGE_MASK; + } else { + sent = rv; + break; + } + } + return sent; +} + +#define MAX_TRAILER (MPA_CRC_SIZE + 4) + +static void siw_unmap_pages(struct page **pages, int hdr_len, int num_maps) +{ + if (hdr_len) { + ++pages; + --num_maps; + } + while (num_maps-- > 0) { + kunmap(*pages); + pages++; + } +} + +/* + * siw_tx_hdt() tries to push a complete packet to TCP where all + * packet fragments are referenced by the elements of one iovec. + * For the data portion, each involved page must be referenced by + * one extra element. All sge's data can be non-aligned to page + * boundaries. Two more elements are referencing iWARP header + * and trailer: + * MAX_ARRAY = 64KB/PAGE_SIZE + 1 + (2 * (SIW_MAX_SGE - 1) + HDR + TRL + */ +#define MAX_ARRAY ((0xffff / PAGE_SIZE) + 1 + (2 * (SIW_MAX_SGE - 1) + 2)) + +/* + * Write out iov referencing hdr, data and trailer of current FPDU. + * Update transmit state dependent on write return status + */ +static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) +{ + struct siw_wqe *wqe = &c_tx->wqe_active; + struct siw_sge *sge = &wqe->sqe.sge[c_tx->sge_idx]; + struct kvec iov[MAX_ARRAY]; + struct page *page_array[MAX_ARRAY]; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; + + int seg = 0, do_crc = c_tx->do_crc, is_kva = 0, rv; + unsigned int data_len = c_tx->bytes_unsent, hdr_len = 0, trl_len = 0, + sge_off = c_tx->sge_off, sge_idx = c_tx->sge_idx, + pbl_idx = c_tx->pbl_idx; + + if (c_tx->state == SIW_SEND_HDR) { + if (c_tx->use_sendpage) { + rv = siw_tx_ctrl(c_tx, s, MSG_DONTWAIT | MSG_MORE); + if (rv) + goto done; + + c_tx->state = SIW_SEND_DATA; + } else { + iov[0].iov_base = + (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent; + iov[0].iov_len = hdr_len = + c_tx->ctrl_len - c_tx->ctrl_sent; + seg = 1; + } + } + + wqe->processed += data_len; + + while (data_len) { /* walk the list of SGE's */ + unsigned int sge_len = min(sge->length - sge_off, data_len); + unsigned int fp_off = (sge->laddr + sge_off) & ~PAGE_MASK; + struct siw_mem *mem; + + if (!(tx_flags(wqe) & SIW_WQE_INLINE)) { + mem = wqe->mem[sge_idx]; + if (!mem->mem_obj) + is_kva = 1; + } else { + is_kva = 1; + } + if (is_kva && !c_tx->use_sendpage) { + /* + * tx from kernel virtual address: either inline data + * or memory region with assigned kernel buffer + */ + iov[seg].iov_base = (void *)(sge->laddr + sge_off); + iov[seg].iov_len = sge_len; + + if (do_crc) + crypto_shash_update(c_tx->mpa_crc_hd, + iov[seg].iov_base, + sge_len); + sge_off += sge_len; + data_len -= sge_len; + seg++; + goto sge_done; + } + + while (sge_len) { + size_t plen = min((int)PAGE_SIZE - fp_off, sge_len); + + if (!is_kva) { + struct page *p; + + if (mem->is_pbl) + p = siw_get_pblpage( + mem, sge->laddr + sge_off, + &pbl_idx); + else + p = siw_get_upage(mem->umem, + sge->laddr + sge_off); + if (unlikely(!p)) { + if (hdr_len) + seg--; + if (!c_tx->use_sendpage && seg) { + siw_unmap_pages(page_array, + hdr_len, seg); + } + wqe->processed -= c_tx->bytes_unsent; + rv = -EFAULT; + goto done_crc; + } + page_array[seg] = p; + + if (!c_tx->use_sendpage) { + iov[seg].iov_base = kmap(p) + fp_off; + iov[seg].iov_len = plen; + if (do_crc) + crypto_shash_update( + c_tx->mpa_crc_hd, + iov[seg].iov_base, + plen); + } else if (do_crc) + crypto_shash_update( + c_tx->mpa_crc_hd, + page_address(p) + fp_off, + plen); + } else { + u64 pa = ((sge->laddr + sge_off) & PAGE_MASK); + + page_array[seg] = virt_to_page(pa); + if (do_crc) + crypto_shash_update( + c_tx->mpa_crc_hd, + (void *)(sge->laddr + sge_off), + plen); + } + + sge_len -= plen; + sge_off += plen; + data_len -= plen; + fp_off = 0; + + if (++seg > (int)MAX_ARRAY) { + siw_dbg_qp(tx_qp(c_tx), "to many fragments\n"); + if (!is_kva && !c_tx->use_sendpage) { + siw_unmap_pages(page_array, hdr_len, + seg - 1); + } + wqe->processed -= c_tx->bytes_unsent; + rv = -EMSGSIZE; + goto done_crc; + } + } +sge_done: + /* Update SGE variables at end of SGE */ + if (sge_off == sge->length && + (data_len != 0 || wqe->processed < wqe->bytes)) { + sge_idx++; + sge++; + sge_off = 0; + } + } + /* trailer */ + if (likely(c_tx->state != SIW_SEND_TRAILER)) { + iov[seg].iov_base = &c_tx->trailer.pad[4 - c_tx->pad]; + iov[seg].iov_len = trl_len = MAX_TRAILER - (4 - c_tx->pad); + } else { + iov[seg].iov_base = &c_tx->trailer.pad[c_tx->ctrl_sent]; + iov[seg].iov_len = trl_len = MAX_TRAILER - c_tx->ctrl_sent; + } + + if (c_tx->pad) { + *(u32 *)c_tx->trailer.pad = 0; + if (do_crc) + crypto_shash_update(c_tx->mpa_crc_hd, + (u8 *)&c_tx->trailer.crc - c_tx->pad, + c_tx->pad); + } + if (!c_tx->mpa_crc_hd) + c_tx->trailer.crc = 0; + else if (do_crc) + crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc); + + data_len = c_tx->bytes_unsent; + + if (c_tx->use_sendpage) { + rv = siw_0copy_tx(s, page_array, &wqe->sqe.sge[c_tx->sge_idx], + c_tx->sge_off, data_len); + if (rv == data_len) { + rv = kernel_sendmsg(s, &msg, &iov[seg], 1, trl_len); + if (rv > 0) + rv += data_len; + else + rv = data_len; + } + } else { + rv = kernel_sendmsg(s, &msg, iov, seg + 1, + hdr_len + data_len + trl_len); + if (!is_kva) + siw_unmap_pages(page_array, hdr_len, seg); + } + if (rv < (int)hdr_len) { + /* Not even complete hdr pushed or negative rv */ + wqe->processed -= data_len; + if (rv >= 0) { + c_tx->ctrl_sent += rv; + rv = -EAGAIN; + } + goto done_crc; + } + rv -= hdr_len; + + if (rv >= (int)data_len) { + /* all user data pushed to TCP or no data to push */ + if (data_len > 0 && wqe->processed < wqe->bytes) { + /* Save the current state for next tx */ + c_tx->sge_idx = sge_idx; + c_tx->sge_off = sge_off; + c_tx->pbl_idx = pbl_idx; + } + rv -= data_len; + + if (rv == trl_len) /* all pushed */ + rv = 0; + else { + c_tx->state = SIW_SEND_TRAILER; + c_tx->ctrl_len = MAX_TRAILER; + c_tx->ctrl_sent = rv + 4 - c_tx->pad; + c_tx->bytes_unsent = 0; + rv = -EAGAIN; + } + + } else if (data_len > 0) { + /* Maybe some user data pushed to TCP */ + c_tx->state = SIW_SEND_DATA; + wqe->processed -= data_len - rv; + + if (rv) { + /* + * Some bytes out. Recompute tx state based + * on old state and bytes pushed + */ + unsigned int sge_unsent; + + c_tx->bytes_unsent -= rv; + sge = &wqe->sqe.sge[c_tx->sge_idx]; + sge_unsent = sge->length - c_tx->sge_off; + + while (sge_unsent <= rv) { + rv -= sge_unsent; + c_tx->sge_idx++; + c_tx->sge_off = 0; + sge++; + sge_unsent = sge->length; + } + c_tx->sge_off += rv; + } + rv = -EAGAIN; + } +done_crc: + c_tx->do_crc = 0; +done: + return rv; +} + +static void siw_update_tcpseg(struct siw_iwarp_tx *c_tx, + struct socket *s) +{ + struct tcp_sock *tp = tcp_sk(s->sk); + + if (tp->gso_segs) { + if (c_tx->gso_seg_limit == 0) + c_tx->tcp_seglen = tp->mss_cache * tp->gso_segs; + else + c_tx->tcp_seglen = + tp->mss_cache * + min_t(u16, c_tx->gso_seg_limit, tp->gso_segs); + } else { + c_tx->tcp_seglen = tp->mss_cache; + } + /* Loopback may give odd numbers */ + c_tx->tcp_seglen &= 0xfffffff8; +} + +/* + * siw_prepare_fpdu() + * + * Prepares transmit context to send out one FPDU if FPDU will contain + * user data and user data are not immediate data. + * Computes maximum FPDU length to fill up TCP MSS if possible. + * + * @qp: QP from which to transmit + * @wqe: Current WQE causing transmission + * + * TODO: Take into account real available sendspace on socket + * to avoid header misalignment due to send pausing within + * fpdu transmission + */ +static void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe) +{ + struct siw_iwarp_tx *c_tx = &qp->tx_ctx; + int data_len; + + c_tx->ctrl_len = + iwarp_pktinfo[__rdmap_get_opcode(&c_tx->pkt.ctrl)].hdr_len; + c_tx->ctrl_sent = 0; + + /* + * Update target buffer offset if any + */ + if (!(c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)) + /* Untagged message */ + c_tx->pkt.c_untagged.ddp_mo = cpu_to_be32(wqe->processed); + else /* Tagged message */ + c_tx->pkt.c_tagged.ddp_to = + cpu_to_be64(wqe->sqe.raddr + wqe->processed); + + data_len = wqe->bytes - wqe->processed; + if (data_len + c_tx->ctrl_len + MPA_CRC_SIZE > c_tx->tcp_seglen) { + /* Trim DDP payload to fit into current TCP segment */ + data_len = c_tx->tcp_seglen - (c_tx->ctrl_len + MPA_CRC_SIZE); + c_tx->pkt.ctrl.ddp_rdmap_ctrl &= ~DDP_FLAG_LAST; + c_tx->pad = 0; + } else { + c_tx->pkt.ctrl.ddp_rdmap_ctrl |= DDP_FLAG_LAST; + c_tx->pad = -data_len & 0x3; + } + c_tx->bytes_unsent = data_len; + + c_tx->pkt.ctrl.mpa_len = + htons(c_tx->ctrl_len + data_len - MPA_HDR_SIZE); + + /* + * Init MPA CRC computation + */ + if (c_tx->mpa_crc_hd) { + crypto_shash_init(c_tx->mpa_crc_hd); + crypto_shash_update(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt, + c_tx->ctrl_len); + c_tx->do_crc = 1; + } +} + +/* + * siw_check_sgl_tx() + * + * Check permissions for a list of SGE's (SGL). + * A successful check will have all memory referenced + * for transmission resolved and assigned to the WQE. + * + * @pd: Protection Domain SGL should belong to + * @wqe: WQE to be checked + * @perms: requested access permissions + * + */ + +static int siw_check_sgl_tx(struct ib_pd *pd, struct siw_wqe *wqe, + enum ib_access_flags perms) +{ + struct siw_sge *sge = &wqe->sqe.sge[0]; + int i, len, num_sge = wqe->sqe.num_sge; + + if (unlikely(num_sge > SIW_MAX_SGE)) + return -EINVAL; + + for (i = 0, len = 0; num_sge; num_sge--, i++, sge++) { + /* + * rdma verbs: do not check stag for a zero length sge + */ + if (sge->length) { + int rv = siw_check_sge(pd, sge, &wqe->mem[i], perms, 0, + sge->length); + + if (unlikely(rv != E_ACCESS_OK)) + return rv; + } + len += sge->length; + } + return len; +} + +/* + * siw_qp_sq_proc_tx() + * + * Process one WQE which needs transmission on the wire. + */ +static int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe) +{ + struct siw_iwarp_tx *c_tx = &qp->tx_ctx; + struct socket *s = qp->attrs.sk; + int rv = 0, burst_len = qp->tx_ctx.burst; + enum rdmap_ecode ecode = RDMAP_ECODE_CATASTROPHIC_STREAM; + + if (unlikely(wqe->wr_status == SIW_WR_IDLE)) + return 0; + + if (!burst_len) + burst_len = SQ_USER_MAXBURST; + + if (wqe->wr_status == SIW_WR_QUEUED) { + if (!(wqe->sqe.flags & SIW_WQE_INLINE)) { + if (tx_type(wqe) == SIW_OP_READ_RESPONSE) + wqe->sqe.num_sge = 1; + + if (tx_type(wqe) != SIW_OP_READ && + tx_type(wqe) != SIW_OP_READ_LOCAL_INV) { + /* + * Reference memory to be tx'd w/o checking + * access for LOCAL_READ permission, since + * not defined in RDMA core. + */ + rv = siw_check_sgl_tx(qp->pd, wqe, 0); + if (rv < 0) { + if (tx_type(wqe) == + SIW_OP_READ_RESPONSE) + ecode = siw_rdmap_error(-rv); + rv = -EINVAL; + goto tx_error; + } + wqe->bytes = rv; + } else { + wqe->bytes = 0; + } + } else { + wqe->bytes = wqe->sqe.sge[0].length; + if (!qp->kernel_verbs) { + if (wqe->bytes > SIW_MAX_INLINE) { + rv = -EINVAL; + goto tx_error; + } + wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; + } + } + wqe->wr_status = SIW_WR_INPROGRESS; + wqe->processed = 0; + + siw_update_tcpseg(c_tx, s); + + rv = siw_qp_prepare_tx(c_tx); + if (rv == PKT_FRAGMENTED) { + c_tx->state = SIW_SEND_HDR; + siw_prepare_fpdu(qp, wqe); + } else if (rv == PKT_COMPLETE) { + c_tx->state = SIW_SEND_SHORT_FPDU; + } else { + goto tx_error; + } + } + +next_segment: + siw_dbg_qp(qp, "wr type %d, state %d, data %u, sent %u, id %llx\n", + tx_type(wqe), wqe->wr_status, wqe->bytes, wqe->processed, + wqe->sqe.id); + + if (--burst_len == 0) { + rv = -EINPROGRESS; + goto tx_done; + } + if (c_tx->state == SIW_SEND_SHORT_FPDU) { + enum siw_opcode tx_type = tx_type(wqe); + unsigned int msg_flags; + + if (siw_sq_empty(qp) || !siw_tcp_nagle || burst_len == 1) + /* + * End current TCP segment, if SQ runs empty, + * or siw_tcp_nagle is not set, or we bail out + * soon due to no burst credit left. + */ + msg_flags = MSG_DONTWAIT; + else + msg_flags = MSG_DONTWAIT | MSG_MORE; + + rv = siw_tx_ctrl(c_tx, s, msg_flags); + + if (!rv && tx_type != SIW_OP_READ && + tx_type != SIW_OP_READ_LOCAL_INV) + wqe->processed = wqe->bytes; + + goto tx_done; + + } else { + rv = siw_tx_hdt(c_tx, s); + } + if (!rv) { + /* + * One segment sent. Processing completed if last + * segment, Do next segment otherwise. + */ + if (unlikely(c_tx->tx_suspend)) { + /* + * Verbs, 6.4.: Try stopping sending after a full + * DDP segment if the connection goes down + * (== peer halfclose) + */ + rv = -ECONNABORTED; + goto tx_done; + } + if (c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_LAST) { + siw_dbg_qp(qp, "WQE completed\n"); + goto tx_done; + } + c_tx->state = SIW_SEND_HDR; + + siw_update_tcpseg(c_tx, s); + + siw_prepare_fpdu(qp, wqe); + goto next_segment; + } +tx_done: + qp->tx_ctx.burst = burst_len; + return rv; + +tx_error: + if (ecode != RDMAP_ECODE_CATASTROPHIC_STREAM) + siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, + RDMAP_ETYPE_REMOTE_PROTECTION, ecode, 1); + else + siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, + RDMAP_ETYPE_CATASTROPHIC, + RDMAP_ECODE_UNSPECIFIED, 1); + return rv; +} + +static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) +{ + struct ib_mr *base_mr = (struct ib_mr *)sqe->base_mr; + struct siw_device *sdev = to_siw_dev(pd->device); + struct siw_mem *mem = siw_mem_id2obj(sdev, sqe->rkey >> 8); + int rv = 0; + + siw_dbg_pd(pd, "STag 0x%08x\n", sqe->rkey); + + if (unlikely(!mem || !base_mr)) { + pr_warn("siw: fastreg: STag 0x%08x unknown\n", sqe->rkey); + return -EINVAL; + } + if (unlikely(base_mr->rkey >> 8 != sqe->rkey >> 8)) { + pr_warn("siw: fastreg: STag 0x%08x: bad MR\n", sqe->rkey); + rv = -EINVAL; + goto out; + } + if (unlikely(mem->pd != pd)) { + pr_warn("siw: fastreg: PD mismatch\n"); + rv = -EINVAL; + goto out; + } + if (unlikely(mem->stag_valid)) { + pr_warn("siw: fastreg: STag 0x%08x already valid\n", sqe->rkey); + rv = -EINVAL; + goto out; + } + /* Refresh STag since user may have changed key part */ + mem->stag = sqe->rkey; + mem->perms = sqe->access; + + siw_dbg_mem(mem, "STag now valid, MR va: 0x%016llx -> 0x%016llx\n", + mem->va, base_mr->iova); + mem->va = base_mr->iova; + mem->stag_valid = 1; +out: + siw_mem_put(mem); + return rv; +} + +static int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe) +{ + int rv; + + switch (tx_type(wqe)) { + case SIW_OP_REG_MR: + rv = siw_fastreg_mr(qp->pd, &wqe->sqe); + break; + + case SIW_OP_INVAL_STAG: + rv = siw_invalidate_stag(qp->pd, wqe->sqe.rkey); + break; + + default: + rv = -EINVAL; + } + return rv; +} + +/* + * siw_qp_sq_process() + * + * Core TX path routine for RDMAP/DDP/MPA using a TCP kernel socket. + * Sends RDMAP payload for the current SQ WR @wqe of @qp in one or more + * MPA FPDUs, each containing a DDP segment. + * + * SQ processing may occur in user context as a result of posting + * new WQE's or from siw_sq_work_handler() context. Processing in + * user context is limited to non-kernel verbs users. + * + * SQ processing may get paused anytime, possibly in the middle of a WR + * or FPDU, if insufficient send space is available. SQ processing + * gets resumed from siw_sq_work_handler(), if send space becomes + * available again. + * + * Must be called with the QP state read-locked. + * + * Note: + * An outbound RREQ can be satisfied by the corresponding RRESP + * _before_ it gets assigned to the ORQ. This happens regularly + * in RDMA READ via loopback case. Since both outbound RREQ and + * inbound RRESP can be handled by the same CPU, locking the ORQ + * is dead-lock prone and thus not an option. With that, the + * RREQ gets assigned to the ORQ _before_ being sent - see + * siw_activate_tx() - and pulled back in case of send failure. + */ +int siw_qp_sq_process(struct siw_qp *qp) +{ + struct siw_wqe *wqe = tx_wqe(qp); + enum siw_opcode tx_type; + unsigned long flags; + int rv = 0; + + siw_dbg_qp(qp, "enter for type %d\n", tx_type(wqe)); + +next_wqe: + /* + * Stop QP processing if SQ state changed + */ + if (unlikely(qp->tx_ctx.tx_suspend)) { + siw_dbg_qp(qp, "tx suspended\n"); + goto done; + } + tx_type = tx_type(wqe); + + if (tx_type <= SIW_OP_READ_RESPONSE) + rv = siw_qp_sq_proc_tx(qp, wqe); + else + rv = siw_qp_sq_proc_local(qp, wqe); + + if (!rv) { + /* + * WQE processing done + */ + switch (tx_type) { + case SIW_OP_SEND: + case SIW_OP_SEND_REMOTE_INV: + case SIW_OP_WRITE: + siw_wqe_put_mem(wqe, tx_type); + /* Fall through */ + + case SIW_OP_INVAL_STAG: + case SIW_OP_REG_MR: + if (tx_flags(wqe) & SIW_WQE_SIGNALLED) + siw_sqe_complete(qp, &wqe->sqe, wqe->bytes, + SIW_WC_SUCCESS); + break; + + case SIW_OP_READ: + case SIW_OP_READ_LOCAL_INV: + /* + * already enqueued to ORQ queue + */ + break; + + case SIW_OP_READ_RESPONSE: + siw_wqe_put_mem(wqe, tx_type); + break; + + default: + WARN(1, "undefined WQE type %d\n", tx_type); + rv = -EINVAL; + goto done; + } + + spin_lock_irqsave(&qp->sq_lock, flags); + wqe->wr_status = SIW_WR_IDLE; + rv = siw_activate_tx(qp); + spin_unlock_irqrestore(&qp->sq_lock, flags); + + if (rv <= 0) + goto done; + + goto next_wqe; + + } else if (rv == -EAGAIN) { + siw_dbg_qp(qp, "sq paused: hd/tr %d of %d, data %d\n", + qp->tx_ctx.ctrl_sent, qp->tx_ctx.ctrl_len, + qp->tx_ctx.bytes_unsent); + rv = 0; + goto done; + } else if (rv == -EINPROGRESS) { + rv = siw_sq_start(qp); + goto done; + } else { + /* + * WQE processing failed. + * Verbs 8.3.2: + * o It turns any WQE into a signalled WQE. + * o Local catastrophic error must be surfaced + * o QP must be moved into Terminate state: done by code + * doing socket state change processing + * + * o TODO: Termination message must be sent. + * o TODO: Implement more precise work completion errors, + * see enum ib_wc_status in ib_verbs.h + */ + siw_dbg_qp(qp, "wqe type %d processing failed: %d\n", + tx_type(wqe), rv); + + spin_lock_irqsave(&qp->sq_lock, flags); + /* + * RREQ may have already been completed by inbound RRESP! + */ + if (tx_type == SIW_OP_READ || + tx_type == SIW_OP_READ_LOCAL_INV) { + /* Cleanup pending entry in ORQ */ + qp->orq_put--; + qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0; + } + spin_unlock_irqrestore(&qp->sq_lock, flags); + /* + * immediately suspends further TX processing + */ + if (!qp->tx_ctx.tx_suspend) + siw_qp_cm_drop(qp, 0); + + switch (tx_type) { + case SIW_OP_SEND: + case SIW_OP_SEND_REMOTE_INV: + case SIW_OP_SEND_WITH_IMM: + case SIW_OP_WRITE: + case SIW_OP_READ: + case SIW_OP_READ_LOCAL_INV: + siw_wqe_put_mem(wqe, tx_type); + /* Fall through */ + + case SIW_OP_INVAL_STAG: + case SIW_OP_REG_MR: + siw_sqe_complete(qp, &wqe->sqe, wqe->bytes, + SIW_WC_LOC_QP_OP_ERR); + + siw_qp_event(qp, IB_EVENT_QP_FATAL); + + break; + + case SIW_OP_READ_RESPONSE: + siw_dbg_qp(qp, "proc. read.response failed: %d\n", rv); + + siw_qp_event(qp, IB_EVENT_QP_REQ_ERR); + + siw_wqe_put_mem(wqe, SIW_OP_READ_RESPONSE); + + break; + + default: + WARN(1, "undefined WQE type %d\n", tx_type); + rv = -EINVAL; + } + wqe->wr_status = SIW_WR_IDLE; + } +done: + return rv; +} + +static void siw_sq_resume(struct siw_qp *qp) +{ + if (down_read_trylock(&qp->state_lock)) { + if (likely(qp->attrs.state == SIW_QP_STATE_RTS && + !qp->tx_ctx.tx_suspend)) { + int rv = siw_qp_sq_process(qp); + + up_read(&qp->state_lock); + + if (unlikely(rv < 0)) { + siw_dbg_qp(qp, "SQ task failed: err %d\n", rv); + + if (!qp->tx_ctx.tx_suspend) + siw_qp_cm_drop(qp, 0); + } + } else { + up_read(&qp->state_lock); + } + } else { + siw_dbg_qp(qp, "Resume SQ while QP locked\n"); + } + siw_qp_put(qp); +} + +struct tx_task_t { + struct llist_head active; + wait_queue_head_t waiting; +}; + +static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g); + +void siw_stop_tx_thread(int nr_cpu) +{ + kthread_stop(siw_tx_thread[nr_cpu]); + wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting); +} + +int siw_run_sq(void *data) +{ + const int nr_cpu = (unsigned int)(long)data; + struct llist_node *active; + struct siw_qp *qp; + struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu); + + init_llist_head(&tx_task->active); + init_waitqueue_head(&tx_task->waiting); + + while (1) { + struct llist_node *fifo_list = NULL; + + wait_event_interruptible(tx_task->waiting, + !llist_empty(&tx_task->active) || + kthread_should_stop()); + + if (kthread_should_stop()) + break; + + active = llist_del_all(&tx_task->active); + /* + * llist_del_all returns a list with newest entry first. + * Re-order list for fairness among QP's. + */ + while (active) { + struct llist_node *tmp = active; + + active = llist_next(active); + tmp->next = fifo_list; + fifo_list = tmp; + } + while (fifo_list) { + qp = container_of(fifo_list, struct siw_qp, tx_list); + fifo_list = llist_next(fifo_list); + qp->tx_list.next = NULL; + + siw_sq_resume(qp); + } + } + active = llist_del_all(&tx_task->active); + if (active) { + llist_for_each_entry(qp, active, tx_list) { + qp->tx_list.next = NULL; + siw_sq_resume(qp); + } + } + return 0; +} + +int siw_sq_start(struct siw_qp *qp) +{ + if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) + return 0; + + if (unlikely(!cpu_online(qp->tx_cpu))) { + siw_put_tx_cpu(qp->tx_cpu); + qp->tx_cpu = siw_get_tx_cpu(qp->sdev); + if (qp->tx_cpu < 0) { + pr_warn("siw: no tx cpu available\n"); + + return -EIO; + } + } + siw_qp_get(qp); + + llist_add(&qp->tx_list, &per_cpu(siw_tx_task_g, qp->tx_cpu).active); + + wake_up(&per_cpu(siw_tx_task_g, qp->tx_cpu).waiting); + + return 0; +} diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c new file mode 100644 index 000000000000..32dc79d0e898 --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -0,0 +1,1760 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/xarray.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/uverbs_ioctl.h> + +#include "siw.h" +#include "siw_verbs.h" +#include "siw_mem.h" + +static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = SIW_QP_STATE_IDLE, + [IB_QPS_INIT] = SIW_QP_STATE_IDLE, + [IB_QPS_RTR] = SIW_QP_STATE_RTR, + [IB_QPS_RTS] = SIW_QP_STATE_RTS, + [IB_QPS_SQD] = SIW_QP_STATE_CLOSING, + [IB_QPS_SQE] = SIW_QP_STATE_TERMINATE, + [IB_QPS_ERR] = SIW_QP_STATE_ERROR +}; + +static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = { + [IB_QPS_RESET] = "RESET", [IB_QPS_INIT] = "INIT", [IB_QPS_RTR] = "RTR", + [IB_QPS_RTS] = "RTS", [IB_QPS_SQD] = "SQD", [IB_QPS_SQE] = "SQE", + [IB_QPS_ERR] = "ERR" +}; + +static u32 siw_create_uobj(struct siw_ucontext *uctx, void *vaddr, u32 size) +{ + struct siw_uobj *uobj; + struct xa_limit limit = XA_LIMIT(0, SIW_UOBJ_MAX_KEY); + u32 key; + + uobj = kzalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) + return SIW_INVAL_UOBJ_KEY; + + if (xa_alloc_cyclic(&uctx->xa, &key, uobj, limit, &uctx->uobj_nextkey, + GFP_KERNEL) < 0) { + kfree(uobj); + return SIW_INVAL_UOBJ_KEY; + } + uobj->size = PAGE_ALIGN(size); + uobj->addr = vaddr; + + return key; +} + +static struct siw_uobj *siw_get_uobj(struct siw_ucontext *uctx, + unsigned long off, u32 size) +{ + struct siw_uobj *uobj = xa_load(&uctx->xa, off); + + if (uobj && uobj->size == size) + return uobj; + + return NULL; +} + +int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) +{ + struct siw_ucontext *uctx = to_siw_ctx(ctx); + struct siw_uobj *uobj; + unsigned long off = vma->vm_pgoff; + int size = vma->vm_end - vma->vm_start; + int rv = -EINVAL; + + /* + * Must be page aligned + */ + if (vma->vm_start & (PAGE_SIZE - 1)) { + pr_warn("siw: mmap not page aligned\n"); + goto out; + } + uobj = siw_get_uobj(uctx, off, size); + if (!uobj) { + siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %u\n", + off, size); + goto out; + } + rv = remap_vmalloc_range(vma, uobj->addr, 0); + if (rv) + pr_warn("remap_vmalloc_range failed: %lu, %u\n", off, size); +out: + return rv; +} + +int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata) +{ + struct siw_device *sdev = to_siw_dev(base_ctx->device); + struct siw_ucontext *ctx = to_siw_ctx(base_ctx); + struct siw_uresp_alloc_ctx uresp = {}; + int rv; + + if (atomic_inc_return(&sdev->num_ctx) > SIW_MAX_CONTEXT) { + rv = -ENOMEM; + goto err_out; + } + xa_init_flags(&ctx->xa, XA_FLAGS_ALLOC); + ctx->uobj_nextkey = 0; + ctx->sdev = sdev; + + uresp.dev_id = sdev->vendor_part_id; + + if (udata->outlen < sizeof(uresp)) { + rv = -EINVAL; + goto err_out; + } + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rv) + goto err_out; + + siw_dbg(base_ctx->device, "success. now %d context(s)\n", + atomic_read(&sdev->num_ctx)); + + return 0; + +err_out: + atomic_dec(&sdev->num_ctx); + siw_dbg(base_ctx->device, "failure %d. now %d context(s)\n", rv, + atomic_read(&sdev->num_ctx)); + + return rv; +} + +void siw_dealloc_ucontext(struct ib_ucontext *base_ctx) +{ + struct siw_ucontext *uctx = to_siw_ctx(base_ctx); + void *entry; + unsigned long index; + + /* + * Make sure all user mmap objects are gone. Since QP, CQ + * and SRQ destroy routines destroy related objects, nothing + * should be found here. + */ + xa_for_each(&uctx->xa, index, entry) { + kfree(xa_erase(&uctx->xa, index)); + pr_warn("siw: dropping orphaned uobj at %lu\n", index); + } + xa_destroy(&uctx->xa); + atomic_dec(&uctx->sdev->num_ctx); +} + +int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, + struct ib_udata *udata) +{ + struct siw_device *sdev = to_siw_dev(base_dev); + + if (udata->inlen || udata->outlen) + return -EINVAL; + + memset(attr, 0, sizeof(*attr)); + + /* Revisit atomic caps if RFC 7306 gets supported */ + attr->atomic_cap = 0; + attr->device_cap_flags = + IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG; + attr->max_cq = sdev->attrs.max_cq; + attr->max_cqe = sdev->attrs.max_cqe; + attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; + attr->max_fmr = sdev->attrs.max_fmr; + attr->max_mr = sdev->attrs.max_mr; + attr->max_mw = sdev->attrs.max_mw; + attr->max_mr_size = ~0ull; + attr->max_pd = sdev->attrs.max_pd; + attr->max_qp = sdev->attrs.max_qp; + attr->max_qp_init_rd_atom = sdev->attrs.max_ird; + attr->max_qp_rd_atom = sdev->attrs.max_ord; + attr->max_qp_wr = sdev->attrs.max_qp_wr; + attr->max_recv_sge = sdev->attrs.max_sge; + attr->max_res_rd_atom = sdev->attrs.max_qp * sdev->attrs.max_ird; + attr->max_send_sge = sdev->attrs.max_sge; + attr->max_sge_rd = sdev->attrs.max_sge_rd; + attr->max_srq = sdev->attrs.max_srq; + attr->max_srq_sge = sdev->attrs.max_srq_sge; + attr->max_srq_wr = sdev->attrs.max_srq_wr; + attr->page_size_cap = PAGE_SIZE; + attr->vendor_id = SIW_VENDOR_ID; + attr->vendor_part_id = sdev->vendor_part_id; + + memcpy(&attr->sys_image_guid, sdev->netdev->dev_addr, 6); + + return 0; +} + +int siw_query_port(struct ib_device *base_dev, u8 port, + struct ib_port_attr *attr) +{ + struct siw_device *sdev = to_siw_dev(base_dev); + + memset(attr, 0, sizeof(*attr)); + + attr->active_mtu = attr->max_mtu; + attr->active_speed = 2; + attr->active_width = 2; + attr->gid_tbl_len = 1; + attr->max_msg_sz = -1; + attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); + attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 5 : 3; + attr->pkey_tbl_len = 1; + attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; + attr->state = sdev->state; + /* + * All zero + * + * attr->lid = 0; + * attr->bad_pkey_cntr = 0; + * attr->qkey_viol_cntr = 0; + * attr->sm_lid = 0; + * attr->lmc = 0; + * attr->max_vl_num = 0; + * attr->sm_sl = 0; + * attr->subnet_timeout = 0; + * attr->init_type_repy = 0; + */ + return 0; +} + +int siw_get_port_immutable(struct ib_device *base_dev, u8 port, + struct ib_port_immutable *port_immutable) +{ + struct ib_port_attr attr; + int rv = siw_query_port(base_dev, port, &attr); + + if (rv) + return rv; + + port_immutable->pkey_tbl_len = attr.pkey_tbl_len; + port_immutable->gid_tbl_len = attr.gid_tbl_len; + port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + +int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey) +{ + /* Report the default pkey */ + *pkey = 0xffff; + return 0; +} + +int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, + union ib_gid *gid) +{ + struct siw_device *sdev = to_siw_dev(base_dev); + + /* subnet_prefix == interface_id == 0; */ + memset(gid, 0, sizeof(*gid)); + memcpy(&gid->raw[0], sdev->netdev->dev_addr, 6); + + return 0; +} + +int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) +{ + struct siw_device *sdev = to_siw_dev(pd->device); + + if (atomic_inc_return(&sdev->num_pd) > SIW_MAX_PD) { + atomic_dec(&sdev->num_pd); + return -ENOMEM; + } + siw_dbg_pd(pd, "now %d PD's(s)\n", atomic_read(&sdev->num_pd)); + + return 0; +} + +void siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +{ + struct siw_device *sdev = to_siw_dev(pd->device); + + siw_dbg_pd(pd, "free PD\n"); + atomic_dec(&sdev->num_pd); +} + +void siw_qp_get_ref(struct ib_qp *base_qp) +{ + siw_qp_get(to_siw_qp(base_qp)); +} + +void siw_qp_put_ref(struct ib_qp *base_qp) +{ + siw_qp_put(to_siw_qp(base_qp)); +} + +/* + * siw_create_qp() + * + * Create QP of requested size on given device. + * + * @pd: Protection Domain + * @attrs: Initial QP attributes. + * @udata: used to provide QP ID, SQ and RQ size back to user. + */ + +struct ib_qp *siw_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct siw_qp *qp = NULL; + struct siw_base_qp *siw_base_qp = NULL; + struct ib_device *base_dev = pd->device; + struct siw_device *sdev = to_siw_dev(base_dev); + struct siw_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct siw_ucontext, + base_ucontext); + struct siw_cq *scq = NULL, *rcq = NULL; + unsigned long flags; + int num_sqe, num_rqe, rv = 0; + + siw_dbg(base_dev, "create new QP\n"); + + if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { + siw_dbg(base_dev, "too many QP's\n"); + rv = -ENOMEM; + goto err_out; + } + if (attrs->qp_type != IB_QPT_RC) { + siw_dbg(base_dev, "only RC QP's supported\n"); + rv = -EINVAL; + goto err_out; + } + if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) || + (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) || + (attrs->cap.max_send_sge > SIW_MAX_SGE) || + (attrs->cap.max_recv_sge > SIW_MAX_SGE)) { + siw_dbg(base_dev, "QP size error\n"); + rv = -EINVAL; + goto err_out; + } + if (attrs->cap.max_inline_data > SIW_MAX_INLINE) { + siw_dbg(base_dev, "max inline send: %d > %d\n", + attrs->cap.max_inline_data, (int)SIW_MAX_INLINE); + rv = -EINVAL; + goto err_out; + } + /* + * NOTE: we allow for zero element SQ and RQ WQE's SGL's + * but not for a QP unable to hold any WQE (SQ + RQ) + */ + if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) { + siw_dbg(base_dev, "QP must have send or receive queue\n"); + rv = -EINVAL; + goto err_out; + } + scq = to_siw_cq(attrs->send_cq); + rcq = to_siw_cq(attrs->recv_cq); + + if (!scq || (!rcq && !attrs->srq)) { + siw_dbg(base_dev, "send CQ or receive CQ invalid\n"); + rv = -EINVAL; + goto err_out; + } + siw_base_qp = kzalloc(sizeof(*siw_base_qp), GFP_KERNEL); + if (!siw_base_qp) { + rv = -ENOMEM; + goto err_out; + } + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + rv = -ENOMEM; + goto err_out; + } + siw_base_qp->qp = qp; + qp->ib_qp = &siw_base_qp->base_qp; + + init_rwsem(&qp->state_lock); + spin_lock_init(&qp->sq_lock); + spin_lock_init(&qp->rq_lock); + spin_lock_init(&qp->orq_lock); + + qp->kernel_verbs = !udata; + qp->xa_sq_index = SIW_INVAL_UOBJ_KEY; + qp->xa_rq_index = SIW_INVAL_UOBJ_KEY; + + rv = siw_qp_add(sdev, qp); + if (rv) + goto err_out; + + /* All queue indices are derived from modulo operations + * on a free running 'get' (consumer) and 'put' (producer) + * unsigned counter. Having queue sizes at power of two + * avoids handling counter wrap around. + */ + num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); + num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr); + + if (qp->kernel_verbs) + qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); + else + qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); + + if (qp->sendq == NULL) { + siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe); + rv = -ENOMEM; + goto err_out_xa; + } + if (attrs->sq_sig_type != IB_SIGNAL_REQ_WR) { + if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->attrs.flags |= SIW_SIGNAL_ALL_WR; + else { + rv = -EINVAL; + goto err_out_xa; + } + } + qp->pd = pd; + qp->scq = scq; + qp->rcq = rcq; + + if (attrs->srq) { + /* + * SRQ support. + * Verbs 6.3.7: ignore RQ size, if SRQ present + * Verbs 6.3.5: do not check PD of SRQ against PD of QP + */ + qp->srq = to_siw_srq(attrs->srq); + qp->attrs.rq_size = 0; + siw_dbg(base_dev, "QP [%u]: [SRQ 0x%p] attached\n", + qp->qp_num, qp->srq); + } else if (num_rqe) { + if (qp->kernel_verbs) + qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); + else + qp->recvq = + vmalloc_user(num_rqe * sizeof(struct siw_rqe)); + + if (qp->recvq == NULL) { + siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe); + rv = -ENOMEM; + goto err_out_xa; + } + qp->attrs.rq_size = num_rqe; + } + qp->attrs.sq_size = num_sqe; + qp->attrs.sq_max_sges = attrs->cap.max_send_sge; + qp->attrs.rq_max_sges = attrs->cap.max_recv_sge; + + /* Make those two tunables fixed for now. */ + qp->tx_ctx.gso_seg_limit = 1; + qp->tx_ctx.zcopy_tx = zcopy_tx; + + qp->attrs.state = SIW_QP_STATE_IDLE; + + if (udata) { + struct siw_uresp_create_qp uresp = {}; + + uresp.num_sqe = num_sqe; + uresp.num_rqe = num_rqe; + uresp.qp_id = qp_id(qp); + + if (qp->sendq) { + qp->xa_sq_index = + siw_create_uobj(uctx, qp->sendq, + num_sqe * sizeof(struct siw_sqe)); + } + if (qp->recvq) { + qp->xa_rq_index = + siw_create_uobj(uctx, qp->recvq, + num_rqe * sizeof(struct siw_rqe)); + } + if (qp->xa_sq_index == SIW_INVAL_UOBJ_KEY || + qp->xa_rq_index == SIW_INVAL_UOBJ_KEY) { + rv = -ENOMEM; + goto err_out_xa; + } + uresp.sq_key = qp->xa_sq_index << PAGE_SHIFT; + uresp.rq_key = qp->xa_rq_index << PAGE_SHIFT; + + if (udata->outlen < sizeof(uresp)) { + rv = -EINVAL; + goto err_out_xa; + } + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rv) + goto err_out_xa; + } + qp->tx_cpu = siw_get_tx_cpu(sdev); + if (qp->tx_cpu < 0) { + rv = -EINVAL; + goto err_out_xa; + } + INIT_LIST_HEAD(&qp->devq); + spin_lock_irqsave(&sdev->lock, flags); + list_add_tail(&qp->devq, &sdev->qp_list); + spin_unlock_irqrestore(&sdev->lock, flags); + + return qp->ib_qp; + +err_out_xa: + xa_erase(&sdev->qp_xa, qp_id(qp)); +err_out: + kfree(siw_base_qp); + + if (qp) { + if (qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) + kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); + if (qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) + kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); + + vfree(qp->sendq); + vfree(qp->recvq); + kfree(qp); + } + atomic_dec(&sdev->num_qp); + + return ERR_PTR(rv); +} + +/* + * Minimum siw_query_qp() verb interface. + * + * @qp_attr_mask is not used but all available information is provided + */ +int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct siw_qp *qp; + struct siw_device *sdev; + + if (base_qp && qp_attr && qp_init_attr) { + qp = to_siw_qp(base_qp); + sdev = to_siw_dev(base_qp->device); + } else { + return -EINVAL; + } + qp_attr->cap.max_inline_data = SIW_MAX_INLINE; + qp_attr->cap.max_send_wr = qp->attrs.sq_size; + qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; + qp_attr->cap.max_recv_wr = qp->attrs.rq_size; + qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; + qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); + qp_attr->max_rd_atomic = qp->attrs.irq_size; + qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; + + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + + qp_init_attr->qp_type = base_qp->qp_type; + qp_init_attr->send_cq = base_qp->send_cq; + qp_init_attr->recv_cq = base_qp->recv_cq; + qp_init_attr->srq = base_qp->srq; + + qp_init_attr->cap = qp_attr->cap; + + return 0; +} + +int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct siw_qp_attrs new_attrs; + enum siw_qp_attr_mask siw_attr_mask = 0; + struct siw_qp *qp = to_siw_qp(base_qp); + int rv = 0; + + if (!attr_mask) + return 0; + + memset(&new_attrs, 0, sizeof(new_attrs)); + + if (attr_mask & IB_QP_ACCESS_FLAGS) { + siw_attr_mask = SIW_QP_ATTR_ACCESS_FLAGS; + + if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) + new_attrs.flags |= SIW_RDMA_READ_ENABLED; + if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) + new_attrs.flags |= SIW_RDMA_WRITE_ENABLED; + if (attr->qp_access_flags & IB_ACCESS_MW_BIND) + new_attrs.flags |= SIW_RDMA_BIND_ENABLED; + } + if (attr_mask & IB_QP_STATE) { + siw_dbg_qp(qp, "desired IB QP state: %s\n", + ib_qp_state_to_string[attr->qp_state]); + + new_attrs.state = ib_qp_state_to_siw_qp_state[attr->qp_state]; + + if (new_attrs.state > SIW_QP_STATE_RTS) + qp->tx_ctx.tx_suspend = 1; + + siw_attr_mask |= SIW_QP_ATTR_STATE; + } + if (!siw_attr_mask) + goto out; + + down_write(&qp->state_lock); + + rv = siw_qp_modify(qp, &new_attrs, siw_attr_mask); + + up_write(&qp->state_lock); +out: + return rv; +} + +int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) +{ + struct siw_qp *qp = to_siw_qp(base_qp); + struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp); + struct siw_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct siw_ucontext, + base_ucontext); + struct siw_qp_attrs qp_attrs; + + siw_dbg_qp(qp, "state %d, cep 0x%p\n", qp->attrs.state, qp->cep); + + /* + * Mark QP as in process of destruction to prevent from + * any async callbacks to RDMA core + */ + qp->attrs.flags |= SIW_QP_IN_DESTROY; + qp->rx_stream.rx_suspend = 1; + + if (uctx && qp->xa_sq_index != SIW_INVAL_UOBJ_KEY) + kfree(xa_erase(&uctx->xa, qp->xa_sq_index)); + if (uctx && qp->xa_rq_index != SIW_INVAL_UOBJ_KEY) + kfree(xa_erase(&uctx->xa, qp->xa_rq_index)); + + down_write(&qp->state_lock); + + qp_attrs.state = SIW_QP_STATE_ERROR; + siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE); + + if (qp->cep) { + siw_cep_put(qp->cep); + qp->cep = NULL; + } + up_write(&qp->state_lock); + + kfree(qp->tx_ctx.mpa_crc_hd); + kfree(qp->rx_stream.mpa_crc_hd); + + qp->scq = qp->rcq = NULL; + + siw_qp_put(qp); + kfree(siw_base_qp); + + return 0; +} + +/* + * siw_copy_inline_sgl() + * + * Prepare sgl of inlined data for sending. For userland callers + * function checks if given buffer addresses and len's are within + * process context bounds. + * Data from all provided sge's are copied together into the wqe, + * referenced by a single sge. + */ +static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, + struct siw_sqe *sqe) +{ + struct ib_sge *core_sge = core_wr->sg_list; + void *kbuf = &sqe->sge[1]; + int num_sge = core_wr->num_sge, bytes = 0; + + sqe->sge[0].laddr = (u64)kbuf; + sqe->sge[0].lkey = 0; + + while (num_sge--) { + if (!core_sge->length) { + core_sge++; + continue; + } + bytes += core_sge->length; + if (bytes > SIW_MAX_INLINE) { + bytes = -EINVAL; + break; + } + memcpy(kbuf, (void *)(uintptr_t)core_sge->addr, + core_sge->length); + + kbuf += core_sge->length; + core_sge++; + } + sqe->sge[0].length = bytes > 0 ? bytes : 0; + sqe->num_sge = bytes > 0 ? 1 : 0; + + return bytes; +} + +/* + * siw_post_send() + * + * Post a list of S-WR's to a SQ. + * + * @base_qp: Base QP contained in siw QP + * @wr: Null terminated list of user WR's + * @bad_wr: Points to failing WR in case of synchronous failure. + */ +int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + struct siw_qp *qp = to_siw_qp(base_qp); + struct siw_wqe *wqe = tx_wqe(qp); + + unsigned long flags; + int rv = 0; + + /* + * Try to acquire QP state lock. Must be non-blocking + * to accommodate kernel clients needs. + */ + if (!down_read_trylock(&qp->state_lock)) { + *bad_wr = wr; + siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); + return -ENOTCONN; + } + if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { + up_read(&qp->state_lock); + *bad_wr = wr; + siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); + return -ENOTCONN; + } + if (wr && !qp->kernel_verbs) { + siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); + up_read(&qp->state_lock); + *bad_wr = wr; + return -EINVAL; + } + spin_lock_irqsave(&qp->sq_lock, flags); + + while (wr) { + u32 idx = qp->sq_put % qp->attrs.sq_size; + struct siw_sqe *sqe = &qp->sendq[idx]; + + if (sqe->flags) { + siw_dbg_qp(qp, "sq full\n"); + rv = -ENOMEM; + break; + } + if (wr->num_sge > qp->attrs.sq_max_sges) { + siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge); + rv = -EINVAL; + break; + } + sqe->id = wr->wr_id; + + if ((wr->send_flags & IB_SEND_SIGNALED) || + (qp->attrs.flags & SIW_SIGNAL_ALL_WR)) + sqe->flags |= SIW_WQE_SIGNALLED; + + if (wr->send_flags & IB_SEND_FENCE) + sqe->flags |= SIW_WQE_READ_FENCE; + + switch (wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + sqe->flags |= SIW_WQE_SOLICITED; + + if (!(wr->send_flags & IB_SEND_INLINE)) { + siw_copy_sgl(wr->sg_list, sqe->sge, + wr->num_sge); + sqe->num_sge = wr->num_sge; + } else { + rv = siw_copy_inline_sgl(wr, sqe); + if (rv <= 0) { + rv = -EINVAL; + break; + } + sqe->flags |= SIW_WQE_INLINE; + sqe->num_sge = 1; + } + if (wr->opcode == IB_WR_SEND) + sqe->opcode = SIW_OP_SEND; + else { + sqe->opcode = SIW_OP_SEND_REMOTE_INV; + sqe->rkey = wr->ex.invalidate_rkey; + } + break; + + case IB_WR_RDMA_READ_WITH_INV: + case IB_WR_RDMA_READ: + /* + * iWarp restricts RREAD sink to SGL containing + * 1 SGE only. we could relax to SGL with multiple + * elements referring the SAME ltag or even sending + * a private per-rreq tag referring to a checked + * local sgl with MULTIPLE ltag's. + */ + if (unlikely(wr->num_sge != 1)) { + rv = -EINVAL; + break; + } + siw_copy_sgl(wr->sg_list, &sqe->sge[0], 1); + /* + * NOTE: zero length RREAD is allowed! + */ + sqe->raddr = rdma_wr(wr)->remote_addr; + sqe->rkey = rdma_wr(wr)->rkey; + sqe->num_sge = 1; + + if (wr->opcode == IB_WR_RDMA_READ) + sqe->opcode = SIW_OP_READ; + else + sqe->opcode = SIW_OP_READ_LOCAL_INV; + break; + + case IB_WR_RDMA_WRITE: + if (!(wr->send_flags & IB_SEND_INLINE)) { + siw_copy_sgl(wr->sg_list, &sqe->sge[0], + wr->num_sge); + sqe->num_sge = wr->num_sge; + } else { + rv = siw_copy_inline_sgl(wr, sqe); + if (unlikely(rv < 0)) { + rv = -EINVAL; + break; + } + sqe->flags |= SIW_WQE_INLINE; + sqe->num_sge = 1; + } + sqe->raddr = rdma_wr(wr)->remote_addr; + sqe->rkey = rdma_wr(wr)->rkey; + sqe->opcode = SIW_OP_WRITE; + break; + + case IB_WR_REG_MR: + sqe->base_mr = (uint64_t)reg_wr(wr)->mr; + sqe->rkey = reg_wr(wr)->key; + sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK; + sqe->opcode = SIW_OP_REG_MR; + break; + + case IB_WR_LOCAL_INV: + sqe->rkey = wr->ex.invalidate_rkey; + sqe->opcode = SIW_OP_INVAL_STAG; + break; + + default: + siw_dbg_qp(qp, "ib wr type %d unsupported\n", + wr->opcode); + rv = -EINVAL; + break; + } + siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n", + sqe->opcode, sqe->flags, (void *)sqe->id); + + if (unlikely(rv < 0)) + break; + + /* make SQE only valid after completely written */ + smp_wmb(); + sqe->flags |= SIW_WQE_VALID; + + qp->sq_put++; + wr = wr->next; + } + + /* + * Send directly if SQ processing is not in progress. + * Eventual immediate errors (rv < 0) do not affect the involved + * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ + * processing, if new work is already pending. But rv must be passed + * to caller. + */ + if (wqe->wr_status != SIW_WR_IDLE) { + spin_unlock_irqrestore(&qp->sq_lock, flags); + goto skip_direct_sending; + } + rv = siw_activate_tx(qp); + spin_unlock_irqrestore(&qp->sq_lock, flags); + + if (rv <= 0) + goto skip_direct_sending; + + if (qp->kernel_verbs) { + rv = siw_sq_start(qp); + } else { + qp->tx_ctx.in_syscall = 1; + + if (siw_qp_sq_process(qp) != 0 && !(qp->tx_ctx.tx_suspend)) + siw_qp_cm_drop(qp, 0); + + qp->tx_ctx.in_syscall = 0; + } +skip_direct_sending: + + up_read(&qp->state_lock); + + if (rv >= 0) + return 0; + /* + * Immediate error + */ + siw_dbg_qp(qp, "error %d\n", rv); + + *bad_wr = wr; + return rv; +} + +/* + * siw_post_receive() + * + * Post a list of R-WR's to a RQ. + * + * @base_qp: Base QP contained in siw QP + * @wr: Null terminated list of user WR's + * @bad_wr: Points to failing WR in case of synchronous failure. + */ +int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct siw_qp *qp = to_siw_qp(base_qp); + unsigned long flags; + int rv = 0; + + if (qp->srq) { + *bad_wr = wr; + return -EOPNOTSUPP; /* what else from errno.h? */ + } + /* + * Try to acquire QP state lock. Must be non-blocking + * to accommodate kernel clients needs. + */ + if (!down_read_trylock(&qp->state_lock)) { + *bad_wr = wr; + return -ENOTCONN; + } + if (!qp->kernel_verbs) { + siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n"); + up_read(&qp->state_lock); + *bad_wr = wr; + return -EINVAL; + } + if (qp->attrs.state > SIW_QP_STATE_RTS) { + up_read(&qp->state_lock); + *bad_wr = wr; + return -EINVAL; + } + /* + * Serialize potentially multiple producers. + * Not needed for single threaded consumer side. + */ + spin_lock_irqsave(&qp->rq_lock, flags); + + while (wr) { + u32 idx = qp->rq_put % qp->attrs.rq_size; + struct siw_rqe *rqe = &qp->recvq[idx]; + + if (rqe->flags) { + siw_dbg_qp(qp, "RQ full\n"); + rv = -ENOMEM; + break; + } + if (wr->num_sge > qp->attrs.rq_max_sges) { + siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge); + rv = -EINVAL; + break; + } + rqe->id = wr->wr_id; + rqe->num_sge = wr->num_sge; + siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge); + + /* make sure RQE is completely written before valid */ + smp_wmb(); + + rqe->flags = SIW_WQE_VALID; + + qp->rq_put++; + wr = wr->next; + } + spin_unlock_irqrestore(&qp->rq_lock, flags); + + up_read(&qp->state_lock); + + if (rv < 0) { + siw_dbg_qp(qp, "error %d\n", rv); + *bad_wr = wr; + } + return rv > 0 ? 0 : rv; +} + +void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) +{ + struct siw_cq *cq = to_siw_cq(base_cq); + struct siw_device *sdev = to_siw_dev(base_cq->device); + struct siw_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct siw_ucontext, + base_ucontext); + + siw_dbg_cq(cq, "free CQ resources\n"); + + siw_cq_flush(cq); + + if (ctx && cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) + kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); + + atomic_dec(&sdev->num_cq); + + vfree(cq->queue); +} + +/* + * siw_create_cq() + * + * Populate CQ of requested size + * + * @base_cq: CQ as allocated by RDMA midlayer + * @attr: Initial CQ attributes + * @udata: relates to user context + */ + +int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) +{ + struct siw_device *sdev = to_siw_dev(base_cq->device); + struct siw_cq *cq = to_siw_cq(base_cq); + int rv, size = attr->cqe; + + if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { + siw_dbg(base_cq->device, "too many CQ's\n"); + rv = -ENOMEM; + goto err_out; + } + if (size < 1 || size > sdev->attrs.max_cqe) { + siw_dbg(base_cq->device, "CQ size error: %d\n", size); + rv = -EINVAL; + goto err_out; + } + size = roundup_pow_of_two(size); + cq->base_cq.cqe = size; + cq->num_cqe = size; + cq->xa_cq_index = SIW_INVAL_UOBJ_KEY; + + if (!udata) { + cq->kernel_verbs = 1; + cq->queue = vzalloc(size * sizeof(struct siw_cqe) + + sizeof(struct siw_cq_ctrl)); + } else { + cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) + + sizeof(struct siw_cq_ctrl)); + } + if (cq->queue == NULL) { + rv = -ENOMEM; + goto err_out; + } + get_random_bytes(&cq->id, 4); + siw_dbg(base_cq->device, "new CQ [%u]\n", cq->id); + + spin_lock_init(&cq->lock); + + cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify; + + if (udata) { + struct siw_uresp_create_cq uresp = {}; + struct siw_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct siw_ucontext, + base_ucontext); + + cq->xa_cq_index = + siw_create_uobj(ctx, cq->queue, + size * sizeof(struct siw_cqe) + + sizeof(struct siw_cq_ctrl)); + if (cq->xa_cq_index == SIW_INVAL_UOBJ_KEY) { + rv = -ENOMEM; + goto err_out; + } + uresp.cq_key = cq->xa_cq_index << PAGE_SHIFT; + uresp.cq_id = cq->id; + uresp.num_cqe = size; + + if (udata->outlen < sizeof(uresp)) { + rv = -EINVAL; + goto err_out; + } + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rv) + goto err_out; + } + return 0; + +err_out: + siw_dbg(base_cq->device, "CQ creation failed: %d", rv); + + if (cq && cq->queue) { + struct siw_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct siw_ucontext, + base_ucontext); + if (cq->xa_cq_index != SIW_INVAL_UOBJ_KEY) + kfree(xa_erase(&ctx->xa, cq->xa_cq_index)); + vfree(cq->queue); + } + atomic_dec(&sdev->num_cq); + + return rv; +} + +/* + * siw_poll_cq() + * + * Reap CQ entries if available and copy work completion status into + * array of WC's provided by caller. Returns number of reaped CQE's. + * + * @base_cq: Base CQ contained in siw CQ. + * @num_cqe: Maximum number of CQE's to reap. + * @wc: Array of work completions to be filled by siw. + */ +int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc *wc) +{ + struct siw_cq *cq = to_siw_cq(base_cq); + int i; + + for (i = 0; i < num_cqe; i++) { + if (!siw_reap_cqe(cq, wc)) + break; + wc++; + } + return i; +} + +/* + * siw_req_notify_cq() + * + * Request notification for new CQE's added to that CQ. + * Defined flags: + * o SIW_CQ_NOTIFY_SOLICITED lets siw trigger a notification + * event if a WQE with notification flag set enters the CQ + * o SIW_CQ_NOTIFY_NEXT_COMP lets siw trigger a notification + * event if a WQE enters the CQ. + * o IB_CQ_REPORT_MISSED_EVENTS: return value will provide the + * number of not reaped CQE's regardless of its notification + * type and current or new CQ notification settings. + * + * @base_cq: Base CQ contained in siw CQ. + * @flags: Requested notification flags. + */ +int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags) +{ + struct siw_cq *cq = to_siw_cq(base_cq); + + siw_dbg_cq(cq, "flags: 0x%02x\n", flags); + + if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) + /* CQ event for next solicited completion */ + smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED); + else + /* CQ event for any signalled completion */ + smp_store_mb(*cq->notify, SIW_NOTIFY_ALL); + + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + return cq->cq_put - cq->cq_get; + + return 0; +} + +/* + * siw_dereg_mr() + * + * Release Memory Region. + * + * @base_mr: Base MR contained in siw MR. + * @udata: points to user context, unused. + */ +int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata) +{ + struct siw_mr *mr = to_siw_mr(base_mr); + struct siw_device *sdev = to_siw_dev(base_mr->device); + + siw_dbg_mem(mr->mem, "deregister MR\n"); + + atomic_dec(&sdev->num_mr); + + siw_mr_drop_mem(mr); + kfree_rcu(mr, rcu); + + return 0; +} + +/* + * siw_reg_user_mr() + * + * Register Memory Region. + * + * @pd: Protection Domain + * @start: starting address of MR (virtual address) + * @len: len of MR + * @rnic_va: not used by siw + * @rights: MR access rights + * @udata: user buffer to communicate STag and Key. + */ +struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, + u64 rnic_va, int rights, struct ib_udata *udata) +{ + struct siw_mr *mr = NULL; + struct siw_umem *umem = NULL; + struct siw_ureq_reg_mr ureq; + struct siw_device *sdev = to_siw_dev(pd->device); + + unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK); + int rv; + + siw_dbg_pd(pd, "start: 0x%016llx, va: 0x%016llx, len: %llu\n", + (unsigned long long)start, (unsigned long long)rnic_va, + (unsigned long long)len); + + if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { + siw_dbg_pd(pd, "too many mr's\n"); + rv = -ENOMEM; + goto err_out; + } + if (!len) { + rv = -EINVAL; + goto err_out; + } + if (mem_limit != RLIM_INFINITY) { + unsigned long num_pages = + (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT; + mem_limit >>= PAGE_SHIFT; + + if (num_pages > mem_limit - current->mm->locked_vm) { + siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n", + num_pages, mem_limit, + current->mm->locked_vm); + rv = -ENOMEM; + goto err_out; + } + } + umem = siw_umem_get(start, len, ib_access_writable(rights)); + if (IS_ERR(umem)) { + rv = PTR_ERR(umem); + siw_dbg_pd(pd, "getting user memory failed: %d\n", rv); + umem = NULL; + goto err_out; + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + rv = -ENOMEM; + goto err_out; + } + rv = siw_mr_add_mem(mr, pd, umem, start, len, rights); + if (rv) + goto err_out; + + if (udata) { + struct siw_uresp_reg_mr uresp = {}; + struct siw_mem *mem = mr->mem; + + if (udata->inlen < sizeof(ureq)) { + rv = -EINVAL; + goto err_out; + } + rv = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); + if (rv) + goto err_out; + + mr->base_mr.lkey |= ureq.stag_key; + mr->base_mr.rkey |= ureq.stag_key; + mem->stag |= ureq.stag_key; + uresp.stag = mem->stag; + + if (udata->outlen < sizeof(uresp)) { + rv = -EINVAL; + goto err_out; + } + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rv) + goto err_out; + } + mr->mem->stag_valid = 1; + + return &mr->base_mr; + +err_out: + atomic_dec(&sdev->num_mr); + if (mr) { + if (mr->mem) + siw_mr_drop_mem(mr); + kfree_rcu(mr, rcu); + } else { + if (umem) + siw_umem_release(umem, false); + } + return ERR_PTR(rv); +} + +struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_sge, struct ib_udata *udata) +{ + struct siw_device *sdev = to_siw_dev(pd->device); + struct siw_mr *mr = NULL; + struct siw_pbl *pbl = NULL; + int rv; + + if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { + siw_dbg_pd(pd, "too many mr's\n"); + rv = -ENOMEM; + goto err_out; + } + if (mr_type != IB_MR_TYPE_MEM_REG) { + siw_dbg_pd(pd, "mr type %d unsupported\n", mr_type); + rv = -EOPNOTSUPP; + goto err_out; + } + if (max_sge > SIW_MAX_SGE_PBL) { + siw_dbg_pd(pd, "too many sge's: %d\n", max_sge); + rv = -ENOMEM; + goto err_out; + } + pbl = siw_pbl_alloc(max_sge); + if (IS_ERR(pbl)) { + rv = PTR_ERR(pbl); + siw_dbg_pd(pd, "pbl allocation failed: %d\n", rv); + pbl = NULL; + goto err_out; + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + rv = -ENOMEM; + goto err_out; + } + rv = siw_mr_add_mem(mr, pd, pbl, 0, max_sge * PAGE_SIZE, 0); + if (rv) + goto err_out; + + mr->mem->is_pbl = 1; + + siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag); + + return &mr->base_mr; + +err_out: + atomic_dec(&sdev->num_mr); + + if (!mr) { + kfree(pbl); + } else { + if (mr->mem) + siw_mr_drop_mem(mr); + kfree_rcu(mr, rcu); + } + siw_dbg_pd(pd, "failed: %d\n", rv); + + return ERR_PTR(rv); +} + +/* Just used to count number of pages being mapped */ +static int siw_set_pbl_page(struct ib_mr *base_mr, u64 buf_addr) +{ + return 0; +} + +int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, + unsigned int *sg_off) +{ + struct scatterlist *slp; + struct siw_mr *mr = to_siw_mr(base_mr); + struct siw_mem *mem = mr->mem; + struct siw_pbl *pbl = mem->pbl; + struct siw_pble *pble; + u64 pbl_size; + int i, rv; + + if (!pbl) { + siw_dbg_mem(mem, "no PBL allocated\n"); + return -EINVAL; + } + pble = pbl->pbe; + + if (pbl->max_buf < num_sle) { + siw_dbg_mem(mem, "too many SGE's: %d > %d\n", + mem->pbl->max_buf, num_sle); + return -ENOMEM; + } + for_each_sg(sl, slp, num_sle, i) { + if (sg_dma_len(slp) == 0) { + siw_dbg_mem(mem, "empty SGE\n"); + return -EINVAL; + } + if (i == 0) { + pble->addr = sg_dma_address(slp); + pble->size = sg_dma_len(slp); + pble->pbl_off = 0; + pbl_size = pble->size; + pbl->num_buf = 1; + } else { + /* Merge PBL entries if adjacent */ + if (pble->addr + pble->size == sg_dma_address(slp)) { + pble->size += sg_dma_len(slp); + } else { + pble++; + pbl->num_buf++; + pble->addr = sg_dma_address(slp); + pble->size = sg_dma_len(slp); + pble->pbl_off = pbl_size; + } + pbl_size += sg_dma_len(slp); + } + siw_dbg_mem(mem, + "sge[%d], size %llu, addr 0x%016llx, total %llu\n", + i, pble->size, pble->addr, pbl_size); + } + rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page); + if (rv > 0) { + mem->len = base_mr->length; + mem->va = base_mr->iova; + siw_dbg_mem(mem, + "%llu bytes, start 0x%016llx, %u SLE to %u entries\n", + mem->len, mem->va, num_sle, pbl->num_buf); + } + return rv; +} + +/* + * siw_get_dma_mr() + * + * Create a (empty) DMA memory region, where no umem is attached. + */ +struct ib_mr *siw_get_dma_mr(struct ib_pd *pd, int rights) +{ + struct siw_device *sdev = to_siw_dev(pd->device); + struct siw_mr *mr = NULL; + int rv; + + if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { + siw_dbg_pd(pd, "too many mr's\n"); + rv = -ENOMEM; + goto err_out; + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + rv = -ENOMEM; + goto err_out; + } + rv = siw_mr_add_mem(mr, pd, NULL, 0, ULONG_MAX, rights); + if (rv) + goto err_out; + + mr->mem->stag_valid = 1; + + siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag); + + return &mr->base_mr; + +err_out: + if (rv) + kfree(mr); + + atomic_dec(&sdev->num_mr); + + return ERR_PTR(rv); +} + +/* + * siw_create_srq() + * + * Create Shared Receive Queue of attributes @init_attrs + * within protection domain given by @pd. + * + * @base_srq: Base SRQ contained in siw SRQ. + * @init_attrs: SRQ init attributes. + * @udata: points to user context + */ +int siw_create_srq(struct ib_srq *base_srq, + struct ib_srq_init_attr *init_attrs, struct ib_udata *udata) +{ + struct siw_srq *srq = to_siw_srq(base_srq); + struct ib_srq_attr *attrs = &init_attrs->attr; + struct siw_device *sdev = to_siw_dev(base_srq->device); + struct siw_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct siw_ucontext, + base_ucontext); + int rv; + + if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { + siw_dbg_pd(base_srq->pd, "too many SRQ's\n"); + rv = -ENOMEM; + goto err_out; + } + if (attrs->max_wr == 0 || attrs->max_wr > SIW_MAX_SRQ_WR || + attrs->max_sge > SIW_MAX_SGE || attrs->srq_limit > attrs->max_wr) { + rv = -EINVAL; + goto err_out; + } + srq->max_sge = attrs->max_sge; + srq->num_rqe = roundup_pow_of_two(attrs->max_wr); + srq->xa_srq_index = SIW_INVAL_UOBJ_KEY; + srq->limit = attrs->srq_limit; + if (srq->limit) + srq->armed = 1; + + srq->kernel_verbs = !udata; + + if (udata) + srq->recvq = + vmalloc_user(srq->num_rqe * sizeof(struct siw_rqe)); + else + srq->recvq = vzalloc(srq->num_rqe * sizeof(struct siw_rqe)); + + if (srq->recvq == NULL) { + rv = -ENOMEM; + goto err_out; + } + if (udata) { + struct siw_uresp_create_srq uresp = {}; + + srq->xa_srq_index = siw_create_uobj( + ctx, srq->recvq, srq->num_rqe * sizeof(struct siw_rqe)); + + if (srq->xa_srq_index == SIW_INVAL_UOBJ_KEY) { + rv = -ENOMEM; + goto err_out; + } + uresp.srq_key = srq->xa_srq_index; + uresp.num_rqe = srq->num_rqe; + + if (udata->outlen < sizeof(uresp)) { + rv = -EINVAL; + goto err_out; + } + rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rv) + goto err_out; + } + spin_lock_init(&srq->lock); + + siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: success\n", srq); + + return 0; + +err_out: + if (srq->recvq) { + if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) + kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); + vfree(srq->recvq); + } + atomic_dec(&sdev->num_srq); + + return rv; +} + +/* + * siw_modify_srq() + * + * Modify SRQ. The caller may resize SRQ and/or set/reset notification + * limit and (re)arm IB_EVENT_SRQ_LIMIT_REACHED notification. + * + * NOTE: it is unclear if RDMA core allows for changing the MAX_SGE + * parameter. siw_modify_srq() does not check the attrs->max_sge param. + */ +int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct siw_srq *srq = to_siw_srq(base_srq); + unsigned long flags; + int rv = 0; + + spin_lock_irqsave(&srq->lock, flags); + + if (attr_mask & IB_SRQ_MAX_WR) { + /* resize request not yet supported */ + rv = -EOPNOTSUPP; + goto out; + } + if (attr_mask & IB_SRQ_LIMIT) { + if (attrs->srq_limit) { + if (unlikely(attrs->srq_limit > srq->num_rqe)) { + rv = -EINVAL; + goto out; + } + srq->armed = 1; + } else { + srq->armed = 0; + } + srq->limit = attrs->srq_limit; + } +out: + spin_unlock_irqrestore(&srq->lock, flags); + + return rv; +} + +/* + * siw_query_srq() + * + * Query SRQ attributes. + */ +int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs) +{ + struct siw_srq *srq = to_siw_srq(base_srq); + unsigned long flags; + + spin_lock_irqsave(&srq->lock, flags); + + attrs->max_wr = srq->num_rqe; + attrs->max_sge = srq->max_sge; + attrs->srq_limit = srq->limit; + + spin_unlock_irqrestore(&srq->lock, flags); + + return 0; +} + +/* + * siw_destroy_srq() + * + * Destroy SRQ. + * It is assumed that the SRQ is not referenced by any + * QP anymore - the code trusts the RDMA core environment to keep track + * of QP references. + */ +void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) +{ + struct siw_srq *srq = to_siw_srq(base_srq); + struct siw_device *sdev = to_siw_dev(base_srq->device); + struct siw_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct siw_ucontext, + base_ucontext); + + if (ctx && srq->xa_srq_index != SIW_INVAL_UOBJ_KEY) + kfree(xa_erase(&ctx->xa, srq->xa_srq_index)); + + vfree(srq->recvq); + atomic_dec(&sdev->num_srq); +} + +/* + * siw_post_srq_recv() + * + * Post a list of receive queue elements to SRQ. + * NOTE: The function does not check or lock a certain SRQ state + * during the post operation. The code simply trusts the + * RDMA core environment. + * + * @base_srq: Base SRQ contained in siw SRQ + * @wr: List of R-WR's + * @bad_wr: Updated to failing WR if posting fails. + */ +int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct siw_srq *srq = to_siw_srq(base_srq); + unsigned long flags; + int rv = 0; + + if (unlikely(!srq->kernel_verbs)) { + siw_dbg_pd(base_srq->pd, + "[SRQ 0x%p]: no kernel post_recv for mapped srq\n", + srq); + rv = -EINVAL; + goto out; + } + /* + * Serialize potentially multiple producers. + * Also needed to serialize potentially multiple + * consumers. + */ + spin_lock_irqsave(&srq->lock, flags); + + while (wr) { + u32 idx = srq->rq_put % srq->num_rqe; + struct siw_rqe *rqe = &srq->recvq[idx]; + + if (rqe->flags) { + siw_dbg_pd(base_srq->pd, "SRQ full\n"); + rv = -ENOMEM; + break; + } + if (unlikely(wr->num_sge > srq->max_sge)) { + siw_dbg_pd(base_srq->pd, + "[SRQ 0x%p]: too many sge's: %d\n", srq, + wr->num_sge); + rv = -EINVAL; + break; + } + rqe->id = wr->wr_id; + rqe->num_sge = wr->num_sge; + siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge); + + /* Make sure S-RQE is completely written before valid */ + smp_wmb(); + + rqe->flags = SIW_WQE_VALID; + + srq->rq_put++; + wr = wr->next; + } + spin_unlock_irqrestore(&srq->lock, flags); +out: + if (unlikely(rv < 0)) { + siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: error %d\n", srq, rv); + *bad_wr = wr; + } + return rv; +} + +void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype) +{ + struct ib_event event; + struct ib_qp *base_qp = qp->ib_qp; + + /* + * Do not report asynchronous errors on QP which gets + * destroyed via verbs interface (siw_destroy_qp()) + */ + if (qp->attrs.flags & SIW_QP_IN_DESTROY) + return; + + event.event = etype; + event.device = base_qp->device; + event.element.qp = base_qp; + + if (base_qp->event_handler) { + siw_dbg_qp(qp, "reporting event %d\n", etype); + base_qp->event_handler(&event, base_qp->qp_context); + } +} + +void siw_cq_event(struct siw_cq *cq, enum ib_event_type etype) +{ + struct ib_event event; + struct ib_cq *base_cq = &cq->base_cq; + + event.event = etype; + event.device = base_cq->device; + event.element.cq = base_cq; + + if (base_cq->event_handler) { + siw_dbg_cq(cq, "reporting CQ event %d\n", etype); + base_cq->event_handler(&event, base_cq->cq_context); + } +} + +void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype) +{ + struct ib_event event; + struct ib_srq *base_srq = &srq->base_srq; + + event.event = etype; + event.device = base_srq->device; + event.element.srq = base_srq; + + if (base_srq->event_handler) { + siw_dbg_pd(srq->base_srq.pd, + "reporting SRQ event %d\n", etype); + base_srq->event_handler(&event, base_srq->srq_context); + } +} + +void siw_port_event(struct siw_device *sdev, u8 port, enum ib_event_type etype) +{ + struct ib_event event; + + event.event = etype; + event.device = &sdev->base_dev; + event.element.port_num = port; + + siw_dbg(&sdev->base_dev, "reporting port event %d\n", etype); + + ib_dispatch_event(&event); +} diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h new file mode 100644 index 000000000000..1910869281cb --- /dev/null +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#ifndef _SIW_VERBS_H +#define _SIW_VERBS_H + +#include <linux/errno.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> + +#include "siw.h" +#include "siw_cm.h" + +/* + * siw_copy_sgl() + * + * Copy SGL from RDMA core representation to local + * representation. + */ +static inline void siw_copy_sgl(struct ib_sge *sge, struct siw_sge *siw_sge, + int num_sge) +{ + while (num_sge--) { + siw_sge->laddr = sge->addr; + siw_sge->length = sge->length; + siw_sge->lkey = sge->lkey; + + siw_sge++; + sge++; + } +} + +int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata); +void siw_dealloc_ucontext(struct ib_ucontext *base_ctx); +int siw_query_port(struct ib_device *base_dev, u8 port, + struct ib_port_attr *attr); +int siw_get_port_immutable(struct ib_device *base_dev, u8 port, + struct ib_port_immutable *port_immutable); +int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, + struct ib_udata *udata); +int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +int siw_query_port(struct ib_device *base_dev, u8 port, + struct ib_port_attr *attr); +int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey); +int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, + union ib_gid *gid); +int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); +void siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); +struct ib_qp *siw_create_qp(struct ib_pd *base_pd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata); +int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); +int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata); +int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata); +int siw_poll_cq(struct ib_cq *base_cq, int num_entries, struct ib_wc *wc); +int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags); +struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len, + u64 rnic_va, int rights, struct ib_udata *udata); +struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type mr_type, + u32 max_sge, struct ib_udata *udata); +struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights); +int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, + unsigned int *sg_off); +int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata); +int siw_create_srq(struct ib_srq *base_srq, struct ib_srq_init_attr *attr, + struct ib_udata *udata); +int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask mask, struct ib_udata *udata); +int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr); +void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata); +int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma); +void siw_qp_event(struct siw_qp *qp, enum ib_event_type type); +void siw_cq_event(struct siw_cq *cq, enum ib_event_type type); +void siw_srq_event(struct siw_srq *srq, enum ib_event_type type); +void siw_port_event(struct siw_device *dev, u8 port, enum ib_event_type type); + +#endif diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 4760ce465d89..7af68604af77 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -7,7 +7,7 @@ config INFINIBAND_IPOIB transports IP packets over InfiniBand so you can use your IB device as a fancy NIC. - See Documentation/infiniband/ipoib.txt for more information + See Documentation/infiniband/ipoib.rst for more information config INFINIBAND_IPOIB_CM bool "IP-over-InfiniBand Connected Mode support" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index aa9dcfc36cd3..c59e00a0881f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1153,7 +1153,6 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, ret = -ENOMEM; goto err_tx; } - memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring)); p->qp = ipoib_cm_create_tx_qp(p->dev, p); memalloc_noio_restore(noio_flag); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 83429925dfc6..63e4f9d15fd9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -138,7 +138,6 @@ static void ipoib_get_strings(struct net_device __always_unused *dev, p += ETH_GSTRING_LEN; } break; - case ETH_SS_TEST: default: break; } @@ -149,7 +148,6 @@ static int ipoib_get_sset_count(struct net_device __always_unused *dev, switch (sset) { case ETH_SS_STATS: return IPOIB_GLOBAL_STATS_LEN; - case ETH_SS_TEST: default: break; } @@ -222,6 +220,7 @@ static const struct ethtool_ops ipoib_ethtool_ops = { .get_strings = ipoib_get_strings, .get_ethtool_stats = ipoib_get_ethtool_stats, .get_sset_count = ipoib_get_sset_count, + .get_link = ethtool_op_get_link, }; void ipoib_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 04ea7db08e87..ac0583ff280d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1893,12 +1893,6 @@ static void ipoib_child_init(struct net_device *ndev) struct ipoib_dev_priv *priv = ipoib_priv(ndev); struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - dev_hold(priv->parent); - - down_write(&ppriv->vlan_rwsem); - list_add_tail(&priv->list, &ppriv->child_intfs); - up_write(&ppriv->vlan_rwsem); - priv->max_ib_mtu = ppriv->max_ib_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); @@ -1941,6 +1935,17 @@ static int ipoib_ndo_init(struct net_device *ndev) if (rc) { pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n", priv->ca->name, priv->dev->name, priv->port, rc); + return rc; + } + + if (priv->parent) { + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + dev_hold(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_add_tail(&priv->list, &ppriv->child_intfs); + up_write(&ppriv->vlan_rwsem); } return 0; @@ -1958,6 +1963,14 @@ static void ipoib_ndo_uninit(struct net_device *dev) */ WARN_ON(!list_empty(&priv->child_intfs)); + if (priv->parent) { + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_del(&priv->list); + up_write(&ppriv->vlan_rwsem); + } + ipoib_neigh_hash_uninit(dev); ipoib_ib_dev_cleanup(dev); @@ -1969,15 +1982,8 @@ static void ipoib_ndo_uninit(struct net_device *dev) priv->wq = NULL; } - if (priv->parent) { - struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - - down_write(&ppriv->vlan_rwsem); - list_del(&priv->list); - up_write(&ppriv->vlan_rwsem); - + if (priv->parent) dev_put(priv->parent); - } } static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index ba09068f6200..b69304d28f06 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -260,11 +260,8 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) priv->qp = NULL; } - if (ib_destroy_cq(priv->send_cq)) - ipoib_warn(priv, "ib_cq_destroy (send) failed\n"); - - if (ib_destroy_cq(priv->recv_cq)) - ipoib_warn(priv, "ib_cq_destroy (recv) failed\n"); + ib_destroy_cq(priv->send_cq); + ib_destroy_cq(priv->recv_cq); } void ipoib_event(struct ib_event_handler *handler, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9c185a8dabd3..c7a3d75fb308 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -205,7 +205,8 @@ iser_initialize_task_headers(struct iscsi_task *task, goto out; } - tx_desc->wr_idx = 0; + tx_desc->inv_wr.next = NULL; + tx_desc->reg_wr.wr.next = NULL; tx_desc->mapped = true; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; @@ -406,13 +407,10 @@ static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) { struct iscsi_iser_task *iser_task = task->dd_data; + enum iser_data_dir dir = iser_task->dir[ISER_DIR_IN] ? + ISER_DIR_IN : ISER_DIR_OUT; - if (iser_task->dir[ISER_DIR_IN]) - return iser_check_task_pi_status(iser_task, ISER_DIR_IN, - sector); - else - return iser_check_task_pi_status(iser_task, ISER_DIR_OUT, - sector); + return iser_check_task_pi_status(iser_task, dir, sector); } /** diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 36d525110fd2..39bf213444cb 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -225,14 +225,6 @@ enum iser_desc_type { ISCSI_TX_DATAOUT }; -/* Maximum number of work requests per task: - * Data memory region local invalidate + fast registration - * Protection memory region local invalidate + fast registration - * Signature memory region local invalidate + fast registration - * PDU send - */ -#define ISER_MAX_WRS 7 - /** * struct iser_tx_desc - iSER TX descriptor * @@ -245,11 +237,9 @@ enum iser_desc_type { * unsolicited data-out or control * @num_sge: number sges used on this TX task * @mapped: Is the task header mapped - * @wr_idx: Current WR index - * @wrs: Array of WRs per task - * @data_reg: Data buffer registration details - * @prot_reg: Protection buffer registration details - * @sig_attrs: Signature attributes + * reg_wr: registration WR + * send_wr: send WR + * inv_wr: invalidate WR */ struct iser_tx_desc { struct iser_ctrl iser_header; @@ -260,15 +250,9 @@ struct iser_tx_desc { int num_sge; struct ib_cqe cqe; bool mapped; - u8 wr_idx; - union iser_wr { - struct ib_send_wr send; - struct ib_reg_wr fast_reg; - struct ib_sig_handover_wr sig; - } wrs[ISER_MAX_WRS]; - struct iser_mem_reg data_reg; - struct iser_mem_reg prot_reg; - struct ib_sig_attrs sig_attrs; + struct ib_reg_wr reg_wr; + struct ib_send_wr send_wr; + struct ib_send_wr inv_wr; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ @@ -388,6 +372,7 @@ struct iser_device { * * @mr: memory region * @fmr_pool: pool of fmrs + * @sig_mr: signature memory region * @page_vec: fast reg page list used by fmr pool * @mr_valid: is mr valid indicator */ @@ -396,36 +381,22 @@ struct iser_reg_resources { struct ib_mr *mr; struct ib_fmr_pool *fmr_pool; }; + struct ib_mr *sig_mr; struct iser_page_vec *page_vec; u8 mr_valid:1; }; /** - * struct iser_pi_context - Protection information context - * - * @rsc: protection buffer registration resources - * @sig_mr: signature enable memory region - * @sig_mr_valid: is sig_mr valid indicator - * @sig_protected: is region protected indicator - */ -struct iser_pi_context { - struct iser_reg_resources rsc; - struct ib_mr *sig_mr; - u8 sig_mr_valid:1; - u8 sig_protected:1; -}; - -/** * struct iser_fr_desc - Fast registration descriptor * * @list: entry in connection fastreg pool * @rsc: data buffer registration resources - * @pi_ctx: protection information context + * @sig_protected: is region protected indicator */ struct iser_fr_desc { struct list_head list; struct iser_reg_resources rsc; - struct iser_pi_context *pi_ctx; + bool sig_protected; struct list_head all_list; }; @@ -674,21 +645,6 @@ void iser_reg_desc_put_fmr(struct ib_conn *ib_conn, struct iser_fr_desc *desc); -static inline struct ib_send_wr * -iser_tx_next_wr(struct iser_tx_desc *tx_desc) -{ - struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx].send; - struct ib_send_wr *last_wr; - - if (tx_desc->wr_idx) { - last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1].send; - last_wr->next = cur_wr; - } - tx_desc->wr_idx++; - - return cur_wr; -} - static inline struct iser_conn * to_iser_conn(struct ib_conn *ib_conn) { diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 96af06cfe0af..5cbb4b3a0566 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -592,15 +592,14 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) static inline int iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) { - if (likely(rkey == desc->rsc.mr->rkey)) { - desc->rsc.mr_valid = 0; - } else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) { - desc->pi_ctx->sig_mr_valid = 0; - } else { + if (unlikely((!desc->sig_protected && rkey != desc->rsc.mr->rkey) || + (desc->sig_protected && rkey != desc->rsc.sig_mr->rkey))) { iser_err("Bogus remote invalidation for rkey %#x\n", rkey); return -EINVAL; } + desc->rsc.mr_valid = 0; + return 0; } @@ -750,6 +749,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) iser_task->prot[ISER_DIR_IN].data_len = 0; iser_task->prot[ISER_DIR_OUT].data_len = 0; + iser_task->prot[ISER_DIR_IN].dma_nents = 0; + iser_task->prot[ISER_DIR_OUT].dma_nents = 0; + memset(&iser_task->rdma_reg[ISER_DIR_IN], 0, sizeof(struct iser_mem_reg)); memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0, diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2ba70729d7b0..2cc89a9b9e9b 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -302,8 +302,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, } static void -iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs, - struct ib_sig_domain *domain) +iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain) { domain->sig_type = IB_SIG_TYPE_T10_DIF; domain->sig.dif.pi_interval = scsi_prot_interval(sc); @@ -326,21 +325,21 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) case SCSI_PROT_WRITE_INSERT: case SCSI_PROT_READ_STRIP: sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; - iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); + iser_set_dif_domain(sc, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; break; case SCSI_PROT_READ_INSERT: case SCSI_PROT_WRITE_STRIP: sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; - iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); + iser_set_dif_domain(sc, &sig_attrs->mem); sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ? IB_T10DIF_CSUM : IB_T10DIF_CRC; break; case SCSI_PROT_READ_PASS: case SCSI_PROT_WRITE_PASS: - iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire); + iser_set_dif_domain(sc, &sig_attrs->wire); sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; - iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem); + iser_set_dif_domain(sc, &sig_attrs->mem); sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ? IB_T10DIF_CSUM : IB_T10DIF_CRC; break; @@ -366,27 +365,29 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) static inline void iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr, - struct ib_cqe *cqe) + struct ib_cqe *cqe, + struct ib_send_wr *next_wr) { inv_wr->opcode = IB_WR_LOCAL_INV; inv_wr->wr_cqe = cqe; inv_wr->ex.invalidate_rkey = mr->rkey; inv_wr->send_flags = 0; inv_wr->num_sge = 0; + inv_wr->next = next_wr; } static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, - struct iser_pi_context *pi_ctx, - struct iser_mem_reg *data_reg, - struct iser_mem_reg *prot_reg, + struct iser_data_buf *mem, + struct iser_data_buf *sig_mem, + struct iser_reg_resources *rsc, struct iser_mem_reg *sig_reg) { struct iser_tx_desc *tx_desc = &iser_task->desc; - struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; - struct ib_sig_handover_wr *wr; - struct ib_mr *mr = pi_ctx->sig_mr; + struct ib_mr *mr = rsc->sig_mr; + struct ib_sig_attrs *sig_attrs = mr->sig_attrs; + struct ib_reg_wr *wr = &tx_desc->reg_wr; int ret; memset(sig_attrs, 0, sizeof(*sig_attrs)); @@ -396,33 +397,36 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); - if (pi_ctx->sig_mr_valid) - iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); + if (rsc->mr_valid) + iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr, - wr); - wr->wr.opcode = IB_WR_REG_SIG_MR; + ret = ib_map_mr_sg_pi(mr, mem->sg, mem->dma_nents, NULL, + sig_mem->sg, sig_mem->dma_nents, NULL, SZ_4K); + if (unlikely(ret)) { + iser_err("failed to map PI sg (%d)\n", + mem->dma_nents + sig_mem->dma_nents); + goto err; + } + + memset(wr, 0, sizeof(*wr)); + wr->wr.next = &tx_desc->send_wr; + wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; wr->wr.wr_cqe = cqe; - wr->wr.sg_list = &data_reg->sge; - wr->wr.num_sge = 1; + wr->wr.num_sge = 0; wr->wr.send_flags = 0; - wr->sig_attrs = sig_attrs; - wr->sig_mr = mr; - if (scsi_prot_sg_count(iser_task->sc)) - wr->prot = &prot_reg->sge; - else - wr->prot = NULL; - wr->access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; - pi_ctx->sig_mr_valid = 1; + wr->mr = mr; + wr->key = mr->rkey; + wr->access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + rsc->mr_valid = 1; sig_reg->sge.lkey = mr->lkey; sig_reg->rkey = mr->rkey; - sig_reg->sge.addr = 0; - sig_reg->sge.length = scsi_transfer_length(iser_task->sc); + sig_reg->sge.addr = mr->iova; + sig_reg->sge.length = mr->length; iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n", sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr, @@ -439,11 +443,11 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_mr *mr = rsc->mr; - struct ib_reg_wr *wr; + struct ib_reg_wr *wr = &tx_desc->reg_wr; int n; if (rsc->mr_valid) - iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); + iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); @@ -454,7 +458,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return n < 0 ? n : -EINVAL; } - wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr); + wr->wr.next = &tx_desc->send_wr; wr->wr.opcode = IB_WR_REG_MR; wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; @@ -479,21 +483,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, } static int -iser_reg_prot_sg(struct iscsi_iser_task *task, - struct iser_data_buf *mem, - struct iser_fr_desc *desc, - bool use_dma_key, - struct iser_mem_reg *reg) -{ - struct iser_device *device = task->iser_conn->ib_conn.device; - - if (use_dma_key) - return iser_reg_dma(device, mem, reg); - - return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg); -} - -static int iser_reg_data_sg(struct iscsi_iser_task *task, struct iser_data_buf *mem, struct iser_fr_desc *desc, @@ -516,7 +505,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, struct iser_device *device = ib_conn->device; struct iser_data_buf *mem = &task->data[dir]; struct iser_mem_reg *reg = &task->rdma_reg[dir]; - struct iser_mem_reg *data_reg; struct iser_fr_desc *desc = NULL; bool use_dma_key; int err; @@ -529,32 +517,17 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, reg->mem_h = desc; } - if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) - data_reg = reg; - else - data_reg = &task->desc.data_reg; - - err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg); - if (unlikely(err)) - goto err_reg; - - if (scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) { - struct iser_mem_reg *prot_reg = &task->desc.prot_reg; - - if (scsi_prot_sg_count(task->sc)) { - mem = &task->prot[dir]; - err = iser_reg_prot_sg(task, mem, desc, - use_dma_key, prot_reg); - if (unlikely(err)) - goto err_reg; - } - - err = iser_reg_sig_mr(task, desc->pi_ctx, data_reg, - prot_reg, reg); + if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) { + err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg); + if (unlikely(err)) + goto err_reg; + } else { + err = iser_reg_sig_mr(task, mem, &task->prot[dir], + &desc->rsc, reg); if (unlikely(err)) goto err_reg; - desc->pi_ctx->sig_protected = 1; + desc->sig_protected = 1; } return 0; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 4ff3d98fa6a4..a6548de0e218 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -233,116 +233,63 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn) kfree(desc); } -static int -iser_alloc_reg_res(struct iser_device *device, - struct ib_pd *pd, - struct iser_reg_resources *res, - unsigned int size) +static struct iser_fr_desc * +iser_create_fastreg_desc(struct iser_device *device, + struct ib_pd *pd, + bool pi_enable, + unsigned int size) { + struct iser_fr_desc *desc; struct ib_device *ib_dev = device->ib_device; enum ib_mr_type mr_type; int ret; + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + return ERR_PTR(-ENOMEM); + if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) mr_type = IB_MR_TYPE_SG_GAPS; else mr_type = IB_MR_TYPE_MEM_REG; - res->mr = ib_alloc_mr(pd, mr_type, size); - if (IS_ERR(res->mr)) { - ret = PTR_ERR(res->mr); + desc->rsc.mr = ib_alloc_mr(pd, mr_type, size); + if (IS_ERR(desc->rsc.mr)) { + ret = PTR_ERR(desc->rsc.mr); iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); - return ret; + goto err_alloc_mr; } - res->mr_valid = 0; - - return 0; -} -static void -iser_free_reg_res(struct iser_reg_resources *rsc) -{ - ib_dereg_mr(rsc->mr); -} - -static int -iser_alloc_pi_ctx(struct iser_device *device, - struct ib_pd *pd, - struct iser_fr_desc *desc, - unsigned int size) -{ - struct iser_pi_context *pi_ctx = NULL; - int ret; - - desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); - if (!desc->pi_ctx) - return -ENOMEM; - - pi_ctx = desc->pi_ctx; - - ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size); - if (ret) { - iser_err("failed to allocate reg_resources\n"); - goto alloc_reg_res_err; - } - - pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2); - if (IS_ERR(pi_ctx->sig_mr)) { - ret = PTR_ERR(pi_ctx->sig_mr); - goto sig_mr_failure; + if (pi_enable) { + desc->rsc.sig_mr = ib_alloc_mr_integrity(pd, size, size); + if (IS_ERR(desc->rsc.sig_mr)) { + ret = PTR_ERR(desc->rsc.sig_mr); + iser_err("Failed to allocate sig_mr err=%d\n", ret); + goto err_alloc_mr_integrity; + } } - pi_ctx->sig_mr_valid = 0; - desc->pi_ctx->sig_protected = 0; - - return 0; + desc->rsc.mr_valid = 0; -sig_mr_failure: - iser_free_reg_res(&pi_ctx->rsc); -alloc_reg_res_err: - kfree(desc->pi_ctx); + return desc; - return ret; -} +err_alloc_mr_integrity: + ib_dereg_mr(desc->rsc.mr); +err_alloc_mr: + kfree(desc); -static void -iser_free_pi_ctx(struct iser_pi_context *pi_ctx) -{ - iser_free_reg_res(&pi_ctx->rsc); - ib_dereg_mr(pi_ctx->sig_mr); - kfree(pi_ctx); + return ERR_PTR(ret); } -static struct iser_fr_desc * -iser_create_fastreg_desc(struct iser_device *device, - struct ib_pd *pd, - bool pi_enable, - unsigned int size) +static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc) { - struct iser_fr_desc *desc; - int ret; - - desc = kzalloc(sizeof(*desc), GFP_KERNEL); - if (!desc) - return ERR_PTR(-ENOMEM); - - ret = iser_alloc_reg_res(device, pd, &desc->rsc, size); - if (ret) - goto reg_res_alloc_failure; + struct iser_reg_resources *res = &desc->rsc; - if (pi_enable) { - ret = iser_alloc_pi_ctx(device, pd, desc, size); - if (ret) - goto pi_ctx_alloc_failure; + ib_dereg_mr(res->mr); + if (res->sig_mr) { + ib_dereg_mr(res->sig_mr); + res->sig_mr = NULL; } - - return desc; - -pi_ctx_alloc_failure: - iser_free_reg_res(&desc->rsc); -reg_res_alloc_failure: kfree(desc); - - return ERR_PTR(ret); } /** @@ -399,10 +346,7 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) { list_del(&desc->all_list); - iser_free_reg_res(&desc->rsc); - if (desc->pi_ctx) - iser_free_pi_ctx(desc->pi_ctx); - kfree(desc); + iser_destroy_fastreg_desc(desc); ++i; } @@ -455,7 +399,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) init_attr.qp_type = IB_QPT_RC; if (ib_conn->pi_support) { init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; - init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; + init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); } else { @@ -707,6 +651,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, struct ib_device_attr *attr = &device->ib_device->attrs; unsigned short sg_tablesize, sup_sg_tablesize; unsigned short reserved_mr_pages; + u32 max_num_sg; /* * FRs without SG_GAPS or FMRs can only map up to a (device) page per @@ -720,12 +665,17 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, else reserved_mr_pages = 1; + if (iser_conn->ib_conn.pi_support) + max_num_sg = attr->max_pi_fast_reg_page_list_len; + else + max_num_sg = attr->max_fast_reg_page_list_len; + sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) sup_sg_tablesize = min_t( uint, ISCSI_ISER_MAX_SG_TABLESIZE, - attr->max_fast_reg_page_list_len - reserved_mr_pages); + max_num_sg - reserved_mr_pages); else sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE; @@ -762,7 +712,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) /* connection T10-PI support */ if (iser_pi_enable) { if (!(device->ib_device->attrs.device_cap_flags & - IB_DEVICE_SIGNATURE_HANDOVER)) { + IB_DEVICE_INTEGRITY_HANDOVER)) { iser_warn("T10-PI requested but not supported on %s, " "continue without T10-PI\n", dev_name(&ib_conn->device->ib_device->dev)); @@ -1087,7 +1037,8 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) { - struct ib_send_wr *wr = iser_tx_next_wr(tx_desc); + struct ib_send_wr *wr = &tx_desc->send_wr; + struct ib_send_wr *first_wr; int ib_ret; ib_dma_sync_single_for_device(ib_conn->device->ib_device, @@ -1101,7 +1052,14 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, wr->opcode = IB_WR_SEND; wr->send_flags = signal ? IB_SEND_SIGNALED : 0; - ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, NULL); + if (tx_desc->inv_wr.next) + first_wr = &tx_desc->inv_wr; + else if (tx_desc->reg_wr.wr.next) + first_wr = &tx_desc->reg_wr.wr; + else + first_wr = wr; + + ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL); if (ib_ret) iser_err("ib_post_send failed, ret:%d opcode:%d\n", ib_ret, wr->opcode); @@ -1118,9 +1076,9 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, struct ib_mr_status mr_status; int ret; - if (desc && desc->pi_ctx->sig_protected) { - desc->pi_ctx->sig_protected = 0; - ret = ib_check_mr_status(desc->pi_ctx->sig_mr, + if (desc && desc->sig_protected) { + desc->sig_protected = 0; + ret = ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { pr_err("ib_check_mr_status failed, ret %d\n", ret); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 0205cf142b2f..a1a035270cab 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -133,7 +133,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; if (device->pi_capable) - attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; + attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; ret = rdma_create_qp(cma_id, device->pd, &attr); if (ret) { @@ -309,7 +309,7 @@ isert_create_device_ib_res(struct isert_device *device) /* Check signature cap */ device->pi_capable = ib_dev->attrs.device_cap_flags & - IB_DEVICE_SIGNATURE_HANDOVER ? true : false; + IB_DEVICE_INTEGRITY_HANDOVER ? true : false; return 0; @@ -1669,7 +1669,7 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) isert_dbg("Cmd %p\n", isert_cmd); - ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr); + ret = isert_check_pi_status(cmd, isert_cmd->rw.reg->mr); isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); if (ret) { @@ -1715,7 +1715,7 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) iscsit_stop_dataout_timer(cmd); if (isert_prot_cmd(isert_conn, se_cmd)) - ret = isert_check_pi_status(se_cmd, isert_cmd->rw.sig->sig_mr); + ret = isert_check_pi_status(se_cmd, isert_cmd->rw.reg->mr); isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); cmd->write_data_done = 0; @@ -2059,8 +2059,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) } static inline void -isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs, - struct ib_sig_domain *domain) +isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_domain *domain) { domain->sig_type = IB_SIG_TYPE_T10_DIF; domain->sig.dif.bg_type = IB_T10DIF_CRC; @@ -2088,17 +2087,17 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) case TARGET_PROT_DIN_INSERT: case TARGET_PROT_DOUT_STRIP: sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; - isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire); + isert_set_dif_domain(se_cmd, &sig_attrs->wire); break; case TARGET_PROT_DOUT_INSERT: case TARGET_PROT_DIN_STRIP: sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; - isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem); + isert_set_dif_domain(se_cmd, &sig_attrs->mem); break; case TARGET_PROT_DIN_PASS: case TARGET_PROT_DOUT_PASS: - isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire); - isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem); + isert_set_dif_domain(se_cmd, &sig_attrs->wire); + isert_set_dif_domain(se_cmd, &sig_attrs->mem); break; default: isert_err("Unsupported PI operation %d\n", se_cmd->prot_op); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d5cbad2c61e4..c7bd96edce80 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3466,13 +3466,14 @@ static const match_table_t srp_opt_tokens = { * @net: [in] Network namespace. * @sa: [out] Address family, IP address and port number. * @addr_port_str: [in] IP address and port number. + * @has_port: [out] Whether or not @addr_port_str includes a port number. * * Parse the following address formats: * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5. */ static int srp_parse_in(struct net *net, struct sockaddr_storage *sa, - const char *addr_port_str) + const char *addr_port_str, bool *has_port) { char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL); char *port_str; @@ -3481,9 +3482,12 @@ static int srp_parse_in(struct net *net, struct sockaddr_storage *sa, if (!addr) return -ENOMEM; port_str = strrchr(addr, ':'); - if (!port_str) - return -EINVAL; - *port_str++ = '\0'; + if (port_str && strchr(port_str, ']')) + port_str = NULL; + if (port_str) + *port_str++ = '\0'; + if (has_port) + *has_port = port_str != NULL; ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa); if (ret && addr[0]) { addr_end = addr + strlen(addr) - 1; @@ -3505,6 +3509,7 @@ static int srp_parse_options(struct net *net, const char *buf, char *p; substring_t args[MAX_OPT_ARGS]; unsigned long long ull; + bool has_port; int opt_mask = 0; int token; int ret = -EINVAL; @@ -3603,7 +3608,8 @@ static int srp_parse_options(struct net *net, const char *buf, ret = -ENOMEM; goto out; } - ret = srp_parse_in(net, &target->rdma_cm.src.ss, p); + ret = srp_parse_in(net, &target->rdma_cm.src.ss, p, + NULL); if (ret < 0) { pr_warn("bad source parameter '%s'\n", p); kfree(p); @@ -3619,7 +3625,10 @@ static int srp_parse_options(struct net *net, const char *buf, ret = -ENOMEM; goto out; } - ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p); + ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p, + &has_port); + if (!has_port) + ret = -EINVAL; if (ret < 0) { pr_warn("bad dest parameter '%s'\n", p); kfree(p); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 676619c1454a..a249db528d54 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -482,7 +482,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, - nvme_rdma_get_max_fr_pages(ibdev)); + nvme_rdma_get_max_fr_pages(ibdev), 0); if (ret) { dev_err(queue->ctrl->ctrl.device, "failed to initialize MR pool sized %d for QID %d\n", |