summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/core_priv.h5
-rw-r--r--drivers/infiniband/core/counters.c17
-rw-r--r--drivers/infiniband/core/device.c102
-rw-r--r--drivers/infiniband/core/mad.c20
-rw-r--r--drivers/infiniband/core/nldev.c8
-rw-r--r--drivers/infiniband/core/umem_odp.c4
-rw-r--r--drivers/infiniband/core/user_mad.c6
7 files changed, 106 insertions, 56 deletions
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 888d89ce81df..beee7b7e0d9a 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -302,7 +302,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
struct ib_udata *udata,
struct ib_uobject *uobj)
{
+ enum ib_qp_type qp_type = attr->qp_type;
struct ib_qp *qp;
+ bool is_xrc;
if (!dev->ops.create_qp)
return ERR_PTR(-EOPNOTSUPP);
@@ -320,7 +322,8 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
* and more importantly they are created internaly by driver,
* see mlx5 create_dev_resources() as an example.
*/
- if (attr->qp_type < IB_QPT_XRC_INI) {
+ is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
+ if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
qp->res.type = RDMA_RESTRACK_QP;
if (uobj)
rdma_restrack_uadd(&qp->res);
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 01faef7bc061..b79890739a2c 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -38,6 +38,9 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
int ret;
port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return -EOPNOTSUPP;
+
mutex_lock(&port_counter->lock);
if (on) {
ret = __counter_set_mode(&port_counter->mode,
@@ -393,6 +396,9 @@ u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index)
u64 sum;
port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return 0;
+
sum = get_running_counters_hwstat_sum(dev, port, index);
sum += port_counter->hstats->value[index];
@@ -506,6 +512,9 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
if (!rdma_is_port_valid(dev, port))
return -EINVAL;
+ if (!dev->port_data[port].port_counter.hstats)
+ return -EOPNOTSUPP;
+
qp = rdma_counter_get_qp(dev, qp_num);
if (!qp)
return -ENOENT;
@@ -594,7 +603,7 @@ void rdma_counter_init(struct ib_device *dev)
struct rdma_port_counter *port_counter;
u32 port;
- if (!dev->ops.alloc_hw_stats || !dev->port_data)
+ if (!dev->port_data)
return;
rdma_for_each_port(dev, port) {
@@ -602,6 +611,9 @@ void rdma_counter_init(struct ib_device *dev)
port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
mutex_init(&port_counter->lock);
+ if (!dev->ops.alloc_hw_stats)
+ continue;
+
port_counter->hstats = dev->ops.alloc_hw_stats(dev, port);
if (!port_counter->hstats)
goto fail;
@@ -624,9 +636,6 @@ void rdma_counter_release(struct ib_device *dev)
struct rdma_port_counter *port_counter;
u32 port;
- if (!dev->ops.alloc_hw_stats)
- return;
-
rdma_for_each_port(dev, port) {
port_counter = &dev->port_data[port].port_counter;
kfree(port_counter->hstats);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 9773145dee09..ea8661a00651 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -94,11 +94,17 @@ static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
static DECLARE_RWSEM(devices_rwsem);
#define DEVICE_REGISTERED XA_MARK_1
-static LIST_HEAD(client_list);
+static u32 highest_client_id;
#define CLIENT_REGISTERED XA_MARK_1
static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
static DECLARE_RWSEM(clients_rwsem);
+static void ib_client_put(struct ib_client *client)
+{
+ if (refcount_dec_and_test(&client->uses))
+ complete(&client->uses_zero);
+}
+
/*
* If client_data is registered then the corresponding client must also still
* be registered.
@@ -661,6 +667,14 @@ static int add_client_context(struct ib_device *device,
down_write(&device->client_data_rwsem);
/*
+ * So long as the client is registered hold both the client and device
+ * unregistration locks.
+ */
+ if (!refcount_inc_not_zero(&client->uses))
+ goto out_unlock;
+ refcount_inc(&device->refcount);
+
+ /*
* Another caller to add_client_context got here first and has already
* completely initialized context.
*/
@@ -683,6 +697,9 @@ static int add_client_context(struct ib_device *device,
return 0;
out:
+ ib_device_put(device);
+ ib_client_put(client);
+out_unlock:
up_write(&device->client_data_rwsem);
return ret;
}
@@ -702,7 +719,7 @@ static void remove_client_context(struct ib_device *device,
client_data = xa_load(&device->client_data, client_id);
xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
client = xa_load(&clients, client_id);
- downgrade_write(&device->client_data_rwsem);
+ up_write(&device->client_data_rwsem);
/*
* Notice we cannot be holding any exclusive locks when calling the
@@ -712,17 +729,13 @@ static void remove_client_context(struct ib_device *device,
*
* For this reason clients and drivers should not call the
* unregistration functions will holdling any locks.
- *
- * It tempting to drop the client_data_rwsem too, but this is required
- * to ensure that unregister_client does not return until all clients
- * are completely unregistered, which is required to avoid module
- * unloading races.
*/
if (client->remove)
client->remove(device, client_data);
xa_erase(&device->client_data, client_id);
- up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
}
static int alloc_port_data(struct ib_device *device)
@@ -1224,7 +1237,7 @@ static int setup_device(struct ib_device *device)
static void disable_device(struct ib_device *device)
{
- struct ib_client *client;
+ u32 cid;
WARN_ON(!refcount_read(&device->refcount));
@@ -1232,10 +1245,19 @@ static void disable_device(struct ib_device *device)
xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
up_write(&devices_rwsem);
+ /*
+ * Remove clients in LIFO order, see assign_client_id. This could be
+ * more efficient if xarray learns to reverse iterate. Since no new
+ * clients can be added to this ib_device past this point we only need
+ * the maximum possible client_id value here.
+ */
down_read(&clients_rwsem);
- list_for_each_entry_reverse(client, &client_list, list)
- remove_client_context(device, client->client_id);
+ cid = highest_client_id;
up_read(&clients_rwsem);
+ while (cid) {
+ cid--;
+ remove_client_context(device, cid);
+ }
/* Pairs with refcount_set in enable_device */
ib_device_put(device);
@@ -1662,30 +1684,31 @@ static int assign_client_id(struct ib_client *client)
/*
* The add/remove callbacks must be called in FIFO/LIFO order. To
* achieve this we assign client_ids so they are sorted in
- * registration order, and retain a linked list we can reverse iterate
- * to get the LIFO order. The extra linked list can go away if xarray
- * learns to reverse iterate.
+ * registration order.
*/
- if (list_empty(&client_list)) {
- client->client_id = 0;
- } else {
- struct ib_client *last;
-
- last = list_last_entry(&client_list, struct ib_client, list);
- client->client_id = last->client_id + 1;
- }
+ client->client_id = highest_client_id;
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
if (ret)
goto out;
+ highest_client_id++;
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
- list_add_tail(&client->list, &client_list);
out:
up_write(&clients_rwsem);
return ret;
}
+static void remove_client_id(struct ib_client *client)
+{
+ down_write(&clients_rwsem);
+ xa_erase(&clients, client->client_id);
+ for (; highest_client_id; highest_client_id--)
+ if (xa_load(&clients, highest_client_id - 1))
+ break;
+ up_write(&clients_rwsem);
+}
+
/**
* ib_register_client - Register an IB client
* @client:Client to register
@@ -1705,6 +1728,8 @@ int ib_register_client(struct ib_client *client)
unsigned long index;
int ret;
+ refcount_set(&client->uses, 1);
+ init_completion(&client->uses_zero);
ret = assign_client_id(client);
if (ret)
return ret;
@@ -1740,21 +1765,30 @@ void ib_unregister_client(struct ib_client *client)
unsigned long index;
down_write(&clients_rwsem);
+ ib_client_put(client);
xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
up_write(&clients_rwsem);
- /*
- * Every device still known must be serialized to make sure we are
- * done with the client callbacks before we return.
- */
- down_read(&devices_rwsem);
- xa_for_each (&devices, index, device)
+
+ /* We do not want to have locks while calling client->remove() */
+ rcu_read_lock();
+ xa_for_each (&devices, index, device) {
+ if (!ib_device_try_get(device))
+ continue;
+ rcu_read_unlock();
+
remove_client_context(device, client->client_id);
- up_read(&devices_rwsem);
- down_write(&clients_rwsem);
- list_del(&client->list);
- xa_erase(&clients, client->client_id);
- up_write(&clients_rwsem);
+ ib_device_put(device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+ /*
+ * remove_client_context() is not a fence, it can return even though a
+ * removal is ongoing. Wait until all removals are completed.
+ */
+ wait_for_completion(&client->uses_zero);
+ remove_client_id(client);
}
EXPORT_SYMBOL(ib_unregister_client);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index cc99479b2c09..9947d16edef2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -3224,18 +3224,18 @@ static int ib_mad_port_open(struct ib_device *device,
if (has_smi)
cq_size *= 2;
+ port_priv->pd = ib_alloc_pd(device, 0);
+ if (IS_ERR(port_priv->pd)) {
+ dev_err(&device->dev, "Couldn't create ib_mad PD\n");
+ ret = PTR_ERR(port_priv->pd);
+ goto error3;
+ }
+
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
IB_POLL_UNBOUND_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
- goto error3;
- }
-
- port_priv->pd = ib_alloc_pd(device, 0);
- if (IS_ERR(port_priv->pd)) {
- dev_err(&device->dev, "Couldn't create ib_mad PD\n");
- ret = PTR_ERR(port_priv->pd);
goto error4;
}
@@ -3278,11 +3278,11 @@ error8:
error7:
destroy_mad_qp(&port_priv->qp_info[0]);
error6:
- ib_dealloc_pd(port_priv->pd);
-error4:
ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
+error4:
+ ib_dealloc_pd(port_priv->pd);
error3:
kfree(port_priv);
@@ -3312,8 +3312,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_workqueue(port_priv->wq);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
- ib_dealloc_pd(port_priv->pd);
ib_free_cq(port_priv->cq);
+ ib_dealloc_pd(port_priv->pd);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 783e465e7c41..87d40d1ecdde 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1952,12 +1952,16 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
if (fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
- nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode))
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
+ ret = -EMSGSIZE;
goto err_msg;
+ }
if ((mode == RDMA_COUNTER_MODE_AUTO) &&
- nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask))
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
+ ret = -EMSGSIZE;
goto err_msg;
+ }
nlmsg_end(msg, nlh);
ib_device_put(device);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 2a75c6f8d827..c0e15db34680 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -112,10 +112,6 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
* prevent any further fault handling on this MR.
*/
ib_umem_notifier_start_account(umem_odp);
- umem_odp->dying = 1;
- /* Make sure that the fact the umem is dying is out before we release
- * all pending page faults. */
- smp_wmb();
complete_all(&umem_odp->notifier_completion);
umem_odp->umem.context->invalidate_range(
umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 9f8a48016b41..ffdeaf6e0b68 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -49,6 +49,7 @@
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/slab.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
@@ -884,11 +885,14 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
if (get_user(id, arg))
return -EFAULT;
+ if (id >= IB_UMAD_MAX_AGENTS)
+ return -EINVAL;
mutex_lock(&file->port->file_mutex);
mutex_lock(&file->mutex);
- if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+ id = array_index_nospec(id, IB_UMAD_MAX_AGENTS);
+ if (!__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}