summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/addr.c13
-rw-r--r--drivers/infiniband/core/cache.c533
-rw-r--r--drivers/infiniband/core/cm.c67
-rw-r--r--drivers/infiniband/core/cma.c160
-rw-r--r--drivers/infiniband/core/cma_priv.h97
-rw-r--r--drivers/infiniband/core/core_priv.h11
-rw-r--r--drivers/infiniband/core/device.c18
-rw-r--r--drivers/infiniband/core/iwpm_util.c5
-rw-r--r--drivers/infiniband/core/multicast.c26
-rw-r--r--drivers/infiniband/core/nldev.c364
-rw-r--r--drivers/infiniband/core/rdma_core.c13
-rw-r--r--drivers/infiniband/core/restrack.c121
-rw-r--r--drivers/infiniband/core/sa_query.c202
-rw-r--r--drivers/infiniband/core/sysfs.c53
-rw-r--r--drivers/infiniband/core/ucm.c18
-rw-r--r--drivers/infiniband/core/ucma.c40
-rw-r--r--drivers/infiniband/core/uverbs.h41
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c279
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c60
-rw-r--r--drivers/infiniband/core/uverbs_ioctl_merge.c2
-rw-r--r--drivers/infiniband/core/uverbs_main.c218
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c326
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c210
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c108
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c435
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c147
-rw-r--r--drivers/infiniband/core/verbs.c87
28 files changed, 2579 insertions, 1079 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index f69833db0a32..dda9e856e3fa 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -34,4 +34,6 @@ ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
- uverbs_ioctl_merge.o
+ uverbs_ioctl_merge.o uverbs_std_types_cq.o \
+ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
+ uverbs_std_types_mr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index cb1d2ab13c66..88a7542d8c7b 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -329,7 +329,8 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int ib_nl_fetch_ha(const struct dst_entry *dst,
+ struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
@@ -340,7 +341,8 @@ static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int dst_fetch_ha(const struct dst_entry *dst,
+ struct rdma_dev_addr *dev_addr,
const void *daddr)
{
struct neighbour *n;
@@ -364,7 +366,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ret;
}
-static bool has_gateway(struct dst_entry *dst, sa_family_t family)
+static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
{
struct rtable *rt;
struct rt6_info *rt6;
@@ -378,7 +380,7 @@ static bool has_gateway(struct dst_entry *dst, sa_family_t family)
return rt6->rt6i_flags & RTF_GATEWAY;
}
-static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const struct sockaddr *dst_in, u32 seq)
{
const struct sockaddr_in *dst_in4 =
@@ -482,7 +484,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
}
#endif
-static int addr_resolve_neigh(struct dst_entry *dst,
+static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
u32 seq)
@@ -736,7 +738,6 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
return addr_resolve(src_in, dst_addr, addr, false, 0);
}
-EXPORT_SYMBOL(rdma_resolve_ip_route);
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index e9a409d7f4e2..e337b08de2ff 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -59,8 +59,6 @@ struct ib_update_work {
union ib_gid zgid;
EXPORT_SYMBOL(zgid);
-static const struct ib_gid_attr zattr;
-
enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
@@ -73,15 +71,6 @@ enum gid_table_entry_props {
GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
};
-enum gid_table_write_action {
- GID_TABLE_WRITE_ACTION_ADD,
- GID_TABLE_WRITE_ACTION_DEL,
- /* MODIFY only updates the GID table. Currently only used by
- * ib_cache_update.
- */
- GID_TABLE_WRITE_ACTION_MODIFY
-};
-
struct ib_gid_table_entry {
unsigned long props;
union ib_gid gid;
@@ -100,31 +89,26 @@ struct ib_gid_table {
* (a) Find the GID
* (b) Delete it.
*
- * Add/delete should be carried out atomically.
- * This is done by locking this mutex from multiple
- * writers. We don't need this lock for IB, as the MAD
- * layer replaces all entries. All data_vec entries
- * are locked by this lock.
**/
- struct mutex lock;
- /* This lock protects the table entries from being
- * read and written simultaneously.
+ /* Any writer to data_vec must hold this lock and the write side of
+ * rwlock. readers must hold only rwlock. All writers must be in a
+ * sleepable context.
*/
+ struct mutex lock;
+ /* rwlock protects data_vec[ix]->props. */
rwlock_t rwlock;
struct ib_gid_table_entry *data_vec;
};
static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
{
- if (rdma_cap_roce_gid_table(ib_dev, port)) {
- struct ib_event event;
+ struct ib_event event;
- event.device = ib_dev;
- event.element.port_num = port;
- event.event = IB_EVENT_GID_CHANGE;
+ event.device = ib_dev;
+ event.element.port_num = port;
+ event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
- }
+ ib_dispatch_event(&event);
}
static const char * const gid_type_str[] = {
@@ -165,94 +149,127 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
-/* This function expects that rwlock will be write locked in all
- * scenarios and that lock will be locked in sleep-able (RoCE)
- * scenarios.
- */
-static int write_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- enum gid_table_write_action action,
- bool default_gid)
- __releases(&table->rwlock) __acquires(&table->rwlock)
+static void del_roce_gid(struct ib_device *device, u8 port_num,
+ struct ib_gid_table *table, int ix)
{
- int ret = 0;
- struct net_device *old_net_dev;
- enum ib_gid_type old_gid_type;
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ device->name, port_num, ix,
+ table->data_vec[ix].gid.raw);
+
+ if (rdma_cap_roce_gid_table(device, port_num))
+ device->del_gid(&table->data_vec[ix].attr,
+ &table->data_vec[ix].context);
+ dev_put(table->data_vec[ix].attr.ndev);
+}
- /* in rdma_cap_roce_gid_table, this funciton should be protected by a
- * sleep-able lock.
- */
+static int add_roce_gid(struct ib_gid_table *table,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry;
+ int ix = attr->index;
+ int ret = 0;
- if (rdma_cap_roce_gid_table(ib_dev, port)) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
- write_unlock_irq(&table->rwlock);
- /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
- * RoCE providers and thus only updates the cache.
- */
- if (action == GID_TABLE_WRITE_ACTION_ADD)
- ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
- &table->data_vec[ix].context);
- else if (action == GID_TABLE_WRITE_ACTION_DEL)
- ret = ib_dev->del_gid(ib_dev, port, ix,
- &table->data_vec[ix].context);
- write_lock_irq(&table->rwlock);
+ if (!attr->ndev) {
+ pr_err("%s NULL netdev device=%s port=%d index=%d\n",
+ __func__, attr->device->name, attr->port_num,
+ attr->index);
+ return -EINVAL;
}
- old_net_dev = table->data_vec[ix].attr.ndev;
- old_gid_type = table->data_vec[ix].attr.gid_type;
- if (old_net_dev && old_net_dev != attr->ndev)
- dev_put(old_net_dev);
- /* if modify_gid failed, just delete the old gid */
- if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
- gid = &zgid;
- attr = &zattr;
- table->data_vec[ix].context = NULL;
+ entry = &table->data_vec[ix];
+ if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
+ WARN(1, "GID table corruption device=%s port=%d index=%d\n",
+ attr->device->name, attr->port_num,
+ attr->index);
+ return -EINVAL;
}
- memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
- memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
- if (default_gid) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
- if (action == GID_TABLE_WRITE_ACTION_DEL)
- table->data_vec[ix].attr.gid_type = old_gid_type;
+ if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
+ ret = attr->device->add_gid(gid, attr, &entry->context);
+ if (ret) {
+ pr_err("%s GID add failed device=%s port=%d index=%d\n",
+ __func__, attr->device->name, attr->port_num,
+ attr->index);
+ goto add_err;
+ }
}
- if (table->data_vec[ix].attr.ndev &&
- table->data_vec[ix].attr.ndev != old_net_dev)
- dev_hold(table->data_vec[ix].attr.ndev);
-
- table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
+ dev_hold(attr->ndev);
+add_err:
+ if (!ret)
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ attr->device->name, attr->port_num, ix, gid->raw);
return ret;
}
-static int add_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_ADD, default_gid);
-}
+/**
+ * add_modify_gid - Add or modify GID table entry
+ *
+ * @table: GID table in which GID to be added or modified
+ * @gid: GID content
+ * @attr: Attributes of the GID
+ *
+ * Returns 0 on success or appropriate error code. It accepts zero
+ * GID addition for non RoCE ports for HCA's who report them as valid
+ * GID. However such zero GIDs are not added to the cache.
+ */
+static int add_modify_gid(struct ib_gid_table *table,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ int ret;
+
+ if (rdma_protocol_roce(attr->device, attr->port_num)) {
+ ret = add_roce_gid(table, gid, attr);
+ if (ret)
+ return ret;
+ } else {
+ /*
+ * Some HCA's report multiple GID entries with only one
+ * valid GID, but remaining as zero GID.
+ * So ignore such behavior for IB link layer and don't
+ * fail the call, but don't add such entry to GID cache.
+ */
+ if (!memcmp(gid, &zgid, sizeof(*gid)))
+ return 0;
+ }
+
+ lockdep_assert_held(&table->lock);
+ memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
+ memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-static int modify_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
+ write_unlock_irq(&table->rwlock);
+ return 0;
}
-static int del_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
- GID_TABLE_WRITE_ACTION_DEL, default_gid);
+/**
+ * del_gid - Delete GID table entry
+ *
+ * @ib_dev: IB device whose GID entry to be deleted
+ * @port: Port number of the IB device
+ * @table: GID table of the IB device for a port
+ * @ix: GID entry index to delete
+ *
+ */
+static void del_gid(struct ib_device *ib_dev, u8 port,
+ struct ib_gid_table *table, int ix)
+{
+ lockdep_assert_held(&table->lock);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+ write_unlock_irq(&table->rwlock);
+
+ if (rdma_protocol_roce(ib_dev, port))
+ del_roce_gid(ib_dev, port, table, ix);
+ memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
+ memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
+ table->data_vec[ix].context = NULL;
}
-/* rwlock should be read locked */
+/* rwlock should be read locked, or lock should be held */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
unsigned long mask, int *pempty)
@@ -268,15 +285,32 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
i++;
+ /* find_gid() is used during GID addition where it is expected
+ * to return a free entry slot which is not duplicate.
+ * Free entry slot is requested and returned if pempty is set,
+ * so lookup free slot only if requested.
+ */
+ if (pempty && empty < 0) {
+ if (data->props & GID_TABLE_ENTRY_INVALID) {
+ /* Found an invalid (free) entry; allocate it */
+ if (data->props & GID_TABLE_ENTRY_DEFAULT) {
+ if (default_gid)
+ empty = curr_index;
+ } else {
+ empty = curr_index;
+ }
+ }
+ }
+
+ /*
+ * Additionally find_gid() is used to find valid entry during
+ * lookup operation, where validity needs to be checked. So
+ * find the empty entry first to continue to search for a free
+ * slot and ignore its INVALID flag.
+ */
if (data->props & GID_TABLE_ENTRY_INVALID)
continue;
- if (empty < 0)
- if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
- !memcmp(attr, &zattr, sizeof(*attr)) &&
- !data->props)
- empty = curr_index;
-
if (found >= 0)
continue;
@@ -312,20 +346,56 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
addrconf_ifid_eui48(&gid->raw[8], dev);
}
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
- union ib_gid *gid, struct ib_gid_attr *attr)
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr,
+ unsigned long mask, bool default_gid)
{
struct ib_gid_table *table;
- int ix;
int ret = 0;
- struct net_device *idev;
int empty;
+ int ix;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
-
+ /* Do not allow adding zero GID in support of
+ * IB spec version 1.3 section 4.1.1 point (6) and
+ * section 12.7.10 and section 12.7.20
+ */
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+
+ mutex_lock(&table->lock);
+
+ ix = find_gid(table, gid, attr, default_gid, mask, &empty);
+ if (ix >= 0)
+ goto out_unlock;
+
+ if (empty < 0) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+ attr->device = ib_dev;
+ attr->index = empty;
+ attr->port_num = port;
+ ret = add_modify_gid(table, gid, attr);
+ if (!ret)
+ dispatch_gid_change_event(ib_dev, port);
+
+out_unlock:
+ mutex_unlock(&table->lock);
+ if (ret)
+ pr_warn("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
+}
+
+int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ struct net_device *idev;
+ unsigned long mask;
+ int ret;
+
if (ib_dev->get_netdev) {
idev = ib_dev->get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
@@ -342,27 +412,11 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
dev_put(idev);
}
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
-
- ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV, &empty);
- if (ix >= 0)
- goto out_unlock;
-
- if (empty < 0) {
- ret = -ENOSPC;
- goto out_unlock;
- }
-
- ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
- if (!ret)
- dispatch_gid_change_event(ib_dev, port);
+ mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV;
-out_unlock:
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
+ ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
return ret;
}
@@ -370,29 +424,32 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
struct ib_gid_table *table;
+ int ret = 0;
int ix;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false,
GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV |
- GID_ATTR_FIND_MASK_DEFAULT,
+ GID_ATTR_FIND_MASK_NETDEV,
NULL);
- if (ix < 0)
+ if (ix < 0) {
+ ret = -EINVAL;
goto out_unlock;
+ }
- if (!del_gid(ib_dev, port, table, ix, false))
- dispatch_gid_change_event(ib_dev, port);
+ del_gid(ib_dev, port, table, ix);
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
- return 0;
+ if (ret)
+ pr_debug("%s: can't delete gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
}
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
@@ -405,16 +462,14 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- for (ix = 0; ix < table->sz; ix++)
- if (table->data_vec[ix].attr.ndev == ndev)
- if (!del_gid(ib_dev, port, table, ix,
- !!(table->data_vec[ix].props &
- GID_TABLE_ENTRY_DEFAULT)))
- deleted = true;
+ for (ix = 0; ix < table->sz; ix++) {
+ if (table->data_vec[ix].attr.ndev == ndev) {
+ del_gid(ib_dev, port, table, ix);
+ deleted = true;
+ }
+ }
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
if (deleted)
@@ -492,6 +547,19 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
mask, port, index);
}
+/**
+ * ib_find_cached_gid_by_port - Returns the GID table index where a specified
+ * GID value occurs. It searches for the specified GID value in the local
+ * software cache.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @port_num: The port number of the device where the GID value should be
+ * searched.
+ * @ndev: In RoCE, the net device of the device. Null means ignore.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ */
int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
enum ib_gid_type gid_type,
@@ -528,7 +596,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
EXPORT_SYMBOL(ib_find_cached_gid_by_port);
/**
- * ib_find_gid_by_filter - Returns the GID table index where a specified
+ * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
* GID value occurs
* @device: The device to query.
* @gid: The GID value to search for.
@@ -539,7 +607,7 @@ EXPORT_SYMBOL(ib_find_cached_gid_by_port);
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found. This
+ * @index: The index into the cached GID table where the GID was found. This
* parameter may be NULL.
*
* ib_cache_gid_find_by_filter() searches for the specified GID value
@@ -598,6 +666,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
{
struct ib_gid_table *table =
kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+ int i;
if (!table)
return NULL;
@@ -611,6 +680,11 @@ static struct ib_gid_table *alloc_gid_table(int sz)
table->sz = sz;
rwlock_init(&table->rwlock);
+ /* Mark all entries as invalid so that allocator can allocate
+ * one of the invalid (free) entry.
+ */
+ for (i = 0; i < sz; i++)
+ table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
return table;
err_free_table:
@@ -635,16 +709,15 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
if (!table)
return;
- write_lock_irq(&table->rwlock);
+ mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
- sizeof(table->data_vec[i].gid)))
- if (!del_gid(ib_dev, port, table, i,
- table->data_vec[i].props &
- GID_ATTR_FIND_MASK_DEFAULT))
- deleted = true;
+ sizeof(table->data_vec[i].gid))) {
+ del_gid(ib_dev, port, table, i);
+ deleted = true;
+ }
}
- write_unlock_irq(&table->rwlock);
+ mutex_unlock(&table->lock);
if (deleted)
dispatch_gid_change_event(ib_dev, port);
@@ -657,9 +730,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
{
union ib_gid gid;
struct ib_gid_attr gid_attr;
- struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
+ unsigned long mask;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
@@ -668,60 +741,19 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
gid_attr.ndev = ndev;
for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
- int ix;
- union ib_gid current_gid;
- struct ib_gid_attr current_gid_attr = {};
-
if (1UL << gid_type & ~gid_type_mask)
continue;
gid_attr.gid_type = gid_type;
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- ix = find_gid(table, NULL, &gid_attr, true,
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_DEFAULT,
- NULL);
-
- /* Coudn't find default GID location */
- if (WARN_ON(ix < 0))
- goto release;
-
- zattr_type.gid_type = gid_type;
-
- if (!__ib_cache_gid_get(ib_dev, port, ix,
- &current_gid, &current_gid_attr) &&
- mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
- !memcmp(&gid, &current_gid, sizeof(gid)) &&
- !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
- goto release;
-
- if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
- memcmp(&current_gid_attr, &zattr_type,
- sizeof(current_gid_attr))) {
- if (del_gid(ib_dev, port, table, ix, true)) {
- pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
- ix, gid.raw);
- goto release;
- } else {
- dispatch_gid_change_event(ib_dev, port);
- }
- }
-
if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
- if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
- pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
- gid.raw);
- else
- dispatch_gid_change_event(ib_dev, port);
+ mask = GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT;
+ __ib_cache_gid_add(ib_dev, port, &gid,
+ &gid_attr, mask, true);
+ } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
+ ib_cache_gid_del(ib_dev, port, &gid, &gid_attr);
}
-
-release:
- if (current_gid_attr.ndev)
- dev_put(current_gid_attr.ndev);
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
}
}
@@ -848,6 +880,20 @@ int ib_get_cached_gid(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_gid);
+/**
+ * ib_find_cached_gid - Returns the port number and GID table index where
+ * a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ *
+ * ib_find_cached_gid() searches for the specified GID value in
+ * the local software cache.
+ */
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
enum ib_gid_type gid_type,
@@ -868,7 +914,7 @@ int ib_find_gid_by_filter(struct ib_device *device,
void *context, u16 *index)
{
/* Only RoCE GID table supports filter function */
- if (!rdma_cap_roce_gid_table(device, port_num) && filter)
+ if (!rdma_protocol_roce(device, port_num) && filter)
return -EPROTONOSUPPORT;
return ib_cache_gid_find_by_filter(device, gid,
@@ -910,8 +956,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
unsigned long flags;
int p;
- if (port_num < rdma_start_port(device) ||
- port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
p = port_num - rdma_start_port(device);
@@ -1021,7 +1066,7 @@ int ib_get_cached_port_state(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
@@ -1033,21 +1078,46 @@ int ib_get_cached_port_state(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_port_state);
+static int config_non_roce_gid_cache(struct ib_device *device,
+ u8 port, int gid_tbl_len)
+{
+ struct ib_gid_attr gid_attr = {};
+ struct ib_gid_table *table;
+ union ib_gid gid;
+ int ret = 0;
+ int i;
+
+ gid_attr.device = device;
+ gid_attr.port_num = port;
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
+
+ mutex_lock(&table->lock);
+ for (i = 0; i < gid_tbl_len; ++i) {
+ if (!device->query_gid)
+ continue;
+ ret = device->query_gid(device, port, i, &gid);
+ if (ret) {
+ pr_warn("query_gid failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
+ gid_attr.index = i;
+ add_modify_gid(table, &gid, &gid_attr);
+ }
+err:
+ mutex_unlock(&table->lock);
+ return ret;
+}
+
static void ib_cache_update(struct ib_device *device,
u8 port,
bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
- struct ib_gid_cache {
- int table_len;
- union ib_gid table[0];
- } *gid_cache = NULL;
int i;
int ret;
struct ib_gid_table *table;
- bool use_roce_gid_table =
- rdma_cap_roce_gid_table(device, port);
if (!rdma_is_port_valid(device, port))
return;
@@ -1065,6 +1135,13 @@ static void ib_cache_update(struct ib_device *device,
goto err;
}
+ if (!rdma_protocol_roce(device, port)) {
+ ret = config_non_roce_gid_cache(device, port,
+ tprops->gid_tbl_len);
+ if (ret)
+ goto err;
+ }
+
pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
sizeof *pkey_cache->table, GFP_KERNEL);
if (!pkey_cache)
@@ -1072,15 +1149,6 @@ static void ib_cache_update(struct ib_device *device,
pkey_cache->table_len = tprops->pkey_tbl_len;
- if (!use_roce_gid_table) {
- gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
- sizeof(*gid_cache->table), GFP_KERNEL);
- if (!gid_cache)
- goto err;
-
- gid_cache->table_len = tprops->gid_tbl_len;
- }
-
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
@@ -1090,33 +1158,12 @@ static void ib_cache_update(struct ib_device *device,
}
}
- if (!use_roce_gid_table) {
- for (i = 0; i < gid_cache->table_len; ++i) {
- ret = ib_query_gid(device, port, i,
- gid_cache->table + i, NULL);
- if (ret) {
- pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
- ret, device->name, i);
- goto err;
- }
- }
- }
-
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.ports[port -
rdma_start_port(device)].pkey;
device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
- if (!use_roce_gid_table) {
- write_lock(&table->rwlock);
- for (i = 0; i < gid_cache->table_len; i++) {
- modify_gid(device, port, table, i, gid_cache->table + i,
- &zattr, false);
- }
- write_unlock(&table->rwlock);
- }
-
device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
device->cache.ports[port - rdma_start_port(device)].port_state =
tprops->state;
@@ -1130,14 +1177,12 @@ static void ib_cache_update(struct ib_device *device,
port,
tprops->subnet_prefix);
- kfree(gid_cache);
kfree(old_pkey_cache);
kfree(tprops);
return;
err:
kfree(pkey_cache);
- kfree(gid_cache);
kfree(tprops);
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index e6749157fd86..a92e1a5c202b 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -462,13 +462,31 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
- struct cm_id_private *cm_id_priv)
+static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
+ struct cm_av *av,
+ struct cm_port *port)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cm.lock, flags);
+
+ if (&cm_id_priv->av == av)
+ list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+ else if (&cm_id_priv->alt_av == av)
+ list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+ else
+ ret = -EINVAL;
+
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return ret;
+}
+
+static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
- int ret;
u8 p;
struct net_device *ndev = ib_get_ndev_from_path(path);
@@ -477,7 +495,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
sa_conv_pathrec_to_gid_type(path),
ndev, &p, NULL)) {
- port = cm_dev->port[p-1];
+ port = cm_dev->port[p - 1];
break;
}
}
@@ -485,9 +503,20 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
if (ndev)
dev_put(ndev);
+ return port;
+}
+static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+ struct cm_id_private *cm_id_priv)
+{
+ struct cm_device *cm_dev;
+ struct cm_port *port;
+ int ret;
+
+ port = get_cm_port_from_path(path);
if (!port)
return -EINVAL;
+ cm_dev = port->cm_dev;
ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
be16_to_cpu(path->pkey), &av->pkey_index);
@@ -502,16 +531,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
av->timeout = path->packet_life_time + 1;
- spin_lock_irqsave(&cm.lock, flags);
- if (&cm_id_priv->av == av)
- list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
- else if (&cm_id_priv->alt_av == av)
- list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
- else
- ret = -EINVAL;
-
- spin_unlock_irqrestore(&cm.lock, flags);
-
+ ret = add_cm_id_to_port_list(cm_id_priv, av, port);
return ret;
}
@@ -1523,6 +1543,8 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
cm_req_get_primary_local_ack_timeout(req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
primary_path->service_id = req_msg->service_id;
+ if (sa_path_is_roce(primary_path))
+ primary_path->roce.route_resolved = false;
if (cm_req_has_alt_path(req_msg)) {
alt_path->dgid = req_msg->alt_local_gid;
@@ -1542,6 +1564,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
cm_req_get_alt_local_ack_timeout(req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
alt_path->service_id = req_msg->service_id;
+
+ if (sa_path_is_roce(alt_path))
+ alt_path->roce.route_resolved = false;
}
cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
}
@@ -3150,6 +3175,13 @@ static int cm_lap_handler(struct cm_work *work)
struct ib_mad_send_buf *msg = NULL;
int ret;
+ /* Currently Alternate path messages are not supported for
+ * RoCE link layer.
+ */
+ if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ work->port->port_num))
+ return -EINVAL;
+
/* todo: verify LAP request and send reject APR if invalid. */
lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
@@ -3299,6 +3331,13 @@ static int cm_apr_handler(struct cm_work *work)
struct cm_apr_msg *apr_msg;
int ret;
+ /* Currently Alternate path messages are not supported for
+ * RoCE link layer.
+ */
+ if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ work->port->port_num))
+ return -EINVAL;
+
apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
apr_msg->local_comm_id);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6ab1059fed66..51a641002e10 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -62,6 +62,7 @@
#include <rdma/iw_cm.h>
#include "core_priv.h"
+#include "cma_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -174,7 +175,7 @@ static struct cma_pernet *cma_pernet(struct net *net)
return net_generic(net, cma_pernet_id);
}
-static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
+static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps)
{
struct cma_pernet *pernet = cma_pernet(net);
@@ -203,7 +204,7 @@ struct cma_device {
};
struct rdma_bind_list {
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
struct hlist_head owners;
unsigned short port;
};
@@ -216,7 +217,7 @@ struct class_port_info_context {
u8 port_num;
};
-static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
+static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
@@ -225,14 +226,15 @@ static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
}
static struct rdma_bind_list *cma_ps_find(struct net *net,
- enum rdma_port_space ps, int snum)
+ enum rdma_ucm_port_space ps, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
return idr_find(idr, snum);
}
-static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
+static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
+ int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
@@ -327,46 +329,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
* We do this by disabling removal notification while a callback is in process,
* and reporting it after the callback completes.
*/
-struct rdma_id_private {
- struct rdma_cm_id id;
-
- struct rdma_bind_list *bind_list;
- struct hlist_node node;
- struct list_head list; /* listen_any_list or cma_device.list */
- struct list_head listen_list; /* per device listens */
- struct cma_device *cma_dev;
- struct list_head mc_list;
-
- int internal_id;
- enum rdma_cm_state state;
- spinlock_t lock;
- struct mutex qp_mutex;
-
- struct completion comp;
- atomic_t refcount;
- struct mutex handler_mutex;
-
- int backlog;
- int timeout_ms;
- struct ib_sa_query *query;
- int query_id;
- union {
- struct ib_cm_id *ib;
- struct iw_cm_id *iw;
- } cm_id;
-
- u32 seq_num;
- u32 qkey;
- u32 qp_num;
- pid_t owner;
- u32 options;
- u8 srq;
- u8 tos;
- bool tos_set;
- u8 reuseaddr;
- u8 afonly;
- enum ib_gid_type gid_type;
-};
struct cma_multicast {
struct rdma_id_private *id_priv;
@@ -505,6 +467,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
+ id_priv->res.type = RDMA_RESTRACK_CM_ID;
+ rdma_restrack_add(&id_priv->res);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -777,10 +741,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
complete(&id_priv->comp);
}
-struct rdma_cm_id *rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps,
- enum ib_qp_type qp_type)
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
{
struct rdma_id_private *id_priv;
@@ -788,7 +752,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- id_priv->owner = task_pid_nr(current);
+ if (caller)
+ id_priv->res.kern_name = caller;
+ else
+ rdma_restrack_set_task(&id_priv->res, current);
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
@@ -808,7 +775,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
return &id_priv->id;
}
-EXPORT_SYMBOL(rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -1400,7 +1367,7 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
return net_dev;
}
-static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
+static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id)
{
return (be64_to_cpu(service_id) >> 16) & 0xffff;
}
@@ -1441,21 +1408,12 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
return true;
}
-static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
-{
- enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
- enum rdma_transport_type transport =
- rdma_node_get_transport(device->node_type);
-
- return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
-}
-
static bool cma_protocol_roce(const struct rdma_cm_id *id)
{
struct ib_device *device = id->device;
const int port_num = id->port_num ?: rdma_start_port(device);
- return cma_protocol_roce_dev_port(device, port_num);
+ return rdma_protocol_roce(device, port_num);
}
static bool cma_match_net_dev(const struct rdma_cm_id *id,
@@ -1468,7 +1426,7 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
/* This request is an AF_IB request or a RoCE request */
return (!id->port_num || id->port_num == port_num) &&
(addr->src_addr.ss_family == AF_IB ||
- cma_protocol_roce_dev_port(id->device, port_num));
+ rdma_protocol_roce(id->device, port_num));
return !addr->dev_addr.bound_dev_if ||
(net_eq(dev_net(net_dev), addr->dev_addr.net) &&
@@ -1523,7 +1481,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
*net_dev = NULL;
- } else if (cma_protocol_roce_dev_port(req.device, req.port)) {
+ } else if (rdma_protocol_roce(req.device, req.port)) {
/* TODO find the net dev matching the request parameters
* through the RoCE GID table */
*net_dev = NULL;
@@ -1668,6 +1626,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_unlock(&id_priv->handler_mutex);
if (id_priv->cma_dev) {
+ rdma_restrack_del(&id_priv->res);
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1817,6 +1776,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
+ struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
struct rdma_route *rt;
@@ -1826,9 +1786,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
ib_event->param.req_rcvd.primary_path->service_id;
int ret;
- id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+ id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type);
+ listen_id->ps, ib_event->param.req_rcvd.qp_type,
+ listen_id_priv->res.kern_name);
if (IS_ERR(id))
return NULL;
@@ -1877,14 +1839,17 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
+ struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
struct net *net = listen_id->route.addr.dev_addr.net;
int ret;
- id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD);
+ listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+ id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
+ listen_id->ps, IB_QPT_UD,
+ listen_id_priv->res.kern_name);
if (IS_ERR(id))
return NULL;
@@ -2150,10 +2115,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC);
+ new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context,
+ RDMA_PS_TCP, IB_QPT_RC,
+ listen_id->res.kern_name);
if (IS_ERR(new_cm_id)) {
ret = -ENOMEM;
goto out;
@@ -2278,8 +2244,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
- id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type);
+ id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
+ id_priv->id.qp_type, id_priv->res.kern_name);
if (IS_ERR(id))
return;
@@ -2541,6 +2507,7 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
gid_type = ib_network_to_gid_type(addr->dev_addr.network);
route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+ route->path_rec->roce.route_resolved = true;
sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
sa_path_set_ifindex(route->path_rec, ndev->ifindex);
sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
@@ -3028,7 +2995,7 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
hlist_add_head(&id_priv->node, &bind_list->owners);
}
-static int cma_alloc_port(enum rdma_port_space ps,
+static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv, unsigned short snum)
{
struct rdma_bind_list *bind_list;
@@ -3091,7 +3058,7 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
return 0;
}
-static int cma_alloc_any_port(enum rdma_port_space ps,
+static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
static unsigned int last_used_port;
@@ -3169,7 +3136,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
return 0;
}
-static int cma_use_port(enum rdma_port_space ps,
+static int cma_use_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list;
@@ -3203,8 +3170,8 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
return ret;
}
-static enum rdma_port_space cma_select_inet_ps(
- struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_inet_ps(struct rdma_id_private *id_priv)
{
switch (id_priv->id.ps) {
case RDMA_PS_TCP:
@@ -3218,9 +3185,10 @@ static enum rdma_port_space cma_select_inet_ps(
}
}
-static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_ib_ps(struct rdma_id_private *id_priv)
{
- enum rdma_port_space ps = 0;
+ enum rdma_ucm_port_space ps = 0;
struct sockaddr_ib *sib;
u64 sid_ps, mask, sid;
@@ -3251,7 +3219,7 @@ static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
static int cma_get_port(struct rdma_id_private *id_priv)
{
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
int ret;
if (cma_family(id_priv) != AF_IB)
@@ -3389,8 +3357,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (id_priv->cma_dev)
+ if (id_priv->cma_dev) {
+ rdma_restrack_del(&id_priv->res);
cma_release_dev(id_priv);
+ }
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
@@ -3773,14 +3743,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ const char *caller)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->owner = task_pid_nr(current);
+ if (caller)
+ id_priv->res.kern_name = caller;
+ else
+ rdma_restrack_set_task(&id_priv->res, current);
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3820,7 +3794,7 @@ reject:
rdma_reject(id, NULL, 0);
return ret;
}
-EXPORT_SYMBOL(rdma_accept);
+EXPORT_SYMBOL(__rdma_accept);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
@@ -3938,10 +3912,14 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
rdma_start_port(id_priv->cma_dev->device)];
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num, &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
+ ret = ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num,
+ &multicast->rec,
+ ndev, gid_type,
+ &event.param.ud.ah_attr);
+ if (ret)
+ event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
if (ndev)
@@ -4501,7 +4479,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
goto out;
- id_stats->pid = id_priv->owner;
+ id_stats->pid = task_pid_vnr(id_priv->res.task);
id_stats->port_space = id->ps;
id_stats->cm_state = id_priv->state;
id_stats->qp_num = id_priv->qp_num;
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
new file mode 100644
index 000000000000..194cfe78c447
--- /dev/null
+++ b/drivers/infiniband/core/cma_priv.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CMA_PRIV_H
+#define _CMA_PRIV_H
+
+enum rdma_cm_state {
+ RDMA_CM_IDLE,
+ RDMA_CM_ADDR_QUERY,
+ RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY,
+ RDMA_CM_ROUTE_RESOLVED,
+ RDMA_CM_CONNECT,
+ RDMA_CM_DISCONNECT,
+ RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN,
+ RDMA_CM_DEVICE_REMOVAL,
+ RDMA_CM_DESTROYING
+};
+
+struct rdma_id_private {
+ struct rdma_cm_id id;
+
+ struct rdma_bind_list *bind_list;
+ struct hlist_node node;
+ struct list_head list; /* listen_any_list or cma_device.list */
+ struct list_head listen_list; /* per device listens */
+ struct cma_device *cma_dev;
+ struct list_head mc_list;
+
+ int internal_id;
+ enum rdma_cm_state state;
+ spinlock_t lock;
+ struct mutex qp_mutex;
+
+ struct completion comp;
+ atomic_t refcount;
+ struct mutex handler_mutex;
+
+ int backlog;
+ int timeout_ms;
+ struct ib_sa_query *query;
+ int query_id;
+ union {
+ struct ib_cm_id *ib;
+ struct iw_cm_id *iw;
+ } cm_id;
+
+ u32 seq_num;
+ u32 qkey;
+ u32 qp_num;
+ u32 options;
+ u8 srq;
+ u8 tos;
+ bool tos_set;
+ u8 reuseaddr;
+ u8 afonly;
+ enum ib_gid_type gid_type;
+
+ /*
+ * Internal to RDMA/core, don't use in the drivers
+ */
+ struct rdma_restrack_entry res;
+};
+#endif /* _CMA_PRIV_H */
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 25bb178f6074..54163a6e4067 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -333,4 +333,15 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
return qp;
}
+
+struct rdma_dev_addr;
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr);
+
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *dmac, const struct net_device *ndev,
+ int *hoplimit);
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b7459cf524e4..ea9fbcfb21bd 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -103,7 +103,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
IB_MANDATORY_FUNC(query_pkey),
- IB_MANDATORY_FUNC(query_gid),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_ah),
@@ -853,7 +852,7 @@ int ib_query_port(struct ib_device *device,
if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
return 0;
- err = ib_query_gid(device, port_num, 0, &gid, NULL);
+ err = device->query_gid(device, port_num, 0, &gid);
if (err)
return err;
@@ -871,19 +870,13 @@ EXPORT_SYMBOL(ib_query_port);
* @attr: Returned GID attributes related to this GID index (only in RoCE).
* NULL means ignore.
*
- * ib_query_gid() fetches the specified GID table entry.
+ * ib_query_gid() fetches the specified GID table entry from the cache.
*/
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid,
struct ib_gid_attr *attr)
{
- if (rdma_cap_roce_gid_table(device, port_num))
- return ib_get_cached_gid(device, port_num, index, gid, attr);
-
- if (attr)
- return -EINVAL;
-
- return device->query_gid(device, port_num, index, gid);
+ return ib_get_cached_gid(device, port_num, index, gid, attr);
}
EXPORT_SYMBOL(ib_query_gid);
@@ -1049,19 +1042,18 @@ EXPORT_SYMBOL(ib_modify_port);
* a specified GID value occurs. Its searches only for IB link layer.
* @device: The device to query.
* @gid: The GID value to search for.
- * @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index)
+ u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
- if (rdma_cap_roce_gid_table(device, port))
+ if (!rdma_protocol_ib(device, port))
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 81528f64061a..9821ae900f6d 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -439,10 +439,9 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
struct sk_buff *skb = NULL;
skb = dev_alloc_skb(IWPM_MSG_SIZE);
- if (!skb) {
- pr_err("%s Unable to allocate skb\n", __func__);
+ if (!skb)
goto create_nlmsg_exit;
- }
+
if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
NLM_F_REQUEST))) {
pr_warn("%s: Unable to put the nlmsg header\n", __func__);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 45f2f095f793..4eb72ff539fc 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -724,21 +724,19 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
{
int ret;
u16 gid_index;
- u8 p;
-
- if (rdma_protocol_roce(device, port_num)) {
- ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
- gid_type, port_num,
- ndev,
- &gid_index);
- } else if (rdma_protocol_ib(device, port_num)) {
- ret = ib_find_cached_gid(device, &rec->port_gid,
- IB_GID_TYPE_IB, NULL, &p,
- &gid_index);
- } else {
- ret = -EINVAL;
- }
+ /* GID table is not based on the netdevice for IB link layer,
+ * so ignore ndev during search.
+ */
+ if (rdma_protocol_ib(device, port_num))
+ ndev = NULL;
+ else if (!rdma_protocol_roce(device, port_num))
+ return -EINVAL;
+
+ ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+ gid_type, port_num,
+ ndev,
+ &gid_index);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 5326a684555f..eb567765f45c 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -34,9 +34,11 @@
#include <linux/pid.h>
#include <linux/pid_namespace.h>
#include <net/netlink.h>
+#include <rdma/rdma_cm.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
+#include "cma_priv.h"
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
@@ -71,6 +73,31 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
.len = TASK_COMM_LEN },
+ [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
};
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -99,7 +126,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
ib_get_device_fw_str(device, fw);
- /* Device without FW has strlen(fw) */
+ /* Device without FW has strlen(fw) = 0 */
if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
return -EMSGSIZE;
@@ -115,8 +142,10 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
}
static int fill_port_info(struct sk_buff *msg,
- struct ib_device *device, u32 port)
+ struct ib_device *device, u32 port,
+ const struct net *net)
{
+ struct net_device *netdev = NULL;
struct ib_port_attr attr;
int ret;
@@ -150,7 +179,23 @@ static int fill_port_info(struct sk_buff *msg,
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
return -EMSGSIZE;
- return 0;
+
+ if (device->get_netdev)
+ netdev = device->get_netdev(device, port);
+
+ if (netdev && net_eq(dev_net(netdev), net)) {
+ ret = nla_put_u32(msg,
+ RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+ if (ret)
+ goto out;
+ ret = nla_put_string(msg,
+ RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+ }
+
+out:
+ if (netdev)
+ dev_put(netdev);
+ return ret;
}
static int fill_res_info_entry(struct sk_buff *msg,
@@ -182,6 +227,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_PD] = "pd",
[RDMA_RESTRACK_CQ] = "cq",
[RDMA_RESTRACK_QP] = "qp",
+ [RDMA_RESTRACK_CM_ID] = "cm_id",
+ [RDMA_RESTRACK_MR] = "mr",
};
struct rdma_restrack_root *res = &device->res;
@@ -212,10 +259,29 @@ err:
return ret;
}
-static int fill_res_qp_entry(struct sk_buff *msg,
- struct ib_qp *qp, uint32_t port)
+static int fill_res_name_pid(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ /*
+ * For user resources, user is should read /proc/PID/comm to get the
+ * name of the task file.
+ */
+ if (rdma_is_kernel_res(res)) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name))
+ return -EMSGSIZE;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
+ task_pid_vnr(res->task)))
+ return -EMSGSIZE;
+ }
+ return 0;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
{
- struct rdma_restrack_entry *res = &qp->res;
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
struct ib_qp_init_attr qp_init_attr;
struct nlattr *entry_attr;
struct ib_qp_attr qp_attr;
@@ -262,19 +328,172 @@ static int fill_res_qp_entry(struct sk_buff *msg,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;
- /*
- * Existence of task means that it is user QP and netlink
- * user is invited to go and read /proc/PID/comm to get name
- * of the task file and res->task_com should be NULL.
- */
- if (rdma_is_kernel_res(res)) {
- if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_cm_id_entry(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct rdma_id_private *id_priv =
+ container_of(res, struct rdma_id_private, res);
+ struct rdma_cm_id *cm_id = &id_priv->id;
+ struct nlattr *entry_attr;
+
+ if (port && port != cm_id->port_num)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (cm_id->port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+ goto err;
+
+ if (id_priv->qp_num) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
goto err;
- } else {
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
goto err;
}
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
+ goto err;
+
+ if (cm_id->route.addr.src_addr.ss_family &&
+ nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
+ sizeof(cm_id->route.addr.src_addr),
+ &cm_id->route.addr.src_addr))
+ goto err;
+ if (cm_id->route.addr.dst_addr.ss_family &&
+ nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
+ sizeof(cm_id->route.addr.dst_addr),
+ &cm_id->route.addr.dst_addr))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&cq->usecnt), 0))
+ goto err;
+
+ /* Poll context is only valid for kernel CQs */
+ if (rdma_is_kernel_res(res) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
+ mr->iova, 0))
+ goto err;
+ }
+
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_pd *pd = container_of(res, struct ib_pd, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
+ pd->local_dma_lkey))
+ goto err;
+ if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+ pd->unsafe_global_rkey))
+ goto err;
+ }
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&pd->usecnt), 0))
+ goto err;
+ if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+ pd->unsafe_global_rkey))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
nla_nest_end(msg, entry_attr);
return 0;
@@ -405,7 +624,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
- err = fill_port_info(msg, device, port);
+ err = fill_port_info(msg, device, port, sock_net(skb->sk));
if (err)
goto err_free;
@@ -465,7 +684,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
RDMA_NLDEV_CMD_PORT_GET),
0, NLM_F_MULTI);
- if (fill_port_info(skb, device, p)) {
+ if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -558,23 +777,60 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
}
-static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb)
+struct nldev_fill_res_entry {
+ int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, u32 port);
+ enum rdma_nldev_attr nldev_attr;
+ enum rdma_nldev_command nldev_cmd;
+};
+
+static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_QP] = {
+ .fill_res_func = fill_res_qp_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
+ },
+ [RDMA_RESTRACK_CM_ID] = {
+ .fill_res_func = fill_res_cm_id_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
+ },
+ [RDMA_RESTRACK_CQ] = {
+ .fill_res_func = fill_res_cq_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
+ },
+ [RDMA_RESTRACK_MR] = {
+ .fill_res_func = fill_res_mr_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
+ },
+ [RDMA_RESTRACK_PD] = {
+ .fill_res_func = fill_res_pd_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
+ },
+};
+
+static int res_get_common_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ enum rdma_restrack_type res_type)
{
+ const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct rdma_restrack_entry *res;
int err, ret = 0, idx = 0;
struct nlattr *table_attr;
struct ib_device *device;
int start = cb->args[0];
- struct ib_qp *qp = NULL;
struct nlmsghdr *nlh;
u32 index, port = 0;
+ bool filled = false;
err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, NULL);
/*
- * Right now, we are expecting the device index to get QP information,
+ * Right now, we are expecting the device index to get res information,
* but it is possible to extend this code to return all devices in
* one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
* if it doesn't exist, we will iterate over all devices.
@@ -601,7 +857,7 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
0, NLM_F_MULTI);
if (fill_nldev_handle(skb, device)) {
@@ -609,24 +865,26 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
goto err;
}
- table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
+ table_attr = nla_nest_start(skb, fe->nldev_attr);
if (!table_attr) {
ret = -EMSGSIZE;
goto err;
}
down_read(&device->res.rwsem);
- hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
+ hash_for_each_possible(device->res.hash, res, node, res_type) {
if (idx < start)
goto next;
if ((rdma_is_kernel_res(res) &&
task_active_pid_ns(current) != &init_pid_ns) ||
- (!rdma_is_kernel_res(res) &&
- task_active_pid_ns(current) != task_active_pid_ns(res->task)))
+ (!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
+ task_active_pid_ns(res->task)))
/*
- * 1. Kernel QPs should be visible in init namspace only
- * 2. Present only QPs visible in the current namespace
+ * 1. Kern resources should be visible in init
+ * namspace only
+ * 2. Present only resources visible in the current
+ * namespace
*/
goto next;
@@ -638,10 +896,10 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
*/
goto next;
- qp = container_of(res, struct ib_qp, res);
+ filled = true;
up_read(&device->res.rwsem);
- ret = fill_res_qp_entry(skb, qp, port);
+ ret = fe->fill_res_func(skb, cb, res, port);
down_read(&device->res.rwsem);
/*
* Return resource back, but it won't be released till
@@ -667,10 +925,10 @@ next: idx++;
cb->args[0] = idx;
/*
- * No more QPs to fill, cancel the message and
+ * No more entries to fill, cancel the message and
* return 0 to mark end of dumpit.
*/
- if (!qp)
+ if (!filled)
goto err;
put_device(&device->dev);
@@ -688,6 +946,36 @@ err_index:
return ret;
}
+static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
+}
+
+static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
+}
+
+static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
+}
+
+static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
+}
+
+static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -714,6 +1002,18 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
* too.
*/
},
+ [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
+ .dump = nldev_res_get_cm_id_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET] = {
+ .dump = nldev_res_get_cq_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET] = {
+ .dump = nldev_res_get_mr_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_PD_GET] = {
+ .dump = nldev_res_get_pd_dumpit,
+ },
};
void __init nldev_init(void)
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index d8eead5d106d..a6e904973ba8 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -350,13 +350,6 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
return type->type_class->alloc_begin(type, ucontext);
}
-static void uverbs_uobject_add(struct ib_uobject *uobject)
-{
- mutex_lock(&uobject->context->uobjects_lock);
- list_add(&uobject->list, &uobject->context->uobjects);
- mutex_unlock(&uobject->context->uobjects_lock);
-}
-
static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
@@ -502,7 +495,6 @@ out:
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
- uverbs_uobject_add(uobj);
spin_lock(&uobj->context->ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
@@ -518,7 +510,6 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
- uverbs_uobject_add(&uobj_file->uobj);
fd_install(uobj_file->uobj.id, uobj->object);
/* This shouldn't be used anymore. Use the file object instead */
uobj_file->uobj.id = 0;
@@ -545,6 +536,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
assert_uverbs_usecnt(uobj, true);
atomic_set(&uobj->usecnt, 0);
+ mutex_lock(&uobj->context->uobjects_lock);
+ list_add(&uobj->list, &uobj->context->uobjects);
+ mutex_unlock(&uobj->context->uobjects_lock);
+
uobj->type->type_class->alloc_commit(uobj);
up_read(&uobj->context->cleanup_rwsem);
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 3dbc4e4cca41..efddd13e3edb 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -3,20 +3,66 @@
* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
*/
+#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/pid_namespace.h>
+#include "cma_priv.h"
+
void rdma_restrack_init(struct rdma_restrack_root *res)
{
init_rwsem(&res->rwsem);
}
+static const char *type2str(enum rdma_restrack_type type)
+{
+ static const char * const names[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_PD] = "PD",
+ [RDMA_RESTRACK_CQ] = "CQ",
+ [RDMA_RESTRACK_QP] = "QP",
+ [RDMA_RESTRACK_CM_ID] = "CM_ID",
+ [RDMA_RESTRACK_MR] = "MR",
+ };
+
+ return names[type];
+};
+
void rdma_restrack_clean(struct rdma_restrack_root *res)
{
- WARN_ON_ONCE(!hash_empty(res->hash));
+ struct rdma_restrack_entry *e;
+ char buf[TASK_COMM_LEN];
+ struct ib_device *dev;
+ const char *owner;
+ int bkt;
+
+ if (hash_empty(res->hash))
+ return;
+
+ dev = container_of(res, struct ib_device, res);
+ pr_err("restrack: %s", CUT_HERE);
+ pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
+ dev->name);
+ hash_for_each(res->hash, bkt, e, node) {
+ if (rdma_is_kernel_res(e)) {
+ owner = e->kern_name;
+ } else {
+ /*
+ * There is no need to call get_task_struct here,
+ * because we can be here only if there are more
+ * get_task_struct() call than put_task_struct().
+ */
+ get_task_comm(buf, e->task);
+ owner = buf;
+ }
+
+ pr_err("restrack: %s %s object allocated by %s is not freed\n",
+ rdma_is_kernel_res(e) ? "Kernel" : "User",
+ type2str(e->type), owner);
+ }
+ pr_err("restrack: %s", CUT_HERE);
}
int rdma_restrack_count(struct rdma_restrack_root *res,
@@ -40,51 +86,48 @@ EXPORT_SYMBOL(rdma_restrack_count);
static void set_kern_name(struct rdma_restrack_entry *res)
{
- enum rdma_restrack_type type = res->type;
- struct ib_qp *qp;
-
- if (type != RDMA_RESTRACK_QP)
- /* PD and CQ types already have this name embedded in */
- return;
+ struct ib_pd *pd;
- qp = container_of(res, struct ib_qp, res);
- if (!qp->pd) {
- WARN_ONCE(true, "XRC QPs are not supported\n");
- /* Survive, despite the programmer's error */
- res->kern_name = " ";
- return;
+ switch (res->type) {
+ case RDMA_RESTRACK_QP:
+ pd = container_of(res, struct ib_qp, res)->pd;
+ if (!pd) {
+ WARN_ONCE(true, "XRC QPs are not supported\n");
+ /* Survive, despite the programmer's error */
+ res->kern_name = " ";
+ }
+ break;
+ case RDMA_RESTRACK_MR:
+ pd = container_of(res, struct ib_mr, res)->pd;
+ break;
+ default:
+ /* Other types set kern_name directly */
+ pd = NULL;
+ break;
}
- res->kern_name = qp->pd->res.kern_name;
+ if (pd)
+ res->kern_name = pd->res.kern_name;
}
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
- enum rdma_restrack_type type = res->type;
- struct ib_device *dev;
- struct ib_pd *pd;
- struct ib_cq *cq;
- struct ib_qp *qp;
-
- switch (type) {
+ switch (res->type) {
case RDMA_RESTRACK_PD:
- pd = container_of(res, struct ib_pd, res);
- dev = pd->device;
- break;
+ return container_of(res, struct ib_pd, res)->device;
case RDMA_RESTRACK_CQ:
- cq = container_of(res, struct ib_cq, res);
- dev = cq->device;
- break;
+ return container_of(res, struct ib_cq, res)->device;
case RDMA_RESTRACK_QP:
- qp = container_of(res, struct ib_qp, res);
- dev = qp->device;
- break;
+ return container_of(res, struct ib_qp, res)->device;
+ case RDMA_RESTRACK_CM_ID:
+ return container_of(res, struct rdma_id_private,
+ res)->id.device;
+ case RDMA_RESTRACK_MR:
+ return container_of(res, struct ib_mr, res)->device;
default:
- WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
+ WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
}
-
- return dev;
}
static bool res_is_user(struct rdma_restrack_entry *res)
@@ -96,6 +139,10 @@ static bool res_is_user(struct rdma_restrack_entry *res)
return container_of(res, struct ib_cq, res)->uobject;
case RDMA_RESTRACK_QP:
return container_of(res, struct ib_qp, res)->uobject;
+ case RDMA_RESTRACK_CM_ID:
+ return !res->kern_name;
+ case RDMA_RESTRACK_MR:
+ return container_of(res, struct ib_mr, res)->pd->uobject;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return false;
@@ -109,13 +156,15 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
if (!dev)
return;
+ if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res))
+ res->task = NULL;
+
if (res_is_user(res)) {
- get_task_struct(current);
- res->task = current;
+ if (!res->task)
+ rdma_restrack_set_task(res, current);
res->kern_name = NULL;
} else {
set_kern_name(res);
- res->task = NULL;
}
kref_init(&res->kref);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 9f029a1ca5ea..a61ec7e33613 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,118 +1227,130 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+static int
+roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec)
{
+ struct net_device *resolved_dev;
+ struct net_device *ndev;
+ struct net_device *idev;
+ struct rdma_dev_addr dev_addr = {
+ .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
+ sa_path_get_ifindex(rec) : 0),
+ .net = sa_path_get_ndev(rec) ?
+ sa_path_get_ndev(rec) :
+ &init_net
+ };
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
int ret;
- u16 gid_index;
- int use_roce;
- struct net_device *ndev = NULL;
- memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->type = rdma_ah_find_type(device, port_num);
+ if (rec->roce.route_resolved)
+ return 0;
- rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
- if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
- (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)))
- rdma_ah_set_make_grd(ah_attr, true);
+ rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
- rdma_ah_set_sl(ah_attr, rec->sl);
- rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
- get_src_path_mask(device, port_num));
- rdma_ah_set_port_num(ah_attr, port_num);
- rdma_ah_set_static_rate(ah_attr, rec->rate);
- use_roce = rdma_cap_eth_ah(device, port_num);
-
- if (use_roce) {
- struct net_device *idev;
- struct net_device *resolved_dev;
- struct rdma_dev_addr dev_addr = {
- .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
- sa_path_get_ifindex(rec) : 0),
- .net = sa_path_get_ndev(rec) ?
- sa_path_get_ndev(rec) :
- &init_net
- };
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } sgid_addr, dgid_addr;
-
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
-
- /* validate the route */
- ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
- &dgid_addr._sockaddr, &dev_addr);
- if (ret)
- return ret;
+ /* validate the route */
+ ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+ &dgid_addr._sockaddr, &dev_addr);
+ if (ret)
+ return ret;
- if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
- dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
- return -EINVAL;
+ if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+ dev_addr.network == RDMA_NETWORK_IPV6) &&
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
+ return -EINVAL;
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
- resolved_dev = dev_get_by_index(dev_addr.net,
- dev_addr.bound_dev_if);
- if (!resolved_dev) {
- dev_put(idev);
- return -ENODEV;
- }
- ndev = ib_get_ndev_from_path(rec);
- rcu_read_lock();
- if ((ndev && ndev != resolved_dev) ||
- (resolved_dev != idev &&
- !rdma_is_upper_dev_rcu(idev, resolved_dev)))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
- if (ret) {
- if (ndev)
- dev_put(ndev);
- return ret;
- }
+ resolved_dev = dev_get_by_index(dev_addr.net,
+ dev_addr.bound_dev_if);
+ if (!resolved_dev) {
+ ret = -ENODEV;
+ goto done;
}
+ ndev = ib_get_ndev_from_path(rec);
+ rcu_read_lock();
+ if ((ndev && ndev != resolved_dev) ||
+ (resolved_dev != idev &&
+ !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(resolved_dev);
+ if (ndev)
+ dev_put(ndev);
+done:
+ dev_put(idev);
+ if (!ret)
+ rec->roce.route_resolved = true;
+ return ret;
+}
- if (rec->hop_limit > 0 || use_roce) {
- enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
+static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
+{
+ enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
+ struct net_device *ndev;
+ u16 gid_index;
+ int ret;
- ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
- port_num, ndev, &gid_index);
- if (ret) {
- if (ndev)
- dev_put(ndev);
- return ret;
- }
+ ndev = ib_get_ndev_from_path(rec);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
+ port_num, ndev, &gid_index);
+ if (ndev)
+ dev_put(ndev);
+ if (ret)
+ return ret;
- rdma_ah_set_grh(ah_attr, &rec->dgid,
- be32_to_cpu(rec->flow_label),
- gid_index, rec->hop_limit,
- rec->traffic_class);
- if (ndev)
- dev_put(ndev);
- }
+ rdma_ah_set_grh(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ gid_index, rec->hop_limit,
+ rec->traffic_class);
+ return 0;
+}
- if (use_roce) {
- u8 *dmac = sa_path_get_dmac(rec);
+int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
+{
+ int ret = 0;
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
- if (!dmac)
- return -EINVAL;
- memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
+ if (sa_path_is_roce(rec)) {
+ ret = roce_resolve_route_from_path(device, port_num, rec);
+ if (ret)
+ return ret;
+
+ memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
+ } else {
+ rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ if (sa_path_is_opa(rec) &&
+ rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
+ rdma_ah_set_make_grd(ah_attr, true);
+
+ rdma_ah_set_path_bits(ah_attr,
+ be32_to_cpu(sa_path_get_slid(rec)) &
+ get_src_path_mask(device, port_num));
}
- return 0;
+ if (rec->hop_limit > 0 || sa_path_is_roce(rec))
+ ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+ return ret;
}
EXPORT_SYMBOL(ib_init_ah_attr_from_path);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 8ae1308eecc7..31c7efaf8e7a 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -273,6 +273,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
+ speed = " SDR";
rate = 25;
break;
}
@@ -388,14 +389,26 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
+ union ib_gid *pgid;
union ib_gid gid;
ssize_t ret;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
- if (ret)
- return ret;
- return sprintf(buf, "%pI6\n", gid.raw);
+ /* If reading GID fails, it is likely due to GID entry being empty
+ * (invalid) or reserved GID in the table.
+ * User space expects to read GID table entries as long as it given
+ * index is within GID table size.
+ * Administrative/debugging tool fails to query rest of the GID entries
+ * if it hits error while querying a GID of the given index.
+ * To avoid user space throwing such error on fail to read gid, return
+ * zero GID as before. This maintains backward compatibility.
+ */
+ if (ret)
+ pgid = &zgid;
+ else
+ pgid = &gid;
+ return sprintf(buf, "%pI6\n", pgid->raw);
}
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
@@ -810,10 +823,15 @@ static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
dev = port->ibdev;
stats = port->hw_stats;
}
+ mutex_lock(&stats->lock);
ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
if (ret)
- return ret;
- return print_hw_stat(stats, hsa->index, buf);
+ goto unlock;
+ ret = print_hw_stat(stats, hsa->index, buf);
+unlock:
+ mutex_unlock(&stats->lock);
+
+ return ret;
}
static ssize_t show_stats_lifespan(struct kobject *kobj,
@@ -821,17 +839,25 @@ static ssize_t show_stats_lifespan(struct kobject *kobj,
char *buf)
{
struct hw_stats_attribute *hsa;
+ struct rdma_hw_stats *stats;
int msecs;
hsa = container_of(attr, struct hw_stats_attribute, attr);
if (!hsa->port_num) {
struct ib_device *dev = container_of((struct device *)kobj,
struct ib_device, dev);
- msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
+
+ stats = dev->hw_stats;
} else {
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- msecs = jiffies_to_msecs(p->hw_stats->lifespan);
+
+ stats = p->hw_stats;
}
+
+ mutex_lock(&stats->lock);
+ msecs = jiffies_to_msecs(stats->lifespan);
+ mutex_unlock(&stats->lock);
+
return sprintf(buf, "%d\n", msecs);
}
@@ -840,6 +866,7 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
const char *buf, size_t count)
{
struct hw_stats_attribute *hsa;
+ struct rdma_hw_stats *stats;
int msecs;
int jiffies;
int ret;
@@ -854,11 +881,18 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
if (!hsa->port_num) {
struct ib_device *dev = container_of((struct device *)kobj,
struct ib_device, dev);
- dev->hw_stats->lifespan = jiffies;
+
+ stats = dev->hw_stats;
} else {
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- p->hw_stats->lifespan = jiffies;
+
+ stats = p->hw_stats;
}
+
+ mutex_lock(&stats->lock);
+ stats->lifespan = jiffies;
+ mutex_unlock(&stats->lock);
+
return count;
}
@@ -951,6 +985,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
sysfs_attr_init(hsag->attrs[i]);
}
+ mutex_init(&stats->lock);
/* treat an error here as non-fatal */
hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
if (hsag->attrs[i])
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 01702265c1e1..9eef96dacbd7 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -430,7 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
uevent->resp.id = ctx->id;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp, sizeof(uevent->resp))) {
result = -EFAULT;
goto done;
@@ -441,7 +441,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
result = -ENOMEM;
goto done;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.data,
+ if (copy_to_user(u64_to_user_ptr(cmd.data),
uevent->data, uevent->data_len)) {
result = -EFAULT;
goto done;
@@ -453,7 +453,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
result = -ENOMEM;
goto done;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.info,
+ if (copy_to_user(u64_to_user_ptr(cmd.info),
uevent->info, uevent->info_len)) {
result = -EFAULT;
goto done;
@@ -502,7 +502,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
}
resp.id = ctx->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
result = -EFAULT;
goto err2;
@@ -556,7 +556,7 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
ib_ucm_cleanup_events(ctx);
resp.events_reported = ctx->events_reported;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -588,7 +588,7 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
resp.local_id = ctx->cm_id->local_id;
resp.remote_id = ctx->cm_id->remote_id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -625,7 +625,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -699,7 +699,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
if (!len)
return 0;
- data = memdup_user((void __user *)(unsigned long)src, len);
+ data = memdup_user(u64_to_user_ptr(src), len);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -721,7 +721,7 @@ static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
if (!sa_path)
return -ENOMEM;
- if (copy_from_user(&upath, (void __user *)(unsigned long)src,
+ if (copy_from_user(&upath, u64_to_user_ptr(src),
sizeof(upath))) {
kfree(sa_path);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index d933336d7e01..74329483af6d 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -382,7 +382,11 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
struct ucma_event *uevent;
int ret = 0;
- if (out_len < sizeof uevent->resp)
+ /*
+ * Old 32 bit user space does not send the 4 byte padding in the
+ * reserved field. We don't care, allow it to keep working.
+ */
+ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -416,8 +420,9 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
uevent->resp.id = ctx->id;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &uevent->resp, sizeof uevent->resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
+ &uevent->resp,
+ min_t(size_t, out_len, sizeof(uevent->resp)))) {
ret = -EFAULT;
goto done;
}
@@ -477,15 +482,15 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
return -ENOMEM;
ctx->uid = cmd.uid;
- cm_id = rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type);
+ cm_id = __rdma_create_id(current->nsproxy->net_ns,
+ ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto err1;
}
resp.id = ctx->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err2;
@@ -615,7 +620,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
}
resp.events_reported = ucma_free_ctx(ctx);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -845,7 +850,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
out:
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -991,7 +996,7 @@ static ssize_t ucma_query(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- response = (void __user *)(unsigned long) cmd.response;
+ response = u64_to_user_ptr(cmd.response);
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1094,12 +1099,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
- ret = rdma_accept(ctx->cm_id, &conn_param);
+ ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
if (!ret)
ctx->uid = cmd.uid;
mutex_unlock(&file->mut);
} else
- ret = rdma_accept(ctx->cm_id, NULL);
+ ret = __rdma_accept(ctx->cm_id, NULL, NULL);
ucma_put_ctx(ctx);
return ret;
@@ -1179,7 +1184,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
goto out;
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -1241,6 +1246,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
if (!optlen)
return -EINVAL;
+ if (!ctx->cm_id->device)
+ return -EINVAL;
+
memset(&sa_path, 0, sizeof(sa_path));
sa_path.rec_type = SA_PATH_REC_TYPE_IB;
@@ -1315,7 +1323,7 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
return -EINVAL;
- optval = memdup_user((void __user *) (unsigned long) cmd.optval,
+ optval = memdup_user(u64_to_user_ptr(cmd.optval),
cmd.optlen);
if (IS_ERR(optval)) {
ret = PTR_ERR(optval);
@@ -1395,7 +1403,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
goto err2;
resp.id = mc->id;
- if (copy_to_user((void __user *)(unsigned long) cmd->response,
+ if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err3;
@@ -1500,7 +1508,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
resp.events_reported = mc->events_reported;
kfree(mc);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
out:
@@ -1587,7 +1595,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
ucma_unlock_files(cur_file, new_file);
response:
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index deccefb71a6b..cfb51618ab7a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -46,6 +46,10 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_std_types.h>
+
+#define UVERBS_MODULE_NAME ib_uverbs
+#include <rdma/uverbs_named_ioctl.h>
static inline void
ib_uverbs_init_udata(struct ib_udata *udata,
@@ -199,11 +203,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
+struct ib_uflow_resources;
+struct ib_uflow_object {
+ struct ib_uobject uobject;
+ struct ib_uflow_resources *resources;
+};
+
extern const struct file_operations uverbs_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_completion_event_file *ev_file,
@@ -226,7 +237,13 @@ int uverbs_dealloc_mw(struct ib_mw *mw);
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
+void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
+extern const struct uverbs_attr_def uverbs_uhw_compat_in;
+extern const struct uverbs_attr_def uverbs_uhw_compat_out;
long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs);
struct ib_uverbs_flow_spec {
union {
@@ -240,13 +257,37 @@ struct ib_uverbs_flow_spec {
};
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_esp esp;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
struct ib_uverbs_flow_spec_action_tag flow_tag;
struct ib_uverbs_flow_spec_action_drop drop;
+ struct ib_uverbs_flow_spec_action_handle action;
};
};
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec);
+
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
+
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
struct ib_device *ib_dev, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a148de35df8d..13cb5e4deb86 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -50,7 +50,7 @@
static struct ib_uverbs_completion_event_file *
ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
{
- struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+ struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
fd, context);
struct ib_uobject_file *uobj_file;
@@ -322,7 +322,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -372,7 +372,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -517,7 +517,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
file->ucontext);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
@@ -602,7 +602,7 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -663,11 +663,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = uobj_alloc(uobj_get_type(mr), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -693,6 +693,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->pd = pd;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
@@ -756,7 +758,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -770,7 +772,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -822,7 +824,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -851,11 +853,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_alloc(uobj_get_type(mw), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -914,7 +916,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -939,7 +941,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -984,7 +986,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
file->ucontext);
if (IS_ERR(obj))
return obj;
@@ -1173,7 +1175,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1238,7 +1240,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1285,7 +1287,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1312,7 +1314,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -1371,7 +1373,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -1382,7 +1384,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
cmd->rwq_ind_tbl_handle,
file->ucontext);
if (!ind_tbl) {
@@ -1409,7 +1411,7 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
@@ -1429,7 +1431,7 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = uobj_get_obj_read(srq, cmd->srq_handle,
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
file->ucontext);
if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
@@ -1439,7 +1441,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+ rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
file->ucontext);
if (!rcq) {
ret = -EINVAL;
@@ -1450,11 +1452,11 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+ scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
file->ucontext);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1751,12 +1753,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -1859,7 +1861,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1964,7 +1966,7 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1989,6 +1991,13 @@ static int modify_qp(struct ib_uverbs_file *file,
goto release_qp;
}
+ if ((cmd->base.attr_mask & IB_QP_CUR_STATE &&
+ cmd->base.cur_qp_state > IB_QPS_ERR) ||
+ cmd->base.qp_state > IB_QPS_ERR) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
attr->qp_state = cmd->base.qp_state;
attr->cur_qp_state = cmd->base.cur_qp_state;
attr->path_mtu = cmd->base.path_mtu;
@@ -2112,7 +2121,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2178,7 +2187,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2214,7 +2223,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+ ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
file->ucontext);
if (!ud->ah) {
kfree(ud);
@@ -2449,7 +2458,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2498,7 +2507,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
@@ -2555,11 +2564,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2627,7 +2636,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2650,7 +2659,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -2701,7 +2710,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -2730,8 +2739,52 @@ out_put:
return ret ? ret : in_len;
}
-static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+struct ib_uflow_resources {
+ size_t max;
+ size_t num;
+ struct ib_flow_action *collection[0];
+};
+
+static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
+{
+ struct ib_uflow_resources *resources;
+
+ resources =
+ kmalloc(sizeof(*resources) +
+ num_specs * sizeof(*resources->collection), GFP_KERNEL);
+
+ if (!resources)
+ return NULL;
+
+ resources->num = 0;
+ resources->max = num_specs;
+
+ return resources;
+}
+
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
+{
+ unsigned int i;
+
+ for (i = 0; i < uflow_res->num; i++)
+ atomic_dec(&uflow_res->collection[i]->usecnt);
+
+ kfree(uflow_res);
+}
+
+static void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ struct ib_flow_action *action)
+{
+ WARN_ON(uflow_res->num >= uflow_res->max);
+
+ atomic_inc(&action->usecnt);
+ uflow_res->collection[uflow_res->num++] = action;
+}
+
+static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
{
ib_spec->type = kern_spec->type;
switch (ib_spec->type) {
@@ -2750,19 +2803,34 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ if (kern_spec->action.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_handle))
+ return -EOPNOTSUPP;
+ ib_spec->action.act = uobj_get_obj_read(flow_action,
+ UVERBS_OBJECT_FLOW_ACTION,
+ kern_spec->action.handle,
+ ucontext);
+ if (!ib_spec->action.act)
+ return -EINVAL;
+ ib_spec->action.size =
+ sizeof(struct ib_flow_spec_action_handle);
+ flow_resources_add(uflow_res, ib_spec->action.act);
+ uobj_put_obj_read(ib_spec->action.act);
+ break;
default:
return -EINVAL;
}
return 0;
}
-static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
+static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
}
-static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
+static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
/*
@@ -2780,28 +2848,21 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
- ssize_t kern_filter_sz;
ssize_t ib_filter_sz;
- void *kern_spec_mask;
- void *kern_spec_val;
- if (kern_spec->reserved)
- return -EINVAL;
-
- ib_spec->type = kern_spec->type;
-
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
/* User flow spec size must be aligned to 4 bytes */
if (kern_filter_sz != ALIGN(kern_filter_sz, 4))
return -EINVAL;
- kern_spec_val = (void *)kern_spec +
- sizeof(struct ib_uverbs_flow_spec_hdr);
- kern_spec_mask = kern_spec_val + kern_filter_sz;
+ ib_spec->type = type;
+
if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
return -EINVAL;
@@ -2870,20 +2931,56 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
(ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
return -EINVAL;
break;
+ case IB_FLOW_SPEC_ESP:
+ ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->esp.size = sizeof(struct ib_flow_spec_esp);
+ memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
+ break;
default:
return -EINVAL;
}
return 0;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ssize_t kern_filter_sz;
+ void *kern_spec_mask;
+ void *kern_spec_val;
+
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+
+ kern_spec_val = (void *)kern_spec +
+ sizeof(struct ib_uverbs_flow_spec_hdr);
+ kern_spec_mask = kern_spec_val + kern_filter_sz;
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(kern_spec->type,
+ kern_spec_mask,
+ kern_spec_val,
+ kern_filter_sz, ib_spec);
+}
+
+static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
{
if (kern_spec->reserved)
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
- return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
+ uflow_res);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
}
@@ -2925,18 +3022,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3040,7 +3137,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3091,7 +3188,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
@@ -3185,7 +3282,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
file->ucontext);
if (!wq) {
err = -EINVAL;
@@ -3195,7 +3292,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto put_wqs;
@@ -3282,7 +3379,7 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3298,10 +3395,12 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
struct ib_uobject *uobj;
+ struct ib_uflow_object *uflow;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
struct ib_qp *qp;
+ struct ib_uflow_resources *uflow_res;
int err = 0;
void *kern_spec;
void *ib_spec;
@@ -3361,13 +3460,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = uobj_alloc(uobj_get_type(flow), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3379,6 +3478,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -ENOMEM;
goto err_put;
}
+ uflow_res = flow_resources_alloc(cmd.flow_attr.num_of_specs);
+ if (!uflow_res) {
+ err = -ENOMEM;
+ goto err_free_flow_attr;
+ }
flow_attr->type = kern_flow_attr->type;
flow_attr->priority = kern_flow_attr->priority;
@@ -3393,7 +3497,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
cmd.flow_attr.size >=
((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
- err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec,
+ uflow_res);
if (err)
goto err_free;
flow_attr->size +=
@@ -3415,6 +3520,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
}
flow_id->uobject = uobj;
uobj->object = flow_id;
+ uflow = container_of(uobj, typeof(*uflow), uobject);
+ uflow->resources = uflow_res;
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
@@ -3433,6 +3540,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err_copy:
ib_destroy_flow(flow_id);
err_free:
+ ib_uverbs_flow_resources_free(uflow_res);
+err_free_flow_attr:
kfree(flow_attr);
err_put:
uobj_put_obj_read(qp);
@@ -3463,7 +3572,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3485,7 +3594,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -3494,7 +3603,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
attr.ext.tag_matching.max_num_tags = cmd->max_num_tags;
if (cmd->srq_type == IB_SRQT_XRC) {
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -3512,7 +3621,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (ib_srq_has_cq(cmd->srq_type)) {
- attr.ext.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
file->ucontext);
if (!attr.ext.cq) {
ret = -EINVAL;
@@ -3520,7 +3629,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
}
- pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -3572,7 +3681,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user((void __user *) (unsigned long) cmd->response,
+ if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
@@ -3692,7 +3801,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -3723,7 +3832,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -3760,7 +3869,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3897,6 +4006,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.cq_moderation_caps.max_cq_moderation_period =
attr.cq_caps.max_cq_moderation_period;
resp.response_length += sizeof(resp.cq_moderation_caps);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size))
+ goto end;
+
+ resp.max_dm_size = attr.max_dm_size;
+ resp.response_length += sizeof(resp.max_dm_size);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
@@ -3933,7 +4048,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
if (cmd.attr_mask > IB_CQ_MODERATE)
return -EOPNOTSUPP;
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 339b85145044..8c93970dc8f1 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -35,6 +35,17 @@
#include "rdma_core.h"
#include "uverbs.h"
+static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
+ u16 len)
+{
+ if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
+ return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
+ uattr->len - len);
+
+ return !memchr_inv((const void *)&uattr->data + len,
+ 0, uattr->len - len);
+}
+
static int uverbs_process_attr(struct ib_device *ibdev,
struct ib_ucontext *ucontext,
const struct ib_uverbs_attr *uattr,
@@ -44,14 +55,12 @@ static int uverbs_process_attr(struct ib_device *ibdev,
struct ib_uverbs_attr __user *uattr_ptr)
{
const struct uverbs_attr_spec *spec;
+ const struct uverbs_attr_spec *val_spec;
struct uverbs_attr *e;
const struct uverbs_object_spec *object;
struct uverbs_obj_attr *o_attr;
struct uverbs_attr *elements = attr_bundle_h->attrs;
- if (uattr->reserved)
- return -EINVAL;
-
if (attr_id >= attr_spec_bucket->num_attrs) {
if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
return -EINVAL;
@@ -63,15 +72,46 @@ static int uverbs_process_attr(struct ib_device *ibdev,
return -EINVAL;
spec = &attr_spec_bucket->attrs[attr_id];
+ val_spec = spec;
e = &elements[attr_id];
e->uattr = uattr_ptr;
switch (spec->type) {
+ case UVERBS_ATTR_TYPE_ENUM_IN:
+ if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems)
+ return -EOPNOTSUPP;
+
+ if (uattr->attr_data.enum_data.reserved)
+ return -EINVAL;
+
+ val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id];
+
+ /* Currently we only support PTR_IN based enums */
+ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
+ return -EOPNOTSUPP;
+
+ e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
+ /* fall through */
case UVERBS_ATTR_TYPE_PTR_IN:
+ /* Ensure that any data provided by userspace beyond the known
+ * struct is zero. Userspace that knows how to use some future
+ * longer struct will fail here if used with an old kernel and
+ * non-zero content, making ABI compat/discovery simpler.
+ */
+ if (uattr->len > val_spec->ptr.len &&
+ val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
+ !uverbs_is_attr_cleared(uattr, val_spec->ptr.len))
+ return -EOPNOTSUPP;
+
+ /* fall through */
case UVERBS_ATTR_TYPE_PTR_OUT:
- if (uattr->len < spec->len ||
- (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) &&
- uattr->len > spec->len))
+ if (uattr->len < val_spec->ptr.min_len ||
+ (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
+ uattr->len > val_spec->ptr.len))
+ return -EINVAL;
+
+ if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
+ uattr->attr_data.reserved)
return -EINVAL;
e->ptr_attr.data = uattr->data;
@@ -84,6 +124,9 @@ static int uverbs_process_attr(struct ib_device *ibdev,
return -EINVAL;
/* fall through */
case UVERBS_ATTR_TYPE_FD:
+ if (uattr->attr_data.reserved)
+ return -EINVAL;
+
if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
return -EINVAL;
@@ -246,6 +289,9 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
size_t ctx_size;
uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
+ if (hdr->driver_id != ib_dev->driver_id)
+ return -EINVAL;
+
object_spec = uverbs_get_object(ib_dev, hdr->object_id);
if (!object_spec)
return -EPROTONOSUPPORT;
@@ -350,7 +396,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto out;
}
- if (hdr.reserved) {
+ if (hdr.reserved1 || hdr.reserved2) {
err = -EPROTONOSUPPORT;
goto out;
}
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index 62e1eb1d2a28..0f88a1919d51 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -379,7 +379,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
"ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
min_id) ||
WARN(IS_ATTR_OBJECT(attr) &&
- attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ,
+ attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
"ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
min_id)) {
res = -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index b1ca223aa380..4445d8ee9314 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -468,7 +468,7 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
@@ -501,7 +501,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
@@ -635,39 +635,87 @@ err_put_refs:
return filp;
}
-static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
+static bool verify_command_mask(struct ib_device *ib_dev,
+ u32 command, bool extended)
{
- u64 mask;
+ if (!extended)
+ return ib_dev->uverbs_cmd_mask & BIT_ULL(command);
- if (command <= IB_USER_VERBS_CMD_OPEN_QP)
- mask = ib_dev->uverbs_cmd_mask;
- else
- mask = ib_dev->uverbs_ex_cmd_mask;
-
- if (mask & ((u64)1 << command))
- return 0;
-
- return -1;
+ return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command);
}
static bool verify_command_idx(u32 command, bool extended)
{
if (extended)
- return command < ARRAY_SIZE(uverbs_ex_cmd_table);
+ return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
+ uverbs_ex_cmd_table[command];
+
+ return command < ARRAY_SIZE(uverbs_cmd_table) &&
+ uverbs_cmd_table[command];
+}
+
+static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
+ u32 *command, bool *extended)
+{
+ if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
+
+ *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
+ *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
+
+ if (!verify_command_idx(*command, *extended))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
+ struct ib_uverbs_ex_cmd_hdr *ex_hdr,
+ size_t count, bool extended)
+{
+ if (extended) {
+ count -= sizeof(*hdr) + sizeof(*ex_hdr);
+
+ if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
+ return -EINVAL;
+
+ if (ex_hdr->cmd_hdr_reserved)
+ return -EINVAL;
- return command < ARRAY_SIZE(uverbs_cmd_table);
+ if (ex_hdr->response) {
+ if (!hdr->out_words && !ex_hdr->provider_out_words)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE,
+ u64_to_user_ptr(ex_hdr->response),
+ (hdr->out_words + ex_hdr->provider_out_words) * 8))
+ return -EFAULT;
+ } else {
+ if (hdr->out_words || ex_hdr->provider_out_words)
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ /* not extended command */
+ if (hdr->in_words * 4 != count)
+ return -EINVAL;
+
+ return 0;
}
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_device *ib_dev;
struct ib_uverbs_cmd_hdr hdr;
- bool extended_command;
- __u32 command;
- __u32 flags;
+ bool extended;
int srcu_key;
+ u32 command;
ssize_t ret;
if (!ib_safe_file_access(filp)) {
@@ -676,12 +724,31 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
return -EACCES;
}
- if (count < sizeof hdr)
+ if (count < sizeof(hdr))
return -EINVAL;
- if (copy_from_user(&hdr, buf, sizeof hdr))
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
+ ret = process_hdr(&hdr, &command, &extended);
+ if (ret)
+ return ret;
+
+ if (!file->ucontext &&
+ (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended))
+ return -EINVAL;
+
+ if (extended) {
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
+ }
+
+ ret = verify_hdr(&hdr, &ex_hdr, count, extended);
+ if (ret)
+ return ret;
+
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
@@ -690,106 +757,22 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
goto out;
}
- if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK)) {
- ret = -EINVAL;
- goto out;
- }
-
- command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
- flags = (hdr.command &
- IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
- extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED;
- if (!verify_command_idx(command, extended_command)) {
- ret = -EINVAL;
- goto out;
- }
-
- if (verify_command_mask(ib_dev, command)) {
+ if (!verify_command_mask(ib_dev, command, extended)) {
ret = -EOPNOTSUPP;
goto out;
}
- if (!file->ucontext &&
- command != IB_USER_VERBS_CMD_GET_CONTEXT) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!flags) {
- if (!uverbs_cmd_table[command]) {
- ret = -EINVAL;
- goto out;
- }
-
- if (hdr.in_words * 4 != count) {
- ret = -EINVAL;
- goto out;
- }
+ buf += sizeof(hdr);
- ret = uverbs_cmd_table[command](file, ib_dev,
- buf + sizeof(hdr),
- hdr.in_words * 4,
- hdr.out_words * 4);
-
- } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
- struct ib_uverbs_ex_cmd_hdr ex_hdr;
+ if (!extended) {
+ ret = uverbs_cmd_table[command](file, ib_dev, buf,
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+ } else {
struct ib_udata ucore;
struct ib_udata uhw;
- size_t written_count = count;
- if (!uverbs_ex_cmd_table[command]) {
- ret = -ENOSYS;
- goto out;
- }
-
- if (!file->ucontext) {
- ret = -EINVAL;
- goto out;
- }
-
- if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
- ret = -EINVAL;
- goto out;
- }
-
- if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
- ret = -EFAULT;
- goto out;
- }
-
- count -= sizeof(hdr) + sizeof(ex_hdr);
- buf += sizeof(hdr) + sizeof(ex_hdr);
-
- if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
- ret = -EINVAL;
- goto out;
- }
-
- if (ex_hdr.cmd_hdr_reserved) {
- ret = -EINVAL;
- goto out;
- }
-
- if (ex_hdr.response) {
- if (!hdr.out_words && !ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!access_ok(VERIFY_WRITE,
- u64_to_user_ptr(ex_hdr.response),
- (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
- ret = -EFAULT;
- goto out;
- }
- } else {
- if (hdr.out_words || ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
- }
+ buf += sizeof(ex_hdr);
ib_uverbs_init_udata_buf_or_null(&ucore, buf,
u64_to_user_ptr(ex_hdr.response),
@@ -802,10 +785,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
ex_hdr.provider_out_words * 8);
ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
- if (!ret)
- ret = written_count;
- } else {
- ret = -ENOSYS;
+ ret = (ret) ? : count;
}
out:
@@ -953,10 +933,8 @@ static const struct file_operations uverbs_fops = {
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
.unlocked_ioctl = ib_uverbs_ioctl,
.compat_ioctl = ib_uverbs_ioctl,
-#endif
};
static const struct file_operations uverbs_mmap_fops = {
@@ -966,10 +944,8 @@ static const struct file_operations uverbs_mmap_fops = {
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
.unlocked_ioctl = ib_uverbs_ioctl,
.compat_ioctl = ib_uverbs_ioctl,
-#endif
};
static struct ib_client uverbs_client = {
@@ -1032,7 +1008,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (!device->alloc_ucontext)
return;
- uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
return;
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index df1360e6774f..569f48bd821e 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -48,7 +48,16 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
static int uverbs_free_flow(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
- return ib_destroy_flow((struct ib_flow *)uobject->object);
+ int ret;
+ struct ib_flow *flow = (struct ib_flow *)uobject->object;
+ struct ib_uflow_object *uflow =
+ container_of(uobject, struct ib_uflow_object, uobject);
+
+ ret = ib_destroy_flow(flow);
+ if (!ret)
+ ib_uverbs_flow_resources_free(uflow->resources);
+
+ return ret;
}
static int uverbs_free_mw(struct ib_uobject *uobject,
@@ -135,31 +144,6 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
return ret;
}
-static int uverbs_free_cq(struct ib_uobject *uobject,
- enum rdma_remove_reason why)
-{
- struct ib_cq *cq = uobject->object;
- struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobject, struct ib_ucq_object, uobject);
- int ret;
-
- ret = ib_destroy_cq(cq);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
- container_of(ev_queue,
- struct ib_uverbs_completion_event_file,
- ev_queue) : NULL,
- ucq);
- return ret;
-}
-
-static int uverbs_free_mr(struct ib_uobject *uobject,
- enum rdma_remove_reason why)
-{
- return ib_dereg_mr((struct ib_mr *)uobject->object);
-}
-
static int uverbs_free_xrcd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
@@ -210,18 +194,26 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_
return 0;
};
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ return 0;
+}
+
/*
* This spec is used in order to pass information to the hardware driver in a
* legacy way. Every verb that could get driver specific data should get this
* spec.
*/
-static const struct uverbs_attr_def uverbs_uhw_compat_in =
- UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-static const struct uverbs_attr_def uverbs_uhw_compat_out =
- UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-
-static void create_udata(struct uverbs_attr_bundle *ctx,
- struct ib_udata *udata)
+const struct uverbs_attr_def uverbs_uhw_compat_in =
+ UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+const struct uverbs_attr_def uverbs_uhw_compat_out =
+ UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+
+void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
{
/*
* This is for ease of conversion. The purpose is to convert all drivers
@@ -229,9 +221,9 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
* Assume attr == 0 is input and attr == 1 is output.
*/
const struct uverbs_attr *uhw_in =
- uverbs_attr_get(ctx, UVERBS_UHW_IN);
+ uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN);
const struct uverbs_attr *uhw_out =
- uverbs_attr_get(ctx, UVERBS_UHW_OUT);
+ uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT);
if (!IS_ERR(uhw_in)) {
udata->inlen = uhw_in->ptr_attr.len;
@@ -253,207 +245,67 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
}
}
-static int uverbs_create_cq_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
+ &UVERBS_TYPE_ALLOC_FD(0,
+ sizeof(struct ib_uverbs_completion_event_file),
+ uverbs_hot_unplug_completion_event_file,
+ &uverbs_event_fops,
+ "[infinibandevent]", O_RDONLY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
+ uverbs_free_qp));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
+ uverbs_free_srq));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+ 0, uverbs_free_flow));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
+ uverbs_free_wq));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
+ uverbs_free_xrcd));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
+ /* 2 is used in order to free the PD after MRs */
+ &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
+
+static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+ &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
+ &UVERBS_OBJECT(UVERBS_OBJECT_PD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MR),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_QP),
+ &UVERBS_OBJECT(UVERBS_OBJECT_AH),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
+ &UVERBS_OBJECT(UVERBS_OBJECT_DM));
+
+const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
- struct ib_ucontext *ucontext = file->ucontext;
- struct ib_ucq_object *obj;
- struct ib_udata uhw;
- int ret;
- u64 user_handle;
- struct ib_cq_init_attr attr = {};
- struct ib_cq *cq;
- struct ib_uverbs_completion_event_file *ev_file = NULL;
- const struct uverbs_attr *ev_file_attr;
- struct ib_uobject *ev_file_uobj;
-
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
- return -EOPNOTSUPP;
-
- ret = uverbs_copy_from(&attr.comp_vector, attrs, CREATE_CQ_COMP_VECTOR);
- if (!ret)
- ret = uverbs_copy_from(&attr.cqe, attrs, CREATE_CQ_CQE);
- if (!ret)
- ret = uverbs_copy_from(&user_handle, attrs, CREATE_CQ_USER_HANDLE);
- if (ret)
- return ret;
-
- /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
- if (uverbs_copy_from(&attr.flags, attrs, CREATE_CQ_FLAGS) == -EFAULT)
- return -EFAULT;
-
- ev_file_attr = uverbs_attr_get(attrs, CREATE_CQ_COMP_CHANNEL);
- if (!IS_ERR(ev_file_attr)) {
- ev_file_uobj = ev_file_attr->obj_attr.uobject;
-
- ev_file = container_of(ev_file_uobj,
- struct ib_uverbs_completion_event_file,
- uobj_file.uobj);
- uverbs_uobject_get(ev_file_uobj);
- }
-
- if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
- ret = -EINVAL;
- goto err_event_file;
- }
-
- obj = container_of(uverbs_attr_get(attrs, CREATE_CQ_HANDLE)->obj_attr.uobject,
- typeof(*obj), uobject);
- obj->uverbs_file = ucontext->ufile;
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
- INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
-
- /* Temporary, only until drivers get the new uverbs_attr_bundle */
- create_udata(attrs, &uhw);
-
- cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
- goto err_event_file;
- }
-
- cq->device = ib_dev;
- cq->uobject = &obj->uobject;
- cq->comp_handler = ib_uverbs_comp_handler;
- cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
- obj->uobject.object = cq;
- obj->uobject.user_handle = user_handle;
- atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_add(&cq->res);
-
- ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe,
- sizeof(cq->cqe));
- if (ret)
- goto err_cq;
-
- return 0;
-err_cq:
- ib_destroy_cq(cq);
-
-err_event_file:
- if (ev_file)
- uverbs_uobject_put(ev_file_uobj);
- return ret;
-};
-
-static DECLARE_UVERBS_METHOD(
- uverbs_method_cq_create, UVERBS_CQ_CREATE, uverbs_create_cq_handler,
- &UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL,
- UVERBS_ACCESS_READ),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32),
- &UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
-
-static int uverbs_destroy_cq_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uverbs_destroy_cq_resp resp;
- struct ib_uobject *uobj =
- uverbs_attr_get(attrs, DESTROY_CQ_HANDLE)->obj_attr.uobject;
- struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
- uobject);
- int ret;
-
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
- return -EOPNOTSUPP;
-
- ret = rdma_explicit_destroy(uobj);
- if (ret)
- return ret;
-
- resp.comp_events_reported = obj->comp_events_reported;
- resp.async_events_reported = obj->async_events_reported;
-
- return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp));
+ return &uverbs_default_objects;
}
-
-static DECLARE_UVERBS_METHOD(
- uverbs_method_cq_destroy, UVERBS_CQ_DESTROY, uverbs_destroy_cq_handler,
- &UVERBS_ATTR_IDR(DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(DESTROY_CQ_RESP, struct ib_uverbs_destroy_cq_resp,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel,
- UVERBS_OBJECT_COMP_CHANNEL,
- &UVERBS_TYPE_ALLOC_FD(0,
- sizeof(struct ib_uverbs_completion_event_file),
- uverbs_hot_unplug_completion_event_file,
- &uverbs_event_fops,
- "[infinibandevent]", O_RDONLY));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
- uverbs_free_cq),
- &uverbs_method_cq_create,
- &uverbs_method_cq_destroy);
-
-DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
- uverbs_free_qp));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_mw, UVERBS_OBJECT_MW,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_mr, UVERBS_OBJECT_MR,
- /* 1 is used in order to free the MR after all the MWs */
- &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_srq, UVERBS_OBJECT_SRQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
- uverbs_free_srq));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_ah, UVERBS_OBJECT_AH,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_flow, UVERBS_OBJECT_FLOW,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_wq, UVERBS_OBJECT_WQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
- uverbs_free_wq));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_rwq_ind_table,
- UVERBS_OBJECT_RWQ_IND_TBL,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_xrcd, UVERBS_OBJECT_XRCD,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
- uverbs_free_xrcd));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_pd, UVERBS_OBJECT_PD,
- /* 2 is used in order to free the PD after MRs */
- &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_device, UVERBS_OBJECT_DEVICE, NULL);
-
-DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
- &uverbs_object_device,
- &uverbs_object_pd,
- &uverbs_object_mr,
- &uverbs_object_comp_channel,
- &uverbs_object_cq,
- &uverbs_object_qp,
- &uverbs_object_ah,
- &uverbs_object_mw,
- &uverbs_object_srq,
- &uverbs_object_flow,
- &uverbs_object_wq,
- &uverbs_object_rwq_ind_table,
- &uverbs_object_xrcd);
+EXPORT_SYMBOL_GPL(uverbs_default_get_objects);
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
new file mode 100644
index 000000000000..b0dbae9dd0d7
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uobject);
+ int ret;
+
+ ret = ib_destroy_cq(cq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+ container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) : NULL,
+ ucq);
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = file->ucontext;
+ struct ib_ucq_object *obj;
+ struct ib_udata uhw;
+ int ret;
+ u64 user_handle;
+ struct ib_cq_init_attr attr = {};
+ struct ib_cq *cq;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
+ const struct uverbs_attr *ev_file_attr;
+ struct ib_uobject *ev_file_uobj;
+
+ if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.comp_vector, attrs,
+ UVERBS_ATTR_CREATE_CQ_COMP_VECTOR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.cqe, attrs,
+ UVERBS_ATTR_CREATE_CQ_CQE);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_CQ_USER_HANDLE);
+ if (ret)
+ return ret;
+
+ /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+ if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs,
+ UVERBS_ATTR_CREATE_CQ_FLAGS)))
+ return -EFAULT;
+
+ ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+ if (!IS_ERR(ev_file_attr)) {
+ ev_file_uobj = ev_file_attr->obj_attr.uobject;
+
+ ev_file = container_of(ev_file_uobj,
+ struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ uverbs_uobject_get(ev_file_uobj);
+ }
+
+ if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ obj = container_of(uverbs_attr_get(attrs,
+ UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject,
+ typeof(*obj), uobject);
+ obj->uverbs_file = ucontext->ufile;
+ obj->comp_events_reported = 0;
+ obj->async_events_reported = 0;
+ INIT_LIST_HEAD(&obj->comp_list);
+ INIT_LIST_HEAD(&obj->async_list);
+
+ /* Temporary, only until drivers get the new uverbs_attr_bundle */
+ create_udata(attrs, &uhw);
+
+ cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto err_event_file;
+ }
+
+ cq->device = ib_dev;
+ cq->uobject = &obj->uobject;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
+ obj->uobject.object = cq;
+ obj->uobject.user_handle = user_handle;
+ atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
+ sizeof(cq->cqe));
+ if (ret)
+ goto err_cq;
+
+ return 0;
+err_cq:
+ ib_destroy_cq(cq);
+
+err_event_file:
+ if (ev_file)
+ uverbs_uobject_put(ev_file_uobj);
+ return ret;
+};
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_ACCESS_READ),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_destroy_cq_resp resp;
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject;
+ struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
+ uobject);
+ int ret;
+
+ if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
+ return -EOPNOTSUPP;
+
+ ret = rdma_explicit_destroy(uobj);
+ if (ret)
+ return ret;
+
+ resp.comp_events_reported = obj->comp_events_reported;
+ resp.async_events_reported = obj->async_events_reported;
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
+ sizeof(resp));
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
+ uverbs_free_cq),
+#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+#endif
+ );
+
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
new file mode 100644
index 000000000000..8b681575b615
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_dm(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_dm *dm = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt))
+ return -EBUSY;
+
+ return dm->device->dealloc_dm(dm);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = file->ucontext;
+ struct ib_dm_alloc_attr attr = {};
+ struct ib_uobject *uobj;
+ struct ib_dm *dm;
+ int ret;
+
+ if (!ib_dev->alloc_dm)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_ALLOC_DM_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.alignment, attrs,
+ UVERBS_ATTR_ALLOC_DM_ALIGNMENT);
+ if (ret)
+ return ret;
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
+
+ dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs);
+ if (IS_ERR(dm))
+ return PTR_ERR(dm);
+
+ dm->device = ib_dev;
+ dm->length = attr.length;
+ dm->uobject = uobj;
+ atomic_set(&dm->usecnt, 0);
+
+ uobj->object = dm;
+
+ return 0;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
+ uverbs_destroy_def_handler,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
+ /* 1 is used in order to free the DM after MRs */
+ &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
new file mode 100644
index 000000000000..cbcec3da12f6
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_flow_action(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_flow_action *action = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY &&
+ atomic_read(&action->usecnt))
+ return -EBUSY;
+
+ return action->device->destroy_flow_action(action);
+}
+
+static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
+ u32 flags, bool is_modify)
+{
+ u64 verbs_flags = flags;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
+ verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
+
+ if (is_modify && uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
+ verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
+
+ return verbs_flags;
+};
+
+static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
+{
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
+ &keymat->keymat.aes_gcm;
+
+ if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return -EOPNOTSUPP;
+
+ if (aes_gcm->key_len != 32 &&
+ aes_gcm->key_len != 24 &&
+ aes_gcm->key_len != 16)
+ return -EINVAL;
+
+ if (aes_gcm->icv_len != 16 &&
+ aes_gcm->icv_len != 8 &&
+ aes_gcm->icv_len != 12)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
+ [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
+};
+
+static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify)
+{
+ /* This is used in order to modify an esp flow action with an enabled
+ * replay protection to a disabled one. This is only supported via
+ * modify, as in create verb we can simply drop the REPLAY attribute and
+ * achieve the same thing.
+ */
+ return is_modify ? 0 : -EINVAL;
+}
+
+static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify)
+{
+ /* Some replay protections could always be enabled without validating
+ * anything.
+ */
+ return 0;
+}
+
+static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify) = {
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
+};
+
+static int parse_esp_ip(enum ib_flow_spec_type proto,
+ const void __user *val_ptr,
+ size_t len, union ib_flow_spec *out)
+{
+ int ret;
+ const struct ib_uverbs_flow_ipv4_filter ipv4 = {
+ .src_ip = cpu_to_be32(0xffffffffUL),
+ .dst_ip = cpu_to_be32(0xffffffffUL),
+ .proto = 0xff,
+ .tos = 0xff,
+ .ttl = 0xff,
+ .flags = 0xff,
+ };
+ const struct ib_uverbs_flow_ipv6_filter ipv6 = {
+ .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ .flow_label = cpu_to_be32(0xffffffffUL),
+ .next_hdr = 0xff,
+ .traffic_class = 0xff,
+ .hop_limit = 0xff,
+ };
+ union {
+ struct ib_uverbs_flow_ipv4_filter ipv4;
+ struct ib_uverbs_flow_ipv6_filter ipv6;
+ } user_val = {};
+ const void *user_pmask;
+ size_t val_len;
+
+ /* If the flow IPv4/IPv6 flow specifications are extended, the mask
+ * should be changed as well.
+ */
+ BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
+ sizeof(ipv4.flags) != sizeof(ipv4));
+ BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
+ sizeof(ipv6.reserved) != sizeof(ipv6));
+
+ switch (proto) {
+ case IB_FLOW_SPEC_IPV4:
+ if (len > sizeof(user_val.ipv4) &&
+ !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4),
+ len - sizeof(user_val.ipv4)))
+ return -EOPNOTSUPP;
+
+ val_len = min_t(size_t, len, sizeof(user_val.ipv4));
+ ret = copy_from_user(&user_val.ipv4, val_ptr,
+ val_len);
+ if (ret)
+ return -EFAULT;
+
+ user_pmask = &ipv4;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (len > sizeof(user_val.ipv6) &&
+ !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6),
+ len - sizeof(user_val.ipv6)))
+ return -EOPNOTSUPP;
+
+ val_len = min_t(size_t, len, sizeof(user_val.ipv6));
+ ret = copy_from_user(&user_val.ipv6, val_ptr,
+ val_len);
+ if (ret)
+ return -EFAULT;
+
+ user_pmask = &ipv6;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
+ &user_val,
+ val_len, out);
+}
+
+static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_flow_action_esp_encap uverbs_encap;
+ int ret;
+
+ ret = uverbs_copy_from(&uverbs_encap, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
+ if (ret)
+ return ret;
+
+ /* We currently support only one encap */
+ if (uverbs_encap.next_ptr)
+ return -EOPNOTSUPP;
+
+ if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
+ uverbs_encap.type != IB_FLOW_SPEC_IPV6)
+ return -EOPNOTSUPP;
+
+ return parse_esp_ip(uverbs_encap.type,
+ u64_to_user_ptr(uverbs_encap.val_ptr),
+ uverbs_encap.len,
+ &out->spec);
+}
+
+struct ib_flow_action_esp_attr {
+ struct ib_flow_action_attrs_esp hdr;
+ struct ib_flow_action_attrs_esp_keymats keymat;
+ struct ib_flow_action_attrs_esp_replays replay;
+ /* We currently support only one spec */
+ struct ib_flow_spec_list encap;
+};
+
+#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
+static int parse_flow_action_esp(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs,
+ struct ib_flow_action_esp_attr *esp_attr,
+ bool is_modify)
+{
+ struct ib_uverbs_flow_action_esp uverbs_esp = {};
+ int ret;
+
+ /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+ ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ /* This can be called from FLOW_ACTION_ESP_MODIFY where
+ * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
+ */
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
+ ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
+ if (ret)
+ return ret;
+
+ if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
+ return -EOPNOTSUPP;
+
+ esp_attr->hdr.spi = uverbs_esp.spi;
+ esp_attr->hdr.seq = uverbs_esp.seq;
+ esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
+ esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
+ }
+ esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
+ is_modify);
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
+ esp_attr->keymat.protocol =
+ uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+ ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
+ attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+ if (ret)
+ return ret;
+
+ ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.keymat = &esp_attr->keymat;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
+ esp_attr->replay.protocol =
+ uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+
+ ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
+ attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+ if (ret)
+ return ret;
+
+ ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
+ is_modify);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.replay = &esp_attr->replay;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
+ ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.encap = &esp_attr->encap;
+ }
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ int ret;
+ struct ib_uobject *uobj;
+ struct ib_flow_action *action;
+ struct ib_flow_action_esp_attr esp_attr = {};
+
+ if (!ib_dev->create_flow_action_esp)
+ return -EOPNOTSUPP;
+
+ ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false);
+ if (ret)
+ return ret;
+
+ /* No need to check as this attribute is marked as MANDATORY */
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ atomic_set(&action->usecnt, 0);
+ action->device = ib_dev;
+ action->type = IB_FLOW_ACTION_ESP;
+ action->uobject = uobj;
+ uobj->object = action;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ int ret;
+ struct ib_uobject *uobj;
+ struct ib_flow_action *action;
+ struct ib_flow_action_esp_attr esp_attr = {};
+
+ if (!ib_dev->modify_flow_action_esp)
+ return -EOPNOTSUPP;
+
+ ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true);
+ if (ret)
+ return ret;
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ action = uobj->object;
+
+ if (action->type != IB_FLOW_ACTION_ESP)
+ return -EINVAL;
+
+ return ib_dev->modify_flow_action_esp(action,
+ &esp_attr.hdr,
+ attrs);
+}
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
+ [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
+ .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
+ .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+ },
+ },
+};
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
+ .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ /* No need to specify any data */
+ .len = 0,
+ }
+ },
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
+ .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
+ .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+ }
+ },
+};
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+ UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_WRITE,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ uverbs_destroy_def_handler,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
new file mode 100644
index 000000000000..68f7cadf088f
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm_mr_attr attr = {};
+ struct ib_uobject *uobj;
+ struct ib_dm *dm;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->reg_dm_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_REG_DM_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.access_flags, attrs,
+ UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS);
+ if (ret)
+ return ret;
+
+ if (!(attr.access_flags & IB_ZERO_BASED))
+ return -EINVAL;
+
+ ret = ib_check_mr_access(attr.access_flags);
+ if (ret)
+ return ret;
+
+ pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+
+ dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject;
+
+ if (attr.offset > dm->length || attr.length > dm->length ||
+ attr.length > dm->length - attr.offset)
+ return -EINVAL;
+
+ mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = dm;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&dm->usecnt);
+
+ uobj->object = mr;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ goto err_dereg;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ if (ret)
+ goto err_dereg;
+
+ return 0;
+
+err_dereg:
+ ib_dereg_mr(mr);
+
+ return ret;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
+ /* 1 is used in order to free the MR after all the MWs */
+ &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 93025d2009b8..7eff3aeffe01 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -655,7 +655,7 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
return ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_modify_ah);
@@ -663,7 +663,7 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_query_ah);
@@ -689,7 +689,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
struct ib_srq *srq;
if (!pd->device->create_srq)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
srq = pd->device->create_srq(pd, srq_init_attr, NULL);
@@ -722,7 +722,7 @@ int ib_modify_srq(struct ib_srq *srq,
{
return srq->device->modify_srq ?
srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_modify_srq);
@@ -730,7 +730,7 @@ int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr)
{
return srq->device->query_srq ?
- srq->device->query_srq(srq, srq_attr) : -ENOSYS;
+ srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_srq);
@@ -1263,34 +1263,30 @@ static const struct {
}
};
-int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll)
+bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll)
{
enum ib_qp_attr_mask req_param, opt_param;
- if (cur_state < 0 || cur_state > IB_QPS_ERR ||
- next_state < 0 || next_state > IB_QPS_ERR)
- return 0;
-
if (mask & IB_QP_CUR_STATE &&
cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
- return 0;
+ return false;
if (!qp_state_table[cur_state][next_state].valid)
- return 0;
+ return false;
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
if ((mask & req_param) != req_param)
- return 0;
+ return false;
if (mask & ~(req_param | opt_param | IB_QP_STATE))
- return 0;
+ return false;
- return 1;
+ return true;
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
@@ -1457,7 +1453,7 @@ int ib_query_qp(struct ib_qp *qp,
{
return qp->device->query_qp ?
qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_qp);
@@ -1594,7 +1590,7 @@ EXPORT_SYMBOL(ib_create_cq);
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
return cq->device->modify_cq ?
- cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
+ cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_set_cq_moderation);
@@ -1611,7 +1607,7 @@ EXPORT_SYMBOL(ib_destroy_cq);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
return cq->device->resize_cq ?
- cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
+ cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_resize_cq);
@@ -1620,11 +1616,16 @@ EXPORT_SYMBOL(ib_resize_cq);
int ib_dereg_mr(struct ib_mr *mr)
{
struct ib_pd *pd = mr->pd;
+ struct ib_dm *dm = mr->dm;
int ret;
+ rdma_restrack_del(&mr->res);
ret = mr->device->dereg_mr(mr);
- if (!ret)
+ if (!ret) {
atomic_dec(&pd->usecnt);
+ if (dm)
+ atomic_dec(&dm->usecnt);
+ }
return ret;
}
@@ -1649,7 +1650,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
struct ib_mr *mr;
if (!pd->device->alloc_mr)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
if (!IS_ERR(mr)) {
@@ -1658,6 +1659,8 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_add(&mr->res);
}
return mr;
@@ -1673,7 +1676,7 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
struct ib_fmr *fmr;
if (!pd->device->alloc_fmr)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
if (!IS_ERR(fmr)) {
@@ -1757,7 +1760,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
int ret;
if (!qp->device->attach_mcast)
- return -ENOSYS;
+ return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
@@ -1775,7 +1778,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
int ret;
if (!qp->device->detach_mcast)
- return -ENOSYS;
+ return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
@@ -1793,7 +1796,7 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
struct ib_xrcd *xrcd;
if (!device->alloc_xrcd)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
xrcd = device->alloc_xrcd(device, NULL, NULL);
if (!IS_ERR(xrcd)) {
@@ -1847,7 +1850,7 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq *wq;
if (!pd->device->create_wq)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
wq = pd->device->create_wq(pd, wq_attr, NULL);
if (!IS_ERR(wq)) {
@@ -1902,7 +1905,7 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
int err;
if (!wq->device->modify_wq)
- return -ENOSYS;
+ return -EOPNOTSUPP;
err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
return err;
@@ -1927,7 +1930,7 @@ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
u32 table_size;
if (!device->create_rwq_ind_table)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
table_size = (1 << init_attr->log_ind_tbl_size);
rwq_ind_table = device->create_rwq_ind_table(device,
@@ -1977,7 +1980,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
{
struct ib_flow *flow_id;
if (!qp->device->create_flow)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
flow_id = qp->device->create_flow(qp, flow_attr, domain);
if (!IS_ERR(flow_id)) {
@@ -2004,7 +2007,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
return mr->device->check_mr_status ?
- mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
+ mr->device->check_mr_status(mr, check_mask, mr_status) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_check_mr_status);
@@ -2012,7 +2015,7 @@ int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state)
{
if (!device->set_vf_link_state)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->set_vf_link_state(device, vf, port, state);
}
@@ -2022,7 +2025,7 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
struct ifla_vf_info *info)
{
if (!device->get_vf_config)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->get_vf_config(device, vf, port, info);
}
@@ -2032,7 +2035,7 @@ int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
struct ifla_vf_stats *stats)
{
if (!device->get_vf_stats)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->get_vf_stats(device, vf, port, stats);
}
@@ -2042,7 +2045,7 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
int type)
{
if (!device->set_vf_guid)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->set_vf_guid(device, vf, port, guid, type);
}
@@ -2077,7 +2080,7 @@ int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size)
{
if (unlikely(!mr->device->map_mr_sg))
- return -ENOSYS;
+ return -EOPNOTSUPP;
mr->page_size = page_size;
@@ -2194,7 +2197,14 @@ static void __ib_drain_sq(struct ib_qp *qp)
struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
- struct ib_send_wr swr = {}, *bad_swr;
+ struct ib_send_wr *bad_swr;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ },
+ };
int ret;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
@@ -2203,11 +2213,10 @@ static void __ib_drain_sq(struct ib_qp *qp)
return;
}
- swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
- ret = ib_post_send(qp, &swr, &bad_swr);
+ ret = ib_post_send(qp, &swr.wr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;