summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/infiniband/core/cm.c9
-rw-r--r--drivers/infiniband/core/cma.c195
-rw-r--r--drivers/infiniband/core/cma_configfs.c4
-rw-r--r--drivers/infiniband/core/core_priv.h28
-rw-r--r--drivers/infiniband/core/counters.c138
-rw-r--r--drivers/infiniband/core/cq.c16
-rw-r--r--drivers/infiniband/core/device.c92
-rw-r--r--drivers/infiniband/core/iwpm_util.h2
-rw-r--r--drivers/infiniband/core/rdma_core.c101
-rw-r--r--drivers/infiniband/core/restrack.c23
-rw-r--r--drivers/infiniband/core/rw.c5
-rw-r--r--drivers/infiniband/core/sa_query.c3
-rw-r--r--drivers/infiniband/core/sysfs.c166
-rw-r--r--drivers/infiniband/core/ucma.c2
-rw-r--r--drivers/infiniband/core/umem.c17
-rw-r--r--drivers/infiniband/core/user_mad.c6
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c149
-rw-r--r--drivers/infiniband/core/uverbs_main.c4
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c18
-rw-r--r--drivers/infiniband/core/uverbs_std_types_async_fd.c5
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c5
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c4
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c14
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c6
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c6
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c6
-rw-r--r--drivers/infiniband/core/uverbs_std_types_qp.c8
-rw-r--r--drivers/infiniband/core/uverbs_std_types_srq.c4
-rw-r--r--drivers/infiniband/core/uverbs_std_types_wq.c4
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c5
-rw-r--r--drivers/infiniband/core/verbs.c27
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c15
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c34
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c84
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c35
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c8
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c34
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c6
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c1
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c62
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c55
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c132
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c37
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h26
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c46
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h178
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c59
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c50
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c554
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h265
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c82
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c79
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c300
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c53
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c6
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c72
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h35
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_status.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h38
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c41
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h8
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c121
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c64
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c82
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h8
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c16
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c14
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c4
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c66
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c77
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c90
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c6
-rw-r--r--drivers/infiniband/hw/mlx5/main.c105
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c192
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h102
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c960
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c56
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c197
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c2
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c34
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h1
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c80
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c61
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c42
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c11
-rw-r--r--drivers/infiniband/hw/qedr/main.c39
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c13
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c96
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c19
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c100
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c34
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig1
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c12
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c18
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c36
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h94
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c67
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h2
-rw-r--r--drivers/infiniband/sw/siw/Kconfig1
-rw-r--r--drivers/infiniband/sw/siw/siw.h1
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c52
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c24
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c29
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h6
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c2
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c62
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c74
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h1
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h3
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c21
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c119
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.h2
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c61
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c48
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c14
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h2
-rw-r--r--drivers/nvme/host/rdma.c2
-rw-r--r--drivers/nvme/target/rdma.c3
-rw-r--r--drivers/pci/p2pdma.c25
-rw-r--r--include/linux/dma-mapping.h2
-rw-r--r--include/rdma/ib_umem.h42
-rw-r--r--include/rdma/ib_verbs.h198
-rw-r--r--include/rdma/restrack.h24
-rw-r--r--include/rdma/uverbs_ioctl.h25
-rw-r--r--include/rdma/uverbs_types.h9
-rw-r--r--include/uapi/rdma/hns-abi.h10
-rw-r--r--include/uapi/rdma/ib_user_verbs.h14
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h21
-rw-r--r--kernel/dma/Kconfig5
-rw-r--r--kernel/dma/Makefile1
-rw-r--r--kernel/dma/virt.c61
-rw-r--r--net/rds/ib.c10
-rw-r--r--net/rds/ib.h13
-rw-r--r--net/rds/ib_cm.c128
-rw-r--r--net/rds/ib_recv.c18
-rw-r--r--net/rds/ib_send.c8
-rw-r--r--tools/testing/scatterlist/main.c64
175 files changed, 3845 insertions, 3915 deletions
diff --git a/.mailmap b/.mailmap
index 60a60d10ba7c..632700cee55c 100644
--- a/.mailmap
+++ b/.mailmap
@@ -345,3 +345,4 @@ Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
Yusuke Goda <goda.yusuke@renesas.com>
+Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index a14cad29c031..8c1c5f9830c3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16399,7 +16399,7 @@ F: drivers/infiniband/sw/siw/
F: include/uapi/rdma/siw-abi.h
SOFT-ROCE DRIVER (rxe)
-M: Zhu Yanjun <yanjunz@nvidia.com>
+M: Zhu Yanjun <zyjzyj2000@gmail.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/sw/rxe/
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 5afd142fe8c7..98165589c8ab 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1251,7 +1251,8 @@ out:
EXPORT_SYMBOL(ib_cm_listen);
/**
- * Create a new listening ib_cm_id and listen on the given service ID.
+ * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on
+ * the given service ID.
*
* If there's an existing ID listening on that same device and service ID,
* return it.
@@ -1765,7 +1766,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work)
}
/**
- * Convert OPA SGID to IB SGID
+ * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID
* ULPs (such as IPoIB) do not understand OPA GIDs and will
* reject them as the local_gid will not match the sgid. Therefore,
* change the pathrec's SGID to an IB SGID.
@@ -4273,8 +4274,8 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
group = container_of(obj, struct cm_counter_group, obj);
cm_attr = container_of(attr, struct cm_counter_attribute, attr);
- return sprintf(buf, "%ld\n",
- atomic_long_read(&group->counter[cm_attr->index]));
+ return sysfs_emit(buf, "%ld\n",
+ atomic_long_read(&group->counter[cm_attr->index]));
}
static const struct sysfs_ops cm_counter_ops = {
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a77750b8954d..c51b84b2d2f3 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -477,6 +477,10 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
list_del(&id_priv->list);
cma_dev_put(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ if (id_priv->id.route.addr.dev_addr.sgid_attr) {
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+ id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
+ }
mutex_unlock(&lock);
}
@@ -1861,9 +1865,6 @@ static void _destroy_id(struct rdma_id_private *id_priv,
kfree(id_priv->id.route.path_rec);
- if (id_priv->id.route.addr.dev_addr.sgid_attr)
- rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
-
put_net(id_priv->id.route.addr.dev_addr.net);
rdma_restrack_del(&id_priv->res);
kfree(id_priv);
@@ -2495,8 +2496,9 @@ static int cma_listen_handler(struct rdma_cm_id *id,
return id_priv->id.event_handler(id, event);
}
-static void cma_listen_on_dev(struct rdma_id_private *id_priv,
- struct cma_device *cma_dev)
+static int cma_listen_on_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev,
+ struct rdma_id_private **to_destroy)
{
struct rdma_id_private *dev_id_priv;
struct net *net = id_priv->id.route.addr.dev_addr.net;
@@ -2504,21 +2506,21 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
lockdep_assert_held(&lock);
+ *to_destroy = NULL;
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
- return;
+ return 0;
dev_id_priv =
__rdma_create_id(net, cma_listen_handler, id_priv,
id_priv->id.ps, id_priv->id.qp_type, id_priv);
if (IS_ERR(dev_id_priv))
- return;
+ return PTR_ERR(dev_id_priv);
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
_cma_attach_to_dev(dev_id_priv, cma_dev);
- list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
cma_id_get(id_priv);
dev_id_priv->internal_id = 1;
dev_id_priv->afonly = id_priv->afonly;
@@ -2527,19 +2529,42 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
if (ret)
- dev_warn(&cma_dev->device->dev,
- "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
+ goto err_listen;
+ list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
+ return 0;
+err_listen:
+ /* Caller must destroy this after releasing lock */
+ *to_destroy = dev_id_priv;
+ dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret);
+ return ret;
}
-static void cma_listen_on_all(struct rdma_id_private *id_priv)
+static int cma_listen_on_all(struct rdma_id_private *id_priv)
{
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
+ int ret;
mutex_lock(&lock);
list_add_tail(&id_priv->list, &listen_any_list);
- list_for_each_entry(cma_dev, &dev_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret) {
+ /* Prevent racing with cma_process_remove() */
+ if (to_destroy)
+ list_del_init(&to_destroy->list);
+ goto err_listen;
+ }
+ }
mutex_unlock(&lock);
+ return 0;
+
+err_listen:
+ list_del(&id_priv->list);
+ mutex_unlock(&lock);
+ if (to_destroy)
+ rdma_destroy_id(&to_destroy->id);
+ return ret;
}
void rdma_set_service_type(struct rdma_cm_id *id, int tos)
@@ -3692,8 +3717,11 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
ret = -ENOSYS;
goto err;
}
- } else
- cma_listen_on_all(id_priv);
+ } else {
+ ret = cma_listen_on_all(id_priv);
+ if (ret)
+ goto err;
+ }
return 0;
err:
@@ -4773,69 +4801,6 @@ static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
-static int cma_add_one(struct ib_device *device)
-{
- struct cma_device *cma_dev;
- struct rdma_id_private *id_priv;
- unsigned int i;
- unsigned long supported_gids = 0;
- int ret;
-
- cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
- if (!cma_dev)
- return -ENOMEM;
-
- cma_dev->device = device;
- cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
- sizeof(*cma_dev->default_gid_type),
- GFP_KERNEL);
- if (!cma_dev->default_gid_type) {
- ret = -ENOMEM;
- goto free_cma_dev;
- }
-
- cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
- sizeof(*cma_dev->default_roce_tos),
- GFP_KERNEL);
- if (!cma_dev->default_roce_tos) {
- ret = -ENOMEM;
- goto free_gid_type;
- }
-
- rdma_for_each_port (device, i) {
- supported_gids = roce_gid_type_mask_support(device, i);
- WARN_ON(!supported_gids);
- if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
- cma_dev->default_gid_type[i - rdma_start_port(device)] =
- CMA_PREFERRED_ROCE_GID_TYPE;
- else
- cma_dev->default_gid_type[i - rdma_start_port(device)] =
- find_first_bit(&supported_gids, BITS_PER_LONG);
- cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
- }
-
- init_completion(&cma_dev->comp);
- refcount_set(&cma_dev->refcount, 1);
- INIT_LIST_HEAD(&cma_dev->id_list);
- ib_set_client_data(device, &cma_client, cma_dev);
-
- mutex_lock(&lock);
- list_add_tail(&cma_dev->list, &dev_list);
- list_for_each_entry(id_priv, &listen_any_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
- mutex_unlock(&lock);
-
- trace_cm_add_one(device);
- return 0;
-
-free_gid_type:
- kfree(cma_dev->default_gid_type);
-
-free_cma_dev:
- kfree(cma_dev);
- return ret;
-}
-
static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -4898,6 +4863,80 @@ static void cma_process_remove(struct cma_device *cma_dev)
wait_for_completion(&cma_dev->comp);
}
+static int cma_add_one(struct ib_device *device)
+{
+ struct rdma_id_private *to_destroy;
+ struct cma_device *cma_dev;
+ struct rdma_id_private *id_priv;
+ unsigned int i;
+ unsigned long supported_gids = 0;
+ int ret;
+
+ cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL);
+ if (!cma_dev)
+ return -ENOMEM;
+
+ cma_dev->device = device;
+ cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_gid_type),
+ GFP_KERNEL);
+ if (!cma_dev->default_gid_type) {
+ ret = -ENOMEM;
+ goto free_cma_dev;
+ }
+
+ cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_roce_tos),
+ GFP_KERNEL);
+ if (!cma_dev->default_roce_tos) {
+ ret = -ENOMEM;
+ goto free_gid_type;
+ }
+
+ rdma_for_each_port (device, i) {
+ supported_gids = roce_gid_type_mask_support(device, i);
+ WARN_ON(!supported_gids);
+ if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ CMA_PREFERRED_ROCE_GID_TYPE;
+ else
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ find_first_bit(&supported_gids, BITS_PER_LONG);
+ cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
+ }
+
+ init_completion(&cma_dev->comp);
+ refcount_set(&cma_dev->refcount, 1);
+ INIT_LIST_HEAD(&cma_dev->id_list);
+ ib_set_client_data(device, &cma_client, cma_dev);
+
+ mutex_lock(&lock);
+ list_add_tail(&cma_dev->list, &dev_list);
+ list_for_each_entry(id_priv, &listen_any_list, list) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret)
+ goto free_listen;
+ }
+ mutex_unlock(&lock);
+
+ trace_cm_add_one(device);
+ return 0;
+
+free_listen:
+ list_del(&cma_dev->list);
+ mutex_unlock(&lock);
+
+ /* cma_process_remove() will delete to_destroy */
+ cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
+free_gid_type:
+ kfree(cma_dev->default_gid_type);
+
+free_cma_dev:
+ kfree(cma_dev);
+ return ret;
+}
+
static void cma_remove_one(struct ib_device *device, void *client_data)
{
struct cma_device *cma_dev = client_data;
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 7ec4af2ed87a..7f70e5a7de10 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -115,7 +115,7 @@ static ssize_t default_roce_mode_show(struct config_item *item,
if (gid_type < 0)
return gid_type;
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+ return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type));
}
static ssize_t default_roce_mode_store(struct config_item *item,
@@ -157,7 +157,7 @@ static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
tos = cma_get_default_roce_tos(cma_dev, group->port_num);
cma_configfs_params_put(cma_dev);
- return sprintf(buf, "%u\n", tos);
+ return sysfs_emit(buf, "%u\n", tos);
}
static ssize_t default_roce_tos_store(struct config_item *item,
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index e84b0fedaacb..315f7a297eee 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -318,15 +318,12 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index);
void nldev_init(void);
void nldev_exit(void);
-static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
- struct ib_pd *pd,
- struct ib_qp_init_attr *attr,
- struct ib_udata *udata,
- struct ib_uqp_object *uobj)
+static inline struct ib_qp *
+_ib_create_qp(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller)
{
- enum ib_qp_type qp_type = attr->qp_type;
struct ib_qp *qp;
- bool is_xrc;
if (!dev->ops.create_qp)
return ERR_PTR(-EOPNOTSUPP);
@@ -347,6 +344,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
qp->srq = attr->srq;
qp->rwq_ind_tbl = attr->rwq_ind_tbl;
qp->event_handler = attr->event_handler;
+ qp->port = attr->port_num;
atomic_set(&qp->usecnt, 0);
spin_lock_init(&qp->mr_lock);
@@ -354,16 +352,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
INIT_LIST_HEAD(&qp->sig_mrs);
rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
- /*
- * We don't track XRC QPs for now, because they don't have PD
- * and more importantly they are created internaly by driver,
- * see mlx5 create_dev_resources() as an example.
- */
- is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
- if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
- rdma_restrack_parent_name(&qp->res, &pd->res);
- rdma_restrack_add(&qp->res);
- }
+ WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
+ rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
+ rdma_restrack_add(&qp->res);
return qp;
}
@@ -411,7 +402,6 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv,
struct vm_area_struct *vma,
struct rdma_user_mmap_entry *entry);
-void ib_cq_pool_init(struct ib_device *dev);
-void ib_cq_pool_destroy(struct ib_device *dev);
+void ib_cq_pool_cleanup(struct ib_device *dev);
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index e4ff0d3328b6..92745522250e 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -64,8 +64,40 @@ out:
return ret;
}
-static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
- enum rdma_nl_counter_mode mode)
+static void auto_mode_init_counter(struct rdma_counter *counter,
+ const struct ib_qp *qp,
+ enum rdma_nl_counter_mask new_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+
+ counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
+ counter->mode.mask = new_mask;
+
+ if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ param->qp_type = qp->qp_type;
+}
+
+static int __rdma_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ int ret;
+
+ if (qp->counter)
+ return -EINVAL;
+
+ if (!qp->device->ops.counter_bind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_bind_qp(counter, qp);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port,
+ struct ib_qp *qp,
+ enum rdma_nl_counter_mode mode)
{
struct rdma_port_counter *port_counter;
struct rdma_counter *counter;
@@ -88,11 +120,22 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
port_counter = &dev->port_data[port].port_counter;
mutex_lock(&port_counter->lock);
- if (mode == RDMA_COUNTER_MODE_MANUAL) {
+ switch (mode) {
+ case RDMA_COUNTER_MODE_MANUAL:
ret = __counter_set_mode(&port_counter->mode,
RDMA_COUNTER_MODE_MANUAL, 0);
- if (ret)
+ if (ret) {
+ mutex_unlock(&port_counter->lock);
goto err_mode;
+ }
+ break;
+ case RDMA_COUNTER_MODE_AUTO:
+ auto_mode_init_counter(counter, qp, port_counter->mode.mask);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ mutex_unlock(&port_counter->lock);
+ goto err_mode;
}
port_counter->num_counters++;
@@ -102,10 +145,15 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port,
kref_init(&counter->kref);
mutex_init(&counter->lock);
+ ret = __rdma_counter_bind_qp(counter, qp);
+ if (ret)
+ goto err_mode;
+
+ rdma_restrack_parent_name(&counter->res, &qp->res);
+ rdma_restrack_add(&counter->res);
return counter;
err_mode:
- mutex_unlock(&port_counter->lock);
kfree(counter->stats);
err_stats:
rdma_restrack_put(&counter->res);
@@ -132,19 +180,6 @@ static void rdma_counter_free(struct rdma_counter *counter)
kfree(counter);
}
-static void auto_mode_init_counter(struct rdma_counter *counter,
- const struct ib_qp *qp,
- enum rdma_nl_counter_mask new_mask)
-{
- struct auto_mode_param *param = &counter->mode.param;
-
- counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
- counter->mode.mask = new_mask;
-
- if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
- param->qp_type = qp->qp_type;
-}
-
static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
enum rdma_nl_counter_mask auto_mask)
{
@@ -161,24 +196,6 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
return match;
}
-static int __rdma_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- int ret;
-
- if (qp->counter)
- return -EINVAL;
-
- if (!qp->device->ops.counter_bind_qp)
- return -EOPNOTSUPP;
-
- mutex_lock(&counter->lock);
- ret = qp->device->ops.counter_bind_qp(counter, qp);
- mutex_unlock(&counter->lock);
-
- return ret;
-}
-
static int __rdma_counter_unbind_qp(struct ib_qp *qp)
{
struct rdma_counter *counter = qp->counter;
@@ -247,13 +264,6 @@ next:
return counter;
}
-static void rdma_counter_res_add(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- rdma_restrack_parent_name(&counter->res, &qp->res);
- rdma_restrack_add(&counter->res);
-}
-
static void counter_release(struct kref *kref)
{
struct rdma_counter *counter;
@@ -275,7 +285,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
struct rdma_counter *counter;
int ret;
- if (!qp->res.valid || rdma_is_kernel_res(&qp->res))
+ if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
return 0;
if (!rdma_is_port_valid(dev, port))
@@ -293,19 +303,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
return ret;
}
} else {
- counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO);
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO);
if (!counter)
return -ENOMEM;
-
- auto_mode_init_counter(counter, qp, port_counter->mode.mask);
-
- ret = __rdma_counter_bind_qp(counter, qp);
- if (ret) {
- rdma_counter_free(counter);
- return ret;
- }
-
- rdma_counter_res_add(counter, qp);
}
return 0;
@@ -419,15 +419,6 @@ err:
return NULL;
}
-static int rdma_counter_bind_qp_manual(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- if ((counter->device != qp->device) || (counter->port != qp->port))
- return -EINVAL;
-
- return __rdma_counter_bind_qp(counter, qp);
-}
-
static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
u32 counter_id)
{
@@ -475,7 +466,12 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
goto err_task;
}
- ret = rdma_counter_bind_qp_manual(counter, qp);
+ if ((counter->device != qp->device) || (counter->port != qp->port)) {
+ ret = -EINVAL;
+ goto err_task;
+ }
+
+ ret = __rdma_counter_bind_qp(counter, qp);
if (ret)
goto err_task;
@@ -520,26 +516,18 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
goto err;
}
- counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL);
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL);
if (!counter) {
ret = -ENOMEM;
goto err;
}
- ret = rdma_counter_bind_qp_manual(counter, qp);
- if (ret)
- goto err_bind;
-
if (counter_id)
*counter_id = counter->id;
- rdma_counter_res_add(counter, qp);
-
rdma_restrack_put(&qp->res);
- return ret;
+ return 0;
-err_bind:
- rdma_counter_free(counter);
err:
rdma_restrack_put(&qp->res);
return ret;
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 12ebacf52958..433b426729d4 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -123,7 +123,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
}
/**
- * ib_process_direct_cq - process a CQ in caller context
+ * ib_process_cq_direct - process a CQ in caller context
* @cq: CQ to process
* @budget: number of CQEs to poll for
*
@@ -197,7 +197,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * __ib_alloc_cq allocate a completion queue
+ * __ib_alloc_cq - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
@@ -349,16 +349,7 @@ void ib_free_cq(struct ib_cq *cq)
}
EXPORT_SYMBOL(ib_free_cq);
-void ib_cq_pool_init(struct ib_device *dev)
-{
- unsigned int i;
-
- spin_lock_init(&dev->cq_pools_lock);
- for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++)
- INIT_LIST_HEAD(&dev->cq_pools[i]);
-}
-
-void ib_cq_pool_destroy(struct ib_device *dev)
+void ib_cq_pool_cleanup(struct ib_device *dev)
{
struct ib_cq *cq, *n;
unsigned int i;
@@ -367,6 +358,7 @@ void ib_cq_pool_destroy(struct ib_device *dev)
list_for_each_entry_safe(cq, n, &dev->cq_pools[i],
pool_entry) {
WARN_ON(cq->cqe_used);
+ list_del(&cq->pool_entry);
cq->shared = false;
ib_free_cq(cq);
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a3b1fc84cdca..e96f979e6d52 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -284,6 +284,7 @@ static void ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
IB_MANDATORY_FUNC(get_dma_mr),
+ IB_MANDATORY_FUNC(reg_user_mr),
IB_MANDATORY_FUNC(dereg_mr),
IB_MANDATORY_FUNC(get_port_immutable)
};
@@ -569,6 +570,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
struct ib_device *_ib_alloc_device(size_t size)
{
struct ib_device *device;
+ unsigned int i;
if (WARN_ON(size < sizeof(struct ib_device)))
return NULL;
@@ -600,6 +602,41 @@ struct ib_device *_ib_alloc_device(size_t size)
init_completion(&device->unreg_completion);
INIT_WORK(&device->unregistration_work, ib_unregister_work);
+ spin_lock_init(&device->cq_pools_lock);
+ for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++)
+ INIT_LIST_HEAD(&device->cq_pools[i]);
+
+ device->uverbs_cmd_mask =
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
return device;
}
EXPORT_SYMBOL(_ib_alloc_device);
@@ -1177,25 +1214,6 @@ out:
return ret;
}
-static void setup_dma_device(struct ib_device *device,
- struct device *dma_device)
-{
- /*
- * If the caller does not provide a DMA capable device then the IB
- * device will be used. In this case the caller should fully setup the
- * ibdev for DMA. This usually means using dma_virt_ops.
- */
-#ifdef CONFIG_DMA_VIRT_OPS
- if (!dma_device) {
- device->dev.dma_ops = &dma_virt_ops;
- dma_device = &device->dev;
- }
-#endif
- WARN_ON(!dma_device);
- device->dma_device = dma_device;
- WARN_ON(!device->dma_device->dma_parms);
-}
-
/*
* setup_device() allocates memory and sets up data that requires calling the
* device ops, this is the only reason these actions are not done during
@@ -1249,7 +1267,7 @@ static void disable_device(struct ib_device *device)
remove_client_context(device, cid);
}
- ib_cq_pool_destroy(device);
+ ib_cq_pool_cleanup(device);
/* Pairs with refcount_set in enable_device */
ib_device_put(device);
@@ -1294,8 +1312,6 @@ static int enable_device_and_get(struct ib_device *device)
goto out;
}
- ib_cq_pool_init(device);
-
down_read(&clients_rwsem);
xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
ret = add_client_context(device, client);
@@ -1341,7 +1357,14 @@ int ib_register_device(struct ib_device *device, const char *name,
if (ret)
return ret;
- setup_dma_device(device, dma_device);
+ /*
+ * If the caller does not provide a DMA capable device then the IB core
+ * will set up ib_sge and scatterlist structures that stash the kernel
+ * virtual address into the address field.
+ */
+ WARN_ON(dma_device && !dma_device->dma_parms);
+ device->dma_device = dma_device;
+
ret = setup_device(device);
if (ret)
return ret;
@@ -1374,9 +1397,6 @@ int ib_register_device(struct ib_device *device, const char *name,
}
ret = enable_device_and_get(device);
- dev_set_uevent_suppress(&device->dev, false);
- /* Mark for userspace that device is ready */
- kobject_uevent(&device->dev.kobj, KOBJ_ADD);
if (ret) {
void (*dealloc_fn)(struct ib_device *);
@@ -1396,8 +1416,12 @@ int ib_register_device(struct ib_device *device, const char *name,
ib_device_put(device);
__ib_unregister_device(device);
device->ops.dealloc_driver = dealloc_fn;
+ dev_set_uevent_suppress(&device->dev, false);
return ret;
}
+ dev_set_uevent_suppress(&device->dev, false);
+ /* Mark for userspace that device is ready */
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
ib_device_put(device);
return 0;
@@ -2576,6 +2600,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_qp);
SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
SET_DEVICE_OP(dev_ops, create_srq);
+ SET_DEVICE_OP(dev_ops, create_user_ah);
SET_DEVICE_OP(dev_ops, create_wq);
SET_DEVICE_OP(dev_ops, dealloc_dm);
SET_DEVICE_OP(dev_ops, dealloc_driver);
@@ -2675,6 +2700,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
}
EXPORT_SYMBOL(ib_set_device_ops);
+#ifdef CONFIG_INFINIBAND_VIRT_DMA
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ sg_dma_address(s) = (uintptr_t)sg_virt(s);
+ sg_dma_len(s) = s->length;
+ }
+ return nents;
+}
+EXPORT_SYMBOL(ib_dma_virt_map_sg);
+#endif /* CONFIG_INFINIBAND_VIRT_DMA */
+
static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.doit = ib_nl_handle_resolve_resp,
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index 1bf87d9fd0bd..eeb8e6010907 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -141,7 +141,7 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
int iwpm_get_nlmsg_seq(void);
/**
- * iwpm_add_reminfo - Add remote address info of the connecting peer
+ * iwpm_add_remote_info - Add remote address info of the connecting peer
* to the remote info hash table
* @reminfo: The remote info to be added
*/
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index ffe11b03724c..75eafd9208aa 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -137,15 +137,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
} else if (uobj->object) {
ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
attrs);
- if (ret) {
- if (ib_is_destroy_retryable(ret, reason, uobj))
- return ret;
-
- /* Nothing to be done, dangle the memory and move on */
- WARN(true,
- "ib_uverbs: failed to remove uobject id %d, driver err=%d",
- uobj->id, ret);
- }
+ if (ret)
+ /* Nothing to be done, wait till ucontext will clean it */
+ return ret;
uobj->object = NULL;
}
@@ -543,12 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
struct uverbs_obj_idr_type, type);
int ret = idr_type->destroy_object(uobj, why, attrs);
- /*
- * We can only fail gracefully if the user requested to destroy the
- * object or when a retry may be called upon an error.
- * In the rest of the cases, just remove whatever you can.
- */
- if (ib_is_destroy_retryable(ret, why, uobj))
+ if (ret)
return ret;
if (why == RDMA_REMOVE_ABORT)
@@ -581,11 +570,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
{
const struct uverbs_obj_fd_type *fd_type = container_of(
uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
- int ret = fd_type->destroy_object(uobj, why);
-
- if (ib_is_destroy_retryable(ret, why, uobj))
- return ret;
+ fd_type->destroy_object(uobj, why);
return 0;
}
@@ -609,6 +595,27 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
WARN_ON(old != NULL);
}
+static void swap_idr_uobjects(struct ib_uobject *obj_old,
+ struct ib_uobject *obj_new)
+{
+ struct ib_uverbs_file *ufile = obj_old->ufile;
+ void *old;
+
+ /*
+ * New must be an object that been allocated but not yet committed, this
+ * moves the pre-committed state to obj_old, new still must be comitted.
+ */
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (WARN_ON(old != obj_old))
+ return;
+
+ swap(obj_old->id, obj_new->id);
+
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL);
+ WARN_ON(old != NULL);
+}
+
static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
{
int fd = uobj->id;
@@ -655,6 +662,35 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
}
/*
+ * new_uobj will be assigned to the handle currently used by to_uobj, and
+ * to_uobj will be destroyed.
+ *
+ * Upon return the caller must do:
+ * rdma_alloc_commit_uobject(new_uobj)
+ * uobj_put_destroy(to_uobj)
+ *
+ * to_uobj must have a write get but the put mode switches to destroy once
+ * this is called.
+ */
+void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE);
+
+ if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object ||
+ !to_uobj->uapi_object->type_class->swap_uobjects))
+ return;
+
+ to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj);
+
+ /*
+ * If this fails then the uobject is still completely valid (though with
+ * a new ID) and we leak it until context close.
+ */
+ uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs);
+}
+
+/*
* This consumes the kref for uobj. It is up to the caller to unwind the HW
* object and anything else connected to uobj before calling this.
*/
@@ -761,6 +797,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
.lookup_put = lookup_put_idr_uobject,
.destroy_hw = destroy_hw_idr_uobject,
.remove_handle = remove_handle_idr_uobject,
+ .swap_uobjects = swap_idr_uobjects,
};
EXPORT_SYMBOL(uverbs_idr_class);
@@ -863,11 +900,18 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
* racing with a lookup_get.
*/
WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
+ if (reason == RDMA_REMOVE_DRIVER_FAILURE)
+ obj->object = NULL;
if (!uverbs_destroy_uobject(obj, reason, &attrs))
ret = 0;
else
atomic_set(&obj->usecnt, 0);
}
+
+ if (reason == RDMA_REMOVE_DRIVER_FAILURE) {
+ WARN_ON(!list_empty(&ufile->uobjects));
+ return 0;
+ }
return ret;
}
@@ -889,21 +933,12 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
if (!ufile->ucontext)
goto done;
- ufile->ucontext->cleanup_retryable = true;
- while (!list_empty(&ufile->uobjects))
- if (__uverbs_cleanup_ufile(ufile, reason)) {
- /*
- * No entry was cleaned-up successfully during this
- * iteration. It is a driver bug to fail destruction.
- */
- WARN_ON(!list_empty(&ufile->uobjects));
- break;
- }
-
- ufile->ucontext->cleanup_retryable = false;
- if (!list_empty(&ufile->uobjects))
- __uverbs_cleanup_ufile(ufile, reason);
+ while (!list_empty(&ufile->uobjects) &&
+ !__uverbs_cleanup_ufile(ufile, reason)) {
+ }
+ if (WARN_ON(!list_empty(&ufile->uobjects)))
+ __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE);
ufile_destroy_ucontext(ufile, reason);
done:
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 4aeeaaed0f17..e0a41c867002 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -221,19 +221,29 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
struct rdma_restrack_root *rt;
- int ret;
+ int ret = 0;
if (!dev)
return;
+ if (res->no_track)
+ goto out;
+
rt = &dev->res[res->type];
if (res->type == RDMA_RESTRACK_QP) {
/* Special case to ensure that LQPN points to right QP */
struct ib_qp *qp = container_of(res, struct ib_qp, res);
- ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL);
- res->id = ret ? 0 : qp->qp_num;
+ WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8,
+ "QP number 0x%0X and port 0x%0X", qp->qp_num,
+ qp->port);
+ res->id = qp->qp_num;
+ if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI)
+ res->id |= qp->port << 24;
+ ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL);
+ if (ret)
+ res->id = 0;
} else if (res->type == RDMA_RESTRACK_COUNTER) {
/* Special case to ensure that cntn points to right counter */
struct rdma_counter *counter;
@@ -246,6 +256,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
&rt->next_id, GFP_KERNEL);
}
+out:
if (!ret)
res->valid = true;
}
@@ -318,6 +329,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
return;
}
+ if (res->no_track)
+ goto out;
+
dev = res_to_dev(res);
if (WARN_ON(!dev))
return;
@@ -328,8 +342,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP)
return;
WARN_ON(old != res);
- res->valid = false;
+out:
+ res->valid = false;
rdma_restrack_put(res);
wait_for_completion(&res->comp);
}
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 13f43ab7220b..a96030b784eb 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
u32 sg_cnt, enum dma_data_direction dir)
{
- if (is_pci_p2pdma_page(sg_page(sg)))
+ if (is_pci_p2pdma_page(sg_page(sg))) {
+ if (WARN_ON_ONCE(ib_uses_virt_dma(dev)))
+ return 0;
return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
+ }
return ib_dma_map_sg(dev, sg, sg_cnt, dir);
}
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8c930bf1df89..89a831fa1885 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1435,7 +1435,8 @@ enum opa_pr_supported {
};
/**
- * Check if current PR query can be an OPA query.
+ * opa_pr_query_possible - Check if current PR query can be an OPA query.
+ *
* Retuns PR_NOT_SUPPORTED if a path record query is not
* possible, PR_OPA_SUPPORTED if an OPA path record query
* is possible and PR_IB_SUPPORTED if an IB path record
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 914cddea525d..b8abb30f80df 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -165,9 +165,11 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
if (ret)
return ret;
- return sprintf(buf, "%d: %s\n", attr.state,
- attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
- state_name[attr.state] : "UNKNOWN");
+ return sysfs_emit(buf, "%d: %s\n", attr.state,
+ attr.state >= 0 &&
+ attr.state < ARRAY_SIZE(state_name) ?
+ state_name[attr.state] :
+ "UNKNOWN");
}
static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
@@ -180,7 +182,7 @@ static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
if (ret)
return ret;
- return sprintf(buf, "0x%x\n", attr.lid);
+ return sysfs_emit(buf, "0x%x\n", attr.lid);
}
static ssize_t lid_mask_count_show(struct ib_port *p,
@@ -194,7 +196,7 @@ static ssize_t lid_mask_count_show(struct ib_port *p,
if (ret)
return ret;
- return sprintf(buf, "%d\n", attr.lmc);
+ return sysfs_emit(buf, "%d\n", attr.lmc);
}
static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
@@ -207,7 +209,7 @@ static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
if (ret)
return ret;
- return sprintf(buf, "0x%x\n", attr.sm_lid);
+ return sysfs_emit(buf, "0x%x\n", attr.sm_lid);
}
static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
@@ -220,7 +222,7 @@ static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
if (ret)
return ret;
- return sprintf(buf, "%d\n", attr.sm_sl);
+ return sysfs_emit(buf, "%d\n", attr.sm_sl);
}
static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
@@ -233,7 +235,7 @@ static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
if (ret)
return ret;
- return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
+ return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags);
}
static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
@@ -273,6 +275,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " HDR";
rate = 500;
break;
+ case IB_SPEED_NDR:
+ speed = " NDR";
+ rate = 1000;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
speed = " SDR";
@@ -284,9 +290,9 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
if (rate < 0)
return -EINVAL;
- return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
- rate / 10, rate % 10 ? ".5" : "",
- ib_width_enum_to_int(attr.active_width), speed);
+ return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10,
+ rate % 10 ? ".5" : "",
+ ib_width_enum_to_int(attr.active_width), speed);
}
static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
@@ -318,21 +324,28 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
if (ret)
return ret;
- return sprintf(buf, "%d: %s\n", attr.phys_state,
- phys_state_to_str(attr.phys_state));
+ return sysfs_emit(buf, "%d: %s\n", attr.phys_state,
+ phys_state_to_str(attr.phys_state));
}
static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
+ const char *output;
+
switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
- return sprintf(buf, "%s\n", "InfiniBand");
+ output = "InfiniBand";
+ break;
case IB_LINK_LAYER_ETHERNET:
- return sprintf(buf, "%s\n", "Ethernet");
+ output = "Ethernet";
+ break;
default:
- return sprintf(buf, "%s\n", "Unknown");
+ output = "Unknown";
+ break;
}
+
+ return sysfs_emit(buf, "%s\n", output);
}
static PORT_ATTR_RO(state);
@@ -358,27 +371,28 @@ static struct attribute *port_default_attrs[] = {
NULL
};
-static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
+static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
{
struct net_device *ndev;
- size_t ret = -EINVAL;
+ int ret = -EINVAL;
rcu_read_lock();
ndev = rcu_dereference(gid_attr->ndev);
if (ndev)
- ret = sprintf(buf, "%s\n", ndev->name);
+ ret = sysfs_emit(buf, "%s\n", ndev->name);
rcu_read_unlock();
return ret;
}
-static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
+static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
{
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+ return sysfs_emit(buf, "%s\n",
+ ib_cache_gid_type_str(gid_attr->gid_type));
}
static ssize_t _show_port_gid_attr(
struct ib_port *p, struct port_attribute *attr, char *buf,
- size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
+ ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
@@ -401,7 +415,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
const struct ib_gid_attr *gid_attr;
- ssize_t ret;
+ int len;
gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
if (IS_ERR(gid_attr)) {
@@ -416,12 +430,12 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
* space throwing such error on fail to read gid, return zero
* GID as before. This maintains backward compatibility.
*/
- return sprintf(buf, "%pI6\n", zgid.raw);
+ return sysfs_emit(buf, "%pI6\n", zgid.raw);
}
- ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw);
+ len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw);
rdma_put_gid_attr(gid_attr);
- return ret;
+ return len;
}
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
@@ -443,13 +457,13 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
u16 pkey;
- ssize_t ret;
+ int ret;
ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
if (ret)
return ret;
- return sprintf(buf, "0x%04x\n", pkey);
+ return sysfs_emit(buf, "0x%04x\n", pkey);
}
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
@@ -521,8 +535,9 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
container_of(attr, struct port_table_attribute, attr);
int offset = tab_attr->index & 0xffff;
int width = (tab_attr->index >> 16) & 0xff;
- ssize_t ret;
+ int ret;
u8 data[8];
+ int len;
ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
40 + offset / 8, sizeof(data));
@@ -531,30 +546,27 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
switch (width) {
case 4:
- ret = sprintf(buf, "%u\n", (*data >>
- (4 - (offset % 8))) & 0xf);
+ len = sysfs_emit(buf, "%u\n",
+ (*data >> (4 - (offset % 8))) & 0xf);
break;
case 8:
- ret = sprintf(buf, "%u\n", *data);
+ len = sysfs_emit(buf, "%u\n", *data);
break;
case 16:
- ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)data));
+ len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data));
break;
case 32:
- ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)data));
+ len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data));
break;
case 64:
- ret = sprintf(buf, "%llu\n",
- be64_to_cpup((__be64 *)data));
+ len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data));
break;
-
default:
- ret = 0;
+ len = 0;
+ break;
}
- return ret;
+ return len;
}
static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
@@ -815,12 +827,12 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
return 0;
}
-static ssize_t print_hw_stat(struct ib_device *dev, int port_num,
- struct rdma_hw_stats *stats, int index, char *buf)
+static int print_hw_stat(struct ib_device *dev, int port_num,
+ struct rdma_hw_stats *stats, int index, char *buf)
{
u64 v = rdma_counter_get_hwstat_value(dev, port_num, index);
- return sprintf(buf, "%llu\n", stats->value[index] + v);
+ return sysfs_emit(buf, "%llu\n", stats->value[index] + v);
}
static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
@@ -877,7 +889,7 @@ static ssize_t show_stats_lifespan(struct kobject *kobj,
msecs = jiffies_to_msecs(stats->lifespan);
mutex_unlock(&stats->lock);
- return sprintf(buf, "%d\n", msecs);
+ return sysfs_emit(buf, "%d\n", msecs);
}
static ssize_t set_stats_lifespan(struct kobject *kobj,
@@ -1224,21 +1236,34 @@ err_put:
return ret;
}
+static const char *node_type_string(int node_type)
+{
+ switch (node_type) {
+ case RDMA_NODE_IB_CA:
+ return "CA";
+ case RDMA_NODE_IB_SWITCH:
+ return "switch";
+ case RDMA_NODE_IB_ROUTER:
+ return "router";
+ case RDMA_NODE_RNIC:
+ return "RNIC";
+ case RDMA_NODE_USNIC:
+ return "usNIC";
+ case RDMA_NODE_USNIC_UDP:
+ return "usNIC UDP";
+ case RDMA_NODE_UNSPECIFIED:
+ return "unspecified";
+ }
+ return "<unknown>";
+}
+
static ssize_t node_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
- switch (dev->node_type) {
- case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
- case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
- case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
- case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
- case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type);
- case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
- case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
- default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
- }
+ return sysfs_emit(buf, "%d: %s\n", dev->node_type,
+ node_type_string(dev->node_type));
}
static DEVICE_ATTR_RO(node_type);
@@ -1246,12 +1271,13 @@ static ssize_t sys_image_guid_show(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid;
- return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(guid[0]),
+ be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]),
+ be16_to_cpu(guid[3]));
}
static DEVICE_ATTR_RO(sys_image_guid);
@@ -1259,12 +1285,13 @@ static ssize_t node_guid_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ __be16 *node_guid = (__be16 *)&dev->node_guid;
- return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[1]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(node_guid[0]),
+ be16_to_cpu(node_guid[1]),
+ be16_to_cpu(node_guid[2]),
+ be16_to_cpu(node_guid[3]));
}
static DEVICE_ATTR_RO(node_guid);
@@ -1273,7 +1300,7 @@ static ssize_t node_desc_show(struct device *device,
{
struct ib_device *dev = rdma_device_to_ibdev(device);
- return sprintf(buf, "%.64s\n", dev->node_desc);
+ return sysfs_emit(buf, "%.64s\n", dev->node_desc);
}
static ssize_t node_desc_store(struct device *device,
@@ -1300,10 +1327,11 @@ static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ char version[IB_FW_VERSION_NAME_MAX] = {};
+
+ ib_get_device_fw_str(dev, version);
- ib_get_device_fw_str(dev, buf);
- strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
- return strlen(buf);
+ return sysfs_emit(buf, "%s\n", version);
}
static DEVICE_ATTR_RO(fw_ver);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index ffe2563ad345..7dab9a27a145 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1825,7 +1825,7 @@ static ssize_t show_abi_version(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
+ return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
}
static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index e9fecbdf391b..7ca4112e3e8f 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -84,6 +84,15 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
dma_addr_t mask;
int i;
+ if (umem->is_odp) {
+ unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
+
+ /* ODP must always be self consistent. */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+ return page_size;
+ }
+
/* rdma_for_each_block() has a bug if the page size is smaller than the
* page size used to build the umem. For now prevent smaller page sizes
* from being returned.
@@ -220,10 +229,10 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
cur_base += ret * PAGE_SIZE;
npages -= ret;
- sg = __sg_alloc_table_from_pages(
- &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT,
- dma_get_max_seg_size(device->dma_device), sg, npages,
- GFP_KERNEL);
+ sg = __sg_alloc_table_from_pages(&umem->sg_head, page_list, ret,
+ 0, ret << PAGE_SHIFT,
+ ib_dma_max_seg_size(device), sg, npages,
+ GFP_KERNEL);
umem->sg_nents = umem->sg_head.nents;
if (IS_ERR(sg)) {
unpin_user_pages_dirty_lock(page_list, ret, 0);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index b0d0b522cc76..19104a675691 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1191,7 +1191,7 @@ static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
+ return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev));
}
static DEVICE_ATTR_RO(ibdev);
@@ -1203,7 +1203,7 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%d\n", port->port_num);
+ return sysfs_emit(buf, "%d\n", port->port_num);
}
static DEVICE_ATTR_RO(port);
@@ -1222,7 +1222,7 @@ static char *umad_devnode(struct device *dev, umode_t *mode)
static ssize_t abi_version_show(struct class *class,
struct class_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+ return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
}
static CLASS_ATTR_RO(abi_version);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 418d133a8fb0..98a5d36813ff 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -681,8 +681,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
return 0;
ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata);
-
- if (ib_is_destroy_retryable(ret, why, uobject)) {
+ if (ret) {
atomic_inc(&xrcd->usecnt);
return ret;
}
@@ -690,7 +689,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
if (inode)
xrcd_table_delete(dev, inode);
- return ret;
+ return 0;
}
static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
@@ -710,29 +709,20 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
- ret = ib_check_mr_access(cmd.access_flags);
- if (ret)
- return ret;
-
uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
+ ret = ib_check_mr_access(ib_dev, cmd.access_flags);
+ if (ret)
+ goto err_free;
+
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
- if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
- if (!(pd->device->attrs.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
- pr_debug("ODP support not available\n");
- ret = -EINVAL;
- goto err_put;
- }
- }
-
mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags,
&attrs->driver_udata);
@@ -774,23 +764,28 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_pd *pd = NULL;
struct ib_mr *mr;
- struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
+ struct ib_uobject *new_uobj;
+ struct ib_device *ib_dev;
+ struct ib_pd *orig_pd;
+ struct ib_pd *new_pd;
+ struct ib_mr *new_mr;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+ if (!cmd.flags)
return -EINVAL;
+ if (cmd.flags & ~IB_MR_REREG_SUPPORTED)
+ return -EOPNOTSUPP;
+
if ((cmd.flags & IB_MR_REREG_TRANS) &&
- (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
- (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
- return -EINVAL;
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
if (IS_ERR(uobj))
@@ -804,36 +799,74 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
}
if (cmd.flags & IB_MR_REREG_ACCESS) {
- ret = ib_check_mr_access(cmd.access_flags);
+ ret = ib_check_mr_access(mr->device, cmd.access_flags);
if (ret)
goto put_uobjs;
}
+ orig_pd = mr->pd;
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
- attrs);
- if (!pd) {
+ new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+ attrs);
+ if (!new_pd) {
ret = -EINVAL;
goto put_uobjs;
}
+ } else {
+ new_pd = mr->pd;
}
- old_pd = mr->pd;
- ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd,
- &attrs->driver_udata);
- if (ret)
+ /*
+ * The driver might create a new HW object as part of the rereg, we need
+ * to have a uobject ready to hold it.
+ */
+ new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
+ if (IS_ERR(new_uobj)) {
+ ret = PTR_ERR(new_uobj);
goto put_uobj_pd;
-
- if (cmd.flags & IB_MR_REREG_PD) {
- atomic_inc(&pd->usecnt);
- mr->pd = pd;
- atomic_dec(&old_pd->usecnt);
}
- if (cmd.flags & IB_MR_REREG_TRANS)
- mr->iova = cmd.hca_va;
+ new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length,
+ cmd.hca_va, cmd.access_flags, new_pd,
+ &attrs->driver_udata);
+ if (IS_ERR(new_mr)) {
+ ret = PTR_ERR(new_mr);
+ goto put_new_uobj;
+ }
+ if (new_mr) {
+ new_mr->device = new_pd->device;
+ new_mr->pd = new_pd;
+ new_mr->type = IB_MR_TYPE_USER;
+ new_mr->dm = NULL;
+ new_mr->sig_attrs = NULL;
+ new_mr->uobject = uobj;
+ atomic_inc(&new_pd->usecnt);
+ new_mr->iova = cmd.hca_va;
+ new_uobj->object = new_mr;
+
+ rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&new_mr->res, NULL);
+ rdma_restrack_add(&new_mr->res);
+
+ /*
+ * The new uobj for the new HW object is put into the same spot
+ * in the IDR and the old uobj & HW object is deleted.
+ */
+ rdma_assign_uobject(uobj, new_uobj, attrs);
+ rdma_alloc_commit_uobject(new_uobj, attrs);
+ uobj_put_destroy(uobj);
+ new_uobj = NULL;
+ uobj = NULL;
+ mr = new_mr;
+ } else {
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_dec(&orig_pd->usecnt);
+ mr->pd = new_pd;
+ atomic_inc(&new_pd->usecnt);
+ }
+ if (cmd.flags & IB_MR_REREG_TRANS)
+ mr->iova = cmd.hca_va;
+ }
memset(&resp, 0, sizeof(resp));
resp.lkey = mr->lkey;
@@ -841,12 +874,16 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
ret = uverbs_response(attrs, &resp, sizeof(resp));
+put_new_uobj:
+ if (new_uobj)
+ uobj_alloc_abort(new_uobj, attrs);
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- uobj_put_obj_read(pd);
+ uobj_put_obj_read(new_pd);
put_uobjs:
- uobj_put_write(uobj);
+ if (uobj)
+ uobj_put_write(uobj);
return ret;
}
@@ -1401,8 +1438,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
- obj);
+ qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj,
+ NULL);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
@@ -1906,8 +1943,7 @@ static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
if (ret)
return ret;
- if (cmd.base.attr_mask &
- ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+ if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
return -EOPNOTSUPP;
return modify_qp(attrs, &cmd);
@@ -1929,10 +1965,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
* Last bit is reserved for extending the attr_mask by
* using another field.
*/
- BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31));
-
- if (cmd.base.attr_mask &
- ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+ if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
return -EOPNOTSUPP;
ret = modify_qp(attrs, &cmd);
@@ -3693,13 +3726,13 @@ const struct uapi_definition uverbs_def_write_intf[] = {
ib_uverbs_create_ah,
UAPI_DEF_WRITE_UDATA_IO(
struct ib_uverbs_create_ah,
- struct ib_uverbs_create_ah_resp),
- UAPI_DEF_METHOD_NEEDS_FN(create_ah)),
+ struct ib_uverbs_create_ah_resp)),
DECLARE_UVERBS_WRITE(
IB_USER_VERBS_CMD_DESTROY_AH,
ib_uverbs_destroy_ah,
- UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah),
- UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))),
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)),
+ UAPI_DEF_OBJ_NEEDS_FN(create_user_ah),
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
DECLARE_UVERBS_OBJECT(
UVERBS_OBJECT_COMP_CHANNEL,
@@ -3753,7 +3786,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
IB_USER_VERBS_EX_CMD_MODIFY_CQ,
ib_uverbs_ex_modify_cq,
UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
- UAPI_DEF_METHOD_NEEDS_FN(create_cq))),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_cq))),
DECLARE_UVERBS_OBJECT(
UVERBS_OBJECT_DEVICE,
@@ -3999,8 +4032,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
DECLARE_UVERBS_WRITE(
IB_USER_VERBS_CMD_CLOSE_XRCD,
ib_uverbs_close_xrcd,
- UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd),
- UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)),
+ UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)),
DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
ib_uverbs_open_qp,
UAPI_DEF_WRITE_UDATA_IO(
@@ -4010,8 +4042,9 @@ const struct uapi_definition uverbs_def_write_intf[] = {
ib_uverbs_open_xrcd,
UAPI_DEF_WRITE_UDATA_IO(
struct ib_uverbs_open_xrcd,
- struct ib_uverbs_open_xrcd_resp),
- UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))),
+ struct ib_uverbs_open_xrcd_resp)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
{},
};
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 4bb7c642f80c..f173ecd102dc 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1046,7 +1046,7 @@ static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
+ ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev));
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
@@ -1065,7 +1065,7 @@ static ssize_t abi_version_show(struct device *device,
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
+ ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 0658101fca00..13776a66e2e4 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -88,7 +88,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
return -EBUSY;
ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
for (i = 0; i < table_size; i++)
@@ -96,7 +96,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
kfree(rwq_ind_tbl);
kfree(ind_tbl);
- return ret;
+ return 0;
}
static int uverbs_free_xrcd(struct ib_uobject *uobject,
@@ -108,9 +108,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
container_of(uobject, struct ib_uxrcd_object, uobject);
int ret;
- ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&uxrcd->refcnt))
+ return -EBUSY;
mutex_lock(&attrs->ufile->device->xrcd_tree_mutex);
ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs);
@@ -124,11 +123,9 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_pd *pd = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&pd->usecnt))
+ return -EBUSY;
return ib_dealloc_pd_user(pd, &attrs->driver_udata);
}
@@ -157,7 +154,7 @@ void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
spin_unlock_irq(&event_queue->lock);
}
-static int
+static void
uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
@@ -166,7 +163,6 @@ uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
uobj);
ib_uverbs_free_event_queue(&file->ev_queue);
- return 0;
}
int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c
index 61899eaf1f91..cc24cfdf7aee 100644
--- a/drivers/infiniband/core/uverbs_std_types_async_fd.c
+++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c
@@ -19,8 +19,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)(
return 0;
}
-static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct ib_uverbs_async_event_file *event_file =
container_of(uobj, struct ib_uverbs_async_event_file, uobj);
@@ -30,7 +30,6 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
if (why == RDMA_REMOVE_DRIVER_REMOVE)
ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL,
NULL, NULL);
- return 0;
}
int uverbs_async_event_release(struct inode *inode, struct file *filp)
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index b3c6c066b601..999da9c79866 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -42,9 +42,8 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
struct ib_counters *counters = uobject->object;
int ret;
- ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&counters->usecnt))
+ return -EBUSY;
ret = counters->device->ops.destroy_counters(counters);
if (ret)
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 8dabd05988b2..370ad7c83f88 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -46,7 +46,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_cq_user(cq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
ib_uverbs_release_ucq(
@@ -55,7 +55,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
ev_queue) :
NULL,
ucq);
- return ret;
+ return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 302f898c5833..9ec6971056fa 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -317,8 +317,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
struct ib_device *ib_dev;
size_t user_entry_size;
ssize_t num_entries;
- size_t max_entries;
- size_t num_bytes;
+ int max_entries;
u32 flags;
int ret;
@@ -336,19 +335,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
user_entry_size);
if (max_entries <= 0)
- return -EINVAL;
+ return max_entries ?: -EINVAL;
ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
ib_dev = ucontext->device;
- if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes))
- return -EINVAL;
-
- entries = uverbs_zalloc(attrs, num_bytes);
- if (!entries)
- return -ENOMEM;
+ entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries));
+ if (IS_ERR(entries))
+ return PTR_ERR(entries);
num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
if (num_entries < 0)
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index d5a1de33c2c9..98c522cf86d6 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -39,11 +39,9 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_dm *dm = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&dm->usecnt))
+ return -EBUSY;
return dm->device->ops.dealloc_dm(dm, attrs);
}
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index 459cf165b231..d42ed7ff223e 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -39,11 +39,9 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_flow_action *action = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&action->usecnt))
+ return -EBUSY;
return action->device->ops.destroy_flow_action(action);
}
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 9b22bb553e8b..dd4e76b26c74 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -33,6 +33,7 @@
#include "rdma_core.h"
#include "uverbs.h"
#include <rdma/uverbs_std_types.h>
+#include "restrack.h"
static int uverbs_free_mr(struct ib_uobject *uobject,
enum rdma_remove_reason why,
@@ -114,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
if (!(attr.access_flags & IB_ZERO_BASED))
return -EINVAL;
- ret = ib_check_mr_access(attr.access_flags);
+ ret = ib_check_mr_access(ib_dev, attr.access_flags);
if (ret)
return ret;
@@ -134,6 +135,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
atomic_inc(&pd->usecnt);
atomic_inc(&dm->usecnt);
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
index 3bf8dcdfe7eb..c00cfb5ed387 100644
--- a/drivers/infiniband/core/uverbs_std_types_qp.c
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -32,14 +32,14 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
}
ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
if (uqp->uxrcd)
atomic_dec(&uqp->uxrcd->refcnt);
ib_uverbs_release_uevent(&uqp->uevent);
- return ret;
+ return 0;
}
static int check_creation_flags(enum ib_qp_type qp_type,
@@ -251,8 +251,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
if (attr.qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
- obj);
+ qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj,
+ NULL);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c
index c0ecbba26bf4..e5513f828bdc 100644
--- a/drivers/infiniband/core/uverbs_std_types_srq.c
+++ b/drivers/infiniband/core/uverbs_std_types_srq.c
@@ -18,7 +18,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
if (srq_type == IB_SRQT_XRC) {
@@ -30,7 +30,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
}
ib_uverbs_release_uevent(uevent);
- return ret;
+ return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)(
diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c
index f2e6a625724a..7ded8339346f 100644
--- a/drivers/infiniband/core/uverbs_std_types_wq.c
+++ b/drivers/infiniband/core/uverbs_std_types_wq.c
@@ -17,11 +17,11 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_wq_user(wq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
ib_uverbs_release_uevent(&uwq->uevent);
- return ret;
+ return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)(
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 5addc8fae3f3..62f5bcb712cf 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -79,10 +79,7 @@ static int uapi_create_write(struct uverbs_api *uapi,
method_elm->is_ex = def->write.is_ex;
method_elm->handler = def->func_write;
- if (def->write.is_ex)
- method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask &
- BIT_ULL(def->write.command_num));
- else
+ if (!def->write.is_ex)
method_elm->disabled = !(ibdev->uverbs_cmd_mask &
BIT_ULL(def->write.command_num));
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 740f8454b6b4..9137a25bb521 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
/* Protection domains */
/**
- * ib_alloc_pd - Allocates an unused protection domain.
+ * __ib_alloc_pd - Allocates an unused protection domain.
* @device: The device on which to allocate the protection domain.
* @flags: protection domain flags
* @caller: caller's build-time module name
@@ -516,7 +516,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
- if (!device->ops.create_ah)
+ if (!udata && !device->ops.create_ah)
return ERR_PTR(-EOPNOTSUPP);
ah = rdma_zalloc_drv_obj_gfp(
@@ -533,7 +533,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
init_attr.flags = flags;
init_attr.xmit_slave = xmit_slave;
- ret = device->ops.create_ah(ah, &init_attr, udata);
+ if (udata)
+ ret = device->ops.create_user_ah(ah, &init_attr, udata);
+ else
+ ret = device->ops.create_ah(ah, &init_attr, NULL);
if (ret) {
kfree(ah);
return ERR_PTR(ret);
@@ -1188,17 +1191,19 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
}
/**
- * ib_create_qp - Creates a kernel QP associated with the specified protection
+ * ib_create_named_qp - Creates a kernel QP associated with the specified protection
* domain.
* @pd: The protection domain associated with the QP.
* @qp_init_attr: A list of initial attributes required to create the
* QP. If QP creation succeeds, then the attributes are updated to
* the actual capabilities of the created QP.
+ * @caller: caller's build-time module name
*
* NOTE: for user qp use ib_create_qp_user with valid udata!
*/
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr)
+struct ib_qp *ib_create_named_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ const char *caller)
{
struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
struct ib_qp *qp;
@@ -1223,7 +1228,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
if (qp_init_attr->cap.max_rdma_ctxs)
rdma_rw_init_qp(device, qp_init_attr);
- qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
+ qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL, caller);
if (IS_ERR(qp))
return qp;
@@ -1289,7 +1294,7 @@ err:
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_create_qp);
+EXPORT_SYMBOL(ib_create_named_qp);
static const struct {
int valid;
@@ -1662,7 +1667,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp)
qp->qp_type == IB_QPT_XRC_TGT);
}
-/**
+/*
* IB core internal function to perform QP attributes modification.
*/
static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
@@ -1698,8 +1703,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
slave = rdma_lag_get_ah_roce_slave(qp->device,
&attr->ah_attr,
GFP_KERNEL);
- if (IS_ERR(slave))
+ if (IS_ERR(slave)) {
+ ret = PTR_ERR(slave);
goto out_av;
+ }
attr->xmit_slave = slave;
}
}
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index cf3db9628397..401bdc9e931e 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1271,10 +1271,12 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
}
qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */
- if (init_attr->create_flags)
+ if (init_attr->create_flags) {
ibdev_dbg(&rdev->ibdev,
"QP create flags 0x%x not supported",
init_attr->create_flags);
+ return -EOPNOTSUPP;
+ }
/* Setup CQs */
if (init_attr->send_cq) {
@@ -1657,8 +1659,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
srq->qplib_srq.max_wqe = entries;
srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge;
- srq->qplib_srq.wqe_size =
- bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge);
+ /* 128 byte wqe size for SRQ . So use max sges */
+ srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges);
srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
srq->srq_limit = srq_init_attr->attr.srq_limit;
srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
@@ -1829,6 +1831,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
unsigned int flags;
u8 nw_type;
+ if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
qp->qplib_qp.modify_flags = 0;
if (qp_attr_mask & IB_QP_STATE) {
curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
@@ -2078,6 +2083,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
goto out;
}
qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
+ qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state);
qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access);
qp_attr->pkey_index = qplib_qp->pkey_index;
@@ -2827,6 +2833,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct bnxt_qplib_nq *nq = NULL;
unsigned int nq_alloc_cnt;
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
/* Validate CQ fields */
if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded");
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 04621ba8fa76..fdb8c2478258 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -608,7 +608,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
struct bnxt_re_dev *rdev =
rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+ return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -618,7 +618,7 @@ static ssize_t hca_type_show(struct device *device,
struct bnxt_re_dev *rdev =
rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
- return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+ return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc);
}
static DEVICE_ATTR_RO(hca_type);
@@ -646,6 +646,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.create_cq = bnxt_re_create_cq,
.create_qp = bnxt_re_create_qp,
.create_srq = bnxt_re_create_srq,
+ .create_user_ah = bnxt_re_create_ah,
.dealloc_driver = bnxt_re_dealloc_driver,
.dealloc_pd = bnxt_re_dealloc_pd,
.dealloc_ucontext = bnxt_re_dealloc_ucontext,
@@ -701,35 +702,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->dev.parent = &rdev->en_dev->pdev->dev;
ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
- /* User space */
- ibdev->uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
- (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
- /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
-
-
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 64d44f51db4b..6316179583a6 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -118,7 +118,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
* 128 WQEs needs to be reserved for the HW (8916). Prevent
* reporting the max number
*/
- attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
+ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1;
attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ?
6 : sb->max_sge;
attr->max_cq = le32_to_cpu(sb->max_cq);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 28349ed50885..44c2416588d4 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -1006,6 +1006,9 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
pr_debug("ib_dev %p entries %d\n", ibdev, entries);
if (attr->flags)
+ return -EOPNOTSUPP;
+
+ if (entries < 1 || entries > ibdev->attrs.max_cqe)
return -EINVAL;
if (vector >= rhp->rdev.lldi.nciq)
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index a27899402f59..f85477f3b037 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -983,9 +983,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
-int c4iw_dealloc_mw(struct ib_mw *mw);
void c4iw_dealloc(struct uld_ctx *ctx);
-int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 42234df896fb..a2c71a1d93d5 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -365,22 +365,6 @@ static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
pbl_size, pbl_addr, skb, wr_waitp);
}
-static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
- struct c4iw_wr_wait *wr_waitp)
-{
- *stag = T4_STAG_UNSET;
- return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
- 0UL, 0, 0, 0, 0, NULL, wr_waitp);
-}
-
-static int deallocate_window(struct c4iw_rdev *rdev, u32 stag,
- struct sk_buff *skb,
- struct c4iw_wr_wait *wr_waitp)
-{
- return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
- 0, skb, wr_waitp);
-}
-
static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
u32 pbl_size, u32 pbl_addr,
struct c4iw_wr_wait *wr_waitp)
@@ -611,74 +595,6 @@ err_free_mhp:
return ERR_PTR(err);
}
-int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
-{
- struct c4iw_mw *mhp = to_c4iw_mw(ibmw);
- struct c4iw_dev *rhp;
- struct c4iw_pd *php;
- u32 mmid;
- u32 stag = 0;
- int ret;
-
- if (ibmw->type != IB_MW_TYPE_1)
- return -EINVAL;
-
- php = to_c4iw_pd(ibmw->pd);
- rhp = php->rhp;
- mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
- if (!mhp->wr_waitp)
- return -ENOMEM;
-
- mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
- if (!mhp->dereg_skb) {
- ret = -ENOMEM;
- goto free_wr_wait;
- }
-
- ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp);
- if (ret)
- goto free_skb;
-
- mhp->rhp = rhp;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = FW_RI_STAG_MW;
- mhp->attr.stag = stag;
- mmid = (stag) >> 8;
- ibmw->rkey = stag;
- if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
- ret = -ENOMEM;
- goto dealloc_win;
- }
- pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag);
- return 0;
-
-dealloc_win:
- deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb,
- mhp->wr_waitp);
-free_skb:
- kfree_skb(mhp->dereg_skb);
-free_wr_wait:
- c4iw_put_wr_wait(mhp->wr_waitp);
- return ret;
-}
-
-int c4iw_dealloc_mw(struct ib_mw *mw)
-{
- struct c4iw_dev *rhp;
- struct c4iw_mw *mhp;
- u32 mmid;
-
- mhp = to_c4iw_mw(mw);
- rhp = mhp->rhp;
- mmid = (mw->rkey) >> 8;
- xa_erase_irq(&rhp->mrs, mmid);
- deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb,
- mhp->wr_waitp);
- kfree_skb(mhp->dereg_skb);
- c4iw_put_wr_wait(mhp->wr_waitp);
- return 0;
-}
-
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 8138c57a1e43..1f1f856f8715 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -322,8 +322,9 @@ static ssize_t hw_rev_show(struct device *dev,
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
pr_debug("dev 0x%p\n", dev);
- return sprintf(buf, "%d\n",
- CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
+ return sysfs_emit(
+ buf, "%d\n",
+ CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
static DEVICE_ATTR_RO(hw_rev);
@@ -337,7 +338,7 @@ static ssize_t hca_type_show(struct device *dev,
pr_debug("dev 0x%p\n", dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.driver);
+ return sysfs_emit(buf, "%s\n", info.driver);
}
static DEVICE_ATTR_RO(hca_type);
@@ -348,8 +349,8 @@ static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
pr_debug("dev 0x%p\n", dev);
- return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
- c4iw_dev->rdev.lldi.pdev->device);
+ return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
+ c4iw_dev->rdev.lldi.pdev->device);
}
static DEVICE_ATTR_RO(board_id);
@@ -456,13 +457,11 @@ static const struct ib_device_ops c4iw_dev_ops = {
.alloc_hw_stats = c4iw_alloc_stats,
.alloc_mr = c4iw_alloc_mr,
- .alloc_mw = c4iw_alloc_mw,
.alloc_pd = c4iw_allocate_pd,
.alloc_ucontext = c4iw_alloc_ucontext,
.create_cq = c4iw_create_cq,
.create_qp = c4iw_create_qp,
.create_srq = c4iw_create_srq,
- .dealloc_mw = c4iw_dealloc_mw,
.dealloc_pd = c4iw_deallocate_pd,
.dealloc_ucontext = c4iw_dealloc_ucontext,
.dereg_mr = c4iw_dereg_mr,
@@ -533,28 +532,6 @@ void c4iw_register_device(struct work_struct *work)
if (fastreg_support)
dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
dev->ibdev.local_dma_lkey = 0;
- dev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
dev->ibdev.node_type = RDMA_NODE_RNIC;
BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index f20379e4e2ec..a7401398cb34 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2126,7 +2126,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
pr_debug("ib_pd %p\n", pd);
- if (attrs->qp_type != IB_QPT_RC)
+ if (attrs->qp_type != IB_QPT_RC || attrs->create_flags)
return ERR_PTR(-EOPNOTSUPP);
php = to_c4iw_pd(pd);
@@ -2374,6 +2374,9 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
pr_debug("ib_qp %p\n", ibqp);
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
attr_mask &= ~IB_QP_STATE;
@@ -2680,6 +2683,9 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
int ret;
int wr_len;
+ if (attrs->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_c4iw_pd(pd);
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 6faed3a81e08..0f578734bddb 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -245,9 +245,9 @@ static const struct ib_device_ops efa_dev_ops = {
.alloc_hw_stats = efa_alloc_hw_stats,
.alloc_pd = efa_alloc_pd,
.alloc_ucontext = efa_alloc_ucontext,
- .create_ah = efa_create_ah,
.create_cq = efa_create_cq,
.create_qp = efa_create_qp,
+ .create_user_ah = efa_create_ah,
.dealloc_pd = efa_dealloc_pd,
.dealloc_ucontext = efa_dealloc_ucontext,
.dereg_mr = efa_dereg_mr,
@@ -308,27 +308,6 @@ static int efa_ib_device_add(struct efa_dev *dev)
dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dev.parent = &pdev->dev;
- dev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
-
- dev->ibdev.uverbs_ex_cmd_mask =
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
-
ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev);
@@ -405,19 +384,12 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev)
return err;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width));
if (err) {
- dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err);
+ dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err);
return err;
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
- if (err) {
- dev_err(&pdev->dev,
- "err_pci_set_consistent_dma_mask failed %d\n",
- err);
- return err;
- }
dma_set_max_seg_size(&pdev->dev, UINT_MAX);
return 0;
}
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 4e940fc50bba..479b604e533a 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -917,6 +917,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
enum ib_qp_state new_state;
int err;
+ if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
if (udata->inlen &&
!ib_is_udata_cleared(udata, 0, udata->inlen)) {
ibdev_dbg(&dev->ibdev,
@@ -1029,6 +1032,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
ibdev_dbg(ibdev,
"cq: requested entries[%u] non-positive or greater than max[%u]\n",
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 356518e17fa6..681bb4e918c9 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -339,6 +339,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
return -EINVAL;
if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
return -EINVAL;
+ break;
default:
break;
}
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 074ec71772d2..5650130e68d4 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -151,7 +151,7 @@ struct hfi1_port_attr {
static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf)
{
- return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
+ return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
}
static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
@@ -296,7 +296,7 @@ static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr,
container_of(kobj, struct hfi1_pportdata, sc2vl_kobj);
struct hfi1_devdata *dd = ppd->dd;
- return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc));
+ return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc));
}
static const struct sysfs_ops hfi1_sc2vl_ops = {
@@ -401,7 +401,7 @@ static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr,
container_of(kobj, struct hfi1_pportdata, sl2sc_kobj);
struct hfi1_ibport *ibp = &ppd->ibport_data;
- return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]);
+ return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]);
}
static const struct sysfs_ops hfi1_sl2sc_ops = {
@@ -475,7 +475,7 @@ static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr,
container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj);
struct hfi1_devdata *dd = ppd->dd;
- return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu);
+ return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu);
}
static const struct sysfs_ops hfi1_vl2mtu_ops = {
@@ -500,7 +500,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
struct hfi1_ibdev *dev =
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
- return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
+ return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -510,13 +510,11 @@ static ssize_t board_id_show(struct device *device,
struct hfi1_ibdev *dev =
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
- int ret;
if (!dd->boardname)
- ret = -EINVAL;
- else
- ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
- return ret;
+ return -EINVAL;
+
+ return sysfs_emit(buf, "%s\n", dd->boardname);
}
static DEVICE_ATTR_RO(board_id);
@@ -528,7 +526,7 @@ static ssize_t boardversion_show(struct device *device,
struct hfi1_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
- return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
+ return sysfs_emit(buf, "%s", dd->boardversion);
}
static DEVICE_ATTR_RO(boardversion);
@@ -545,9 +543,9 @@ static ssize_t nctxts_show(struct device *device,
* and a receive context, so returning the smaller of the two counts
* give a more accurate picture of total contexts available.
*/
- return scnprintf(buf, PAGE_SIZE, "%u\n",
- min(dd->num_user_contexts,
- (u32)dd->sc_sizes[SC_USER].count));
+ return sysfs_emit(buf, "%u\n",
+ min(dd->num_user_contexts,
+ (u32)dd->sc_sizes[SC_USER].count));
}
static DEVICE_ATTR_RO(nctxts);
@@ -559,7 +557,7 @@ static ssize_t nfreectxts_show(struct device *device,
struct hfi1_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
- return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
+ return sysfs_emit(buf, "%u\n", dd->freectxts);
}
static DEVICE_ATTR_RO(nfreectxts);
@@ -570,7 +568,8 @@ static ssize_t serial_show(struct device *device,
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
- return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
+ /* dd->serial is already newline terminated in chip.c */
+ return sysfs_emit(buf, "%s", dd->serial);
}
static DEVICE_ATTR_RO(serial);
@@ -598,9 +597,8 @@ static DEVICE_ATTR_WO(chip_reset);
* Convert the reported temperature from an integer (reported in
* units of 0.25C) to a floating point number.
*/
-#define temp2str(temp, buf, size, idx) \
- scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \
- ((temp) >> 2), ((temp) & 0x3) * 25)
+#define temp_d(t) ((t) >> 2)
+#define temp_f(t) (((t)&0x3) * 25u)
/*
* Dump tempsense values, in decimal, to ease shell-scripts.
@@ -615,19 +613,17 @@ static ssize_t tempsense_show(struct device *device,
int ret;
ret = hfi1_tempsense_rd(dd, &temp);
- if (!ret) {
- int idx = 0;
-
- idx += temp2str(temp.curr, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx);
- idx += scnprintf(buf + idx, PAGE_SIZE - idx,
- "%u %u %u\n", temp.triggers & 0x1,
- temp.triggers & 0x2, temp.triggers & 0x4);
- ret = idx;
- }
- return ret;
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n",
+ temp_d(temp.curr), temp_f(temp.curr),
+ temp_d(temp.lo_lim), temp_f(temp.lo_lim),
+ temp_d(temp.hi_lim), temp_f(temp.hi_lim),
+ temp_d(temp.crit_lim), temp_f(temp.crit_lim),
+ temp.triggers & 0x1,
+ temp.triggers & 0x2,
+ temp.triggers & 0x4);
}
static DEVICE_ATTR_RO(tempsense);
@@ -817,7 +813,7 @@ static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf)
if (vl < 0)
return vl;
- return snprintf(buf, PAGE_SIZE, "%d\n", vl);
+ return sysfs_emit(buf, "%d\n", vl);
}
static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 73d197e21730..92aa2a9b3b5a 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -2826,6 +2826,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
default:
break;
}
+ break;
default:
break;
}
@@ -3005,6 +3006,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
default:
break;
}
+ break;
default:
break;
}
@@ -3221,6 +3223,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
req = wqe_to_tid_req(prev);
if (req->ack_seg != req->total_segs)
goto interlock;
+ break;
default:
break;
}
@@ -3239,9 +3242,11 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
req = wqe_to_tid_req(prev);
if (req->ack_seg != req->total_segs)
goto interlock;
+ break;
default:
break;
}
+ break;
default:
break;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 75b06db60f7c..cc258edec331 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -31,14 +31,11 @@
*/
#include <linux/platform_device.h>
+#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include "hns_roce_device.h"
-#define HNS_ROCE_PORT_NUM_SHIFT 24
-#define HNS_ROCE_VLAN_SL_BIT_MASK 7
-#define HNS_ROCE_VLAN_SL_SHIFT 13
-
static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
{
u32 fl = ah_attr->grh.flow_label;
@@ -58,47 +55,41 @@ static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
- const struct ib_gid_attr *gid_attr;
- struct device *dev = hr_dev->dev;
- struct hns_roce_ah *ah = to_hr_ah(ibah);
struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
- u16 vlan_id = 0xffff;
- bool vlan_en = false;
- int ret;
-
- gid_attr = ah_attr->grh.sgid_attr;
- ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
- if (ret)
- return ret;
-
- /* Get mac address */
- memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
+ struct hns_roce_ah *ah = to_hr_ah(ibah);
+ int ret = 0;
- if (vlan_id < VLAN_N_VID) {
- vlan_en = true;
- vlan_id |= (rdma_ah_get_sl(ah_attr) &
- HNS_ROCE_VLAN_SL_BIT_MASK) <<
- HNS_ROCE_VLAN_SL_SHIFT;
- }
+ if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata)
+ return -EOPNOTSUPP;
ah->av.port = rdma_ah_get_port_num(ah_attr);
ah->av.gid_index = grh->sgid_index;
- ah->av.vlan_id = vlan_id;
- ah->av.vlan_en = vlan_en;
- dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index,
- ah->av.vlan_id);
if (rdma_ah_get_static_rate(ah_attr))
ah->av.stat_rate = IB_RATE_10_GBPS;
- memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
- ah->av.sl = rdma_ah_get_sl(ah_attr);
+ ah->av.hop_limit = grh->hop_limit;
ah->av.flowlabel = grh->flow_label;
ah->av.udp_sport = get_ah_udp_sport(ah_attr);
+ ah->av.sl = rdma_ah_get_sl(ah_attr);
+ ah->av.tclass = get_tclass(grh);
- return 0;
+ memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
+ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
+
+ /* HIP08 needs to record vlan info in Address Vector */
+ if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) {
+ ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr,
+ &ah->av.vlan_id, NULL);
+ if (ret)
+ return ret;
+
+ ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID;
+ }
+
+ return ret;
}
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index a6b23dec1adc..4bcaaa0524b1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -159,76 +159,96 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
{
- struct device *dev = hr_dev->dev;
- u32 size = buf->size;
- int i;
+ struct hns_roce_buf_list *trunks;
+ u32 i;
- if (size == 0)
+ if (!buf)
return;
- buf->size = 0;
+ trunks = buf->trunk_list;
+ if (trunks) {
+ buf->trunk_list = NULL;
+ for (i = 0; i < buf->ntrunks; i++)
+ dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift,
+ trunks[i].buf, trunks[i].map);
- if (hns_roce_buf_is_direct(buf)) {
- dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
- } else {
- for (i = 0; i < buf->npages; ++i)
- if (buf->page_list[i].buf)
- dma_free_coherent(dev, 1 << buf->page_shift,
- buf->page_list[i].buf,
- buf->page_list[i].map);
- kfree(buf->page_list);
- buf->page_list = NULL;
+ kfree(trunks);
}
+
+ kfree(buf);
}
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
- struct hns_roce_buf *buf, u32 page_shift)
+/*
+ * Allocate the dma buffer for storing ROCEE table entries
+ *
+ * @size: required size
+ * @page_shift: the unit size in a continuous dma address range
+ * @flags: HNS_ROCE_BUF_ flags to control the allocation flow.
+ */
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+ u32 page_shift, u32 flags)
{
- struct hns_roce_buf_list *buf_list;
- struct device *dev = hr_dev->dev;
- u32 page_size;
- int i;
+ u32 trunk_size, page_size, alloced_size;
+ struct hns_roce_buf_list *trunks;
+ struct hns_roce_buf *buf;
+ gfp_t gfp_flags;
+ u32 ntrunk, i;
/* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */
- buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift);
+ if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT))
+ return ERR_PTR(-EINVAL);
+
+ gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL;
+ buf = kzalloc(sizeof(*buf), gfp_flags);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+ buf->page_shift = page_shift;
page_size = 1 << buf->page_shift;
- buf->npages = DIV_ROUND_UP(size, page_size);
-
- /* required size is not bigger than one trunk size */
- if (size <= max_direct) {
- buf->page_list = NULL;
- buf->direct.buf = dma_alloc_coherent(dev, size,
- &buf->direct.map,
- GFP_KERNEL);
- if (!buf->direct.buf)
- return -ENOMEM;
+
+ /* Calc the trunk size and num by required size and page_shift */
+ if (flags & HNS_ROCE_BUF_DIRECT) {
+ buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE));
+ ntrunk = 1;
} else {
- buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL);
- if (!buf_list)
- return -ENOMEM;
-
- for (i = 0; i < buf->npages; i++) {
- buf_list[i].buf = dma_alloc_coherent(dev, page_size,
- &buf_list[i].map,
- GFP_KERNEL);
- if (!buf_list[i].buf)
- break;
- }
+ buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE));
+ ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift);
+ }
- if (i != buf->npages && i > 0) {
- while (i-- > 0)
- dma_free_coherent(dev, page_size,
- buf_list[i].buf,
- buf_list[i].map);
- kfree(buf_list);
- return -ENOMEM;
- }
- buf->page_list = buf_list;
+ trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags);
+ if (!trunks) {
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
}
- buf->size = size;
- return 0;
+ trunk_size = 1 << buf->trunk_shift;
+ alloced_size = 0;
+ for (i = 0; i < ntrunk; i++) {
+ trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size,
+ &trunks[i].map, gfp_flags);
+ if (!trunks[i].buf)
+ break;
+
+ alloced_size += trunk_size;
+ }
+
+ buf->ntrunks = i;
+
+ /* In nofail mode, it's only failed when the alloced size is 0 */
+ if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) {
+ for (i = 0; i < buf->ntrunks; i++)
+ dma_free_coherent(hr_dev->dev, trunk_size,
+ trunks[i].buf, trunks[i].map);
+
+ kfree(trunks);
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ buf->npages = DIV_ROUND_UP(alloced_size, page_size);
+ buf->trunk_list = trunks;
+
+ return buf;
}
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
@@ -240,7 +260,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
end = start + buf_cnt;
if (end > buf->npages) {
dev_err(hr_dev->dev,
- "Failed to check kmem bufs, end %d + %d total %d!\n",
+ "failed to check kmem bufs, end %d + %d total %u!\n",
start, buf_cnt, buf->npages);
return -EINVAL;
}
@@ -262,7 +282,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
u64 addr;
if (page_shift < HNS_HW_PAGE_SHIFT) {
- dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n",
+ dev_err(hr_dev->dev, "failed to check umem page shift %u!\n",
page_shift);
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 455d533dd7c4..339e3fd98b0b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -36,9 +36,9 @@
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
-#define CMD_POLL_TOKEN 0xffff
-#define CMD_MAX_NUM 32
-#define CMD_TOKEN_MASK 0x1f
+#define CMD_POLL_TOKEN 0xffff
+#define CMD_MAX_NUM 32
+#define CMD_TOKEN_MASK 0x1f
static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, u32 in_modifier,
@@ -60,7 +60,7 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param,
static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, unsigned long in_modifier,
u8 op_modifier, u16 op,
- unsigned long timeout)
+ unsigned int timeout)
{
struct device *dev = hr_dev->dev;
int ret;
@@ -78,7 +78,7 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op, unsigned long timeout)
+ u8 op_modifier, u16 op, unsigned int timeout)
{
int ret;
@@ -93,8 +93,8 @@ static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
u64 out_param)
{
- struct hns_roce_cmd_context
- *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask];
+ struct hns_roce_cmd_context *context =
+ &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds];
if (token != context->token)
return;
@@ -108,7 +108,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, unsigned long in_modifier,
u8 op_modifier, u16 op,
- unsigned long timeout)
+ unsigned int timeout)
{
struct hns_roce_cmdq *cmd = &hr_dev->cmd;
struct hns_roce_cmd_context *context;
@@ -159,13 +159,13 @@ out:
static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op, unsigned long timeout)
+ u8 op_modifier, u16 op, unsigned int timeout)
{
int ret;
down(&hr_dev->cmd.event_sem);
- ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op, timeout);
+ ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier,
+ op_modifier, op, timeout);
up(&hr_dev->cmd.event_sem);
return ret;
@@ -173,7 +173,7 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
unsigned long in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout)
+ unsigned int timeout)
{
int ret;
@@ -231,9 +231,8 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
int i;
- hr_cmd->context = kmalloc_array(hr_cmd->max_cmds,
- sizeof(*hr_cmd->context),
- GFP_KERNEL);
+ hr_cmd->context =
+ kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL);
if (!hr_cmd->context)
return -ENOMEM;
@@ -262,8 +261,8 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev)
hr_cmd->use_events = 0;
}
-struct hns_roce_cmd_mailbox
- *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev)
+struct hns_roce_cmd_mailbox *
+hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmd_mailbox *mailbox;
@@ -271,8 +270,8 @@ struct hns_roce_cmd_mailbox
if (!mailbox)
return ERR_PTR(-ENOMEM);
- mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL,
- &mailbox->dma);
+ mailbox->buf =
+ dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma);
if (!mailbox->buf) {
kfree(mailbox);
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 1915bacaded0..8025e7f657fa 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -141,10 +141,10 @@ enum {
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
unsigned long in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout);
+ unsigned int timeout);
-struct hns_roce_cmd_mailbox
- *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
+struct hns_roce_cmd_mailbox *
+hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox);
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index f5669ff8cfeb..5afee04fb02c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -38,21 +38,33 @@
#define roce_raw_write(value, addr) \
__raw_writel((__force u32)cpu_to_le32(value), (addr))
-#define roce_get_field(origin, mask, shift) \
- (((le32_to_cpu(origin)) & (mask)) >> (shift))
+#define roce_get_field(origin, mask, shift) \
+ ((le32_to_cpu(origin) & (mask)) >> (u32)(shift))
#define roce_get_bit(origin, shift) \
roce_get_field((origin), (1ul << (shift)), (shift))
-#define roce_set_field(origin, mask, shift, val) \
- do { \
- (origin) &= ~cpu_to_le32(mask); \
- (origin) |= cpu_to_le32(((u32)(val) << (shift)) & (mask)); \
+#define roce_set_field(origin, mask, shift, val) \
+ do { \
+ (origin) &= ~cpu_to_le32(mask); \
+ (origin) |= cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \
} while (0)
-#define roce_set_bit(origin, shift, val) \
+#define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val))
+#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l
+
+#define _hr_reg_enable(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ *((__le32 *)_ptr + (field_h) / 32) |= \
+ cpu_to_le32(BIT((field_l) % 32)) + \
+ BUILD_BUG_ON_ZERO((field_h) != (field_l)); \
+ })
+
+#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
+
#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 809b22aa5056..8533fc2d8df2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -36,43 +36,42 @@
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
-#include <rdma/hns-abi.h>
#include "hns_roce_common.h"
static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
struct hns_roce_cq_table *cq_table;
- struct ib_device *ibdev = &hr_dev->ib_dev;
u64 mtts[MTT_MIN_COUNT] = { 0 };
dma_addr_t dma_handle;
int ret;
ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts),
&dma_handle);
- if (ret < 1) {
- ibdev_err(ibdev, "Failed to find CQ mtr\n");
+ if (!ret) {
+ ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret);
return -EINVAL;
}
cq_table = &hr_dev->cq_table;
ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret);
+ ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret);
return ret;
}
/* Get CQC memory HEM(Hardware Entry Memory) table */
ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
if (ret) {
- ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n",
+ ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n",
hr_cq->cqn, ret);
goto err_out;
}
ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
if (ret) {
- ibdev_err(ibdev, "Failed to xa_store CQ\n");
+ ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret);
goto err_put;
}
@@ -91,7 +90,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret) {
ibdev_err(ibdev,
- "Failed to send create cmd for CQ(0x%lx), err %d\n",
+ "failed to send create cmd for CQ(0x%lx), ret = %d.\n",
hr_cq->cqn, ret);
goto err_xa;
}
@@ -147,7 +146,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_buf_attr buf_attr = {};
- int err;
+ int ret;
buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
@@ -155,13 +154,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
buf_attr.region_count = 1;
buf_attr.fixed_page = true;
- err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
+ ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT,
udata, addr);
- if (err)
- ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err);
+ if (ret)
+ ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret);
- return err;
+ return ret;
}
static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
@@ -251,14 +250,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
u32 cq_entries = attr->cqe;
int ret;
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
- ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n",
+ ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n",
cq_entries, hr_dev->caps.max_cqes);
return -EINVAL;
}
if (vector >= hr_dev->caps.num_comp_vectors) {
- ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n",
+ ibdev_err(ibdev, "failed to check CQ vector = %d, max = %d.\n",
vector, hr_dev->caps.num_comp_vectors);
return -EINVAL;
}
@@ -274,9 +276,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
if (udata) {
ret = ib_copy_from_udata(&ucmd, udata,
- min(sizeof(ucmd), udata->inlen));
+ min(udata->inlen, sizeof(ucmd)));
if (ret) {
- ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n",
+ ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n",
ret);
return ret;
}
@@ -286,19 +288,20 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret);
+ ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
return ret;
}
ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret);
+ ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret);
goto err_cq_buf;
}
ret = alloc_cqc(hr_dev, hr_cq);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret);
+ ibdev_err(ibdev,
+ "failed to alloc CQ context, ret = %d.\n", ret);
goto err_cq_db;
}
@@ -313,7 +316,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
if (udata) {
resp.cqn = hr_cq->cqn;
- ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
if (ret)
goto err_cqc;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index bff6abdccfb0..5cb7376ce978 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -95,8 +95,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir,
struct hns_roce_db *db, int order)
{
- int o;
- int i;
+ unsigned long o;
+ unsigned long i;
for (o = order; o <= 1; ++o) {
i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o);
@@ -154,8 +154,8 @@ out:
void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
{
- int o;
- int i;
+ unsigned long o;
+ unsigned long i;
mutex_lock(&hr_dev->pgdir_mutex);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 6d2acff69f98..55d538625e36 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -34,6 +34,7 @@
#define _HNS_ROCE_DEVICE_H
#include <rdma/ib_verbs.h>
+#include <rdma/hns-abi.h>
#define DRV_NAME "hns_roce"
@@ -117,6 +118,8 @@
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
+#define HNS_ROCE_QP_BANK_NUM 8
+
/* The chip implementation of the consumer index is calculated
* according to twice the actual EQ depth
*/
@@ -129,15 +132,6 @@ enum {
SERV_TYPE_UD,
};
-enum {
- HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0),
- HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1),
-};
-
-enum hns_roce_cq_flags {
- HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0),
-};
-
enum hns_roce_qp_state {
HNS_ROCE_QP_STATE_RST,
HNS_ROCE_QP_STATE_INIT,
@@ -166,7 +160,6 @@ enum hns_roce_event {
/* 0x10 and 0x11 is unused in currently application case */
HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12,
HNS_ROCE_EVENT_TYPE_MB = 0x13,
- HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14,
HNS_ROCE_EVENT_TYPE_FLR = 0x15,
};
@@ -221,6 +214,8 @@ enum {
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
+ HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
+ HNS_ROCE_CAP_FLAG_STASH = BIT(17),
};
#define HNS_ROCE_DB_TYPE_COUNT 2
@@ -265,9 +260,6 @@ enum {
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
-/* The minimum page count for hardware access page directly. */
-#define HNS_HW_DIRECT_PAGE_COUNT 2
-
struct hns_roce_uar {
u64 pfn;
unsigned long index;
@@ -318,7 +310,7 @@ struct hns_roce_hem_table {
};
struct hns_roce_buf_region {
- int offset; /* page offset */
+ u32 offset; /* page offset */
u32 count; /* page count */
int hopnum; /* addressing hop num */
};
@@ -338,10 +330,10 @@ struct hns_roce_buf_attr {
size_t size; /* region size */
int hopnum; /* multi-hop addressing hop num */
} region[HNS_ROCE_MAX_BT_REGION];
- int region_count; /* valid region count */
+ unsigned int region_count; /* valid region count */
unsigned int page_shift; /* buffer page shift */
bool fixed_page; /* decide page shift is fixed-size or maximum size */
- int user_access; /* umem access flag */
+ unsigned int user_access; /* umem access flag */
bool mtt_only; /* only alloc buffer-required MTT memory */
};
@@ -352,7 +344,7 @@ struct hns_roce_hem_cfg {
unsigned int buf_pg_shift; /* buffer page shift */
unsigned int buf_pg_count; /* buffer page count */
struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION];
- int region_count;
+ unsigned int region_count;
};
/* memory translate region */
@@ -400,7 +392,7 @@ struct hns_roce_wq {
u64 *wrid; /* Work request ID */
spinlock_t lock;
u32 wqe_cnt; /* WQE num */
- int max_gs;
+ u32 max_gs;
int offset;
int wqe_shift; /* WQE size */
u32 head;
@@ -419,11 +411,26 @@ struct hns_roce_buf_list {
dma_addr_t map;
};
+/*
+ * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous
+ * dma address range.
+ *
+ * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep.
+ *
+ * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even
+ * the allocated size is smaller than the required size.
+ */
+enum {
+ HNS_ROCE_BUF_DIRECT = BIT(0),
+ HNS_ROCE_BUF_NOSLEEP = BIT(1),
+ HNS_ROCE_BUF_NOFAIL = BIT(2),
+};
+
struct hns_roce_buf {
- struct hns_roce_buf_list direct;
- struct hns_roce_buf_list *page_list;
+ struct hns_roce_buf_list *trunk_list;
+ u32 ntrunks;
u32 npages;
- u32 size;
+ unsigned int trunk_shift;
unsigned int page_shift;
};
@@ -451,8 +458,8 @@ struct hns_roce_db {
} u;
dma_addr_t dma;
void *virt_addr;
- int index;
- int order;
+ unsigned long index;
+ unsigned long order;
};
struct hns_roce_cq {
@@ -500,8 +507,8 @@ struct hns_roce_srq {
u64 *wrid;
struct hns_roce_idx_que idx_que;
spinlock_t lock;
- int head;
- int tail;
+ u16 head;
+ u16 tail;
struct mutex mutex;
void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
};
@@ -510,13 +517,22 @@ struct hns_roce_uar_table {
struct hns_roce_bitmap bitmap;
};
+struct hns_roce_bank {
+ struct ida ida;
+ u32 inuse; /* Number of IDs allocated */
+ u32 min; /* Lowest ID to allocate. */
+ u32 max; /* Highest ID to allocate. */
+ u32 next; /* Next ID to allocate. */
+};
+
struct hns_roce_qp_table {
- struct hns_roce_bitmap bitmap;
struct hns_roce_hem_table qp_table;
struct hns_roce_hem_table irrl_table;
struct hns_roce_hem_table trrl_table;
struct hns_roce_hem_table sccc_table;
struct mutex scc_mutex;
+ struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
+ spinlock_t bank_lock;
};
struct hns_roce_cq_table {
@@ -547,7 +563,7 @@ struct hns_roce_av {
u8 dgid[HNS_ROCE_GID_SIZE];
u8 mac[ETH_ALEN];
u16 vlan_id;
- bool vlan_en;
+ u8 vlan_en;
};
struct hns_roce_ah {
@@ -619,10 +635,9 @@ enum {
struct hns_roce_work {
struct hns_roce_dev *hr_dev;
struct work_struct work;
- u32 qpn;
- u32 cqn;
int event_type;
int sub_type;
+ u32 queue_num;
};
struct hns_roce_qp {
@@ -690,28 +705,10 @@ struct hns_roce_aeqe {
__le32 asyn;
union {
struct {
- __le32 qp;
- u32 rsv0;
- u32 rsv1;
- } qp_event;
-
- struct {
- __le32 srq;
- u32 rsv0;
- u32 rsv1;
- } srq_event;
-
- struct {
- __le32 cq;
- u32 rsv0;
- u32 rsv1;
- } cq_event;
-
- struct {
- __le32 ceqe;
+ __le32 num;
u32 rsv0;
u32 rsv1;
- } ce_event;
+ } queue_event;
struct {
__le64 out_param;
@@ -730,11 +727,11 @@ struct hns_roce_eq {
int type_flag; /* Aeq:1 ceq:0 */
int eqn;
u32 entries;
- int log_entries;
+ u32 log_entries;
int eqe_size;
int irq;
int log_page_size;
- int cons_index;
+ u32 cons_index;
struct hns_roce_buf_list *buf_list;
int over_ignore;
int coalesce;
@@ -742,7 +739,7 @@ struct hns_roce_eq {
int hop_num;
struct hns_roce_mtr mtr;
u16 eq_max_cnt;
- int eq_period;
+ u32 eq_period;
int shift;
int event_type;
int sub_type;
@@ -765,8 +762,8 @@ struct hns_roce_caps {
u32 max_sq_inline;
u32 max_rq_sg;
u32 max_extend_sg;
- int num_qps;
- int reserved_qps;
+ u32 num_qps;
+ u32 reserved_qps;
int num_qpc_timer;
int num_cqc_timer;
int num_srqs;
@@ -778,7 +775,7 @@ struct hns_roce_caps {
u32 max_srq_desc_sz;
int max_qp_init_rdma;
int max_qp_dest_rdma;
- int num_cqs;
+ u32 num_cqs;
u32 max_cqes;
u32 min_cqes;
u32 min_wqes;
@@ -787,7 +784,7 @@ struct hns_roce_caps {
int num_aeq_vectors;
int num_comp_vectors;
int num_other_vectors;
- int num_mtpts;
+ u32 num_mtpts;
u32 num_mtt_segs;
u32 num_cqe_segs;
u32 num_srqwqe_segs;
@@ -825,6 +822,7 @@ struct hns_roce_caps {
u32 cqc_timer_bt_num;
u32 mpt_bt_num;
u32 sccc_bt_num;
+ u32 gmv_bt_num;
u32 qpc_ba_pg_sz;
u32 qpc_buf_pg_sz;
u32 qpc_hop_num;
@@ -864,6 +862,11 @@ struct hns_roce_caps {
u32 eqe_ba_pg_sz;
u32 eqe_buf_pg_sz;
u32 eqe_hop_num;
+ u32 gmv_entry_num;
+ u32 gmv_entry_sz;
+ u32 gmv_ba_pg_sz;
+ u32 gmv_buf_pg_sz;
+ u32 gmv_hop_num;
u32 sl_num;
u32 tsq_buf_pg_sz;
u32 tpq_buf_pg_sz;
@@ -898,7 +901,7 @@ struct hns_roce_hw {
int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, u32 in_modifier, u8 op_modifier, u16 op,
u16 token, int event);
- int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout);
+ int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned int timeout);
int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev);
int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
const union ib_gid *gid, const struct ib_gid_attr *attr);
@@ -999,6 +1002,10 @@ struct hns_roce_dev {
struct hns_roce_eq_table eq_table;
struct hns_roce_hem_table qpc_timer_table;
struct hns_roce_hem_table cqc_timer_table;
+ /* GMV is the memory area that the driver allocates for the hardware
+ * to store SGID, SMAC and VLAN information.
+ */
+ struct hns_roce_hem_table gmv_table;
int cmd_mod;
int loop_idc;
@@ -1069,29 +1076,19 @@ static inline struct hns_roce_qp
return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1));
}
-static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf)
+static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
+ unsigned int offset)
{
- if (buf->page_list)
- return false;
-
- return true;
+ return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) +
+ (offset & ((1 << buf->trunk_shift) - 1));
}
-static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
+static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx)
{
- if (hns_roce_buf_is_direct(buf))
- return (char *)(buf->direct.buf) + (offset & (buf->size - 1));
-
- return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
- (offset & ((1 << buf->page_shift) - 1));
-}
+ unsigned int offset = idx << buf->page_shift;
-static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx)
-{
- if (hns_roce_buf_is_direct(buf))
- return buf->direct.map + ((dma_addr_t)idx << buf->page_shift);
- else
- return buf->page_list[idx].map;
+ return buf->trunk_list[offset >> buf->trunk_shift].map +
+ (offset & ((1 << buf->trunk_shift) - 1));
}
#define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT)
@@ -1132,6 +1129,14 @@ static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift)
return ilog2(to_hr_hem_entries_count(count, buf_shift));
}
+#define DSCP_SHIFT 2
+
+static inline u8 get_tclass(const struct ib_global_route *grh)
+{
+ return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ?
+ grh->traffic_class >> DSCP_SHIFT : grh->traffic_class;
+}
+
int hns_roce_init_uar_table(struct hns_roce_dev *dev);
int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
@@ -1155,7 +1160,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev,
struct hns_roce_mtr *mtr);
int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- dma_addr_t *pages, int page_cnt);
+ dma_addr_t *pages, unsigned int page_cnt);
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
@@ -1198,9 +1203,10 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
-int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata);
+struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
@@ -1215,8 +1221,8 @@ int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int hns_roce_dealloc_mw(struct ib_mw *ibmw);
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
- struct hns_roce_buf *buf, u32 page_shift);
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+ u32 page_shift, u32 flags);
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int buf_cnt, int start, struct hns_roce_buf *buf);
@@ -1238,10 +1244,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
-void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n);
-void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n);
-void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n);
-bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n);
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq);
enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
@@ -1271,7 +1277,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
-int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
+u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 7487cf3d2c37..edc9d6b98d95 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -75,6 +75,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
case HEM_TYPE_CQC_TIMER:
hop_num = hr_dev->caps.cqc_timer_hop_num;
break;
+ case HEM_TYPE_GMV:
+ hop_num = hr_dev->caps.gmv_hop_num;
+ break;
default:
return false;
}
@@ -183,8 +186,16 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev,
mhop->ba_l0_num = hr_dev->caps.srqc_bt_num;
mhop->hop_num = hr_dev->caps.srqc_hop_num;
break;
+ case HEM_TYPE_GMV:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz +
+ PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz +
+ PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.gmv_bt_num;
+ mhop->hop_num = hr_dev->caps.gmv_hop_num;
+ break;
default:
- dev_err(dev, "Table %d not support multi-hop addressing!\n",
+ dev_err(dev, "table %u not support multi-hop addressing!\n",
type);
return -EINVAL;
}
@@ -198,9 +209,9 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
{
struct device *dev = hr_dev->dev;
u32 chunk_ba_num;
+ u32 chunk_size;
u32 table_idx;
u32 bt_num;
- u32 chunk_size;
if (get_hem_table_config(hr_dev, mhop, table->type))
return -EINVAL;
@@ -232,8 +243,8 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
mhop->l0_idx = table_idx;
break;
default:
- dev_err(dev, "Table %d not support hop_num = %d!\n",
- table->type, mhop->hop_num);
+ dev_err(dev, "table %u not support hop_num = %u!\n",
+ table->type, mhop->hop_num);
return -EINVAL;
}
if (mhop->l0_idx >= mhop->ba_l0_num)
@@ -332,15 +343,15 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
{
spinlock_t *lock = &hr_dev->bt_cmd_lock;
struct device *dev = hr_dev->dev;
- long end;
- unsigned long flags;
struct hns_roce_hem_iter iter;
void __iomem *bt_cmd;
__le32 bt_cmd_val[2];
__le32 bt_cmd_h = 0;
+ unsigned long flags;
__le32 bt_cmd_l;
- u64 bt_ba;
int ret = 0;
+ u64 bt_ba;
+ long end;
/* Find the HEM(Hardware Entry Memory) entry */
unsigned long i = (obj & (table->num_obj - 1)) /
@@ -438,13 +449,13 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev,
index->buf = l0_idx;
break;
default:
- ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n",
+ ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n",
table->type, mhop->hop_num);
return -EINVAL;
}
if (unlikely(index->buf >= table->num_hem)) {
- ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n",
+ ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n",
table->type, index->buf, table->num_hem);
return -EINVAL;
}
@@ -640,8 +651,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, unsigned long obj)
{
struct device *dev = hr_dev->dev;
- int ret = 0;
unsigned long i;
+ int ret = 0;
if (hns_roce_check_whether_mhop(hr_dev, table->type))
return hns_roce_table_mhop_get(hr_dev, table, obj);
@@ -714,15 +725,15 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
step_idx = hop_num;
if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx))
- ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num);
+ ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num);
if (index->inited & HEM_INDEX_L1)
if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
- ibdev_warn(ibdev, "Clear HEM step 1 failed.\n");
+ ibdev_warn(ibdev, "failed to clear HEM step 1.\n");
if (index->inited & HEM_INDEX_L0)
if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
- ibdev_warn(ibdev, "Clear HEM step 0 failed.\n");
+ ibdev_warn(ibdev, "failed to clear HEM step 0.\n");
}
}
@@ -789,14 +800,14 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_chunk *chunk;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
- void *addr = NULL;
unsigned long mhop_obj = obj;
unsigned long obj_per_chunk;
unsigned long idx_offset;
int offset, dma_offset;
+ void *addr = NULL;
+ u32 hem_idx = 0;
int length;
int i, j;
- u32 hem_idx = 0;
if (!table->lowmem)
return NULL;
@@ -876,7 +887,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
unsigned long buf_chunk_size;
unsigned long bt_chunk_size;
unsigned long bt_chunk_num;
- unsigned long num_bt_l0 = 0;
+ unsigned long num_bt_l0;
u32 hop_num;
if (get_hem_table_config(hr_dev, &mhop, type))
@@ -966,8 +977,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev,
{
struct hns_roce_hem_mhop mhop;
u32 buf_chunk_size;
- int i;
u64 obj;
+ int i;
if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
return;
@@ -1017,7 +1028,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
- if (hr_dev->caps.srqc_entry_sz)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->srq_table.table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
@@ -1027,12 +1038,16 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.cqc_timer_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->cqc_timer_table);
- if (hr_dev->caps.sccc_sz)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.trrl_table);
+
+ if (hr_dev->caps.gmv_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table);
+
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
@@ -1234,7 +1249,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
}
if (offset < r->offset) {
- dev_err(hr_dev->dev, "invalid offset %d,min %d!\n",
+ dev_err(hr_dev->dev, "invalid offset %d, min %u!\n",
offset, r->offset);
return -EINVAL;
}
@@ -1298,8 +1313,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
const struct hns_roce_buf_region *regions,
int region_cnt)
{
- struct roce_hem_item *hem, *temp_hem, *root_hem;
struct list_head temp_list[HNS_ROCE_MAX_BT_REGION];
+ struct roce_hem_item *hem, *temp_hem, *root_hem;
const struct hns_roce_buf_region *r;
struct list_head temp_root;
struct list_head temp_btm;
@@ -1404,8 +1419,8 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
{
const struct hns_roce_buf_region *r;
int ofs, end;
- int ret;
int unit;
+ int ret;
int i;
if (region_cnt > HNS_ROCE_MAX_BT_REGION) {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index b34c940077bb..13fdeb3274e7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -47,6 +47,7 @@ enum {
HEM_TYPE_SCCC,
HEM_TYPE_QPC_TIMER,
HEM_TYPE_CQC_TIMER,
+ HEM_TYPE_GMV,
/* UNMAP HEM */
HEM_TYPE_MTT,
@@ -174,4 +175,4 @@ static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter)
return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
}
-#endif /*_HNS_ROCE_HEM_H*/
+#endif /* _HNS_ROCE_HEM_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 5f4d8a32ed6d..f68585ff8e8a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -239,7 +239,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
break;
}
- /*Ctrl field, ctrl set type: sig, solic, imm, fence */
+ /* Ctrl field, ctrl set type: sig, solic, imm, fence */
/* SO wait for conforming application scenarios */
ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ?
cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
@@ -288,7 +288,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
ret = -EINVAL;
*bad_wr = wr;
dev_err(dev, "inline len(1-%d)=%d, illegal",
- ctrl->msg_length,
+ le32_to_cpu(ctrl->msg_length),
hr_dev->caps.max_sq_inline);
goto out;
}
@@ -300,7 +300,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
}
ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE);
} else {
- /*sqe num is two */
+ /* sqe num is two */
for (i = 0; i < wr->num_sge; i++)
set_data_seg(dseg + i, wr->sg_list + i);
@@ -353,8 +353,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
unsigned long flags = 0;
unsigned int wqe_idx;
int ret = 0;
- int nreq = 0;
- int i = 0;
+ int nreq;
+ int i;
u32 reg_val;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
@@ -1165,7 +1165,7 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
}
raq->e_raq_buf->map = addr;
- /* Configure raq extended address. 48bit 4K align*/
+ /* Configure raq extended address. 48bit 4K align */
roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12);
/* Configure raq_shift */
@@ -1639,7 +1639,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
}
static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
- unsigned long timeout)
+ unsigned int timeout)
{
u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG;
unsigned long end;
@@ -2062,11 +2062,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
}
-static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
-{
- return -EOPNOTSUPP;
-}
-
static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
enum ib_cq_notify_flags flags)
{
@@ -2305,7 +2300,7 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
struct hns_roce_qp *cur_qp = NULL;
unsigned long flags;
int npolled;
- int ret = 0;
+ int ret;
spin_lock_irqsave(&hr_cq->lock, flags);
@@ -2765,7 +2760,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
roce_set_field(context->qpc_bytes_16,
QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
-
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
roce_set_field(context->qpc_bytes_4,
QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
@@ -3261,6 +3255,8 @@ static int hns_roce_v1_modify_qp(struct ib_qp *ibqp,
enum ib_qp_state cur_state,
enum ib_qp_state new_state)
{
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state,
@@ -3604,10 +3600,10 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
return 0;
}
-static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
+static void set_eq_cons_index_v1(struct hns_roce_eq *eq, u32 req_not)
{
roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) |
- (req_not << eq->log_entries), eq->doorbell);
+ (req_not << eq->log_entries), eq->doorbell);
}
static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
@@ -3687,10 +3683,10 @@ static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev,
int phy_port;
int qpn;
- qpn = roce_get_field(aeqe->event.qp_event.qp,
+ qpn = roce_get_field(aeqe->event.queue_event.num,
HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
- phy_port = roce_get_field(aeqe->event.qp_event.qp,
+ phy_port = roce_get_field(aeqe->event.queue_event.num,
HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
if (qpn <= 1)
@@ -3721,9 +3717,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev,
struct device *dev = &hr_dev->pdev->dev;
u32 cqn;
- cqn = roce_get_field(aeqe->event.cq_event.cq,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S);
+ cqn = roce_get_field(aeqe->event.queue_event.num,
+ HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
+ HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
@@ -3798,7 +3794,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
int event_type;
while ((aeqe = next_aeqe_sw_v1(eq))) {
-
/* Make sure we read the AEQ entry after we have checked the
* ownership bit
*/
@@ -3853,12 +3848,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
hns_roce_v1_db_overflow_handle(hr_dev, aeqe);
break;
- case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
- dev_warn(dev, "CEQ 0x%lx overflow.\n",
- roce_get_field(aeqe->event.ce_event.ceqe,
- HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M,
- HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S));
- break;
default:
dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n",
event_type, eq->eqn, eq->cons_index);
@@ -3903,7 +3892,6 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev,
u32 cqn;
while ((ceqe = next_ceqe_sw_v1(eq))) {
-
/* Make sure we read CEQ entry after we have checked the
* ownership bit
*/
@@ -4129,7 +4117,7 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
struct device *dev = &hr_dev->pdev->dev;
dma_addr_t tmp_dma_addr;
- u32 eqcuridx_val = 0;
+ u32 eqcuridx_val;
u32 eqconsindx_val;
u32 eqshift_val;
__le32 tmp2 = 0;
@@ -4347,7 +4335,6 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev)
static const struct ib_device_ops hns_roce_v1_dev_ops = {
.destroy_qp = hns_roce_v1_destroy_qp,
- .modify_cq = hns_roce_v1_modify_cq,
.poll_cq = hns_roce_v1_poll_cq,
.post_recv = hns_roce_v1_post_recv,
.post_send = hns_roce_v1_post_send,
@@ -4367,7 +4354,6 @@ static const struct hns_roce_hw hns_roce_hw_v1 = {
.set_mtu = hns_roce_v1_set_mtu,
.write_mtpt = hns_roce_v1_write_mtpt,
.write_cqc = hns_roce_v1_write_cqc,
- .modify_cq = hns_roce_v1_modify_cq,
.clear_hem = hns_roce_v1_clear_hem,
.modify_qp = hns_roce_v1_modify_qp,
.query_qp = hns_roce_v1_query_qp,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index ffd0156080f5..46ab0a321d21 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -419,7 +419,7 @@ struct hns_roce_wqe_data_seg {
struct hns_roce_wqe_raddr_seg {
__le32 rkey;
- __le32 len;/* reserved */
+ __le32 len; /* reserved */
__le64 raddr;
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0468028ffe39..833e1f259936 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -214,25 +214,20 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
return 0;
}
-static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
- unsigned int *sge_ind, unsigned int valid_num_sge)
+static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge,
+ unsigned int *sge_ind, unsigned int cnt)
{
struct hns_roce_v2_wqe_data_seg *dseg;
- unsigned int cnt = valid_num_sge;
- struct ib_sge *sge = wr->sg_list;
unsigned int idx = *sge_ind;
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- cnt -= HNS_ROCE_SGE_IN_WQE;
- sge += HNS_ROCE_SGE_IN_WQE;
- }
-
while (cnt > 0) {
dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
- set_data_seg_v2(dseg, sge);
- idx++;
+ if (likely(sge->length)) {
+ set_data_seg_v2(dseg, sge);
+ idx++;
+ cnt--;
+ }
sge++;
- cnt--;
}
*sge_ind = idx;
@@ -340,7 +335,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
}
- set_extend_sge(qp, wr, sge_ind, valid_num_sge);
+ set_extend_sge(qp, wr->sg_list + i, sge_ind,
+ valid_num_sge - HNS_ROCE_SGE_IN_WQE);
}
roce_set_field(rc_sq_wqe->byte_16,
@@ -365,7 +361,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev,
} else if (unlikely(hr_qp->state == IB_QPS_RESET ||
hr_qp->state == IB_QPS_INIT ||
hr_qp->state == IB_QPS_RTR)) {
- ibdev_err(ibdev, "failed to post WQE, QP state %d!\n",
+ ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n",
hr_qp->state);
return -EINVAL;
} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
@@ -422,19 +418,54 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
return 0;
}
+static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ struct hns_roce_ah *ah)
+{
+ struct ib_device *ib_dev = ah->ibah.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+
+ roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
+ V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
+
+ roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
+ V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit);
+ roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
+ V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass);
+ roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
+ V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel);
+
+ if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
+ return -EINVAL;
+
+ roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M,
+ V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl);
+
+ ud_sq_wqe->sgid_index = ah->av.gid_index;
+
+ memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN);
+ memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
+ roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
+ ah->av.vlan_en);
+ roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M,
+ V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id);
+
+ return 0;
+}
+
static inline int set_ud_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
unsigned int owner_bit)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe;
unsigned int curr_idx = *sge_idx;
- int valid_num_sge;
+ unsigned int valid_num_sge;
u32 msg_len = 0;
- bool loopback;
- u8 *smac;
int ret;
valid_num_sge = calc_wr_sge_num(wr, &msg_len);
@@ -444,38 +475,13 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
if (WARN_ON(ret))
return ret;
- roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
- V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
- roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
- V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]);
- roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M,
- V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]);
- roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M,
- V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]);
- roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M,
- V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]);
- roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M,
- V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]);
-
- /* MAC loopback */
- smac = (u8 *)hr_dev->dev_addr[qp->port];
- loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0;
-
- roce_set_bit(ud_sq_wqe->byte_40,
- V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
-
ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
- /* Set sig attr */
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
- (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+ !!(wr->send_flags & IB_SEND_SIGNALED));
- /* Set se attr */
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S,
- (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
-
- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S,
- owner_bit);
+ !!(wr->send_flags & IB_SEND_SOLICITED));
roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M,
V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn);
@@ -488,36 +494,29 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
curr_idx & (qp->sge.sge_cnt - 1));
- roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
- V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
qp->qkey : ud_wr(wr)->remote_qkey);
roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn);
- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M,
- V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id);
- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
- V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit);
- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
- V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass);
- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
- V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel);
- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M,
- V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl);
- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M,
- V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port);
-
- roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
- ah->av.vlan_en ? 1 : 0);
- roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
- V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index);
+ ret = fill_ud_av(ud_sq_wqe, ah);
+ if (ret)
+ return ret;
- memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2);
+ set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge);
- set_extend_sge(qp, wr, &curr_idx, valid_num_sge);
+ /*
+ * The pipeline can sequentially post all valid WQEs into WQ buffer,
+ * including new WQEs waiting for the doorbell to update the PI again.
+ * Therefore, the owner bit of WQE MUST be updated after all fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ dma_wmb();
*sge_idx = curr_idx;
+ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S,
+ owner_bit);
return 0;
}
@@ -591,9 +590,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S,
(wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
- owner_bit);
-
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
@@ -601,7 +597,18 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
&curr_idx, valid_num_sge);
+ /*
+ * The pipeline can sequentially post all valid WQEs into WQ buffer,
+ * including new WQEs waiting for the doorbell to update the PI again.
+ * Therefore, the owner bit of WQE MUST be updated after all fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ dma_wmb();
+
*sge_idx = curr_idx;
+ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
+ owner_bit);
return ret;
}
@@ -649,7 +656,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
unsigned int sge_idx;
unsigned int wqe_idx;
void *wqe = NULL;
- int nreq;
+ u32 nreq;
int ret;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -673,7 +680,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
- ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n",
+ ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n",
wr->num_sge, qp->sq.max_gs);
ret = -EINVAL;
*bad_wr = wr;
@@ -686,7 +693,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
/* Corresponding to the QP type, wqe process separately */
- if (ibqp->qp_type == IB_QPT_GSI)
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
else if (ibqp->qp_type == IB_QPT_RC)
ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit);
@@ -758,7 +765,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
- ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n",
+ ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
wr->num_sge, hr_qp->rq.max_gs);
ret = -EINVAL;
*bad_wr = wr;
@@ -827,7 +834,7 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
}
-static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n)
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n)
{
return hns_roce_buf_offset(idx_que->mtr.kmem,
n << idx_que->entry_shift);
@@ -868,12 +875,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
struct hns_roce_v2_wqe_data_seg *dseg;
struct hns_roce_v2_db srq_db;
unsigned long flags;
+ unsigned int ind;
__le32 *srq_idx;
int ret = 0;
int wqe_idx;
void *wqe;
int nreq;
- int ind;
int i;
spin_lock_irqsave(&srq->lock, flags);
@@ -1018,8 +1025,8 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev)
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
- unsigned long instance_stage; /* the current instance stage */
- unsigned long reset_stage; /* the current reset stage */
+ unsigned long instance_stage; /* the current instance stage */
+ unsigned long reset_stage; /* the current reset stage */
unsigned long reset_cnt;
bool sw_resetting;
bool hw_resetting;
@@ -1118,7 +1125,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG,
upper_32_bits(dma));
roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
- ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
+ (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0);
roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0);
} else {
@@ -1126,7 +1133,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG,
upper_32_bits(dma));
roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG,
- ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
+ (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0);
roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0);
}
@@ -1573,6 +1580,10 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
PF_RES_DATA_4_PF_SCCC_BT_NUM_M,
PF_RES_DATA_4_PF_SCCC_BT_NUM_S);
+ hr_dev->caps.gmv_bt_num = roce_get_field(req_b->gmv_idx_num,
+ PF_RES_DATA_5_PF_GMV_BT_NUM_M,
+ PF_RES_DATA_5_PF_GMV_BT_NUM_S);
+
return 0;
}
@@ -1896,11 +1907,20 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+ caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ;
+ caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ;
+ caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE /
+ caps->gmv_entry_sz);
+ caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->gmv_ba_pg_sz = 0;
+ caps->gmv_buf_pg_sz = 0;
+ caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE /
+ caps->gmv_entry_sz);
}
}
-static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
- int *buf_page_size, int *bt_page_size, u32 hem_type)
+static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num,
+ u32 *buf_page_size, u32 *bt_page_size, u32 hem_type)
{
u64 obj_per_chunk;
u64 bt_chunk_size = PAGE_SIZE;
@@ -1930,8 +1950,8 @@ static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
obj_per_chunk = ctx_bt_num * obj_per_chunk_default;
break;
default:
- pr_err("Table %d not support hop_num = %d!\n", hem_type,
- hop_num);
+ pr_err("table %u not support hop_num = %u!\n", hem_type,
+ hop_num);
return;
}
@@ -2122,6 +2142,14 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ;
+ caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ;
+ caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE /
+ caps->gmv_entry_sz);
+ caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->gmv_ba_pg_sz = 0;
+ caps->gmv_buf_pg_sz = 0;
+ caps->gid_table_len[0] = caps->gmv_bt_num *
+ (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz);
}
calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
@@ -2371,10 +2399,10 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
u32 buf_chk_sz;
dma_addr_t t;
int func_num = 1;
- int pg_num_a;
- int pg_num_b;
- int pg_num;
- int size;
+ u32 pg_num_a;
+ u32 pg_num_b;
+ u32 pg_num;
+ u32 size;
int i;
switch (type) {
@@ -2423,7 +2451,6 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
if (i < (pg_num - 1))
entry[i].blk_ba1_nxt_ptr |=
(i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S;
-
}
link_tbl->npages = pg_num;
link_tbl->pg_sz = buf_chk_sz;
@@ -2465,24 +2492,13 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
link_tbl->table.map);
}
-static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
+static int get_hem_table(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_v2_priv *priv = hr_dev->priv;
- int qpc_count, cqc_count;
- int ret, i;
-
- /* TSQ includes SQ doorbell and ack doorbell */
- ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE);
- if (ret) {
- dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret);
- return ret;
- }
-
- ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE);
- if (ret) {
- dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret);
- goto err_tpq_init_failed;
- }
+ unsigned int qpc_count;
+ unsigned int cqc_count;
+ unsigned int gmv_count;
+ int ret;
+ int i;
/* Alloc memory for QPC Timer buffer space chunk */
for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num;
@@ -2506,8 +2522,23 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
}
}
+ /* Alloc memory for GMV(GID/MAC/VLAN) table buffer space chunk */
+ for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num;
+ gmv_count++) {
+ ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to get gmv table, ret = %d.\n", ret);
+ goto err_gmv_failed;
+ }
+ }
+
return 0;
+err_gmv_failed:
+ for (i = 0; i < gmv_count; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i);
+
err_cqc_timer_failed:
for (i = 0; i < cqc_count; i++)
hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i);
@@ -2516,6 +2547,34 @@ err_qpc_timer_failed:
for (i = 0; i < qpc_count; i++)
hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i);
+ return ret;
+}
+
+static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ int ret;
+
+ /* TSQ includes SQ doorbell and ack doorbell */
+ ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init TSQ, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init TPQ, ret = %d.\n", ret);
+ goto err_tpq_init_failed;
+ }
+
+ ret = get_hem_table(hr_dev);
+ if (ret)
+ goto err_get_hem_table_failed;
+
+ return 0;
+
+err_get_hem_table_failed:
hns_roce_free_link_table(hr_dev, &priv->tpq);
err_tpq_init_failed:
@@ -2539,7 +2598,7 @@ static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
struct hns_roce_cmq_desc desc;
struct hns_roce_mbox_status *mb_st =
(struct hns_roce_mbox_status *)desc.data;
- enum hns_roce_cmd_return_status status;
+ int status;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true);
@@ -2610,7 +2669,7 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
}
static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
- unsigned long timeout)
+ unsigned int timeout)
{
struct device *dev = hr_dev->dev;
unsigned long end;
@@ -2637,14 +2696,27 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
return 0;
}
-static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev,
- int gid_index, const union ib_gid *gid,
- enum hns_roce_sgid_type sgid_type)
+static void copy_gid(void *dest, const union ib_gid *gid)
+{
+#define GID_SIZE 4
+ const union ib_gid *src = gid;
+ __le32 (*p)[GID_SIZE] = dest;
+ int i;
+
+ if (!gid)
+ src = &zgid;
+
+ for (i = 0; i < GID_SIZE; i++)
+ (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]);
+}
+
+static int config_sgid_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type)
{
struct hns_roce_cmq_desc desc;
struct hns_roce_cfg_sgid_tb *sgid_tb =
(struct hns_roce_cfg_sgid_tb *)desc.data;
- u32 *p;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
@@ -2653,19 +2725,54 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev,
roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M,
CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
- p = (u32 *)&gid->raw[0];
- sgid_tb->vf_sgid_l = cpu_to_le32(*p);
+ copy_gid(&sgid_tb->vf_sgid_l, gid);
- p = (u32 *)&gid->raw[4];
- sgid_tb->vf_sgid_ml = cpu_to_le32(*p);
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
- p = (u32 *)&gid->raw[8];
- sgid_tb->vf_sgid_mh = cpu_to_le32(*p);
+static int config_gmv_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type,
+ const struct ib_gid_attr *attr)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cfg_gmv_tb_a *tb_a =
+ (struct hns_roce_cfg_gmv_tb_a *)desc[0].data;
+ struct hns_roce_cfg_gmv_tb_b *tb_b =
+ (struct hns_roce_cfg_gmv_tb_b *)desc[1].data;
- p = (u32 *)&gid->raw[0xc];
- sgid_tb->vf_sgid_h = cpu_to_le32(*p);
+ u16 vlan_id = VLAN_CFI_MASK;
+ u8 mac[ETH_ALEN] = {};
+ int ret;
- return hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (gid) {
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac);
+ if (ret)
+ return ret;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false);
+
+ copy_gid(&tb_a->vf_sgid_l, gid);
+
+ roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M,
+ CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type);
+ roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S,
+ vlan_id < VLAN_CFI_MASK);
+ roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M,
+ CFG_GMV_TB_VF_VLAN_ID_S, vlan_id);
+
+ tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac);
+ roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M,
+ CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]);
+
+ roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M,
+ CFG_GMV_TB_SGID_IDX_S, gid_index);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
}
static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
@@ -2675,23 +2782,24 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
int ret;
- if (!gid || !attr)
- return -EINVAL;
-
- if (attr->gid_type == IB_GID_TYPE_ROCE)
- sgid_type = GID_TYPE_FLAG_ROCE_V1;
-
- if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- if (ipv6_addr_v4mapped((void *)gid))
- sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4;
- else
- sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
+ if (gid) {
+ if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4;
+ else
+ sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
+ } else if (attr->gid_type == IB_GID_TYPE_ROCE) {
+ sgid_type = GID_TYPE_FLAG_ROCE_V1;
+ }
}
- ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type);
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr);
+ else
+ ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type);
+
if (ret)
- ibdev_err(&hr_dev->ib_dev,
- "failed to configure sgid table, ret = %d!\n",
+ ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n",
ret);
return ret;
@@ -2959,7 +3067,7 @@ static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
}
-static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
+static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n)
{
struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe);
@@ -3060,6 +3168,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size ==
HNS_ROCE_V3_CQE_SIZE ? 1 : 0);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+ hr_reg_enable(cq_context, CQC_STASH);
+
cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
roce_set_field(cq_context->byte_16_hop_addr,
@@ -3303,7 +3414,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
int is_send;
u16 wqe_ctr;
u32 opcode;
- int qpn;
+ u32 qpn;
int ret;
/* Find cqe according to consumer index */
@@ -3572,7 +3683,7 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
break;
default:
dev_warn(hr_dev->dev,
- "Table %d not to be written by mailbox!\n", type);
+ "table %u not to be written by mailbox!\n", type);
return -EINVAL;
}
@@ -3583,9 +3694,25 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba,
u32 hem_type, int step_idx)
{
struct hns_roce_cmd_mailbox *mailbox;
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_gmv_bt *gmv_bt =
+ (struct hns_roce_cfg_gmv_bt *)desc.data;
int ret;
int op;
+ if (hem_type == HEM_TYPE_GMV) {
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT,
+ false);
+
+ gmv_bt->gmv_ba_l = cpu_to_le32(bt_ba >> HNS_HW_PAGE_SHIFT);
+ gmv_bt->gmv_ba_h = cpu_to_le32(bt_ba >> (HNS_HW_PAGE_SHIFT +
+ 32));
+ gmv_bt->gmv_bt_idx = cpu_to_le32(obj /
+ (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz));
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+ }
+
op = get_op_for_set_hem(hr_dev, hem_type, step_idx);
if (op < 0)
return 0;
@@ -3683,24 +3810,20 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
case HEM_TYPE_CQC:
op = HNS_ROCE_CMD_DESTROY_CQC_BT0;
break;
- case HEM_TYPE_SCCC:
- case HEM_TYPE_QPC_TIMER:
- case HEM_TYPE_CQC_TIMER:
- break;
case HEM_TYPE_SRQC:
op = HNS_ROCE_CMD_DESTROY_SRQC_BT0;
break;
+ case HEM_TYPE_SCCC:
+ case HEM_TYPE_QPC_TIMER:
+ case HEM_TYPE_CQC_TIMER:
+ case HEM_TYPE_GMV:
+ return 0;
default:
- dev_warn(dev, "Table %d not to be destroyed by mailbox!\n",
+ dev_warn(dev, "table %u not to be destroyed by mailbox!\n",
table->type);
return 0;
}
- if (table->type == HEM_TYPE_SCCC ||
- table->type == HEM_TYPE_QPC_TIMER ||
- table->type == HEM_TYPE_CQC_TIMER)
- return 0;
-
op += step_idx;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
@@ -3851,9 +3974,14 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
- hr_qp->access_flags = attr->qp_access_flags;
roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
+
+ if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ)
+ return;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+ hr_reg_enable(&context->ext, QPCEX_STASH);
}
static void modify_qp_init_to_init(struct ib_qp *ibqp,
@@ -3874,51 +4002,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
V2_QPC_BYTE_4_TST_S, 0);
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- 0);
- roce_set_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_EXT_ATE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_EXT_ATE_S, 0);
- } else {
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- !!(hr_qp->access_flags & IB_ACCESS_REMOTE_WRITE));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- 0);
- roce_set_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_EXT_ATE_S,
- !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_EXT_ATE_S, 0);
- }
-
roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
@@ -4328,7 +4411,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask);
if (ret) {
- ibdev_err(ibdev, "failed to config sq buf, ret %d\n", ret);
+ ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret);
return ret;
}
@@ -4421,7 +4504,9 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
IB_GID_TYPE_ROCE_UDP_ENCAP);
}
- if (vlan_id < VLAN_N_VID) {
+ /* Only HIP08 needs to set the vlan_en bits in QPC */
+ if (vlan_id < VLAN_N_VID &&
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
roce_set_bit(context->byte_76_srqn_op_en,
V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
roce_set_bit(qpc_mask->byte_76_srqn_op_en,
@@ -4468,15 +4553,11 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
- if (is_udp)
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
- V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2);
- else
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
- V2_QPC_BYTE_24_TC_S, grh->traffic_class);
-
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+ V2_QPC_BYTE_24_TC_S, get_tclass(&attr->ah_attr.grh));
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, 0);
+
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
V2_QPC_BYTE_28_FL_S, grh->flow_label);
roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@@ -4758,6 +4839,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
unsigned long rq_flag = 0;
int ret;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
/*
* In v2 engine, software pass context and context mask to hardware
* when modifying qp. If software need modify some fields in context,
@@ -4818,7 +4902,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
/* SW pass context to HW */
ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
if (ret) {
- ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret);
+ ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret);
goto out;
}
@@ -4911,7 +4995,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context);
if (ret) {
- ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret);
+ ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret);
ret = -EINVAL;
goto out;
}
@@ -5026,13 +5110,15 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
unsigned long flags;
int ret = 0;
- if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) {
+ if ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD) &&
+ hr_qp->state != IB_QPS_RESET) {
/* Modify qp to reset before destroying qp */
ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
hr_qp->state, IB_QPS_RESET);
if (ret)
ibdev_err(ibdev,
- "failed to modify QP to RST, ret = %d\n",
+ "failed to modify QP to RST, ret = %d.\n",
ret);
}
@@ -5071,7 +5157,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
if (ret)
ibdev_err(&hr_dev->ib_dev,
- "failed to destroy QP 0x%06lx, ret = %d\n",
+ "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
hr_qp->qpn, ret);
hns_roce_qp_destroy(hr_dev, hr_qp, udata);
@@ -5094,7 +5180,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
- ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret);
+ ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret);
goto out;
}
@@ -5104,7 +5190,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
clr->qpn = cpu_to_le32(hr_qp->qpn);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
- ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret);
+ ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret);
goto out;
}
@@ -5353,7 +5439,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret)
ibdev_err(&hr_dev->ib_dev,
- "failed to process cmd when modifying CQ, ret = %d\n",
+ "failed to process cmd when modifying CQ, ret = %d.\n",
ret);
return ret;
@@ -5364,8 +5450,6 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
struct hns_roce_work *irq_work =
container_of(work, struct hns_roce_work, work);
struct ib_device *ibdev = &irq_work->hr_dev->ib_dev;
- u32 qpn = irq_work->qpn;
- u32 cqn = irq_work->cqn;
switch (irq_work->event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -5381,15 +5465,15 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n",
- qpn, irq_work->sub_type);
+ irq_work->queue_num, irq_work->sub_type);
break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n",
- qpn);
+ irq_work->queue_num);
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n",
- qpn, irq_work->sub_type);
+ irq_work->queue_num, irq_work->sub_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
ibdev_warn(ibdev, "SRQ limit reach.\n");
@@ -5401,10 +5485,10 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
ibdev_err(ibdev, "SRQ catas error.\n");
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- ibdev_err(ibdev, "CQ 0x%x access err.\n", cqn);
+ ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num);
break;
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- ibdev_warn(ibdev, "CQ 0x%x overflow\n", cqn);
+ ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num);
break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
ibdev_warn(ibdev, "DB overflow.\n");
@@ -5420,8 +5504,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
}
static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq,
- u32 qpn, u32 cqn)
+ struct hns_roce_eq *eq, u32 queue_num)
{
struct hns_roce_work *irq_work;
@@ -5431,10 +5514,9 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
irq_work->hr_dev = hr_dev;
- irq_work->qpn = qpn;
- irq_work->cqn = cqn;
irq_work->event_type = eq->event_type;
irq_work->sub_type = eq->sub_type;
+ irq_work->queue_num = queue_num;
queue_work(hr_dev->irq_workq, &(irq_work->work));
}
@@ -5486,10 +5568,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
int aeqe_found = 0;
int event_type;
+ u32 queue_num;
int sub_type;
- u32 srqn;
- u32 qpn;
- u32 cqn;
while (aeqe) {
/* Make sure we read AEQ entry after we have checked the
@@ -5503,15 +5583,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
sub_type = roce_get_field(aeqe->asyn,
HNS_ROCE_V2_AEQE_SUB_TYPE_M,
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
- qpn = roce_get_field(aeqe->event.qp_event.qp,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
- cqn = roce_get_field(aeqe->event.cq_event.cq,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
- srqn = roce_get_field(aeqe->event.srq_event.srq,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+ queue_num = roce_get_field(aeqe->event.queue_event.num,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -5522,17 +5596,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_qp_event(hr_dev, qpn, event_type);
+ hns_roce_qp_event(hr_dev, queue_num, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- hns_roce_srq_event(hr_dev, srqn, event_type);
+ hns_roce_srq_event(hr_dev, queue_num, event_type);
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- hns_roce_cq_event(hr_dev, cqn, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ hns_roce_cq_event(hr_dev, queue_num, event_type);
break;
case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev,
@@ -5540,8 +5612,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
aeqe->event.cmd.status,
le64_to_cpu(aeqe->event.cmd.out_param));
break;
- case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
- break;
+ case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
case HNS_ROCE_EVENT_TYPE_FLR:
break;
default:
@@ -5558,7 +5629,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
if (eq->cons_index > (2 * eq->entries - 1))
eq->cons_index = 0;
- hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
+ hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq);
}
@@ -6193,6 +6264,7 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0},
/* required last entry */
{0, }
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index be7f2fe1e883..bdaccf86460d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -44,6 +44,7 @@
#define HNS_ROCE_VF_SMAC_NUM 32
#define HNS_ROCE_VF_SGID_NUM 32
#define HNS_ROCE_VF_SL_NUM 8
+#define HNS_ROCE_VF_GMV_BT_NUM 256
#define HNS_ROCE_V2_MAX_QP_NUM 0x100000
#define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200
@@ -89,6 +90,7 @@
#define HNS_ROCE_V2_SCCC_SZ 32
#define HNS_ROCE_V3_SCCC_SZ 64
+#define HNS_ROCE_V3_GMV_ENTRY_SZ 32
#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
@@ -241,6 +243,8 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CLR_SCCC = 0x8509,
HNS_ROCE_OPC_QUERY_SCCC = 0x850a,
HNS_ROCE_OPC_RESET_SCCC = 0x850b,
+ HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
+ HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
@@ -263,23 +267,24 @@ enum hns_roce_sgid_type {
};
struct hns_roce_v2_cq_context {
- __le32 byte_4_pg_ceqn;
- __le32 byte_8_cqn;
- __le32 cqe_cur_blk_addr;
- __le32 byte_16_hop_addr;
- __le32 cqe_nxt_blk_addr;
- __le32 byte_24_pgsz_addr;
- __le32 byte_28_cq_pi;
- __le32 byte_32_cq_ci;
- __le32 cqe_ba;
- __le32 byte_40_cqe_ba;
- __le32 byte_44_db_record;
- __le32 db_record_addr;
- __le32 byte_52_cqe_cnt;
- __le32 byte_56_cqe_period_maxcnt;
- __le32 cqe_report_timer;
- __le32 byte_64_se_cqe_idx;
+ __le32 byte_4_pg_ceqn;
+ __le32 byte_8_cqn;
+ __le32 cqe_cur_blk_addr;
+ __le32 byte_16_hop_addr;
+ __le32 cqe_nxt_blk_addr;
+ __le32 byte_24_pgsz_addr;
+ __le32 byte_28_cq_pi;
+ __le32 byte_32_cq_ci;
+ __le32 cqe_ba;
+ __le32 byte_40_cqe_ba;
+ __le32 byte_44_db_record;
+ __le32 db_record_addr;
+ __le32 byte_52_cqe_cnt;
+ __le32 byte_56_cqe_period_maxcnt;
+ __le32 cqe_report_timer;
+ __le32 byte_64_se_cqe_idx;
};
+
#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
@@ -356,6 +361,10 @@ struct hns_roce_v2_cq_context {
#define V2_CQC_BYTE_64_SE_CQE_IDX_S 0
#define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
+#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
+
+#define CQC_STASH CQC_FIELD_LOC(63, 63)
+
struct hns_roce_srq_context {
__le32 byte_4_srqn_srqst;
__le32 byte_8_limit_wl;
@@ -440,7 +449,7 @@ struct hns_roce_srq_context {
#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1
#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1)
-enum{
+enum {
V2_MPT_ST_VALID = 0x1,
V2_MPT_ST_FREE = 0x2,
};
@@ -457,68 +466,72 @@ enum hns_roce_v2_qp_state {
HNS_ROCE_QP_NUM_ST
};
+struct hns_roce_v2_qp_context_ex {
+ __le32 data[64];
+};
struct hns_roce_v2_qp_context {
- __le32 byte_4_sqpn_tst;
- __le32 wqe_sge_ba;
- __le32 byte_12_sq_hop;
- __le32 byte_16_buf_ba_pg_sz;
- __le32 byte_20_smac_sgid_idx;
- __le32 byte_24_mtu_tc;
- __le32 byte_28_at_fl;
- u8 dgid[GID_LEN_V2];
- __le32 dmac;
- __le32 byte_52_udpspn_dmac;
- __le32 byte_56_dqpn_err;
- __le32 byte_60_qpst_tempid;
- __le32 qkey_xrcd;
- __le32 byte_68_rq_db;
- __le32 rq_db_record_addr;
- __le32 byte_76_srqn_op_en;
- __le32 byte_80_rnr_rx_cqn;
- __le32 byte_84_rq_ci_pi;
- __le32 rq_cur_blk_addr;
- __le32 byte_92_srq_info;
- __le32 byte_96_rx_reqmsn;
- __le32 rq_nxt_blk_addr;
- __le32 byte_104_rq_sge;
- __le32 byte_108_rx_reqepsn;
- __le32 rq_rnr_timer;
- __le32 rx_msg_len;
- __le32 rx_rkey_pkt_info;
- __le64 rx_va;
- __le32 byte_132_trrl;
- __le32 trrl_ba;
- __le32 byte_140_raq;
- __le32 byte_144_raq;
- __le32 byte_148_raq;
- __le32 byte_152_raq;
- __le32 byte_156_raq;
- __le32 byte_160_sq_ci_pi;
- __le32 sq_cur_blk_addr;
- __le32 byte_168_irrl_idx;
- __le32 byte_172_sq_psn;
- __le32 byte_176_msg_pktn;
- __le32 sq_cur_sge_blk_addr;
- __le32 byte_184_irrl_idx;
- __le32 cur_sge_offset;
- __le32 byte_192_ext_sge;
- __le32 byte_196_sq_psn;
- __le32 byte_200_sq_max;
- __le32 irrl_ba;
- __le32 byte_208_irrl;
- __le32 byte_212_lsn;
- __le32 sq_timer;
- __le32 byte_220_retry_psn_msn;
- __le32 byte_224_retry_msg;
- __le32 rx_sq_cur_blk_addr;
- __le32 byte_232_irrl_sge;
- __le32 irrl_cur_sge_offset;
- __le32 byte_240_irrl_tail;
- __le32 byte_244_rnr_rxack;
- __le32 byte_248_ack_psn;
- __le32 byte_252_err_txcqn;
- __le32 byte_256_sqflush_rqcqe;
- __le32 ext[64];
+ __le32 byte_4_sqpn_tst;
+ __le32 wqe_sge_ba;
+ __le32 byte_12_sq_hop;
+ __le32 byte_16_buf_ba_pg_sz;
+ __le32 byte_20_smac_sgid_idx;
+ __le32 byte_24_mtu_tc;
+ __le32 byte_28_at_fl;
+ u8 dgid[GID_LEN_V2];
+ __le32 dmac;
+ __le32 byte_52_udpspn_dmac;
+ __le32 byte_56_dqpn_err;
+ __le32 byte_60_qpst_tempid;
+ __le32 qkey_xrcd;
+ __le32 byte_68_rq_db;
+ __le32 rq_db_record_addr;
+ __le32 byte_76_srqn_op_en;
+ __le32 byte_80_rnr_rx_cqn;
+ __le32 byte_84_rq_ci_pi;
+ __le32 rq_cur_blk_addr;
+ __le32 byte_92_srq_info;
+ __le32 byte_96_rx_reqmsn;
+ __le32 rq_nxt_blk_addr;
+ __le32 byte_104_rq_sge;
+ __le32 byte_108_rx_reqepsn;
+ __le32 rq_rnr_timer;
+ __le32 rx_msg_len;
+ __le32 rx_rkey_pkt_info;
+ __le64 rx_va;
+ __le32 byte_132_trrl;
+ __le32 trrl_ba;
+ __le32 byte_140_raq;
+ __le32 byte_144_raq;
+ __le32 byte_148_raq;
+ __le32 byte_152_raq;
+ __le32 byte_156_raq;
+ __le32 byte_160_sq_ci_pi;
+ __le32 sq_cur_blk_addr;
+ __le32 byte_168_irrl_idx;
+ __le32 byte_172_sq_psn;
+ __le32 byte_176_msg_pktn;
+ __le32 sq_cur_sge_blk_addr;
+ __le32 byte_184_irrl_idx;
+ __le32 cur_sge_offset;
+ __le32 byte_192_ext_sge;
+ __le32 byte_196_sq_psn;
+ __le32 byte_200_sq_max;
+ __le32 irrl_ba;
+ __le32 byte_208_irrl;
+ __le32 byte_212_lsn;
+ __le32 sq_timer;
+ __le32 byte_220_retry_psn_msn;
+ __le32 byte_224_retry_msg;
+ __le32 rx_sq_cur_blk_addr;
+ __le32 byte_232_irrl_sge;
+ __le32 irrl_cur_sge_offset;
+ __le32 byte_240_irrl_tail;
+ __le32 byte_244_rnr_rxack;
+ __le32 byte_248_ack_psn;
+ __le32 byte_252_err_txcqn;
+ __le32 byte_256_sqflush_rqcqe;
+
+ struct hns_roce_v2_qp_context_ex ext;
};
#define V2_QPC_BYTE_4_TST_S 0
@@ -887,6 +900,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16
#define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16)
+#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l)
+
+#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82)
+
#define V2_QP_RWE_S 1 /* rdma write enable */
#define V2_QP_RRE_S 2 /* rdma read enable */
#define V2_QP_ATE_S 3 /* rdma atomic enable */
@@ -1073,12 +1090,13 @@ struct hns_roce_v2_ud_send_wqe {
__le32 byte_32;
__le32 byte_36;
__le32 byte_40;
- __le32 dmac;
- __le32 byte_48;
+ u8 dmac[ETH_ALEN];
+ u8 sgid_index;
+ u8 smac_index;
u8 dgid[GID_LEN_V2];
-
};
-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
+
+#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7
@@ -1117,37 +1135,10 @@ struct hns_roce_v2_ud_send_wqe {
#define V2_UD_SEND_WQE_BYTE_40_SL_S 20
#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20)
-#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
-#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
-
#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
-#define V2_UD_SEND_WQE_DMAC_0_S 0
-#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0)
-
-#define V2_UD_SEND_WQE_DMAC_1_S 8
-#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8)
-
-#define V2_UD_SEND_WQE_DMAC_2_S 16
-#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16)
-
-#define V2_UD_SEND_WQE_DMAC_3_S 24
-#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24)
-
-#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0
-#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0)
-
-#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8
-#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8)
-
-#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16
-#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16)
-
-#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24
-#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24)
-
struct hns_roce_v2_rc_send_wqe {
__le32 byte_4;
__le32 msg_len;
@@ -1334,7 +1325,7 @@ struct hns_roce_pf_res_b {
__le32 sgid_idx_num;
__le32 qid_idx_sl_num;
__le32 sccc_bt_idx_num;
- __le32 rsv;
+ __le32 gmv_idx_num;
};
#define PF_RES_DATA_1_PF_SMAC_IDX_S 0
@@ -1361,6 +1352,12 @@ struct hns_roce_pf_res_b {
#define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9
#define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9)
+#define PF_RES_DATA_5_PF_GMV_BT_IDX_S 0
+#define PF_RES_DATA_5_PF_GMV_BT_IDX_M GENMASK(7, 0)
+
+#define PF_RES_DATA_5_PF_GMV_BT_NUM_S 8
+#define PF_RES_DATA_5_PF_GMV_BT_NUM_M GENMASK(16, 8)
+
struct hns_roce_pf_timer_res_a {
__le32 rsv0;
__le32 qpc_timer_bt_idx_num;
@@ -1425,7 +1422,7 @@ struct hns_roce_vf_res_b {
__le32 vf_sgid_idx_num;
__le32 vf_qid_idx_sl_num;
__le32 vf_sccc_idx_num;
- __le32 rsv1;
+ __le32 vf_gmv_idx_num;
};
#define VF_RES_B_DATA_0_VF_ID_S 0
@@ -1455,6 +1452,12 @@ struct hns_roce_vf_res_b {
#define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9
#define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9)
+#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_S 0
+#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_M GENMASK(7, 0)
+
+#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_S 16
+#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_M GENMASK(24, 16)
+
struct hns_roce_vf_switch {
__le32 rocee_sel;
__le32 fun_id;
@@ -1577,6 +1580,46 @@ struct hns_roce_cfg_smac_tb {
#define CFG_SMAC_TB_VF_SMAC_H_S 0
#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
+struct hns_roce_cfg_gmv_bt {
+ __le32 gmv_ba_l;
+ __le32 gmv_ba_h;
+ __le32 gmv_bt_idx;
+ __le32 rsv[3];
+};
+
+#define CFG_GMV_BA_H_S 0
+#define CFG_GMV_BA_H_M GENMASK(19, 0)
+
+struct hns_roce_cfg_gmv_tb_a {
+ __le32 vf_sgid_l;
+ __le32 vf_sgid_ml;
+ __le32 vf_sgid_mh;
+ __le32 vf_sgid_h;
+ __le32 vf_sgid_type_vlan;
+ __le32 resv;
+};
+
+#define CFG_GMV_TB_SGID_IDX_S 0
+#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0)
+
+#define CFG_GMV_TB_VF_SGID_TYPE_S 0
+#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0)
+
+#define CFG_GMV_TB_VF_VLAN_EN_S 2
+
+#define CFG_GMV_TB_VF_VLAN_ID_S 16
+#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16)
+
+struct hns_roce_cfg_gmv_tb_b {
+ __le32 vf_smac_l;
+ __le32 vf_smac_h;
+ __le32 table_idx_rsv;
+ __le32 resv[3];
+};
+
+#define CFG_GMV_TB_SMAC_H_S 0
+#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0)
+
#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
struct hns_roce_query_pf_caps_a {
u8 number_ports;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index afeffafc59f9..d9179bae4989 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -33,13 +33,13 @@
#include <linux/acpi.h>
#include <linux/of_platform.h>
#include <linux/module.h>
+#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
-#include <rdma/hns-abi.h>
#include "hns_roce_hem.h"
/**
@@ -53,7 +53,7 @@
* GID[0][0], GID[1][0],.....GID[N - 1][0],
* And so on
*/
-int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
+u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
{
return gid_index * hr_dev->caps.num_ports + port;
}
@@ -61,7 +61,10 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
{
u8 phy_port;
- u32 i = 0;
+ u32 i;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN))
return 0;
@@ -90,14 +93,13 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
- struct ib_gid_attr zattr = {};
u8 port = attr->port_num - 1;
int ret;
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr);
+ ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL);
return ret;
}
@@ -325,7 +327,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
resp.cqe_size = hr_dev->caps.cqe_sz;
- ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
if (ret)
goto error_fail_copy_to_udata;
@@ -421,6 +424,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.alloc_pd = hns_roce_alloc_pd,
.alloc_ucontext = hns_roce_alloc_ucontext,
.create_ah = hns_roce_create_ah,
+ .create_user_ah = hns_roce_create_ah,
.create_cq = hns_roce_create_cq,
.create_qp = hns_roce_create_qp,
.dealloc_pd = hns_roce_dealloc_pd,
@@ -491,36 +495,13 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
- ib_dev->uverbs_cmd_mask =
- (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ULL << IB_USER_VERBS_CMD_REG_MR) |
- (1ULL << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ULL << IB_USER_VERBS_CMD_DESTROY_QP);
-
- ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
-
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) {
- ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR)
ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
- }
/* MW */
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
- ib_dev->uverbs_cmd_mask |=
- (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW)
ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops);
- }
/* FRMR */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
@@ -528,12 +509,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
/* SRQ */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
- ib_dev->uverbs_cmd_mask |=
- (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV);
ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
}
@@ -580,8 +555,8 @@ error_failed_setup_mtu_mac:
static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
{
- int ret;
struct device *dev = hr_dev->dev;
+ int ret;
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table,
HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz,
@@ -631,7 +606,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
goto err_unmap_trrl;
}
- if (hr_dev->caps.srqc_entry_sz) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
HEM_TYPE_SRQC,
hr_dev->caps.srqc_entry_sz,
@@ -643,7 +618,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
}
}
- if (hr_dev->caps.sccc_sz) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
ret = hns_roce_init_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table,
HEM_TYPE_SCCC,
@@ -680,18 +655,35 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
}
}
+ if (hr_dev->caps.gmv_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table,
+ HEM_TYPE_GMV,
+ hr_dev->caps.gmv_entry_sz,
+ hr_dev->caps.gmv_entry_num, 1);
+ if (ret) {
+ dev_err(dev,
+ "failed to init gmv table memory, ret = %d\n",
+ ret);
+ goto err_unmap_cqc_timer;
+ }
+ }
+
return 0;
+err_unmap_cqc_timer:
+ if (hr_dev->caps.cqc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table);
+
err_unmap_qpc_timer:
if (hr_dev->caps.qpc_timer_entry_sz)
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
err_unmap_ctx:
- if (hr_dev->caps.sccc_sz)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
err_unmap_srq:
- if (hr_dev->caps.srqc_entry_sz)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
err_unmap_cq:
@@ -721,8 +713,8 @@ err_unmap_dmpt:
*/
static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
{
- int ret;
struct device *dev = hr_dev->dev;
+ int ret;
spin_lock_init(&hr_dev->sm_lock);
spin_lock_init(&hr_dev->bt_cmd_lock);
@@ -846,8 +838,8 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev)
int hns_roce_init(struct hns_roce_dev *hr_dev)
{
- int ret;
struct device *dev = hr_dev->dev;
+ int ret;
if (hr_dev->hw->reset) {
ret = hr_dev->hw->reset(hr_dev, true);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 7f81a695e9af..1bcffd93ff3e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -167,10 +167,10 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr)
{
- int ret;
unsigned long mtpt_idx = key_to_hw_index(mr->key);
- struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ int ret;
/* Allocate mailbox memory */
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
@@ -185,14 +185,14 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
else
ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
if (ret) {
- dev_err(dev, "Write mtpt fail!\n");
+ dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
goto err_page;
}
ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
mtpt_idx & (hr_dev->caps.num_mtpts - 1));
if (ret) {
- dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
+ dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
goto err_page;
}
@@ -328,9 +328,10 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
return ret;
}
-int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata)
+struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct ib_device *ib_dev = &hr_dev->ib_dev;
@@ -341,11 +342,11 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
int ret;
if (!mr->enabled)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ return ERR_CAST(mailbox);
mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0,
@@ -390,12 +391,12 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- return 0;
+ return NULL;
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- return ret;
+ return ERR_PTR(ret);
}
int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
@@ -495,7 +496,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
if (ret < 1) {
- ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n",
+ ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
goto err_page_list;
}
@@ -509,7 +510,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
ret = 0;
} else {
- mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size);
+ mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
ret = mr->npages;
}
@@ -695,15 +696,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
return size;
}
-static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size,
- unsigned int page_shift)
-{
- if (is_direct)
- return ALIGN(alloc_size, 1 << page_shift);
- else
- return HNS_HW_DIRECT_PAGE_COUNT << page_shift;
-}
-
/*
* check the given pages in continuous address space
* Returns 0 on success, or the error page num.
@@ -732,7 +724,6 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
/* release kernel buffers */
if (mtr->kmem) {
hns_roce_buf_free(hr_dev, mtr->kmem);
- kfree(mtr->kmem);
mtr->kmem = NULL;
}
}
@@ -744,13 +735,12 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct ib_device *ibdev = &hr_dev->ib_dev;
unsigned int best_pg_shift;
int all_pg_count = 0;
- size_t direct_size;
size_t total_size;
int ret;
total_size = mtr_bufs_size(buf_attr);
if (total_size < 1) {
- ibdev_err(ibdev, "Failed to check mtr size\n");
+ ibdev_err(ibdev, "failed to check mtr size\n.");
return -EINVAL;
}
@@ -762,7 +752,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
buf_attr->user_access);
if (IS_ERR_OR_NULL(mtr->umem)) {
- ibdev_err(ibdev, "Failed to get umem, ret %ld\n",
+ ibdev_err(ibdev, "failed to get umem, ret = %ld.\n",
PTR_ERR(mtr->umem));
return -ENOMEM;
}
@@ -780,19 +770,16 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
ret = 0;
} else {
mtr->umem = NULL;
- mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL);
- if (!mtr->kmem) {
- ibdev_err(ibdev, "Failed to alloc kmem\n");
- return -ENOMEM;
- }
- direct_size = mtr_kmem_direct_size(is_direct, total_size,
- buf_attr->page_shift);
- ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size,
- mtr->kmem, buf_attr->page_shift);
- if (ret) {
- ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret);
- goto err_alloc_mem;
+ mtr->kmem =
+ hns_roce_buf_alloc(hr_dev, total_size,
+ buf_attr->page_shift,
+ is_direct ? HNS_ROCE_BUF_DIRECT : 0);
+ if (IS_ERR(mtr->kmem)) {
+ ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
+ PTR_ERR(mtr->kmem));
+ return PTR_ERR(mtr->kmem);
}
+
best_pg_shift = buf_attr->page_shift;
all_pg_count = mtr->kmem->npages;
}
@@ -800,7 +787,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
/* must bigger than minimum hardware page shift */
if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) {
ret = -EINVAL;
- ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n",
+ ibdev_err(ibdev,
+ "failed to check mtr, page shift = %u count = %d.\n",
best_pg_shift, all_pg_count);
goto err_alloc_mem;
}
@@ -841,12 +829,12 @@ static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
}
int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- dma_addr_t *pages, int page_cnt)
+ dma_addr_t *pages, unsigned int page_cnt)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_buf_region *r;
+ unsigned int i;
int err;
- int i;
/*
* Only use the first page address as root ba when hopnum is 0, this
@@ -862,7 +850,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
if (r->offset + r->count > page_cnt) {
err = -EINVAL;
ibdev_err(ibdev,
- "Failed to check mtr%d end %d + %d, max %d\n",
+ "failed to check mtr%u end %u + %u, max %u.\n",
i, r->offset, r->count, page_cnt);
return err;
}
@@ -870,7 +858,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r);
if (err) {
ibdev_err(ibdev,
- "Failed to map mtr%d offset %d, err %d\n",
+ "failed to map mtr%u offset %u, ret = %d.\n",
i, r->offset, err);
return err;
}
@@ -883,13 +871,12 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
{
struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int mtt_count, left;
int start_index;
- int mtt_count;
int total = 0;
__le64 *mtts;
- int npage;
+ u32 npage;
u64 addr;
- int left;
if (!mtt_buf || mtt_max < 1)
goto done;
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 98f69496adb4..cca818d05a8f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -32,7 +32,6 @@
#include <linux/platform_device.h>
#include <linux/pci.h>
-#include <uapi/rdma/hns-abi.h>
#include "hns_roce_device.h"
static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
@@ -65,21 +64,22 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn);
if (ret) {
- ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret);
+ ibdev_err(ib_dev, "failed to alloc pd, ret = %d.\n", ret);
return ret;
}
if (udata) {
- struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn};
+ struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn};
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret) {
hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn);
- ibdev_err(ib_dev, "failed to copy to udata\n");
- return -EFAULT;
+ ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret);
}
}
- return 0;
+ return ret;
}
int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 6c081dd985fc..d8e2fe5558d2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -39,7 +39,6 @@
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
-#include <rdma/hns-abi.h>
static void flush_work_handle(struct work_struct *work)
{
@@ -114,8 +113,8 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
enum hns_roce_event type)
{
- struct ib_event event;
struct ib_qp *ibqp = &hr_qp->ibqp;
+ struct ib_event event;
if (ibqp->event_handler) {
event.device = ibqp->device;
@@ -154,9 +153,50 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
}
}
+static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank)
+{
+ u32 least_load = bank[0].inuse;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
+ }
+
+ return bankid;
+}
+
+static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
+ unsigned long *qpn)
+{
+ int id;
+
+ id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
+ }
+
+ /* the QPN should keep increasing until the max value is reached. */
+ bank->next = (id + 1) > bank->max ? bank->min : id + 1;
+
+ /* the lower 3 bits is bankid */
+ *qpn = (id << 3) | bankid;
+
+ return 0;
+}
static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
+ struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
unsigned long num = 0;
+ u8 bankid;
int ret;
if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
@@ -169,13 +209,21 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
hr_qp->doorbell_qpn = 1;
} else {
- ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap,
- 1, 1, &num);
+ spin_lock(&qp_table->bank_lock);
+ bankid = get_least_load_bankid_for_qp(qp_table->bank);
+
+ ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
+ &num);
if (ret) {
- ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n");
- return -ENOMEM;
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to alloc QPN, ret = %d\n", ret);
+ spin_unlock(&qp_table->bank_lock);
+ return ret;
}
+ qp_table->bank[bankid].inuse++;
+ spin_unlock(&qp_table->bank_lock);
+
hr_qp->doorbell_qpn = (u32)num;
}
@@ -286,7 +334,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
}
}
- if (hr_dev->caps.sccc_sz) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
/* Alloc memory for SCC CTX */
ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
@@ -340,9 +388,15 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
}
+static inline u8 get_qp_bankid(unsigned long qpn)
+{
+ /* The lower 3 bits of QPN are used to hash to different banks */
+ return (u8)(qpn & GENMASK(2, 0));
+}
+
static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
- struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ u8 bankid;
if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
return;
@@ -350,7 +404,13 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
if (hr_qp->qpn < hr_dev->caps.reserved_qps)
return;
- hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR);
+ bankid = get_qp_bankid(hr_qp->qpn);
+
+ ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3);
+
+ spin_lock(&hr_dev->qp_table.bank_lock);
+ hr_dev->qp_table.bank[bankid].inuse--;
+ spin_unlock(&hr_dev->qp_table.bank_lock);
}
static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
@@ -404,37 +464,43 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
return 0;
}
-static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
- struct hns_roce_qp *hr_qp,
- struct ib_qp_cap *cap)
+static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
{
- u32 cnt;
+ /* GSI/UD QP only has extended sge */
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
+ return qp->sq.max_gs;
- cnt = max(1U, cap->max_send_sge);
- if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
- hr_qp->sq.max_gs = roundup_pow_of_two(cnt);
- hr_qp->sge.sge_cnt = 0;
+ if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
+ return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
- return 0;
- }
+ return 0;
+}
- hr_qp->sq.max_gs = cnt;
+static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
+ struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
+{
+ u32 total_sge_cnt;
+ u32 wqe_sge_cnt;
- /* UD sqwqe's sge use extend sge */
- if (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
- hr_qp->ibqp.qp_type == IB_QPT_UD) {
- cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs);
- } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) {
- cnt = roundup_pow_of_two(sq_wqe_cnt *
- (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE));
- } else {
- cnt = 0;
+ hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+
+ if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
+ hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE;
+ return;
}
- hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
- hr_qp->sge.sge_cnt = cnt;
+ hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
- return 0;
+ wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp);
+
+ /* If the number of extended sge is not zero, they MUST use the
+ * space of HNS_HW_PAGE_SIZE at least.
+ */
+ if (wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt);
+ hr_qp->sge.sge_cnt = max(total_sge_cnt,
+ (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
+ }
}
static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
@@ -447,12 +513,12 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
/* Sanity check SQ size before proceeding */
if (ucmd->log_sq_stride > max_sq_stride ||
ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
- ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n");
+ ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n");
return -EINVAL;
}
if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
- ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n",
+ ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n",
cap->max_send_sge);
return -EINVAL;
}
@@ -479,9 +545,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
return ret;
}
- ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap);
- if (ret)
- return ret;
+ set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
hr_qp->sq.wqe_cnt = cnt;
@@ -546,7 +610,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
{
struct ib_device *ibdev = &hr_dev->ib_dev;
u32 cnt;
- int ret;
if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
cap->max_send_sge > hr_dev->caps.max_sq_sg) {
@@ -558,7 +621,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes));
if (cnt > hr_dev->caps.max_wqes) {
- ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n",
+ ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n",
cnt);
return -EINVAL;
}
@@ -566,9 +629,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
hr_qp->sq.wqe_cnt = cnt;
- ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap);
- if (ret)
- return ret;
+ set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
/* sync the parameters of kernel QP to user's configuration */
cap->max_send_wr = cnt;
@@ -725,13 +786,17 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE)
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
+
if (udata) {
if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr,
&hr_qp->sdb);
if (ret) {
ibdev_err(ibdev,
- "Failed to map user SQ doorbell\n");
+ "failed to map user SQ doorbell, ret = %d.\n",
+ ret);
goto err_out;
}
hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
@@ -743,7 +808,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
&hr_qp->rdb);
if (ret) {
ibdev_err(ibdev,
- "Failed to map user RQ doorbell\n");
+ "failed to map user RQ doorbell, ret = %d.\n",
+ ret);
goto err_sdb;
}
hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
@@ -760,7 +826,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
if (ret) {
ibdev_err(ibdev,
- "Failed to alloc kernel RQ doorbell\n");
+ "failed to alloc kernel RQ doorbell, ret = %d.\n",
+ ret);
goto err_out;
}
*hr_qp->rdb.db_record = 0;
@@ -803,14 +870,14 @@ static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev,
sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL);
if (ZERO_OR_NULL_PTR(sq_wrid)) {
- ibdev_err(ibdev, "Failed to alloc SQ wrid\n");
+ ibdev_err(ibdev, "failed to alloc SQ wrid.\n");
return -ENOMEM;
}
if (hr_qp->rq.wqe_cnt) {
rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL);
if (ZERO_OR_NULL_PTR(rq_wrid)) {
- ibdev_err(ibdev, "Failed to alloc RQ wrid\n");
+ ibdev_err(ibdev, "failed to alloc RQ wrid.\n");
ret = -ENOMEM;
goto err_sq;
}
@@ -860,29 +927,25 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
}
if (udata) {
- if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) {
- ibdev_err(ibdev, "Failed to copy QP ucmd\n");
- return -EFAULT;
+ ret = ib_copy_from_udata(ucmd, udata,
+ min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to copy QP ucmd, ret = %d\n", ret);
+ return ret;
}
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
if (ret)
- ibdev_err(ibdev, "Failed to set user SQ size\n");
+ ibdev_err(ibdev,
+ "failed to set user SQ size, ret = %d.\n",
+ ret);
} else {
- if (init_attr->create_flags &
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
- ibdev_err(ibdev, "Failed to check multicast loopback\n");
- return -EINVAL;
- }
-
- if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
- ibdev_err(ibdev, "Failed to check ipoib ud lso\n");
- return -EINVAL;
- }
-
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
- ibdev_err(ibdev, "Failed to set kernel SQ size\n");
+ ibdev_err(ibdev,
+ "failed to set kernel SQ size, ret = %d.\n",
+ ret);
}
return ret;
@@ -906,47 +969,53 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp->state = IB_QPS_RESET;
hr_qp->flush_flag = 0;
+ if (init_attr->create_flags)
+ return -EOPNOTSUPP;
+
ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
if (ret) {
- ibdev_err(ibdev, "Failed to set QP param\n");
+ ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret);
return ret;
}
if (!udata) {
ret = alloc_kernel_wrid(hr_dev, hr_qp);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc wrid\n");
+ ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n",
+ ret);
return ret;
}
}
ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc QP doorbell\n");
+ ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n",
+ ret);
goto err_wrid;
}
ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc QP buffer\n");
+ ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret);
goto err_db;
}
ret = alloc_qpn(hr_dev, hr_qp);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc QPN\n");
+ ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret);
goto err_buf;
}
ret = alloc_qpc(hr_dev, hr_qp);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc QP context\n");
+ ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n",
+ ret);
goto err_qpn;
}
ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr);
if (ret) {
- ibdev_err(ibdev, "Failed to store QP\n");
+ ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret);
goto err_qpc;
}
@@ -1003,6 +1072,30 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
kfree(hr_qp);
}
+static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
+ bool is_user)
+{
+ switch (type) {
+ case IB_QPT_UD:
+ if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 &&
+ is_user)
+ goto out;
+ fallthrough;
+ case IB_QPT_RC:
+ case IB_QPT_GSI:
+ break;
+ default:
+ goto out;
+ }
+
+ return 0;
+
+out:
+ ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type);
+
+ return -EOPNOTSUPP;
+}
+
struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
@@ -1012,15 +1105,9 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
struct hns_roce_qp *hr_qp;
int ret;
- switch (init_attr->qp_type) {
- case IB_QPT_RC:
- case IB_QPT_GSI:
- break;
- default:
- ibdev_err(ibdev, "not support QP type %d\n",
- init_attr->qp_type);
- return ERR_PTR(-EOPNOTSUPP);
- }
+ ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
+ if (ret)
+ return ERR_PTR(ret);
hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
if (!hr_qp)
@@ -1035,10 +1122,11 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
if (ret) {
ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n",
init_attr->qp_type, ret);
- ibdev_err(ibdev, "Create GSI QP failed!\n");
+
kfree(hr_qp);
return ERR_PTR(ret);
}
+
return &hr_qp->ibqp;
}
@@ -1091,9 +1179,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
- ibdev_err(&hr_dev->ib_dev,
- "attr port_num invalid.attr->port_num=%d\n",
- attr->port_num);
+ ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n",
+ attr->port_num);
return -EINVAL;
}
@@ -1101,8 +1188,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
ibdev_err(&hr_dev->ib_dev,
- "attr pkey_index invalid.attr->pkey_index=%d\n",
- attr->pkey_index);
+ "invalid attr, pkey_index = %u.\n",
+ attr->pkey_index);
return -EINVAL;
}
}
@@ -1110,16 +1197,16 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
ibdev_err(&hr_dev->ib_dev,
- "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
- attr->max_rd_atomic);
+ "invalid attr, max_rd_atomic = %u.\n",
+ attr->max_rd_atomic);
return -EINVAL;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
ibdev_err(&hr_dev->ib_dev,
- "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
- attr->max_dest_rd_atomic);
+ "invalid attr, max_dest_rd_atomic = %u.\n",
+ attr->max_dest_rd_atomic);
return -EINVAL;
}
@@ -1244,22 +1331,22 @@ static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
return hns_roce_buf_offset(hr_qp->mtr.kmem, offset);
}
-void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
{
return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
}
-void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
{
return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
}
-void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n)
{
return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift));
}
-bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq)
{
struct hns_roce_cq *hr_cq;
@@ -1280,22 +1367,24 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- int reserved_from_top = 0;
- int reserved_from_bot;
- int ret;
+ unsigned int reserved_from_bot;
+ unsigned int i;
mutex_init(&qp_table->scc_mutex);
xa_init(&hr_dev->qp_table_xa);
reserved_from_bot = hr_dev->caps.reserved_qps;
- ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
- hr_dev->caps.num_qps - 1, reserved_from_bot,
- reserved_from_top);
- if (ret) {
- dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
- ret);
- return ret;
+ for (i = 0; i < reserved_from_bot; i++) {
+ hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++;
+ hr_dev->qp_table.bank[get_qp_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ ida_init(&hr_dev->qp_table.bank[i].ida);
+ hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps /
+ HNS_ROCE_QP_BANK_NUM - 1;
+ hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min;
}
return 0;
@@ -1303,5 +1392,8 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap);
+ int i;
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
+ ida_destroy(&hr_dev->qp_table.bank[i].ida);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 8caf74e44efd..c4ae57e4173a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -4,7 +4,6 @@
*/
#include <rdma/ib_umem.h>
-#include <rdma/hns-abi.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
@@ -93,7 +92,8 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
if (ret < 1) {
- ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n");
+ ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
+ ret);
return -ENOBUFS;
}
@@ -101,32 +101,34 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx,
ARRAY_SIZE(mtts_idx), &dma_handle_idx);
if (ret < 1) {
- ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n");
+ ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
+ ret);
return -ENOBUFS;
}
ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret);
+ ibdev_err(ibdev,
+ "failed to alloc SRQ number, ret = %d.\n", ret);
return -ENOMEM;
}
ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
if (ret) {
- ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret);
+ ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret);
goto err_out;
}
ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
if (ret) {
- ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret);
+ ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
goto err_put;
}
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR_OR_NULL(mailbox)) {
ret = -ENOMEM;
- ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n");
+ ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n");
goto err_xa;
}
@@ -137,7 +139,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret) {
- ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret);
+ ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret);
goto err_xa;
}
@@ -198,7 +200,8 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
hr_dev->caps.srqwqe_ba_pg_sz +
HNS_HW_PAGE_SHIFT, udata, addr);
if (err)
- ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err);
+ ibdev_err(ibdev,
+ "failed to alloc SRQ buf mtr, ret = %d.\n", err);
return err;
}
@@ -229,18 +232,18 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT,
udata, addr);
if (err) {
- ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err);
+ ibdev_err(ibdev,
+ "failed to alloc SRQ idx mtr, ret = %d.\n", err);
return err;
}
if (!udata) {
idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
if (!idx_que->bitmap) {
- ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n");
+ ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n");
err = -ENOMEM;
goto err_idx_mtr;
}
-
}
return 0;
@@ -288,6 +291,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
int ret;
u32 cqn;
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC)
+ return -EOPNOTSUPP;
+
/* Check the actual SRQ wqe and SRQ sge num */
if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
@@ -300,9 +307,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->max_gs = init_attr->attr.max_sge;
if (udata) {
- ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
if (ret) {
- ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n",
+ ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n",
ret);
return ret;
}
@@ -310,20 +318,21 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret);
+ ibdev_err(ibdev,
+ "failed to alloc SRQ buffer, ret = %d.\n", ret);
return ret;
}
ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret);
+ ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret);
goto err_buf_alloc;
}
if (!udata) {
ret = alloc_srq_wrid(hr_dev, srq);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n",
+ ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n",
ret);
goto err_idx_alloc;
}
@@ -335,7 +344,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0);
if (ret) {
- ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret);
+ ibdev_err(ibdev,
+ "failed to alloc SRQ context, ret = %d.\n", ret);
goto err_wrid_alloc;
}
@@ -343,11 +353,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
resp.srqn = srq->srqn;
if (udata) {
- if (ib_copy_to_udata(udata, &resp,
- min(udata->outlen, sizeof(resp)))) {
- ret = -EFAULT;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
goto err_srqc_alloc;
- }
}
return 0;
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 832b80de004f..6a79502c8b53 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -274,7 +274,6 @@ struct i40iw_device {
u8 max_sge;
u8 iw_status;
u8 send_term_ok;
- bool push_mode; /* Initialized from parameter passed to driver */
/* x710 specific */
struct mutex pbl_mutex;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 3053c345a5a3..9acc0ecc9a43 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -2426,7 +2426,7 @@ static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node,
}
break;
case I40IW_CM_STATE_MPAREQ_RCVD:
- atomic_add_return(1, &cm_node->passive_state);
+ atomic_inc(&cm_node->passive_state);
break;
case I40IW_CM_STATE_ESTABLISHED:
case I40IW_CM_STATE_SYN_RCVD:
@@ -3020,7 +3020,7 @@ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8
i40iw_cleanup_retrans_entry(cm_node);
if (!loopback) {
- passive_state = atomic_add_return(1, &cm_node->passive_state);
+ passive_state = atomic_inc_return(&cm_node->passive_state);
if (passive_state == I40IW_SEND_RESET_EVENT) {
cm_node->state = I40IW_CM_STATE_CLOSED;
i40iw_rem_ref_cm_node(cm_node);
@@ -3678,7 +3678,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
return -EINVAL;
}
- passive_state = atomic_add_return(1, &cm_node->passive_state);
+ passive_state = atomic_inc_return(&cm_node->passive_state);
if (passive_state == I40IW_SEND_RESET_EVENT) {
i40iw_rem_ref_cm_node(cm_node);
return -ECONNRESET;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index 86d3f8aff329..c943d491b72b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -820,46 +820,6 @@ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done(
}
/**
- * i40iw_sc_manage_push_page - Handle push page
- * @cqp: struct for cqp hw
- * @info: push page info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_manage_push_page(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_cqp_manage_push_page_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT)
- return I40IW_ERR_INVALID_PUSH_PAGE_INDEX;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, info->qs_handle);
-
- header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
- LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
* i40iw_sc_manage_hmc_pm_func_table - manage of function table
* @cqp: struct for cqp hw
* @scratch: u64 saved to be used during cqp completion
@@ -2859,9 +2819,7 @@ static enum i40iw_status_code i40iw_sc_qp_setctx(
LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) |
LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) |
- LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) |
- LS_64(info->push_idx, I40IWQPC_PPIDX) |
- LS_64(info->push_mode_en, I40IWQPC_PMENA);
+ LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN);
set_64bit_val(qp_ctx, 8, qp->sq_pa);
set_64bit_val(qp_ctx, 16, qp->rq_pa);
@@ -4291,13 +4249,6 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev,
pcmdinfo->in.u.add_arp_cache_entry.scratch,
pcmdinfo->post_sq);
break;
- case OP_MANAGE_PUSH_PAGE:
- status = i40iw_sc_manage_push_page(
- pcmdinfo->in.u.manage_push_page.cqp,
- &pcmdinfo->in.u.manage_push_page.info,
- pcmdinfo->in.u.manage_push_page.scratch,
- pcmdinfo->post_sq);
- break;
case OP_UPDATE_PE_SDS:
/* case I40IW_CQP_OP_UPDATE_PE_SDS */
status = i40iw_update_pe_sds(
@@ -5098,7 +5049,7 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi)
i40iw_hw_stats_stop_timer(vsi);
}
-static struct i40iw_cqp_ops iw_cqp_ops = {
+static const struct i40iw_cqp_ops iw_cqp_ops = {
.cqp_init = i40iw_sc_cqp_init,
.cqp_create = i40iw_sc_cqp_create,
.cqp_post_sq = i40iw_sc_cqp_post_sq,
@@ -5107,7 +5058,7 @@ static struct i40iw_cqp_ops iw_cqp_ops = {
.poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done
};
-static struct i40iw_ccq_ops iw_ccq_ops = {
+static const struct i40iw_ccq_ops iw_ccq_ops = {
.ccq_init = i40iw_sc_ccq_init,
.ccq_create = i40iw_sc_ccq_create,
.ccq_destroy = i40iw_sc_ccq_destroy,
@@ -5116,7 +5067,7 @@ static struct i40iw_ccq_ops iw_ccq_ops = {
.ccq_arm = i40iw_sc_ccq_arm
};
-static struct i40iw_ceq_ops iw_ceq_ops = {
+static const struct i40iw_ceq_ops iw_ceq_ops = {
.ceq_init = i40iw_sc_ceq_init,
.ceq_create = i40iw_sc_ceq_create,
.cceq_create_done = i40iw_sc_cceq_create_done,
@@ -5126,7 +5077,7 @@ static struct i40iw_ceq_ops iw_ceq_ops = {
.process_ceq = i40iw_sc_process_ceq
};
-static struct i40iw_aeq_ops iw_aeq_ops = {
+static const struct i40iw_aeq_ops iw_aeq_ops = {
.aeq_init = i40iw_sc_aeq_init,
.aeq_create = i40iw_sc_aeq_create,
.aeq_destroy = i40iw_sc_aeq_destroy,
@@ -5137,11 +5088,11 @@ static struct i40iw_aeq_ops iw_aeq_ops = {
};
/* iwarp pd ops */
-static struct i40iw_pd_ops iw_pd_ops = {
+static const struct i40iw_pd_ops iw_pd_ops = {
.pd_init = i40iw_sc_pd_init,
};
-static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
+static const struct i40iw_priv_qp_ops iw_priv_qp_ops = {
.qp_init = i40iw_sc_qp_init,
.qp_create = i40iw_sc_qp_create,
.qp_modify = i40iw_sc_qp_modify,
@@ -5156,14 +5107,14 @@ static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
.iw_mr_fast_register = i40iw_sc_mr_fast_register
};
-static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
+static const struct i40iw_priv_cq_ops iw_priv_cq_ops = {
.cq_init = i40iw_sc_cq_init,
.cq_create = i40iw_sc_cq_create,
.cq_destroy = i40iw_sc_cq_destroy,
.cq_modify = i40iw_sc_cq_modify,
};
-static struct i40iw_mr_ops iw_mr_ops = {
+static const struct i40iw_mr_ops iw_mr_ops = {
.alloc_stag = i40iw_sc_alloc_stag,
.mr_reg_non_shared = i40iw_sc_mr_reg_non_shared,
.mr_reg_shared = i40iw_sc_mr_reg_shared,
@@ -5172,8 +5123,7 @@ static struct i40iw_mr_ops iw_mr_ops = {
.mw_alloc = i40iw_sc_mw_alloc
};
-static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
- .manage_push_page = i40iw_sc_manage_push_page,
+static const struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
.manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table,
.set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile,
.commit_fpm_values = i40iw_sc_commit_fpm_values,
@@ -5195,7 +5145,7 @@ static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
.update_resume_qp = i40iw_sc_resume_qp
};
-static struct i40iw_hmc_ops iw_hmc_ops = {
+static const struct i40iw_hmc_ops iw_hmc_ops = {
.init_iw_hmc = i40iw_sc_init_iw_hmc,
.parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf,
.configure_iw_fpm = i40iw_sc_configure_iw_fpm,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index e8367d67575d..86d5a33c57cc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -40,11 +40,6 @@
#define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024)
#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
-#define I40IW_PUSH_OFFSET (4 * 1024 * 1024)
-#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16
-#define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024)
-#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2
-
#define I40IW_PE_DB_SIZE_4M 1
#define I40IW_PE_DB_SIZE_8M 2
@@ -402,7 +397,6 @@
#define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e
#define I40IW_CQP_OP_MANAGE_ARP 0x0f
#define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10
-#define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11
#define I40IW_CQP_OP_QUERY_RDMA_FEATURES 0x12
#define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13
#define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14
@@ -843,7 +837,6 @@
#define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \
(0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT)
-/* Manage Push Page - MPP */
#define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff
#define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0
@@ -1352,9 +1345,6 @@
#define I40IWQPSQ_ADDFRAGCNT_SHIFT 38
#define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT)
-#define I40IWQPSQ_PUSHWQE_SHIFT 56
-#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT)
-
#define I40IWQPSQ_STREAMMODE_SHIFT 58
#define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT)
@@ -1740,18 +1730,17 @@ enum i40iw_alignment {
#define OP_MW_ALLOC 20
#define OP_QP_FLUSH_WQES 21
#define OP_ADD_ARP_CACHE_ENTRY 22
-#define OP_MANAGE_PUSH_PAGE 23
-#define OP_UPDATE_PE_SDS 24
-#define OP_MANAGE_HMC_PM_FUNC_TABLE 25
-#define OP_SUSPEND 26
-#define OP_RESUME 27
-#define OP_MANAGE_VF_PBLE_BP 28
-#define OP_QUERY_FPM_VALUES 29
-#define OP_COMMIT_FPM_VALUES 30
-#define OP_REQUESTED_COMMANDS 31
-#define OP_COMPLETED_COMMANDS 32
-#define OP_GEN_AE 33
-#define OP_QUERY_RDMA_FEATURES 34
-#define OP_SIZE_CQP_STAT_ARRAY 35
+#define OP_UPDATE_PE_SDS 23
+#define OP_MANAGE_HMC_PM_FUNC_TABLE 24
+#define OP_SUSPEND 25
+#define OP_RESUME 26
+#define OP_MANAGE_VF_PBLE_BP 27
+#define OP_QUERY_FPM_VALUES 28
+#define OP_COMMIT_FPM_VALUES 29
+#define OP_REQUESTED_COMMANDS 30
+#define OP_COMPLETED_COMMANDS 31
+#define OP_GEN_AE 32
+#define OP_QUERY_RDMA_FEATURES 33
+#define OP_SIZE_CQP_STAT_ARRAY 34
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h
index d1c5855bd8c3..36a19c4e5bba 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_status.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_status.h
@@ -61,7 +61,6 @@ enum i40iw_status_code {
I40IW_ERR_QUEUE_EMPTY = -22,
I40IW_ERR_INVALID_ALIGNMENT = -23,
I40IW_ERR_FLUSHED_QUEUE = -24,
- I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25,
I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26,
I40IW_ERR_TIMEOUT = -27,
I40IW_ERR_OPCODE_MISMATCH = -28,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index c3babf3cbb8e..394e182686cf 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -387,7 +387,6 @@ struct i40iw_sc_qp {
u8 *q2_buf;
u64 qp_compl_ctx;
u16 qs_handle;
- u16 push_idx;
u8 sq_tph_val;
u8 rq_tph_val;
u8 qp_state;
@@ -493,16 +492,16 @@ struct i40iw_sc_dev {
struct i40iw_sc_aeq *aeq;
struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT];
struct i40iw_sc_cq *ccq;
- struct i40iw_cqp_ops *cqp_ops;
- struct i40iw_ccq_ops *ccq_ops;
- struct i40iw_ceq_ops *ceq_ops;
- struct i40iw_aeq_ops *aeq_ops;
- struct i40iw_pd_ops *iw_pd_ops;
- struct i40iw_priv_qp_ops *iw_priv_qp_ops;
- struct i40iw_priv_cq_ops *iw_priv_cq_ops;
- struct i40iw_mr_ops *mr_ops;
- struct i40iw_cqp_misc_ops *cqp_misc_ops;
- struct i40iw_hmc_ops *hmc_ops;
+ const struct i40iw_cqp_ops *cqp_ops;
+ const struct i40iw_ccq_ops *ccq_ops;
+ const struct i40iw_ceq_ops *ceq_ops;
+ const struct i40iw_aeq_ops *aeq_ops;
+ const struct i40iw_pd_ops *iw_pd_ops;
+ const struct i40iw_priv_qp_ops *iw_priv_qp_ops;
+ const struct i40iw_priv_cq_ops *iw_priv_cq_ops;
+ const struct i40iw_mr_ops *mr_ops;
+ const struct i40iw_cqp_misc_ops *cqp_misc_ops;
+ const struct i40iw_hmc_ops *hmc_ops;
struct i40iw_vchnl_if vchnl_if;
const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
@@ -749,8 +748,6 @@ struct i40iw_qp_host_ctx_info {
struct i40iwarp_offload_info *iwarp_info;
u32 send_cq_num;
u32 rcv_cq_num;
- u16 push_idx;
- bool push_mode_en;
bool tcp_info_valid;
bool iwarp_info_valid;
bool err_rq_idx_valid;
@@ -937,12 +934,6 @@ struct i40iw_local_mac_ipaddr_entry_info {
u8 entry_idx;
};
-struct i40iw_cqp_manage_push_page_info {
- u32 push_idx;
- u16 qs_handle;
- u8 free_page;
-};
-
struct i40iw_qp_flush_info {
u16 sq_minor_code;
u16 sq_major_code;
@@ -1114,9 +1105,6 @@ struct i40iw_mr_ops {
};
struct i40iw_cqp_misc_ops {
- enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *,
- struct i40iw_cqp_manage_push_page_info *,
- u64, bool);
enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *,
u64, u8, bool, bool);
enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *,
@@ -1254,12 +1242,6 @@ struct cqp_info {
} manage_vf_pble_bp;
struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_cqp_manage_push_page_info info;
- u64 scratch;
- } manage_push_page;
-
- struct {
struct i40iw_sc_dev *dev;
struct i40iw_upload_context_info info;
u64 scratch;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 8afa5a67a86b..c3633c9944db 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -115,17 +115,6 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp)
}
/**
- * i40iw_qp_ring_push_db - ring qp doorbell
- * @qp: hw qp ptr
- * @wqe_idx: wqe index
- */
-static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx)
-{
- set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id);
- qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
-}
-
-/**
* i40iw_qp_get_next_send_wqe - return next wqe ptr
* @qp: hw qp ptr
* @wqe_idx: return wqe index
@@ -426,7 +415,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
u64 *wqe;
u8 *dest, *src;
struct i40iw_inline_rdma_write *op_info;
- u64 *push;
u64 header = 0;
u32 wqe_idx;
enum i40iw_status_code ret_code;
@@ -453,7 +441,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
- LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
LS_64(read_fence, I40IWQPSQ_READFENCE) |
LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
@@ -475,14 +462,8 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
set_64bit_val(wqe, 24, header);
- if (qp->push_db) {
- push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
- memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
- i40iw_qp_ring_push_db(qp, wqe_idx);
- } else {
- if (post_sq)
- i40iw_qp_post_wr(qp);
- }
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
return 0;
}
@@ -507,7 +488,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
enum i40iw_status_code ret_code;
bool read_fence = false;
u8 wqe_size;
- u64 *push;
op_info = &info->op.inline_send;
if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
@@ -526,7 +506,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
LS_64(info->op_type, I40IWQPSQ_OPCODE) |
LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
- LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
LS_64(read_fence, I40IWQPSQ_READFENCE) |
LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
@@ -548,14 +527,8 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
set_64bit_val(wqe, 24, header);
- if (qp->push_db) {
- push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
- memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
- i40iw_qp_ring_push_db(qp, wqe_idx);
- } else {
- if (post_sq)
- i40iw_qp_post_wr(qp);
- }
+ if (post_sq)
+ i40iw_qp_post_wr(qp);
return 0;
}
@@ -772,7 +745,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
- info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP);
if (info->error) {
info->comp_status = I40IW_COMPL_STATUS_FLUSHED;
info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR);
@@ -951,7 +923,6 @@ enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth)
static const struct i40iw_qp_uk_ops iw_qp_uk_ops = {
.iw_qp_post_wr = i40iw_qp_post_wr,
- .iw_qp_ring_push_db = i40iw_qp_ring_push_db,
.iw_rdma_write = i40iw_rdma_write,
.iw_rdma_read = i40iw_rdma_read,
.iw_send = i40iw_send,
@@ -1009,11 +980,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
qp->wqe_alloc_reg = info->wqe_alloc_reg;
qp->qp_id = info->qp_id;
-
qp->sq_size = info->sq_size;
- qp->push_db = info->push_db;
- qp->push_wqe = info->push_wqe;
-
qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
sq_ring_size = qp->sq_size << sqshift;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index b125925641e0..93fc3081dd65 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -64,13 +64,11 @@ enum i40iw_device_capabilities_const {
I40IW_MAX_SGE_RD = 1,
I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647,
I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647,
- I40IW_MAX_PUSH_PAGE_COUNT = 4096,
I40IW_MAX_PE_ENABLED_VF_COUNT = 32,
I40IW_MAX_VF_FPM_ID = 47,
I40IW_MAX_VF_PER_PF = 127,
I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
I40IW_MAX_INLINE_DATA_SIZE = 48,
- I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
I40IW_MAX_IRD_SIZE = 64,
I40IW_MAX_ORD_SIZE = 127,
I40IW_MAX_WQ_ENTRIES = 2048,
@@ -272,7 +270,6 @@ struct i40iw_cq_poll_info {
u16 minor_err;
u8 op_type;
bool stag_invalid_set;
- bool push_dropped;
bool error;
bool is_srq;
bool solicited_event;
@@ -280,7 +277,6 @@ struct i40iw_cq_poll_info {
struct i40iw_qp_uk_ops {
void (*iw_qp_post_wr)(struct i40iw_qp_uk *);
- void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32);
enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *,
struct i40iw_post_sq_info *, bool);
enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *,
@@ -340,8 +336,6 @@ struct i40iw_qp_uk {
struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
u64 *rq_wrid_array;
u64 *shadow_area;
- u32 *push_db;
- u64 *push_wqe;
struct i40iw_ring sq_ring;
struct i40iw_ring rq_ring;
struct i40iw_ring initial_ring;
@@ -381,8 +375,6 @@ struct i40iw_qp_uk_init_info {
u64 *shadow_area;
struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
u64 *rq_wrid_array;
- u32 *push_db;
- u64 *push_wqe;
u32 qp_id;
u32 sq_size;
u32 rq_size;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 533f3caecb7a..65aedfe57e77 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -180,78 +180,6 @@ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
}
/**
- * i40iw_alloc_push_page - allocate a push page for qp
- * @iwdev: iwarp device
- * @qp: hardware control qp
- */
-static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
- return;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return;
-
- atomic_inc(&cqp_request->refcount);
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
- cqp_info->post_sq = 1;
-
- cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
- cqp_info->in.u.manage_push_page.info.free_page = 0;
- cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- qp->push_idx = cqp_request->compl_info.op_ret_val;
- else
- i40iw_pr_err("CQP-OP Push page fail");
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
-}
-
-/**
- * i40iw_dealloc_push_page - free a push page for qp
- * @iwdev: iwarp device
- * @qp: hardware control qp
- */
-static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
- return;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
- cqp_info->post_sq = 1;
-
- cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
- cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
- cqp_info->in.u.manage_push_page.info.free_page = 1;
- cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
- else
- i40iw_pr_err("CQP-OP Push page fail");
-}
-
-/**
* i40iw_alloc_pd - allocate protection domain
* @pd: PD pointer
* @udata: user data
@@ -348,7 +276,6 @@ void i40iw_free_qp_resources(struct i40iw_qp *iwqp)
u32 qp_num = iwqp->ibqp.qp_num;
i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
- i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
if (qp_num)
i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
if (iwpbl->pbl_allocated)
@@ -533,7 +460,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
return ERR_PTR(-ENODEV);
if (init_attr->create_flags)
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-EOPNOTSUPP);
if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
@@ -561,8 +488,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
qp = &iwqp->sc_qp;
qp->back_qp = (void *)iwqp;
- qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
-
iwqp->iwdev = iwdev;
iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
@@ -606,8 +531,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
err_code = -EOPNOTSUPP;
goto error;
}
- if (iwdev->push_mode)
- i40iw_alloc_push_page(iwdev, qp);
if (udata) {
err_code = ib_copy_from_udata(&req, udata, sizeof(req));
if (err_code) {
@@ -666,13 +589,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
ctx_info->iwarp_info_valid = true;
ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
- if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) {
- ctx_info->push_mode_en = false;
- } else {
- ctx_info->push_mode_en = true;
- ctx_info->push_idx = qp->push_idx;
- }
-
ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
(u64 *)iwqp->host_ctx.va,
ctx_info);
@@ -712,7 +628,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
uresp.actual_sq_size = sq_size;
uresp.actual_rq_size = rq_size;
uresp.qp_id = qp_num;
- uresp.push_idx = qp->push_idx;
+ uresp.push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (err_code) {
i40iw_pr_err("copy_to_udata failed\n");
@@ -832,6 +748,9 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
u32 err;
unsigned long flags;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
memset(&info, 0, sizeof(info));
ctx_info = &iwqp->ctx_info;
iwarp_info = &iwqp->iwarp_info;
@@ -1081,6 +1000,9 @@ static int i40iw_create_cq(struct ib_cq *ibcq,
int err_code;
int entries = attr->cqe;
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
if (iwdev->closing)
return -ENODEV;
@@ -2033,7 +1955,7 @@ static ssize_t hw_rev_show(struct device *dev,
rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev);
u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
- return sprintf(buf, "%x\n", hw_rev);
+ return sysfs_emit(buf, "%x\n", hw_rev);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -2043,7 +1965,7 @@ static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "I40IW\n");
+ return sysfs_emit(buf, "I40IW\n");
}
static DEVICE_ATTR_RO(hca_type);
@@ -2053,7 +1975,7 @@ static DEVICE_ATTR_RO(hca_type);
static ssize_t board_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
+ return sysfs_emit(buf, "%.*s\n", 32, "I40IW Board ID");
}
static DEVICE_ATTR_RO(board_id);
@@ -2661,27 +2583,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
- iwibdev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND);
iwibdev->ibdev.phys_port_cnt = 1;
iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
iwibdev->ibdev.dev.parent = &pcidev->dev;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 8bd16474708f..f3ace85552f3 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1523,6 +1523,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
return;
} else
*slave_id = slave;
+ break;
default:
/* nothing */;
}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index cd0fba6b0964..e3cd402c079a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2024,7 +2024,8 @@ static ssize_t hca_type_show(struct device *device,
{
struct mlx4_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
- return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
+
+ return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
@@ -2033,7 +2034,8 @@ static ssize_t hw_rev_show(struct device *device,
{
struct mlx4_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
- return sprintf(buf, "%x\n", dev->dev->rev_id);
+
+ return sysfs_emit(buf, "%x\n", dev->dev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -2043,8 +2045,7 @@ static ssize_t board_id_show(struct device *device,
struct mlx4_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
- return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
- dev->dev->board_id);
+ return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
}
static DEVICE_ATTR_RO(board_id);
@@ -2264,10 +2265,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
u64 release_mac = MLX4_IB_INVALID_MAC;
struct mlx4_ib_qp *qp;
- read_lock(&dev_base_lock);
new_smac = mlx4_mac_to_u64(dev->dev_addr);
- read_unlock(&dev_base_lock);
-
atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
/* no need for update QP1 and mac registration in non-SRIOV */
@@ -2657,73 +2655,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
- ibdev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
-
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) ||
(mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
- IB_LINK_LAYER_ETHERNET))) {
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ IB_LINK_LAYER_ETHERNET)))
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
- }
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
- dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
- }
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
}
if (check_flow_steering_support(dev)) {
ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
}
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 5e4ec9786081..33f525b744f2 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -988,53 +988,63 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
}
static ssize_t sysfs_show_group(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct mcast_group *group =
container_of(attr, struct mcast_group, dentry);
struct mcast_req *req = NULL;
- char pending_str[40];
char state_str[40];
- ssize_t len = 0;
- int f;
+ char pending_str[40];
+ int len;
+ int i;
+ u32 hoplimit;
if (group->state == MCAST_IDLE)
- sprintf(state_str, "%s", get_state_string(group->state));
+ scnprintf(state_str, sizeof(state_str), "%s",
+ get_state_string(group->state));
else
- sprintf(state_str, "%s(TID=0x%llx)",
- get_state_string(group->state),
- be64_to_cpu(group->last_req_tid));
+ scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)",
+ get_state_string(group->state),
+ be64_to_cpu(group->last_req_tid));
+
if (list_empty(&group->pending_list)) {
- sprintf(pending_str, "No");
+ scnprintf(pending_str, sizeof(pending_str), "No");
} else {
- req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
- sprintf(pending_str, "Yes(TID=0x%llx)",
- be64_to_cpu(req->sa_mad.mad_hdr.tid));
+ req = list_first_entry(&group->pending_list, struct mcast_req,
+ group_list);
+ scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)",
+ be64_to_cpu(req->sa_mad.mad_hdr.tid));
}
- len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
- group->rec.scope_join_state & 0xf,
- group->members[2], group->members[1], group->members[0],
- atomic_read(&group->refcount),
- pending_str,
- state_str);
- for (f = 0; f < MAX_VFS; ++f)
- if (group->func[f].state == MCAST_MEMBER)
- len += sprintf(buf + len, "%d[%1x] ",
- f, group->func[f].join_state);
-
- len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
- "%4x %4x %2x %2x)\n",
- be16_to_cpu(group->rec.pkey),
- be32_to_cpu(group->rec.qkey),
- (group->rec.mtusel_mtu & 0xc0) >> 6,
- group->rec.mtusel_mtu & 0x3f,
- group->rec.tclass,
- (group->rec.ratesel_rate & 0xc0) >> 6,
- group->rec.ratesel_rate & 0x3f,
- (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
- (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
- be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
- group->rec.proxy_join);
+
+ len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
+ group->rec.scope_join_state & 0xf,
+ group->members[2],
+ group->members[1],
+ group->members[0],
+ atomic_read(&group->refcount),
+ pending_str,
+ state_str);
+
+ for (i = 0; i < MAX_VFS; i++) {
+ if (group->func[i].state == MCAST_MEMBER)
+ len += sysfs_emit_at(buf, len, "%d[%1x] ", i,
+ group->func[i].join_state);
+ }
+
+ hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit);
+ len += sysfs_emit_at(buf, len,
+ "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n",
+ be16_to_cpu(group->rec.pkey),
+ be32_to_cpu(group->rec.qkey),
+ (group->rec.mtusel_mtu & 0xc0) >> 6,
+ (group->rec.mtusel_mtu & 0x3f),
+ group->rec.tclass,
+ (group->rec.ratesel_rate & 0xc0) >> 6,
+ (group->rec.ratesel_rate & 0x3f),
+ (hoplimit & 0xf0000000) >> 28,
+ (hoplimit & 0x0fffff00) >> 8,
+ (hoplimit & 0x000000ff),
+ group->rec.proxy_join);
return len;
}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 58df06492d69..78c9bb79ec75 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -908,10 +908,10 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
-int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
- u64 start, u64 length, u64 virt_addr,
- int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata);
+struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
const struct ib_gid_attr *attr);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 426fed005d53..50becc0e4b62 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -456,10 +456,10 @@ err_free:
return ERR_PTR(err);
}
-int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
- u64 start, u64 length, u64 virt_addr,
- int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata)
+struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(mr->device);
struct mlx4_ib_mr *mmr = to_mmr(mr);
@@ -472,9 +472,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
* race exists.
*/
err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
-
if (err)
- return err;
+ return ERR_PTR(err);
if (flags & IB_MR_REREG_PD) {
err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
@@ -542,8 +541,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
release_mpt_entry:
mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
-
- return err;
+ if (err)
+ return ERR_PTR(err);
+ return NULL;
}
static int
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5cb8e602294c..651785bd57f2 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1493,7 +1493,7 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp,
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
MLX4_IB_QP_CREATE_ROCE_V2_GSI))
- return -EINVAL;
+ return -EOPNOTSUPP;
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
if (init_attr->qp_type != IB_QPT_UD)
@@ -1561,6 +1561,11 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp,
if (err)
return err;
+ if (init_attr->create_flags &
+ (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP))
+ /* Internal QP created with ib_create_qp */
+ rdma_restrack_no_track(&qp->ibqp.res);
+
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
@@ -2787,6 +2792,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
int ret;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
@@ -4007,7 +4015,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
qp_attr->qp_access_flags =
to_ib_qp_access_flags(be32_to_cpu(context.params2));
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index bf618529e734..6a381751c0d8 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -86,6 +86,10 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
int err;
int i;
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC)
+ return -EOPNOTSUPP;
+
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index ea1f3a081b05..1b5891130aab 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -56,7 +56,7 @@ static ssize_t show_admin_alias_guid(struct device *dev,
mlx4_ib_iov_dentry->entry_num,
port->num);
- return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
+ return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
}
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@@ -117,22 +117,24 @@ static ssize_t show_port_gid(struct device *dev,
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
union ib_gid gid;
- ssize_t ret;
+ int ret;
+ __be16 *raw;
ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
mlx4_ib_iov_dentry->entry_num, &gid, 1);
if (ret)
return ret;
- ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) gid.raw)[0]),
- be16_to_cpu(((__be16 *) gid.raw)[1]),
- be16_to_cpu(((__be16 *) gid.raw)[2]),
- be16_to_cpu(((__be16 *) gid.raw)[3]),
- be16_to_cpu(((__be16 *) gid.raw)[4]),
- be16_to_cpu(((__be16 *) gid.raw)[5]),
- be16_to_cpu(((__be16 *) gid.raw)[6]),
- be16_to_cpu(((__be16 *) gid.raw)[7]));
- return ret;
+
+ raw = (__be16 *)gid.raw;
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(raw[0]),
+ be16_to_cpu(raw[1]),
+ be16_to_cpu(raw[2]),
+ be16_to_cpu(raw[3]),
+ be16_to_cpu(raw[4]),
+ be16_to_cpu(raw[5]),
+ be16_to_cpu(raw[6]),
+ be16_to_cpu(raw[7]));
}
static ssize_t show_phys_port_pkey(struct device *dev,
@@ -151,7 +153,7 @@ static ssize_t show_phys_port_pkey(struct device *dev,
if (ret)
return ret;
- return sprintf(buf, "0x%04x\n", pkey);
+ return sysfs_emit(buf, "0x%04x\n", pkey);
}
#define DENTRY_REMOVE(_dentry) \
@@ -441,16 +443,12 @@ static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- ssize_t ret = -ENODEV;
-
- if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
- (p->dev->dev->caps.pkey_table_len[p->port_num]))
- ret = sprintf(buf, "none\n");
- else
- ret = sprintf(buf, "%d\n",
- p->dev->pkeys.virt2phys_pkey[p->slave]
- [p->port_num - 1][tab_attr->index]);
- return ret;
+ struct pkey_mgt *m = &p->dev->pkeys;
+ u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index];
+
+ if (key >= p->dev->dev->caps.pkey_table_len[p->port_num])
+ return sysfs_emit(buf, "none\n");
+ return sysfs_emit(buf, "%d\n", key);
}
static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
@@ -488,7 +486,7 @@ static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
static ssize_t show_port_gid_idx(struct mlx4_port *p,
struct port_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", p->slave);
+ return sysfs_emit(buf, "%d\n", p->slave);
}
static struct attribute **
@@ -542,14 +540,10 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev,
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, smi_enabled);
- ssize_t len = 0;
- if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
- len = sprintf(buf, "%d\n", 1);
- else
- len = sprintf(buf, "%d\n", 0);
-
- return len;
+ return sysfs_emit(buf, "%d\n",
+ !!mlx4_vf_smi_enabled(p->dev->dev, p->slave,
+ p->port_num));
}
static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
@@ -558,14 +552,10 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, enable_smi_admin);
- ssize_t len = 0;
-
- if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
- len = sprintf(buf, "%d\n", 1);
- else
- len = sprintf(buf, "%d\n", 0);
- return len;
+ return sysfs_emit(buf, "%d\n",
+ !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave,
+ p->port_num));
}
static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index fb62f1d04afa..eb92cefffd77 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -707,10 +707,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
int *cqe_size, int *index, int *inlen)
{
struct mlx5_ib_create_cq ucmd = {};
+ unsigned long page_size;
+ unsigned int page_offset_quantized;
size_t ucmdlen;
- int page_shift;
__be64 *pas;
- int npages;
int ncont;
void *cqc;
int err;
@@ -742,14 +742,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
return err;
}
+ page_size = mlx5_umem_find_best_cq_quantized_pgoff(
+ cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
+
err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db);
if (err)
goto err_umem;
- mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
- &ncont, NULL);
- mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
- ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
+ ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size);
+ mlx5_ib_dbg(
+ dev,
+ "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n",
+ ucmd.buf_addr, entries * ucmd.cqe_size,
+ ib_umem_num_pages(cq->buf.umem), page_size, ncont);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
@@ -760,11 +770,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+ mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
*index = ucmd.uar_page_index;
@@ -1128,13 +1139,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
}
static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
- int entries, struct ib_udata *udata, int *npas,
- int *page_shift, int *cqe_size)
+ int entries, struct ib_udata *udata,
+ int *cqe_size)
{
struct mlx5_ib_resize_cq ucmd;
struct ib_umem *umem;
int err;
- int npages;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err)
@@ -1155,9 +1165,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
return err;
}
- mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
- npas, NULL);
-
cq->resize_umem = umem;
*cqe_size = ucmd.cqe_size;
@@ -1250,7 +1257,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
int err;
int npas;
__be64 *pas;
- int page_shift;
+ unsigned int page_offset_quantized = 0;
+ unsigned int page_shift;
int inlen;
int cqe_size;
unsigned long flags;
@@ -1277,22 +1285,34 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mutex_lock(&cq->resize_mutex);
if (udata) {
- err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
- &cqe_size);
+ unsigned long page_size;
+
+ err = resize_user(dev, cq, entries, udata, &cqe_size);
+ if (err)
+ goto ex;
+
+ page_size = mlx5_umem_find_best_cq_quantized_pgoff(
+ cq->resize_umem, cqc, log_page_size,
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto ex_resize;
+ }
+ npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size);
+ page_shift = order_base_2(page_size);
} else {
+ struct mlx5_frag_buf *frag_buf;
+
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
- if (!err) {
- struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf;
-
- npas = frag_buf->npages;
- page_shift = frag_buf->page_shift;
- }
+ if (err)
+ goto ex;
+ frag_buf = &cq->resize_buf->frag_buf;
+ npas = frag_buf->npages;
+ page_shift = frag_buf->page_shift;
}
- if (err)
- goto ex;
-
inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
@@ -1304,8 +1324,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
if (udata)
- mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
- pas, 0);
+ mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas,
+ 0);
else
mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
@@ -1319,6 +1339,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
MLX5_SET(cqc, cqc, cqe_sz,
cqe_sz_to_mlx_sz(cqe_size,
cq->private_flags &
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 9e3d8b826498..819c142857d6 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -93,9 +93,6 @@ struct devx_async_event_file {
struct devx_umem {
struct mlx5_core_dev *mdev;
struct ib_umem *umem;
- u32 page_offset;
- int page_shift;
- int ncont;
u32 dinlen;
u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
};
@@ -1311,7 +1308,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
else
ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
obj->dinlen, out, sizeof(out));
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
devx_event_table = &dev->devx_event_table;
@@ -2057,9 +2054,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
u64 addr;
size_t size;
u32 access;
- int npages;
int err;
- u32 page_mask;
if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
@@ -2073,57 +2068,62 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
if (err)
return err;
- err = ib_check_mr_access(access);
+ err = ib_check_mr_access(&dev->ib_dev, access);
if (err)
return err;
obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
if (IS_ERR(obj->umem))
return PTR_ERR(obj->umem);
-
- mlx5_ib_cont_pages(obj->umem, obj->umem->address,
- MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
- &obj->page_shift, &obj->ncont, NULL);
-
- if (!npages) {
- ib_umem_release(obj->umem);
- return -EINVAL;
- }
-
- page_mask = (1 << obj->page_shift) - 1;
- obj->page_offset = obj->umem->address & page_mask;
-
return 0;
}
-static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
+static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
+ struct uverbs_attr_bundle *attrs,
struct devx_umem *obj,
struct devx_umem_reg_cmd *cmd)
{
+ unsigned int page_size;
+ __be64 *mtt;
+ void *umem;
+
+ /*
+ * We don't know what the user intends to use this umem for, but the HW
+ * restrictions must be met. MR, doorbell records, QP, WQ and CQ all
+ * have different requirements. Since we have no idea how to sort this
+ * out, only support PAGE_SIZE with the expectation that userspace will
+ * provide the necessary alignments inside the known PAGE_SIZE and that
+ * FW will check everything.
+ */
+ page_size = ib_umem_find_best_pgoff(
+ obj->umem, PAGE_SIZE,
+ __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset),
+ 0));
+ if (!page_size)
+ return -EINVAL;
+
cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
- (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
+ (MLX5_ST_SZ_BYTES(mtt) *
+ ib_umem_num_dma_blocks(obj->umem, page_size));
cmd->in = uverbs_zalloc(attrs, cmd->inlen);
- return PTR_ERR_OR_ZERO(cmd->in);
-}
-
-static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
- struct devx_umem *obj,
- struct devx_umem_reg_cmd *cmd)
-{
- void *umem;
- __be64 *mtt;
+ if (IS_ERR(cmd->in))
+ return PTR_ERR(cmd->in);
umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
- MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
- MLX5_SET(umem, umem, log_page_size, obj->page_shift -
- MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(umem, umem, page_offset, obj->page_offset);
- mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
+ MLX5_SET64(umem, umem, num_of_mtt,
+ ib_umem_num_dma_blocks(obj->umem, page_size));
+ MLX5_SET(umem, umem, log_page_size,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(umem, umem, page_offset,
+ ib_umem_dma_offset(obj->umem, page_size));
+
+ mlx5_ib_populate_pas(obj->umem, page_size, mtt,
(obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
- MLX5_IB_MTT_READ);
+ MLX5_IB_MTT_READ);
+ return 0;
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
@@ -2150,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
if (err)
goto err_obj_free;
- err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
+ err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd);
if (err)
goto err_umem_release;
- devx_umem_reg_cmd_build(dev, obj, &cmd);
-
MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
sizeof(cmd.out));
@@ -2187,7 +2185,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
int err;
err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
- if (ib_is_destroy_retryable(err, why, uobject))
+ if (err)
return err;
ib_umem_release(obj->umem);
@@ -2600,8 +2598,8 @@ static const struct file_operations devx_async_event_fops = {
.llseek = no_llseek,
};
-static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct devx_async_cmd_event_file *comp_ev_file =
container_of(uobj, struct devx_async_cmd_event_file,
@@ -2623,11 +2621,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
kvfree(entry);
}
spin_unlock_irq(&comp_ev_file->ev_queue.lock);
- return 0;
};
-static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static void devx_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct devx_async_event_file *ev_file =
container_of(uobj, struct devx_async_event_file,
@@ -2671,7 +2668,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
mutex_unlock(&dev->devx_event_table.event_xa_lock);
put_device(&dev->ib_dev.dev);
- return 0;
};
DECLARE_UVERBS_NAMED_METHOD(
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 492cfe063bca..25da0b05b4e2 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -2035,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_flow_matcher *obj = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&obj->usecnt))
+ return -EBUSY;
kfree(obj);
return 0;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 55545f1286e5..3bae9ba0ead8 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -75,12 +75,6 @@ static LIST_HEAD(mlx5_ib_dev_list);
*/
static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
-/* We can't use an array for xlt_emergency_page because dma_map_single
- * doesn't work on kernel modules memory
- */
-static unsigned long xlt_emergency_page;
-static struct mutex xlt_emergency_page_mutex;
-
struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
{
struct mlx5_ib_dev *dev;
@@ -425,10 +419,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_2X;
*active_speed = IB_SPEED_HDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_NDR;
+ break;
case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_HDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_NDR;
+ break;
default:
return -EINVAL;
}
@@ -2628,7 +2634,7 @@ static ssize_t fw_pages_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
+ return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages);
}
static DEVICE_ATTR_RO(fw_pages);
@@ -2638,7 +2644,7 @@ static ssize_t reg_pages_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
+ return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
static DEVICE_ATTR_RO(reg_pages);
@@ -2648,7 +2654,7 @@ static ssize_t hca_type_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
+ return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
@@ -2658,7 +2664,7 @@ static ssize_t hw_rev_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%x\n", dev->mdev->rev_id);
+ return sysfs_emit(buf, "%x\n", dev->mdev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -2668,8 +2674,8 @@ static ssize_t board_id_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
- dev->mdev->board_id);
+ return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
+ dev->mdev->board_id);
}
static DEVICE_ATTR_RO(board_id);
@@ -4024,6 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.create_cq = mlx5_ib_create_cq,
.create_qp = mlx5_ib_create_qp,
.create_srq = mlx5_ib_create_srq,
+ .create_user_ah = mlx5_ib_create_ah,
.dealloc_pd = mlx5_ib_dealloc_pd,
.dealloc_ucontext = mlx5_ib_dealloc_ucontext,
.del_gid = mlx5_ib_del_gid,
@@ -4141,42 +4148,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
int err;
- dev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
- dev->ib_dev.uverbs_ex_cmd_mask =
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
-
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
ib_set_device_ops(&dev->ib_dev,
@@ -4187,19 +4158,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
- if (MLX5_CAP_GEN(mdev, imaicl)) {
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ if (MLX5_CAP_GEN(mdev, imaicl))
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops);
- }
- if (MLX5_CAP_GEN(mdev, xrc)) {
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ if (MLX5_CAP_GEN(mdev, xrc))
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
- }
if (MLX5_CAP_DEV_MEM(mdev, memic) ||
MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
@@ -4278,12 +4241,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
@@ -4878,30 +4835,17 @@ static struct auxiliary_driver mlx5r_driver = {
.id_table = mlx5r_id_table,
};
-unsigned long mlx5_ib_get_xlt_emergency_page(void)
-{
- mutex_lock(&xlt_emergency_page_mutex);
- return xlt_emergency_page;
-}
-
-void mlx5_ib_put_xlt_emergency_page(void)
-{
- mutex_unlock(&xlt_emergency_page_mutex);
-}
-
static int __init mlx5_ib_init(void)
{
int ret;
- xlt_emergency_page = __get_free_page(GFP_KERNEL);
+ xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL);
if (!xlt_emergency_page)
return -ENOMEM;
- mutex_init(&xlt_emergency_page_mutex);
-
mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
if (!mlx5_ib_event_wq) {
- free_page(xlt_emergency_page);
+ free_page((unsigned long)xlt_emergency_page);
return -ENOMEM;
}
@@ -4934,8 +4878,7 @@ static void __exit mlx5_ib_cleanup(void)
mlx5r_rep_cleanup();
destroy_workqueue(mlx5_ib_event_wq);
- mutex_destroy(&xlt_emergency_page_mutex);
- free_page(xlt_emergency_page);
+ free_page((unsigned long)xlt_emergency_page);
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 13de3d2edd34..844545064c9e 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -36,161 +36,65 @@
#include "mlx5_ib.h"
#include <linux/jiffies.h>
-/* @umem: umem object to scan
- * @addr: ib virtual address requested by the user
- * @max_page_shift: high limit for page_shift - 0 means no limit
- * @count: number of PAGE_SIZE pages covered by umem
- * @shift: page shift for the compound pages found in the region
- * @ncont: number of compund pages
- * @order: log2 of the number of compound pages
+/*
+ * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
+ * filled in the pas array.
*/
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, int *shift,
- int *ncont, int *order)
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+ u64 access_flags)
{
- unsigned long tmp;
- unsigned long m;
- u64 base = ~0, p = 0;
- u64 len, pfn;
- int i = 0;
- struct scatterlist *sg;
- int entry;
-
- addr = addr >> PAGE_SHIFT;
- tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, BITS_PER_LONG);
- if (max_page_shift)
- m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> PAGE_SHIFT;
- pfn = sg_dma_address(sg) >> PAGE_SHIFT;
- if (base + p != pfn) {
- /* If either the offset or the new
- * base are unaligned update m
- */
- tmp = (unsigned long)(pfn | p);
- if (!IS_ALIGNED(tmp, 1 << m))
- m = find_first_bit(&tmp, BITS_PER_LONG);
-
- base = pfn;
- p = 0;
- }
-
- p += len;
- i += len;
- }
-
- if (i) {
- m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
-
- if (order)
- *order = ilog2(roundup_pow_of_two(i) >> m);
-
- *ncont = DIV_ROUND_UP(i, (1 << m));
- } else {
- m = 0;
+ struct ib_block_iter biter;
- if (order)
- *order = 0;
-
- *ncont = 0;
+ rdma_umem_for_each_dma_block (umem, &biter, page_size) {
+ *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ access_flags);
+ pas++;
}
- *shift = PAGE_SHIFT + m;
- *count = i;
}
/*
- * Populate the given array with bus addresses from the umem.
- *
- * dev - mlx5_ib device
- * umem - umem to use to fill the pages
- * page_shift - determines the page size used in the resulting array
- * offset - offset into the umem to start from,
- * only implemented for ODP umems
- * num_pages - total number of pages to fill
- * pas - bus addresses array to fill
- * access_flags - access flags to set on all present pages.
- use enum mlx5_ib_mtt_access_flags for this.
+ * Compute the page shift and page_offset for mailboxes that use a quantized
+ * page_offset. The granulatity of the page offset scales according to page
+ * size.
*/
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, size_t offset, size_t num_pages,
- __be64 *pas, int access_flags)
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+ struct ib_umem *umem, unsigned long pgsz_bitmap,
+ unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+ unsigned int *page_offset_quantized)
{
- int shift = page_shift - PAGE_SHIFT;
- int mask = (1 << shift) - 1;
- int i, k, idx;
- u64 cur = 0;
- u64 base;
- int len;
- struct scatterlist *sg;
- int entry;
-
- i = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> PAGE_SHIFT;
- base = sg_dma_address(sg);
-
- /* Skip elements below offset */
- if (i + len < offset << shift) {
- i += len;
- continue;
- }
-
- /* Skip pages below offset */
- if (i < offset << shift) {
- k = (offset << shift) - i;
- i = offset << shift;
- } else {
- k = 0;
- }
-
- for (; k < len; k++) {
- if (!(i & mask)) {
- cur = base + (k << PAGE_SHIFT);
- cur |= access_flags;
- idx = (i >> shift) - offset;
-
- pas[idx] = cpu_to_be64(cur);
- mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
- i >> shift, be64_to_cpu(pas[idx]));
- }
- i++;
-
- /* Stop after num_pages reached */
- if (i >> shift >= offset + num_pages)
- return;
- }
+ const u64 page_offset_mask = (1UL << page_offset_bits) - 1;
+ unsigned long page_size;
+ u64 page_offset;
+
+ page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask);
+ if (!page_size)
+ return 0;
+
+ /*
+ * page size is the largest possible page size.
+ *
+ * Reduce the page_size, and thus the page_offset and quanta, until the
+ * page_offset fits into the mailbox field. Once page_size < scale this
+ * loop is guaranteed to terminate.
+ */
+ page_offset = ib_umem_dma_offset(umem, page_size);
+ while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) {
+ page_size /= 2;
+ page_offset = ib_umem_dma_offset(umem, page_size);
}
-}
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int access_flags)
-{
- return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
- ib_umem_num_dma_blocks(umem, PAGE_SIZE),
- pas, access_flags);
-}
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
-{
- u64 page_size;
- u64 page_mask;
- u64 off_size;
- u64 off_mask;
- u64 buf_off;
-
- page_size = (u64)1 << page_shift;
- page_mask = page_size - 1;
- buf_off = addr & page_mask;
- off_size = page_size >> 6;
- off_mask = off_size - 1;
-
- if (buf_off & off_mask)
- return -EINVAL;
-
- *offset = buf_off >> ilog2(off_size);
- return 0;
+ /*
+ * The address is not aligned, or otherwise cannot be represented by the
+ * page_offset.
+ */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+
+ *page_offset_quantized =
+ (unsigned long)page_offset / (page_size / scale);
+ if (WARN_ON(*page_offset_quantized > page_offset_mask))
+ return 0;
+ return page_size;
}
#define WR_ID_BF 0xBF
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ea5243815cf6..b0fdc1b08e06 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -40,7 +40,73 @@
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
-#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
+static __always_inline unsigned long
+__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
+ unsigned int pgsz_shift)
+{
+ unsigned int largest_pg_shift =
+ min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift,
+ BITS_PER_LONG - 1);
+
+ /*
+ * Despite a command allowing it, the device does not support lower than
+ * 4k page size.
+ */
+ pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift);
+ return GENMASK(largest_pg_shift, pgsz_shift);
+}
+
+/*
+ * For mkc users, instead of a page_offset the command has a start_iova which
+ * specifies both the page_offset and the on-the-wire IOVA
+ */
+#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \
+ ib_umem_find_best_pgsz(umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), \
+ pgsz_shift), \
+ iova)
+
+static __always_inline unsigned long
+__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits,
+ unsigned int offset_shift)
+{
+ unsigned int largest_offset_shift =
+ min_t(unsigned long, page_offset_bits - 1 + offset_shift,
+ BITS_PER_LONG - 1);
+
+ return GENMASK(largest_offset_shift, offset_shift);
+}
+
+/*
+ * QP/CQ/WQ/etc type commands take a page offset that satisifies:
+ * page_offset_quantized * (page_size/scale) = page_offset
+ * Which restricts allowed page sizes to ones that satisify the above.
+ */
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+ struct ib_umem *umem, unsigned long pgsz_bitmap,
+ unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+ unsigned int *page_offset_quantized);
+#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \
+ pgsz_shift, page_offset_fld, \
+ scale, page_offset_quantized) \
+ __mlx5_umem_find_best_quantized_pgoff( \
+ umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \
+ __mlx5_bit_sz(typ, page_offset_fld), \
+ GENMASK(31, order_base_2(scale)), scale, \
+ page_offset_quantized)
+
+#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \
+ pgsz_shift, page_offset_fld, \
+ scale, page_offset_quantized) \
+ __mlx5_umem_find_best_quantized_pgoff( \
+ umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \
+ __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \
+ page_offset_quantized)
enum {
MLX5_IB_MMAP_OFFSET_START = 9,
@@ -597,14 +663,12 @@ struct mlx5_ib_mr {
int max_descs;
int desc_size;
int access_mode;
+ unsigned int page_shift;
struct mlx5_core_mkey mmkey;
struct ib_umem *umem;
struct mlx5_shared_mr_info *smr_info;
struct list_head list;
- unsigned int order;
struct mlx5_cache_ent *cache_ent;
- int npages;
- struct mlx5_ib_dev *dev;
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx *sig;
void *descs_alloc;
@@ -1042,6 +1106,11 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
}
+static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr)
+{
+ return to_mdev(mr->ibmr.device);
+}
+
static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata)
{
struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
@@ -1189,9 +1258,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr);
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int access_flags,
- struct ib_pd *pd, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 virt_addr, int access_flags,
+ struct ib_pd *pd, struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
@@ -1210,7 +1279,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
struct ib_smp *out_mad);
@@ -1230,15 +1298,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, int *shift,
- int *ncont, int *order);
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, size_t offset, size_t num_pages,
- __be64 *pas, int access_flags);
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int access_flags);
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+ u64 access_flags);
void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@@ -1283,7 +1344,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
-int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable);
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1305,7 +1366,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
return -EOPNOTSUPP;
}
-static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
return -EOPNOTSUPP;
}
@@ -1456,8 +1517,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
}
-unsigned long mlx5_ib_get_xlt_emergency_page(void);
-void mlx5_ib_put_xlt_emergency_page(void);
+extern void *xlt_emergency_page;
int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index b261797b258f..24f8d59a42ea 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -41,6 +41,13 @@
#include <rdma/ib_verbs.h>
#include "mlx5_ib.h"
+/*
+ * We can't use an array for xlt_emergency_page because dma_map_single doesn't
+ * work on kernel modules memory
+ */
+void *xlt_emergency_page;
+static DEFINE_MUTEX(xlt_emergency_page_mutex);
+
enum {
MAX_PENDING_REG_MR = 8,
};
@@ -49,6 +56,9 @@ enum {
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned int page_size, bool populate);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd)
@@ -123,19 +133,12 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start,
- u64 length)
-{
- return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
- length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
-}
-
static void create_mkey_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
container_of(context, struct mlx5_ib_mr, cb_work);
- struct mlx5_ib_dev *dev = mr->dev;
struct mlx5_cache_ent *ent = mr->cache_ent;
+ struct mlx5_ib_dev *dev = ent->dev;
unsigned long flags;
if (status) {
@@ -172,9 +175,7 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return NULL;
- mr->order = ent->order;
mr->cache_ent = ent;
- mr->dev = ent->dev;
set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
@@ -642,6 +643,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
if (mlx5_mr_cache_invalidate(mr)) {
detach_mr_from_cache(mr);
destroy_mkey(dev, mr);
+ kfree(mr);
return;
}
@@ -867,57 +869,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
-static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length,
- int access_flags, struct ib_umem **umem, int *npages,
- int *page_shift, int *ncont, int *order)
-{
- struct ib_umem *u;
-
- *umem = NULL;
-
- if (access_flags & IB_ACCESS_ON_DEMAND) {
- struct ib_umem_odp *odp;
-
- odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
- &mlx5_mn_ops);
- if (IS_ERR(odp)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
- PTR_ERR(odp));
- return PTR_ERR(odp);
- }
-
- u = &odp->umem;
-
- *page_shift = odp->page_shift;
- *ncont = ib_umem_odp_num_pages(odp);
- *npages = *ncont << (*page_shift - PAGE_SHIFT);
- if (order)
- *order = ilog2(roundup_pow_of_two(*ncont));
- } else {
- u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
- if (IS_ERR(u)) {
- mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
- return PTR_ERR(u);
- }
-
- mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
- page_shift, ncont, order);
- }
-
- if (!*npages) {
- mlx5_ib_warn(dev, "avoid zero region\n");
- ib_umem_release(u);
- return -EINVAL;
- }
-
- *umem = u;
-
- mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
- *npages, *ncont, *order, *page_shift);
-
- return 0;
-}
-
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
@@ -974,25 +925,49 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
return &cache->ent[order];
}
-static struct mlx5_ib_mr *
-alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
- u64 len, int npages, int page_shift, unsigned int order,
- int access_flags)
+static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 length, int access_flags)
+{
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.length = length;
+ mr->ibmr.device = &dev->ib_dev;
+ mr->access_flags = access_flags;
+}
+
+static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
+ struct ib_umem *umem, u64 iova,
+ int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
+ struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
+ unsigned int page_size;
- if (!ent)
- return ERR_PTR(-E2BIG);
-
- /* Matches access in alloc_cache_mr() */
- if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
- return ERR_PTR(-EOPNOTSUPP);
+ page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+ if (WARN_ON(!page_size))
+ return ERR_PTR(-EINVAL);
+ ent = mr_cache_ent_from_order(
+ dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
+ /*
+ * Matches access in alloc_cache_mr(). If the MR can't come from the
+ * cache then synchronously create an uncached one.
+ */
+ if (!ent || ent->limit == 0 ||
+ !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false);
+ mutex_unlock(&dev->slow_path_mutex);
+ return mr;
+ }
mr = get_cache_mr(ent);
if (!mr) {
mr = create_cache_mr(ent);
+ /*
+ * The above already tried to do the same stuff as reg_create(),
+ * no reason to try it again.
+ */
if (IS_ERR(mr))
return mr;
}
@@ -1001,9 +976,12 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
mr->umem = umem;
mr->access_flags = access_flags;
mr->desc_size = sizeof(struct mlx5_mtt);
- mr->mmkey.iova = virt_addr;
- mr->mmkey.size = len;
+ mr->mmkey.iova = iova;
+ mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
+ mr->page_shift = order_base_2(page_size);
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags);
return mr;
}
@@ -1012,14 +990,144 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
MLX5_UMR_MTT_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
+/*
+ * Allocate a temporary buffer to hold the per-page information to transfer to
+ * HW. For efficiency this should be as large as it can be, but buffer
+ * allocation failure is not allowed, so try smaller sizes.
+ */
+static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
+{
+ const size_t xlt_chunk_align =
+ MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
+ size_t size;
+ void *res = NULL;
+
+ static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
+
+ /*
+ * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
+ * allocation can't trigger any kind of reclaim.
+ */
+ might_sleep();
+
+ gfp_mask |= __GFP_ZERO;
+
+ /*
+ * If the system already has a suitable high order page then just use
+ * that, but don't try hard to create one. This max is about 1M, so a
+ * free x86 huge page will satisfy it.
+ */
+ size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
+ MLX5_MAX_UMR_CHUNK);
+ *nents = size / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+
+ if (size > MLX5_SPARE_UMR_CHUNK) {
+ size = MLX5_SPARE_UMR_CHUNK;
+ *nents = get_order(size) / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+ }
+
+ *nents = PAGE_SIZE / ent_size;
+ res = (void *)__get_free_page(gfp_mask);
+ if (res)
+ return res;
+
+ mutex_lock(&xlt_emergency_page_mutex);
+ memset(xlt_emergency_page, 0, PAGE_SIZE);
+ return xlt_emergency_page;
+}
+
+static void mlx5_ib_free_xlt(void *xlt, size_t length)
+{
+ if (xlt == xlt_emergency_page) {
+ mutex_unlock(&xlt_emergency_page_mutex);
+ return;
+ }
+
+ free_pages((unsigned long)xlt, get_order(length));
+}
+
+/*
+ * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
+ * submission.
+ */
+static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
+ struct mlx5_umr_wr *wr, struct ib_sge *sg,
+ size_t nents, size_t ent_size,
+ unsigned int flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ dma_addr_t dma;
+ void *xlt;
+
+ xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
+ flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
+ GFP_KERNEL);
+ sg->length = nents * ent_size;
+ dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
+ mlx5_ib_free_xlt(xlt, sg->length);
+ return NULL;
+ }
+ sg->addr = dma;
+ sg->lkey = dev->umrc.pd->local_dma_lkey;
+
+ memset(wr, 0, sizeof(*wr));
+ wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
+ if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+ wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ wr->wr.sg_list = sg;
+ wr->wr.num_sge = 1;
+ wr->wr.opcode = MLX5_IB_WR_UMR;
+ wr->pd = mr->ibmr.pd;
+ wr->mkey = mr->mmkey.key;
+ wr->length = mr->mmkey.size;
+ wr->virt_addr = mr->mmkey.iova;
+ wr->access_flags = mr->access_flags;
+ wr->page_shift = mr->page_shift;
+ wr->xlt_size = sg->length;
+ return xlt;
+}
+
+static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
+ struct ib_sge *sg)
+{
+ struct device *ddev = &dev->mdev->pdev->dev;
+
+ dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
+ mlx5_ib_free_xlt(xlt, sg->length);
+}
+
+static unsigned int xlt_wr_final_send_flags(unsigned int flags)
+{
+ unsigned int res = 0;
+
+ if (flags & MLX5_IB_UPD_XLT_ENABLE)
+ res |= MLX5_IB_SEND_UMR_ENABLE_MR |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
+ res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
+ if (flags & MLX5_IB_UPD_XLT_ADDR)
+ res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ return res;
+}
+
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags)
{
- struct mlx5_ib_dev *dev = mr->dev;
- struct device *ddev = dev->ib_dev.dev.parent;
- int size;
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
void *xlt;
- dma_addr_t dma;
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
@@ -1030,15 +1138,17 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
const int page_mask = page_align - 1;
size_t pages_mapped = 0;
size_t pages_to_map = 0;
- size_t pages_iter = 0;
+ size_t pages_iter;
size_t size_to_map = 0;
- gfp_t gfp;
- bool use_emergency_page = false;
+ size_t orig_sg_length;
if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
!umr_can_use_indirect_mkey(dev))
return -EPERM;
+ if (WARN_ON(!mr->umem->is_odp))
+ return -EINVAL;
+
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
*/
@@ -1046,63 +1156,21 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
npages += idx & page_mask;
idx &= ~page_mask;
}
-
- gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
- gfp |= __GFP_ZERO | __GFP_NOWARN;
-
pages_to_map = ALIGN(npages, page_align);
- size = desc_size * pages_to_map;
- size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
-
- xlt = (void *)__get_free_pages(gfp, get_order(size));
- if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
- mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
- size, get_order(size), MLX5_SPARE_UMR_CHUNK);
-
- size = MLX5_SPARE_UMR_CHUNK;
- xlt = (void *)__get_free_pages(gfp, get_order(size));
- }
- if (!xlt) {
- mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
- xlt = (void *)mlx5_ib_get_xlt_emergency_page();
- size = PAGE_SIZE;
- memset(xlt, 0, size);
- use_emergency_page = true;
- }
- pages_iter = size / desc_size;
- dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
- err = -ENOMEM;
- goto free_xlt;
- }
+ xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
+ if (!xlt)
+ return -ENOMEM;
+ pages_iter = sg.length / desc_size;
+ orig_sg_length = sg.length;
- if (mr->umem->is_odp) {
- if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
- struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
+ if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
- pages_to_map = min_t(size_t, pages_to_map, max_pages);
- }
+ pages_to_map = min_t(size_t, pages_to_map, max_pages);
}
- sg.addr = dma;
- sg.lkey = dev->umrc.pd->local_dma_lkey;
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
- if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
- wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr.wr.sg_list = &sg;
- wr.wr.num_sge = 1;
- wr.wr.opcode = MLX5_IB_WR_UMR;
-
- wr.pd = mr->ibmr.pd;
- wr.mkey = mr->mmkey.key;
- wr.length = mr->mmkey.size;
- wr.virt_addr = mr->mmkey.iova;
- wr.access_flags = mr->access_flags;
wr.page_shift = page_shift;
for (pages_mapped = 0;
@@ -1110,50 +1178,87 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
size_to_map = npages * desc_size;
- dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
- if (mr->umem->is_odp) {
- mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
- } else {
- __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx,
- npages, xlt,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages
- * brought from the umem.
- */
- memset(xlt + size_to_map, 0, size - size_to_map);
- }
- dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
- if (pages_mapped + pages_iter >= pages_to_map) {
- if (flags & MLX5_IB_UPD_XLT_ENABLE)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_ENABLE_MR |
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- if (flags & MLX5_IB_UPD_XLT_PD ||
- flags & MLX5_IB_UPD_XLT_ACCESS)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- if (flags & MLX5_IB_UPD_XLT_ADDR)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- }
+ if (pages_mapped + pages_iter >= pages_to_map)
+ wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
wr.offset = idx * desc_size;
wr.xlt_size = sg.length;
err = mlx5_ib_post_send_wait(dev, &wr);
}
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+ sg.length = orig_sg_length;
+ mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
+ return err;
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ */
+static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ struct ib_block_iter biter;
+ struct mlx5_mtt *cur_mtt;
+ struct mlx5_umr_wr wr;
+ size_t orig_sg_length;
+ struct mlx5_mtt *mtt;
+ size_t final_size;
+ struct ib_sge sg;
+ int err = 0;
+
+ if (WARN_ON(mr->umem->is_odp))
+ return -EINVAL;
+
+ mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
+ ib_umem_num_dma_blocks(mr->umem,
+ 1 << mr->page_shift),
+ sizeof(*mtt), flags);
+ if (!mtt)
+ return -ENOMEM;
+ orig_sg_length = sg.length;
+
+ cur_mtt = mtt;
+ rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
+ BIT(mr->page_shift)) {
+ if (cur_mtt == (void *)mtt + sg.length) {
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ err = mlx5_ib_post_send_wait(dev, &wr);
+ if (err)
+ goto err;
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ wr.offset += sg.length;
+ cur_mtt = mtt;
+ }
+
+ cur_mtt->ptag =
+ cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ MLX5_IB_MTT_PRESENT);
+ cur_mtt++;
+ }
-free_xlt:
- if (use_emergency_page)
- mlx5_ib_put_xlt_emergency_page();
- else
- free_pages((unsigned long)xlt, get_order(size));
+ final_size = (void *)cur_mtt - (void *)mtt;
+ sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
+ memset(cur_mtt, 0, sg.length - final_size);
+ wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
+ wr.xlt_size = sg.length;
+ dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
+ err = mlx5_ib_post_send_wait(dev, &wr);
+
+err:
+ sg.length = orig_sg_length;
+ mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
return err;
}
@@ -1161,11 +1266,9 @@ free_xlt:
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
*/
-static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
- u64 virt_addr, u64 length,
- struct ib_umem *umem, int npages,
- int page_shift, int access_flags,
- bool populate)
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned int page_size, bool populate)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@@ -1176,16 +1279,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
- mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!page_size)
+ return ERR_PTR(-EINVAL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->ibmr.pd = pd;
mr->access_flags = access_flags;
+ mr->page_shift = order_base_2(page_size);
inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
if (populate)
- inlen += sizeof(*pas) * roundup(npages, 2);
+ inlen += sizeof(*pas) *
+ roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
@@ -1197,7 +1304,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
err = -EINVAL;
goto err_2;
}
- mlx5_ib_populate_pas(dev, umem, page_shift, pas,
+ mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
}
@@ -1206,20 +1313,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr,
+ set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
populate ? pd : dev->umrc.pd);
MLX5_SET(mkc, mkc, free, !populate);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET64(mkc, mkc, len, length);
+ MLX5_SET64(mkc, mkc, len, umem->length);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(virt_addr, length, page_shift));
- MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ get_octo_len(iova, umem->length, mr->page_shift));
+ MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
if (populate) {
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- get_octo_len(virt_addr, length, page_shift));
+ get_octo_len(iova, umem->length, mr->page_shift));
}
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
@@ -1229,7 +1336,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
}
mr->mmkey.type = MLX5_MKEY_MR;
mr->desc_size = sizeof(struct mlx5_mtt);
- mr->dev = dev;
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags);
kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1238,25 +1346,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
err_2:
kvfree(in);
-
err_1:
- if (!ibmr)
- kfree(mr);
-
+ kfree(mr);
return ERR_PTR(err);
}
-static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- int npages, u64 length, int access_flags)
-{
- mr->npages = npages;
- atomic_add(npages, &dev->mdev->priv.reg_pages);
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->ibmr.length = length;
- mr->access_flags = access_flags;
-}
-
static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
u64 length, int acc, int mode)
{
@@ -1290,8 +1384,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
kfree(in);
- mr->umem = NULL;
- set_mr_fields(dev, mr, 0, length, acc);
+ set_mr_fields(dev, mr, length, acc);
return &mr->ibmr;
@@ -1352,116 +1445,128 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
attr->access_flags, mode);
}
-struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
bool xlt_with_umr;
- struct ib_umem *umem;
- int page_shift;
- int npages;
- int ncont;
- int order;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
- return ERR_PTR(-EOPNOTSUPP);
-
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
-
- xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
- /* ODP requires xlt update via umr to work. */
- if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
- return ERR_PTR(-EINVAL);
-
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
- length == U64_MAX) {
- if (virt_addr != start)
- return ERR_PTR(-EINVAL);
- if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
- !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
- return ERR_PTR(-EINVAL);
-
- mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
- if (IS_ERR(mr))
- return ERR_CAST(mr);
- return &mr->ibmr;
- }
-
- err = mr_umem_get(dev, start, length, access_flags, &umem,
- &npages, &page_shift, &ncont, &order);
-
- if (err < 0)
- return ERR_PTR(err);
-
+ xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
if (xlt_with_umr) {
- mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
- page_shift, order, access_flags);
- if (IS_ERR(mr))
- mr = NULL;
- }
+ mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
+ } else {
+ unsigned int page_size = mlx5_umem_find_best_pgsz(
+ umem, mkc, log_page_size, 0, iova);
- if (!mr) {
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, !xlt_with_umr);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, true);
mutex_unlock(&dev->slow_path_mutex);
}
-
if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- goto error;
+ ib_umem_release(umem);
+ return ERR_CAST(mr);
}
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
- mr->umem = umem;
- set_mr_fields(dev, mr, npages, length, access_flags);
+ atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
+ if (xlt_with_umr) {
/*
* If the MR was created with reg_create then it will be
* configured properly but left disabled. It is safe to go ahead
* and configure it again via UMR while enabling it.
*/
- int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
-
- err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
- update_xlt_flags);
+ err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
if (err) {
dereg_mr(dev, mr);
return ERR_PTR(err);
}
}
+ return &mr->ibmr;
+}
- if (is_odp_mr(mr)) {
- to_ib_umem_odp(mr->umem)->private = mr;
- init_waitqueue_head(&mr->q_deferred_work);
- atomic_set(&mr->num_deferred_work, 0);
- err = xa_err(xa_store(&dev->odp_mkeys,
- mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
- GFP_KERNEL));
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
+ struct mlx5_ib_mr *mr;
+ int err;
- err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!start && length == U64_MAX) {
+ if (iova != 0)
+ return ERR_PTR(-EINVAL);
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
+ if (IS_ERR(mr))
+ return ERR_CAST(mr);
+ return &mr->ibmr;
+ }
+
+ /* ODP requires xlt update via umr to work. */
+ if (!mlx5_ib_can_load_pas_with_umr(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
+ &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
+
+ mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
+ if (IS_ERR(mr)) {
+ ib_umem_release(&odp->umem);
+ return ERR_CAST(mr);
}
+ odp->private = mr;
+ init_waitqueue_head(&mr->q_deferred_work);
+ atomic_set(&mr->num_deferred_work, 0);
+ err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL));
+ if (err)
+ goto err_dereg_mr;
+
+ err = mlx5_ib_init_odp_mr(mr);
+ if (err)
+ goto err_dereg_mr;
return &mr->ibmr;
-error:
- ib_umem_release(umem);
+
+err_dereg_mr:
+ dereg_mr(dev, mr);
return ERR_PTR(err);
}
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem *umem;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, access_flags);
+
+ if (access_flags & IB_ACCESS_ON_DEMAND)
+ return create_user_odp_mr(pd, start, length, iova, access_flags,
+ udata);
+ umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+ return create_real_mr(pd, umem, iova, access_flags);
+}
+
/**
* mlx5_mr_cache_invalidate - Fence all DMA on the MR
* @mr: The MR to fence
@@ -1474,151 +1579,224 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
{
struct mlx5_umr_wr umrwr = {};
- if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.pd = mr->dev->umrc.pd;
+ umrwr.pd = mr_to_mdev(mr)->umrc.pd;
umrwr.mkey = mr->mmkey.key;
umrwr.ignore_free_state = 1;
- return mlx5_ib_post_send_wait(mr->dev, &umrwr);
+ return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
}
-static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
- int access_flags, int flags)
-{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr umrwr = {};
+/*
+ * True if the change in access flags can be done via UMR, only some access
+ * flags can be updated.
+ */
+static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
+{
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
+ return false;
+ return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
+ target_access_flags);
+}
+
+static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_umr_wr umrwr = {
+ .wr = {
+ .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
+ .opcode = MLX5_IB_WR_UMR,
+ },
+ .mkey = mr->mmkey.key,
+ .pd = pd,
+ .access_flags = access_flags,
+ };
int err;
- umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
-
- umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.mkey = mr->mmkey.key;
-
- if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
- umrwr.pd = pd;
- umrwr.access_flags = access_flags;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- }
-
err = mlx5_ib_post_send_wait(dev, &umrwr);
+ if (err)
+ return err;
- return err;
+ mr->access_flags = access_flags;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+ return 0;
}
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int new_access_flags,
- struct ib_pd *new_pd, struct ib_udata *udata)
+static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
+ struct ib_umem *new_umem,
+ int new_access_flags, u64 iova,
+ unsigned long *page_size)
{
- struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
- struct mlx5_ib_mr *mr = to_mmr(ib_mr);
- struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
- int access_flags = flags & IB_MR_REREG_ACCESS ?
- new_access_flags :
- mr->access_flags;
- int page_shift = 0;
- int upd_flags = 0;
- int npages = 0;
- int ncont = 0;
- int order = 0;
- u64 addr, len;
- int err;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
+ /* We only track the allocated sizes of MRs from the cache */
+ if (!mr->cache_ent)
+ return false;
+ if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
+ return false;
- atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+ *page_size =
+ mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
+ if (WARN_ON(!*page_size))
+ return false;
+ return (1ULL << mr->cache_ent->order) >=
+ ib_umem_num_dma_blocks(new_umem, *page_size);
+}
- if (!mr->umem)
- return -EINVAL;
+static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags, int flags, struct ib_umem *new_umem,
+ u64 iova, unsigned long page_size)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
+ struct ib_umem *old_umem = mr->umem;
+ int err;
- if (is_odp_mr(mr))
- return -EOPNOTSUPP;
+ /*
+ * To keep everything simple the MR is revoked before we start to mess
+ * with it. This ensure the change is atomic relative to any use of the
+ * MR.
+ */
+ err = mlx5_mr_cache_invalidate(mr);
+ if (err)
+ return err;
- if (flags & IB_MR_REREG_TRANS) {
- addr = virt_addr;
- len = length;
- } else {
- addr = mr->umem->address;
- len = mr->umem->length;
+ if (flags & IB_MR_REREG_PD) {
+ mr->ibmr.pd = pd;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+ upd_flags |= MLX5_IB_UPD_XLT_PD;
+ }
+ if (flags & IB_MR_REREG_ACCESS) {
+ mr->access_flags = access_flags;
+ upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
}
- if (flags != IB_MR_REREG_PD) {
+ mr->ibmr.length = new_umem->length;
+ mr->mmkey.iova = iova;
+ mr->mmkey.size = new_umem->length;
+ mr->page_shift = order_base_2(page_size);
+ mr->umem = new_umem;
+ err = mlx5_ib_update_mr_pas(mr, upd_flags);
+ if (err) {
/*
- * Replace umem. This needs to be done whether or not UMR is
- * used.
+ * The MR is revoked at this point so there is no issue to free
+ * new_umem.
*/
- flags |= IB_MR_REREG_TRANS;
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- err = mr_umem_get(dev, addr, len, access_flags, &mr->umem,
- &npages, &page_shift, &ncont, &order);
- if (err)
- goto err;
+ mr->umem = old_umem;
+ return err;
}
- if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags,
- access_flags) ||
- !mlx5_ib_can_load_pas_with_umr(dev, len) ||
- (flags & IB_MR_REREG_TRANS &&
- !mlx5_ib_pas_fits_in_mr(mr, addr, len))) {
- /*
- * UMR can't be used - MKey needs to be replaced.
- */
- if (mr->cache_ent)
- detach_mr_from_cache(mr);
- err = destroy_mkey(dev, mr);
- if (err)
- goto err;
+ atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
+ ib_umem_release(old_umem);
+ atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
+ return 0;
+}
- mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
- page_shift, access_flags, true);
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 iova, int new_access_flags,
+ struct ib_pd *new_pd,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ib_mr);
+ int err;
- if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- mr = to_mmr(ib_mr);
- goto err;
- }
- } else {
- /*
- * Send a UMR WQE
- */
- mr->ibmr.pd = pd;
- mr->access_flags = access_flags;
- mr->mmkey.iova = addr;
- mr->mmkey.size = len;
- mr->mmkey.pd = to_mpd(pd)->pdn;
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ return ERR_PTR(-EOPNOTSUPP);
- if (flags & IB_MR_REREG_TRANS) {
- upd_flags = MLX5_IB_UPD_XLT_ADDR;
- if (flags & IB_MR_REREG_PD)
- upd_flags |= MLX5_IB_UPD_XLT_PD;
- if (flags & IB_MR_REREG_ACCESS)
- upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
- err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
- upd_flags);
- } else {
- err = rereg_umr(pd, mr, access_flags, flags);
+ mlx5_ib_dbg(
+ dev,
+ "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, new_access_flags);
+
+ if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!(flags & IB_MR_REREG_ACCESS))
+ new_access_flags = mr->access_flags;
+ if (!(flags & IB_MR_REREG_PD))
+ new_pd = ib_mr->pd;
+
+ if (!(flags & IB_MR_REREG_TRANS)) {
+ struct ib_umem *umem;
+
+ /* Fast path for PD/access change */
+ if (can_use_umr_rereg_access(dev, mr->access_flags,
+ new_access_flags)) {
+ err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
+ if (err)
+ return ERR_PTR(err);
+ return NULL;
}
+ /* DM or ODP MR's don't have a umem so we can't re-use it */
+ if (!mr->umem || is_odp_mr(mr))
+ goto recreate;
+ /*
+ * Only one active MR can refer to a umem at one time, revoke
+ * the old MR before assigning the umem to the new one.
+ */
+ err = mlx5_mr_cache_invalidate(mr);
if (err)
- goto err;
- }
-
- set_mr_fields(dev, mr, npages, len, access_flags);
+ return ERR_PTR(err);
+ umem = mr->umem;
+ mr->umem = NULL;
+ atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- return 0;
+ return create_real_mr(new_pd, umem, mr->mmkey.iova,
+ new_access_flags);
+ }
-err:
- ib_umem_release(mr->umem);
- mr->umem = NULL;
+ /*
+ * DM doesn't have a PAS list so we can't re-use it, odp does but the
+ * logic around releasing the umem is different
+ */
+ if (!mr->umem || is_odp_mr(mr))
+ goto recreate;
+
+ if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
+ can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
+ struct ib_umem *new_umem;
+ unsigned long page_size;
+
+ new_umem = ib_umem_get(&dev->ib_dev, start, length,
+ new_access_flags);
+ if (IS_ERR(new_umem))
+ return ERR_CAST(new_umem);
+
+ /* Fast path for PAS change */
+ if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
+ &page_size)) {
+ err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
+ new_umem, iova, page_size);
+ if (err) {
+ ib_umem_release(new_umem);
+ return ERR_PTR(err);
+ }
+ return NULL;
+ }
+ return create_real_mr(new_pd, new_umem, iova, new_access_flags);
+ }
- clean_mr(dev, mr);
- return err;
+ /*
+ * Everything else has no state we can preserve, just create a new MR
+ * from scratch
+ */
+recreate:
+ return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
+ new_access_flags, udata);
}
static int
@@ -1627,6 +1805,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
int ndescs,
int desc_size)
{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct device *ddev = &dev->mdev->pdev->dev;
int size = ndescs * desc_size;
int add_size;
int ret;
@@ -1639,9 +1819,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
- mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
- size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
+ mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, mr->desc_map)) {
ret = -ENOMEM;
goto err;
}
@@ -1659,9 +1838,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
if (mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
+ struct mlx5_ib_dev *dev = to_mdev(device);
- dma_unmap_single(device->dev.parent, mr->desc_map,
- size, DMA_TO_DEVICE);
+ dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
+ DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
}
@@ -1691,7 +1871,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- int npages = mr->npages;
struct ib_umem *umem = mr->umem;
/* Stop all DMA */
@@ -1700,14 +1879,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
else
clean_mr(dev, mr);
+ if (umem) {
+ if (!is_odp_mr(mr))
+ atomic_sub(ib_umem_num_pages(umem),
+ &dev->mdev->priv.reg_pages);
+ ib_umem_release(umem);
+ }
+
if (mr->cache_ent)
mlx5_mr_cache_free(dev, mr);
else
kfree(mr);
-
- ib_umem_release(umem);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
-
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 5c853ec1b0d8..aa2413b50adc 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -102,7 +102,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
if (flags & MLX5_IB_UPD_XLT_ZAP) {
for (; pklm != end; pklm++, idx++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
- pklm->key = cpu_to_be32(imr->dev->null_mkey);
+ pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey);
pklm->va = 0;
}
return;
@@ -129,7 +129,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
* locking around the xarray.
*/
lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
- lockdep_assert_held(&imr->dev->odp_srcu);
+ lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu);
for (; pklm != end; pklm++, idx++) {
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
@@ -139,7 +139,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE);
} else {
- pklm->key = cpu_to_be32(imr->dev->null_mkey);
+ pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey);
pklm->va = 0;
}
}
@@ -199,7 +199,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
mutex_unlock(&odp->umem_mutex);
if (!mr->cache_ent) {
- mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey);
+ mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey);
WARN_ON(mr->descs);
}
}
@@ -222,19 +222,19 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
WARN_ON(atomic_read(&mr->num_deferred_work));
if (need_imr_xlt) {
- srcu_key = srcu_read_lock(&mr->dev->odp_srcu);
+ srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu);
mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(mr->parent, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
- srcu_read_unlock(&mr->dev->odp_srcu, srcu_key);
+ srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key);
}
dma_fence_odp_mr(mr);
mr->parent = NULL;
- mlx5_mr_cache_free(mr->dev, mr);
+ mlx5_mr_cache_free(mr_to_mdev(mr), mr);
ib_umem_odp_release(odp);
if (atomic_dec_and_test(&imr->num_deferred_work))
wake_up(&imr->q_deferred_work);
@@ -274,7 +274,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
goto out_unlock;
atomic_inc(&imr->num_deferred_work);
- call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu,
+ call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu,
free_implicit_child_mr_rcu);
out_unlock:
@@ -476,12 +476,13 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
if (IS_ERR(odp))
return ERR_CAST(odp);
- ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY,
- imr->access_flags);
+ ret = mr = mlx5_mr_cache_alloc(
+ mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags);
if (IS_ERR(mr))
goto out_umem;
mr->ibmr.pd = imr->ibmr.pd;
+ mr->ibmr.device = &mr_to_mdev(imr)->ib_dev;
mr->umem = &odp->umem;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
@@ -517,11 +518,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
goto out_mr;
}
- mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr);
+ mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
return mr;
out_mr:
- mlx5_mr_cache_free(imr->dev, mr);
+ mlx5_mr_cache_free(mr_to_mdev(imr), mr);
out_umem:
ib_umem_odp_release(odp);
return ret;
@@ -536,6 +537,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
+ if (!mlx5_ib_can_load_pas_with_umr(dev,
+ MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
+ return ERR_PTR(-EOPNOTSUPP);
+
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
if (IS_ERR(umem_odp))
return ERR_CAST(umem_odp);
@@ -551,6 +556,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->umem = &umem_odp->umem;
imr->ibmr.lkey = imr->mmkey.key;
imr->ibmr.rkey = imr->mmkey.key;
+ imr->ibmr.device = &dev->ib_dev;
imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true;
atomic_set(&imr->num_deferred_work, 0);
@@ -584,7 +590,7 @@ out_umem:
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
- struct mlx5_ib_dev *dev = imr->dev;
+ struct mlx5_ib_dev *dev = mr_to_mdev(imr);
struct list_head destroy_list;
struct mlx5_ib_mr *mtt;
struct mlx5_ib_mr *tmp;
@@ -654,10 +660,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
{
/* Prevent new page faults and prefetch requests from succeeding */
- xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
/* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&mr->dev->odp_srcu);
+ synchronize_srcu(&mr_to_mdev(mr)->odp_srcu);
wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work));
@@ -701,7 +707,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
if (ret < 0) {
if (ret != -EAGAIN)
- mlx5_ib_err(mr->dev,
+ mlx5_ib_err(mr_to_mdev(mr),
"Failed to update mkey page tables\n");
goto out;
}
@@ -791,7 +797,7 @@ out:
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
if (err) {
- mlx5_ib_err(imr->dev, "Failed to update PAS\n");
+ mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n");
return err;
}
return ret;
@@ -811,7 +817,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- lockdep_assert_held(&mr->dev->odp_srcu);
+ lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu);
if (unlikely(io_virt < mr->mmkey.iova))
return -EFAULT;
@@ -831,17 +837,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
flags);
}
-int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
- u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
int ret;
- if (enable)
- flags |= MLX5_PF_FLAGS_ENABLE;
-
- ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
- mr->umem->address, mr->umem->length, NULL,
- flags);
+ ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address,
+ mr->umem->length, NULL,
+ MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE);
return ret >= 0 ? 0 : ret;
}
@@ -1783,7 +1785,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
/* We rely on IB/core that work is executed if we have num_sge != 0 only. */
WARN_ON(!work->num_sge);
- dev = work->frags[0].mr->dev;
+ dev = mr_to_mdev(work->frags[0].mr);
/* SRCU should be held when calling to mlx5_odp_populate_xlt() */
srcu_key = srcu_read_lock(&dev->odp_srcu);
for (i = 0; i < work->num_sge; ++i) {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 600e056798c0..0cb7cc642d87 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -778,39 +778,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
return bfregi->sys_pages[index_of_sys_page] + offset;
}
-static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
- unsigned long addr, size_t size,
- struct ib_umem **umem, int *npages, int *page_shift,
- int *ncont, u32 *offset)
-{
- int err;
-
- *umem = ib_umem_get(&dev->ib_dev, addr, size, 0);
- if (IS_ERR(*umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- return PTR_ERR(*umem);
- }
-
- mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL);
-
- err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
-
- mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
- addr, size, *npages, *page_shift, *ncont, *offset);
-
- return 0;
-
-err_umem:
- ib_umem_release(*umem);
- *umem = NULL;
-
- return err;
-}
-
static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_rwq *rwq, struct ib_udata *udata)
{
@@ -833,10 +800,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
{
struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- int page_shift = 0;
- int npages;
+ unsigned long page_size = 0;
u32 offset = 0;
- int ncont = 0;
int err;
if (!ucmd->buf_addr)
@@ -849,23 +814,26 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return err;
}
- mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
- &rwq->rq_page_offset);
- if (err) {
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &rwq->rq_page_offset);
+ if (!page_size) {
mlx5_ib_warn(dev, "bad offset\n");
+ err = -EINVAL;
goto err_umem;
}
- rwq->rq_num_pas = ncont;
- rwq->page_shift = page_shift;
- rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size);
+ rwq->page_shift = order_base_2(page_size);
+ rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT;
rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
- mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
- (unsigned long long)ucmd->buf_addr, rwq->buf_size,
- npages, page_shift, ncont, offset);
+ mlx5_ib_dbg(
+ dev,
+ "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n",
+ (unsigned long long)ucmd->buf_addr, rwq->buf_size,
+ ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas,
+ offset);
err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db);
if (err) {
@@ -896,10 +864,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
- int page_shift = 0;
+ unsigned int page_offset_quantized = 0;
+ unsigned long page_size = 0;
int uar_index = 0;
- int npages;
- u32 offset = 0;
int bfregn;
int ncont = 0;
__be64 *pas;
@@ -950,11 +917,21 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (ucmd->buf_addr && ubuffer->buf_size) {
ubuffer->buf_addr = ucmd->buf_addr;
- err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr,
- ubuffer->buf_size, &ubuffer->umem,
- &npages, &page_shift, &ncont, &offset);
- if (err)
+ ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
+ ubuffer->buf_size, 0);
+ if (IS_ERR(ubuffer->umem)) {
+ err = PTR_ERR(ubuffer->umem);
goto err_bfreg;
+ }
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ ubuffer->umem, qpc, log_page_size,
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
+ ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size);
} else {
ubuffer->umem = NULL;
}
@@ -969,15 +946,14 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
MLX5_SET(create_qp_in, *in, uid, uid);
- pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
- if (ubuffer->umem)
- mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
-
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
-
- MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(qpc, qpc, page_offset, offset);
-
+ pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
+ if (ubuffer->umem) {
+ mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0);
+ MLX5_SET(qpc, qpc, log_page_size,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, page_offset, page_offset_quantized);
+ }
MLX5_SET(qpc, qpc, uar_page, uar_index);
if (bfregn != MLX5_IB_INVALID_BFREG)
resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
@@ -1209,18 +1185,24 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
void *wq;
int inlen;
int err;
- int page_shift = 0;
- int npages;
- int ncont = 0;
- u32 offset = 0;
-
- err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size,
- &sq->ubuffer.umem, &npages, &page_shift, &ncont,
- &offset);
- if (err)
- return err;
+ unsigned int page_offset_quantized;
+ unsigned long page_size;
+
+ sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
+ ubuffer->buf_size, 0);
+ if (IS_ERR(sq->ubuffer.umem))
+ return PTR_ERR(sq->ubuffer.umem);
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
- inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) *
+ ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
@@ -1248,11 +1230,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
- MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(wq, wq, page_offset, offset);
+ MLX5_SET(wq, wq, log_wq_pg_sz,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, page_offset_quantized);
pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+ mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0);
err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp);
@@ -1278,40 +1261,31 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
ib_umem_release(sq->ubuffer.umem);
}
-static size_t get_rq_pas_size(void *qpc)
-{
- u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
- u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
- u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
- u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
- u32 po_quanta = 1 << (log_page_size - 6);
- u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
- u32 page_size = 1 << log_page_size;
- u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
- u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
-
- return rq_num_pas * sizeof(u64);
-}
-
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin,
- size_t qpinlen, struct ib_pd *pd)
+ struct ib_pd *pd)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
- __be64 *qp_pas;
void *in;
void *rqc;
void *wq;
void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
- size_t rq_pas_size = get_rq_pas_size(qpc);
+ struct ib_umem *umem = rq->base.ubuffer.umem;
+ unsigned int page_offset_quantized;
+ unsigned long page_size = 0;
size_t inlen;
int err;
- if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas))
+ page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz,
+ MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size)
return -EINVAL;
- inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1333,16 +1307,16 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING)
MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
- MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ MLX5_SET(wq, wq, page_offset, page_offset_quantized);
MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
- MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
- memcpy(pas, qp_pas, rq_pas_size);
+ mlx5_ib_populate_pas(umem, page_size, pas, 0);
err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp);
@@ -1463,7 +1437,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
- err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd);
+ err = create_raw_packet_qp_rq(dev, rq, in, pd);
if (err)
goto err_destroy_sq;
@@ -2436,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
qp->state = IB_QPS_RESET;
-
+ rdma_restrack_no_track(&qp->ibqp.res);
return 0;
}
@@ -2460,6 +2434,7 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
case IB_QPT_GSI:
if (dev->profile == &raw_eth_profile)
goto out;
+ fallthrough;
case IB_QPT_RAW_PACKET:
case IB_QPT_UD:
case MLX5_IB_QPT_REG_UMR:
@@ -2712,11 +2687,12 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
true, qp);
- if (create_flags)
+ if (create_flags) {
mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n",
create_flags);
-
- return (create_flags) ? -EINVAL : 0;
+ return -EOPNOTSUPP;
+ }
+ return 0;
}
static int process_udata_size(struct mlx5_ib_dev *dev,
@@ -3102,7 +3078,7 @@ static int ib_to_mlx5_rate_map(u8 rate)
return 5;
default:
return rate + MLX5_STAT_RATE_OFFSET;
- };
+ }
return 0;
}
@@ -4247,6 +4223,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int err = -EINVAL;
int port;
+ if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
+ return -EOPNOTSUPP;
+
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
@@ -4576,7 +4555,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path);
to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path);
qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index);
@@ -4882,7 +4863,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
MLX5_SET(rqc, rqc, delay_drop_en, 1);
}
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
+ mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp);
if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
err = set_delay_drop(dev);
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
index 887270dd3ce2..4ac429e72004 100644
--- a/drivers/infiniband/hw/mlx5/restrack.c
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -116,7 +116,7 @@ static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
- return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
+ return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
mlx5_mkey_to_idx(mr->mmkey.key));
}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index e2f720eec1e1..fab6736e4d6a 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -51,10 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
udata, struct mlx5_ib_ucontext, ibucontext);
size_t ucmdlen;
int err;
- int npages;
- int page_shift;
- int ncont;
- u32 offset;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
ucmdlen = min(udata->inlen, sizeof(ucmd));
@@ -86,32 +82,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
err = PTR_ERR(srq->umem);
return err;
}
-
- mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages,
- &page_shift, &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
- &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
-
- in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL);
- if (!in->pas) {
- err = -ENOMEM;
- goto err_umem;
- }
-
- mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
+ in->umem = srq->umem;
err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db);
if (err) {
mlx5_ib_dbg(dev, "map doorbell failed\n");
- goto err_in;
+ goto err_umem;
}
- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- in->page_offset = offset;
in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type != IB_SRQT_BASIC)
@@ -119,9 +97,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
return 0;
-err_in:
- kvfree(in->pas);
-
err_umem:
ib_umem_release(srq->umem);
@@ -226,6 +201,11 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
struct mlx5_srq_attr in = {};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC &&
+ init_attr->srq_type != IB_SRQT_TM)
+ return -EOPNOTSUPP;
+
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= max_srq_wqes) {
mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
index 2c3627b2509d..a7e3dc5564ac 100644
--- a/drivers/infiniband/hw/mlx5/srq.h
+++ b/drivers/infiniband/hw/mlx5/srq.h
@@ -28,6 +28,7 @@ struct mlx5_srq_attr {
u32 user_index;
u64 db_record;
__be64 *pas;
+ struct ib_umem *umem;
u32 tm_log_list_size;
u32 tm_next_tag;
u32 tm_hw_phase_cnt;
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index db889ec3fd48..8b3385396599 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -92,6 +92,25 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
return srq;
}
+static int __set_srq_page_size(struct mlx5_srq_attr *in,
+ unsigned long page_size)
+{
+ if (!page_size)
+ return -EINVAL;
+ in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT;
+
+ if (WARN_ON(get_pas_size(in) !=
+ ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64)))
+ return -EINVAL;
+ return 0;
+}
+
+#define set_srq_page_size(in, typ, log_pgsz_fld) \
+ __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \
+ (in)->umem, typ, log_pgsz_fld, \
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, \
+ 64, &(in)->page_offset))
+
static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in)
{
@@ -103,6 +122,12 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
int inlen;
int err;
+ if (in->umem) {
+ err = set_srq_page_size(in, srqc, log_page_size);
+ if (err)
+ return err;
+ }
+
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
create_in = kvzalloc(inlen, GFP_KERNEL);
@@ -114,7 +139,13 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
set_srqc(srqc, in);
- memcpy(pas, in->pas, pas_size);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
MLX5_SET(create_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_SRQ);
@@ -194,6 +225,12 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
int inlen;
int err;
+ if (in->umem) {
+ err = set_srq_page_size(in, xrc_srqc, log_page_size);
+ if (err)
+ return err;
+ }
+
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
create_in = kvzalloc(inlen, GFP_KERNEL);
@@ -207,7 +244,13 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
set_srqc(xrc_srqc, in);
MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
- memcpy(pas, in->pas, pas_size);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
MLX5_SET(create_xrc_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_XRC_SRQ);
@@ -289,11 +332,18 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
void *create_in = NULL;
void *rmpc;
void *wq;
+ void *pas;
int pas_size;
int outlen;
int inlen;
int err;
+ if (in->umem) {
+ err = set_srq_page_size(in, wq, log_wq_pg_sz);
+ if (err)
+ return err;
+ }
+
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
@@ -309,8 +359,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
MLX5_SET(create_rmp_in, create_in, uid, in->uid);
+ pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas);
+
set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
@@ -421,10 +479,17 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
void *create_in;
void *xrqc;
void *wq;
+ void *pas;
int pas_size;
int inlen;
int err;
+ if (in->umem) {
+ err = set_srq_page_size(in, wq, log_wq_pg_sz);
+ if (err)
+ return err;
+ }
+
pas_size = get_pas_size(in);
inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
create_in = kvzalloc(inlen, GFP_KERNEL);
@@ -433,9 +498,16 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+ pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas);
set_wq(wq, in);
- memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
if (in->type == IB_SRQT_TM) {
MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 119b2573c9a0..26c3408dcaca 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -604,7 +604,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
default:
- entry->opcode = MTHCA_OPCODE_INVALID;
+ entry->opcode = 0xFF;
break;
}
} else {
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 9dbbf4d16796..a445160de3e1 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -105,7 +105,6 @@ enum {
MTHCA_OPCODE_ATOMIC_CS = 0x11,
MTHCA_OPCODE_ATOMIC_FA = 0x12,
MTHCA_OPCODE_BIND_MW = 0x18,
- MTHCA_OPCODE_INVALID = 0xff
};
enum {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index c4d9cdc4ee97..1a3dd07f993b 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -470,7 +470,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
int err;
if (init_attr->create_flags)
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-EOPNOTSUPP);
switch (init_attr->qp_type) {
case IB_QPT_RC:
@@ -612,7 +612,7 @@ static int mthca_create_cq(struct ib_cq *ibcq,
udata, struct mthca_ucontext, ibucontext);
if (attr->flags)
- return -EINVAL;
+ return -EOPNOTSUPP;
if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
return -EINVAL;
@@ -961,29 +961,34 @@ static ssize_t hw_rev_show(struct device *device,
struct mthca_dev *dev =
rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
- return sprintf(buf, "%x\n", dev->rev_id);
+ return sysfs_emit(buf, "%x\n", dev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
-static ssize_t hca_type_show(struct device *device,
- struct device_attribute *attr, char *buf)
+static const char *hca_type_string(int hca_type)
{
- struct mthca_dev *dev =
- rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
-
- switch (dev->pdev->device) {
+ switch (hca_type) {
case PCI_DEVICE_ID_MELLANOX_TAVOR:
- return sprintf(buf, "MT23108\n");
+ return "MT23108";
case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
- return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
+ return "MT25208 (MT23108 compat mode)";
case PCI_DEVICE_ID_MELLANOX_ARBEL:
- return sprintf(buf, "MT25208\n");
+ return "MT25208";
case PCI_DEVICE_ID_MELLANOX_SINAI:
case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
- return sprintf(buf, "MT25204\n");
- default:
- return sprintf(buf, "unknown\n");
+ return "MT25204";
}
+
+ return "unknown";
+}
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct mthca_dev *dev =
+ rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
+
+ return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device));
}
static DEVICE_ATTR_RO(hca_type);
@@ -993,7 +998,7 @@ static ssize_t board_id_show(struct device *device,
struct mthca_dev *dev =
rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
- return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
+ return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
}
static DEVICE_ATTR_RO(board_id);
@@ -1158,36 +1163,12 @@ int mthca_register_device(struct mthca_dev *dev)
if (ret)
return ret;
- dev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dev.parent = &dev->pdev->dev;
if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
-
if (mthca_is_memfree(dev))
ib_set_device_ops(&dev->ib_dev,
&mthca_dev_arbel_srq_ops);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 08a2a7afafd3..07cfc0934b17 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -863,6 +863,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
mutex_lock(&qp->mutex);
if (attr_mask & IB_QP_CUR_STATE) {
cur_state = attr->cur_qp_state;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 9b96661a7143..9a834a9cca0e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -119,7 +119,7 @@ static ssize_t hw_rev_show(struct device *device,
struct ocrdma_dev *dev =
rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev);
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+ return sysfs_emit(buf, "0x%x\n", dev->nic_info.pdev->vendor);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -129,7 +129,7 @@ static ssize_t hca_type_show(struct device *device,
struct ocrdma_dev *dev =
rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev);
- return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
+ return sysfs_emit(buf, "%s\n", &dev->model_number[0]);
}
static DEVICE_ATTR_RO(hca_type);
@@ -154,6 +154,7 @@ static const struct ib_device_ops ocrdma_dev_ops = {
.create_ah = ocrdma_create_ah,
.create_cq = ocrdma_create_cq,
.create_qp = ocrdma_create_qp,
+ .create_user_ah = ocrdma_create_ah,
.dealloc_pd = ocrdma_dealloc_pd,
.dealloc_ucontext = ocrdma_dealloc_ucontext,
.dereg_mr = ocrdma_dereg_mr,
@@ -204,32 +205,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
sizeof(OCRDMA_NODE_DESC));
- dev->ibdev.uverbs_cmd_mask =
- OCRDMA_UVERBS(GET_CONTEXT) |
- OCRDMA_UVERBS(QUERY_DEVICE) |
- OCRDMA_UVERBS(QUERY_PORT) |
- OCRDMA_UVERBS(ALLOC_PD) |
- OCRDMA_UVERBS(DEALLOC_PD) |
- OCRDMA_UVERBS(REG_MR) |
- OCRDMA_UVERBS(DEREG_MR) |
- OCRDMA_UVERBS(CREATE_COMP_CHANNEL) |
- OCRDMA_UVERBS(CREATE_CQ) |
- OCRDMA_UVERBS(RESIZE_CQ) |
- OCRDMA_UVERBS(DESTROY_CQ) |
- OCRDMA_UVERBS(REQ_NOTIFY_CQ) |
- OCRDMA_UVERBS(CREATE_QP) |
- OCRDMA_UVERBS(MODIFY_QP) |
- OCRDMA_UVERBS(QUERY_QP) |
- OCRDMA_UVERBS(DESTROY_QP) |
- OCRDMA_UVERBS(POLL_CQ) |
- OCRDMA_UVERBS(POST_SEND) |
- OCRDMA_UVERBS(POST_RECV);
-
- dev->ibdev.uverbs_cmd_mask |=
- OCRDMA_UVERBS(CREATE_AH) |
- OCRDMA_UVERBS(MODIFY_AH) |
- OCRDMA_UVERBS(QUERY_AH) |
- OCRDMA_UVERBS(DESTROY_AH);
dev->ibdev.node_type = RDMA_NODE_IB_CA;
dev->ibdev.phys_port_cnt = 1;
@@ -240,16 +215,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops);
- if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
- dev->ibdev.uverbs_cmd_mask |=
- OCRDMA_UVERBS(CREATE_SRQ) |
- OCRDMA_UVERBS(MODIFY_SRQ) |
- OCRDMA_UVERBS(QUERY_SRQ) |
- OCRDMA_UVERBS(DESTROY_SRQ) |
- OCRDMA_UVERBS(POST_SRQ_RECV);
-
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R)
ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops);
- }
+
rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1);
if (ret)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 7350fe16f164..bc98bd950d99 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -974,7 +974,7 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ocrdma_create_cq_ureq ureq;
if (attr->flags)
- return -EINVAL;
+ return -EOPNOTSUPP;
if (udata) {
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
@@ -1299,6 +1299,9 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
struct ocrdma_create_qp_ureq ureq;
u16 dpp_credit_lmt, dpp_offset;
+ if (attrs->create_flags)
+ return ERR_PTR(-EOPNOTSUPP);
+
status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
if (status)
goto gen_err;
@@ -1391,6 +1394,9 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ocrdma_dev *dev;
enum ib_qp_state old_qps, new_qps;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
qp = get_ocrdma_qp(ibqp);
dev = get_ocrdma_dev(ibqp->device);
@@ -1770,6 +1776,9 @@ int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq);
+ if (init_attr->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
return -EINVAL;
if (init_attr->attr.max_wr > dev->attr.max_rqe)
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 967641662b24..8e7c069e1a2d 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -124,7 +124,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
struct qedr_dev *dev =
rdma_device_to_drv_device(device, struct qedr_dev, ibdev);
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver);
+ return sysfs_emit(buf, "0x%x\n", dev->attr.hw_ver);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -134,10 +134,9 @@ static ssize_t hca_type_show(struct device *device,
struct qedr_dev *dev =
rdma_device_to_drv_device(device, struct qedr_dev, ibdev);
- return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n",
- dev->pdev->device,
- rdma_protocol_iwarp(&dev->ibdev, 1) ?
- "iWARP" : "RoCE");
+ return sysfs_emit(buf, "FastLinQ QL%x %s\n", dev->pdev->device,
+ rdma_protocol_iwarp(&dev->ibdev, 1) ? "iWARP" :
+ "RoCE");
}
static DEVICE_ATTR_RO(hca_type);
@@ -188,10 +187,6 @@ static void qedr_roce_register_device(struct qedr_dev *dev)
dev->ibdev.node_type = RDMA_NODE_IB_CA;
ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops);
-
- dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) |
- QEDR_UVERBS(CLOSE_XRCD) |
- QEDR_UVERBS(CREATE_XSRQ);
}
static const struct ib_device_ops qedr_dev_ops = {
@@ -249,31 +244,6 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.node_guid = dev->attr.node_guid;
memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
- dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) |
- QEDR_UVERBS(QUERY_DEVICE) |
- QEDR_UVERBS(QUERY_PORT) |
- QEDR_UVERBS(ALLOC_PD) |
- QEDR_UVERBS(DEALLOC_PD) |
- QEDR_UVERBS(CREATE_COMP_CHANNEL) |
- QEDR_UVERBS(CREATE_CQ) |
- QEDR_UVERBS(RESIZE_CQ) |
- QEDR_UVERBS(DESTROY_CQ) |
- QEDR_UVERBS(REQ_NOTIFY_CQ) |
- QEDR_UVERBS(CREATE_QP) |
- QEDR_UVERBS(MODIFY_QP) |
- QEDR_UVERBS(QUERY_QP) |
- QEDR_UVERBS(DESTROY_QP) |
- QEDR_UVERBS(CREATE_SRQ) |
- QEDR_UVERBS(DESTROY_SRQ) |
- QEDR_UVERBS(QUERY_SRQ) |
- QEDR_UVERBS(MODIFY_SRQ) |
- QEDR_UVERBS(POST_SRQ_RECV) |
- QEDR_UVERBS(REG_MR) |
- QEDR_UVERBS(DEREG_MR) |
- QEDR_UVERBS(POLL_CQ) |
- QEDR_UVERBS(POST_SEND) |
- QEDR_UVERBS(POST_RECV);
-
if (IS_IWARP(dev)) {
rc = qedr_iw_register_device(dev);
if (rc)
@@ -796,6 +766,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
}
xa_unlock_irqrestore(&dev->srqs, flags);
DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq);
+ break;
default:
break;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 511c95bb3d01..0eb6a7a618e0 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -928,6 +928,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
"create_cq: called from %s. entries=%d, vector=%d\n",
udata ? "User Lib" : "Kernel", entries, vector);
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
if (entries > QEDR_MAX_CQES) {
DP_ERR(dev,
"create cq: the number of entries %d is too high. Must be equal or below %d.\n",
@@ -1546,6 +1549,10 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
"create SRQ called from %s (pd %p)\n",
(udata) ? "User lib" : "kernel", pd);
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC)
+ return -EOPNOTSUPP;
+
rc = qedr_check_srq_params(dev, init_attr, udata);
if (rc)
return -EINVAL;
@@ -2241,6 +2248,9 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
struct ib_qp *ibqp;
int rc = 0;
+ if (attrs->create_flags)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (attrs->qp_type == IB_QPT_XRC_TGT) {
xrcd = get_qedr_xrcd(attrs->xrcd);
dev = get_qedr_dev(xrcd->ibxrcd.device);
@@ -2477,6 +2487,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
"modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
attr->qp_state);
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
old_qp_state = qedr_get_ibqp_state(qp->state);
if (attr_mask & IB_QP_STATE)
new_qp_state = attr->qp_state;
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 3dc6ce033319..2e07b3749b88 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -90,25 +90,18 @@ int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
goto bail;
}
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (ret) {
/*
* If the 64 bit setup fails, try 32 bit. Some systems
* do not setup 64 bit maps on systems with 2GB or less
* memory installed.
*/
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (ret) {
qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret);
goto bail;
}
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- } else
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- if (ret) {
- qib_early_err(&pdev->dev,
- "Unable to set DMA consistent mask: %d\n", ret);
- goto bail;
}
pci_set_master(pdev);
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 021df0654ba7..62c179fc764b 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -43,11 +43,8 @@
static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf)
{
struct qib_devdata *dd = ppd->dd;
- int ret;
- ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT);
- ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
- return ret;
+ return sysfs_emit(buf, "%d\n", dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT));
}
static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
@@ -106,14 +103,10 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf,
static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
{
- ssize_t ret;
-
if (!ppd->statusp)
- ret = -EINVAL;
- else
- ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
- (unsigned long long) *(ppd->statusp));
- return ret;
+ return -EINVAL;
+
+ return sysfs_emit(buf, "0x%llx\n", (unsigned long long)*(ppd->statusp));
}
/*
@@ -392,7 +385,7 @@ static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr,
container_of(kobj, struct qib_pportdata, sl2vl_kobj);
struct qib_ibport *qibp = &ppd->ibport_data;
- return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]);
+ return sysfs_emit(buf, "%u\n", qibp->sl_to_vl[sattr->sl]);
}
static const struct sysfs_ops qib_sl2vl_ops = {
@@ -501,17 +494,18 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, diagc_kobj);
struct qib_ibport *qibp = &ppd->ibport_data;
+ u64 val;
if (!strncmp(dattr->attr.name, "rc_acks", 7))
- return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks));
+ val = READ_PER_CPU_CNTR(rc_acks);
else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
- return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks));
+ val = READ_PER_CPU_CNTR(rc_qacks);
else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
- return sprintf(buf, "%llu\n",
- READ_PER_CPU_CNTR(rc_delayed_comp));
+ val = READ_PER_CPU_CNTR(rc_delayed_comp);
else
- return sprintf(buf, "%u\n",
- *(u32 *)((char *)qibp + dattr->counter));
+ val = *(u32 *)((char *)qibp + dattr->counter);
+
+ return sysfs_emit(buf, "%llu\n", val);
}
static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
@@ -565,7 +559,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
struct qib_ibdev *dev =
rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
- return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
+ return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -575,13 +569,10 @@ static ssize_t hca_type_show(struct device *device,
struct qib_ibdev *dev =
rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
struct qib_devdata *dd = dd_from_dev(dev);
- int ret;
if (!dd->boardname)
- ret = -EINVAL;
- else
- ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
- return ret;
+ return -EINVAL;
+ return sysfs_emit(buf, "%s\n", dd->boardname);
}
static DEVICE_ATTR_RO(hca_type);
static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL);
@@ -590,7 +581,7 @@ static ssize_t version_show(struct device *device,
struct device_attribute *attr, char *buf)
{
/* The string printed here is already newline-terminated. */
- return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version);
+ return sysfs_emit(buf, "%s", (char *)ib_qib_version);
}
static DEVICE_ATTR_RO(version);
@@ -602,7 +593,7 @@ static ssize_t boardversion_show(struct device *device,
struct qib_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
- return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
+ return sysfs_emit(buf, "%s", dd->boardversion);
}
static DEVICE_ATTR_RO(boardversion);
@@ -614,7 +605,7 @@ static ssize_t localbus_info_show(struct device *device,
struct qib_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
- return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info);
+ return sysfs_emit(buf, "%s", dd->lbus_info);
}
static DEVICE_ATTR_RO(localbus_info);
@@ -628,9 +619,10 @@ static ssize_t nctxts_show(struct device *device,
/* Return the number of user ports (contexts) available. */
/* The calculation below deals with a special case where
* cfgctxts is set to 1 on a single-port board. */
- return scnprintf(buf, PAGE_SIZE, "%u\n",
- (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
- (dd->cfgctxts - dd->first_user_ctxt));
+ return sysfs_emit(buf, "%u\n",
+ (dd->first_user_ctxt > dd->cfgctxts) ?
+ 0 :
+ (dd->cfgctxts - dd->first_user_ctxt));
}
static DEVICE_ATTR_RO(nctxts);
@@ -642,21 +634,20 @@ static ssize_t nfreectxts_show(struct device *device,
struct qib_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
- return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
+ return sysfs_emit(buf, "%u\n", dd->freectxts);
}
static DEVICE_ATTR_RO(nfreectxts);
-static ssize_t serial_show(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t serial_show(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct qib_ibdev *dev =
rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
struct qib_devdata *dd = dd_from_dev(dev);
+ const u8 *end = memchr(dd->serial, 0, ARRAY_SIZE(dd->serial));
+ int size = end ? end - dd->serial : ARRAY_SIZE(dd->serial);
- buf[sizeof(dd->serial)] = '\0';
- memcpy(buf, dd->serial, sizeof(dd->serial));
- strcat(buf, "\n");
- return strlen(buf);
+ return sysfs_emit(buf, ".%*s\n", size, dd->serial);
}
static DEVICE_ATTR_RO(serial);
@@ -689,27 +680,26 @@ static ssize_t tempsense_show(struct device *device,
struct qib_ibdev *dev =
rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
struct qib_devdata *dd = dd_from_dev(dev);
- int ret;
- int idx;
+ int i;
u8 regvals[8];
- ret = -ENXIO;
- for (idx = 0; idx < 8; ++idx) {
- if (idx == 6)
+ for (i = 0; i < 8; i++) {
+ int ret;
+
+ if (i == 6)
continue;
- ret = dd->f_tempsense_rd(dd, idx);
+ ret = dd->f_tempsense_rd(dd, i);
if (ret < 0)
- break;
- regvals[idx] = ret;
+ return ret; /* return error on bad read */
+ regvals[i] = ret;
}
- if (idx == 8)
- ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
- *(signed char *)(regvals),
- *(signed char *)(regvals + 1),
- regvals[2], regvals[3],
- *(signed char *)(regvals + 5),
- *(signed char *)(regvals + 7));
- return ret;
+ return sysfs_emit(buf, "%d %d %02X %02X %d %d\n",
+ (signed char)regvals[0],
+ (signed char)regvals[1],
+ regvals[2],
+ regvals[3],
+ (signed char)regvals[5],
+ (signed char)regvals[7]);
}
static DEVICE_ATTR_RO(tempsense);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index aa2e65fc5cd6..1b63a491fa72 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -398,25 +398,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
us_ibdev->ib_dev.dev.parent = &dev->dev;
- us_ibdev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
-
ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops);
rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index c85d48ae7442..e59615a4c9d9 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -57,7 +57,7 @@ static ssize_t board_id_show(struct device *device,
subsystem_device_id = us_ibdev->pdev->subsystem_device;
mutex_unlock(&us_ibdev->usdev_lock);
- return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
+ return sysfs_emit(buf, "%u\n", subsystem_device_id);
}
static DEVICE_ATTR_RO(board_id);
@@ -69,19 +69,13 @@ config_show(struct device *device, struct device_attribute *attr, char *buf)
{
struct usnic_ib_dev *us_ibdev =
rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
- char *ptr;
- unsigned left;
- unsigned n;
enum usnic_vnic_res_type res_type;
-
- /* Buffer space limit is 1 page */
- ptr = buf;
- left = PAGE_SIZE;
+ int len;
mutex_lock(&us_ibdev->usdev_lock);
if (kref_read(&us_ibdev->vf_cnt) > 0) {
char *busname;
-
+ char *sep = "";
/*
* bus name seems to come with annoying prefix.
* Remove it if it is predictable
@@ -90,39 +84,35 @@ config_show(struct device *device, struct device_attribute *attr, char *buf)
if (strncmp(busname, "PCI Bus ", 8) == 0)
busname += 8;
- n = scnprintf(ptr, left,
- "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
- dev_name(&us_ibdev->ib_dev.dev),
- busname,
- PCI_SLOT(us_ibdev->pdev->devfn),
- PCI_FUNC(us_ibdev->pdev->devfn),
- netdev_name(us_ibdev->netdev),
- us_ibdev->ufdev->mac,
- kref_read(&us_ibdev->vf_cnt));
- UPDATE_PTR_LEFT(n, ptr, left);
+ len = sysfs_emit(buf, "%s: %s:%d.%d, %s, %pM, %u VFs\n",
+ dev_name(&us_ibdev->ib_dev.dev),
+ busname,
+ PCI_SLOT(us_ibdev->pdev->devfn),
+ PCI_FUNC(us_ibdev->pdev->devfn),
+ netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac,
+ kref_read(&us_ibdev->vf_cnt));
+ len += sysfs_emit_at(buf, len, " Per VF:");
for (res_type = USNIC_VNIC_RES_TYPE_EOL;
- res_type < USNIC_VNIC_RES_TYPE_MAX;
- res_type++) {
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
if (us_ibdev->vf_res_cnt[res_type] == 0)
continue;
- n = scnprintf(ptr, left, " %d %s%s",
- us_ibdev->vf_res_cnt[res_type],
- usnic_vnic_res_type_to_str(res_type),
- (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ?
- "," : "");
- UPDATE_PTR_LEFT(n, ptr, left);
+ len += sysfs_emit_at(buf, len, "%s %d %s",
+ sep,
+ us_ibdev->vf_res_cnt[res_type],
+ usnic_vnic_res_type_to_str(res_type));
+ sep = ",";
}
- n = scnprintf(ptr, left, "\n");
- UPDATE_PTR_LEFT(n, ptr, left);
+ len += sysfs_emit_at(buf, len, "\n");
} else {
- n = scnprintf(ptr, left, "%s: no VFs\n",
- dev_name(&us_ibdev->ib_dev.dev));
- UPDATE_PTR_LEFT(n, ptr, left);
+ len = sysfs_emit(buf, "%s: no VFs\n",
+ dev_name(&us_ibdev->ib_dev.dev));
}
+
mutex_unlock(&us_ibdev->usdev_lock);
- return ptr - buf;
+ return len;
}
static DEVICE_ATTR_RO(config);
@@ -132,8 +122,7 @@ iface_show(struct device *device, struct device_attribute *attr, char *buf)
struct usnic_ib_dev *us_ibdev =
rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
- return scnprintf(buf, PAGE_SIZE, "%s\n",
- netdev_name(us_ibdev->netdev));
+ return sysfs_emit(buf, "%s\n", netdev_name(us_ibdev->netdev));
}
static DEVICE_ATTR_RO(iface);
@@ -143,8 +132,7 @@ max_vf_show(struct device *device, struct device_attribute *attr, char *buf)
struct usnic_ib_dev *us_ibdev =
rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
- return scnprintf(buf, PAGE_SIZE, "%u\n",
- kref_read(&us_ibdev->vf_cnt));
+ return sysfs_emit(buf, "%u\n", kref_read(&us_ibdev->vf_cnt));
}
static DEVICE_ATTR_RO(max_vf);
@@ -158,8 +146,7 @@ qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
- return scnprintf(buf, PAGE_SIZE,
- "%d\n", qp_per_vf);
+ return sysfs_emit(buf, "%d\n", qp_per_vf);
}
static DEVICE_ATTR_RO(qp_per_vf);
@@ -169,8 +156,8 @@ cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
struct usnic_ib_dev *us_ibdev =
rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
- return scnprintf(buf, PAGE_SIZE, "%d\n",
- us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
+ return sysfs_emit(buf, "%d\n",
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
}
static DEVICE_ATTR_RO(cq_per_vf);
@@ -217,43 +204,36 @@ struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME)
static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
{
- return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx);
+ return sysfs_emit(buf, "0x%p\n", qp_grp->ctx);
}
static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
{
- int i, j, n;
- int left;
- char *ptr;
+ int i, j;
struct usnic_vnic_res_chunk *res_chunk;
struct usnic_vnic_res *vnic_res;
+ int len;
- left = PAGE_SIZE;
- ptr = buf;
-
- n = scnprintf(ptr, left,
- "QPN: %d State: (%s) PID: %u VF Idx: %hu ",
- qp_grp->ibqp.qp_num,
- usnic_ib_qp_grp_state_to_string(qp_grp->state),
- qp_grp->owner_pid,
- usnic_vnic_get_index(qp_grp->vf->vnic));
- UPDATE_PTR_LEFT(n, ptr, left);
+ len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu ",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic));
for (i = 0; qp_grp->res_chunk_list[i]; i++) {
res_chunk = qp_grp->res_chunk_list[i];
for (j = 0; j < res_chunk->cnt; j++) {
vnic_res = res_chunk->res[j];
- n = scnprintf(ptr, left, "%s[%d] ",
+ len += sysfs_emit_at(
+ buf, len, "%s[%d] ",
usnic_vnic_res_type_to_str(vnic_res->type),
vnic_res->vnic_idx);
- UPDATE_PTR_LEFT(n, ptr, left);
}
}
- n = scnprintf(ptr, left, "\n");
- UPDATE_PTR_LEFT(n, ptr, left);
+ len = sysfs_emit_at(buf, len, "\n");
- return ptr - buf;
+ return len;
}
static QPN_ATTR_RO(context);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 9e961f8ffa10..38a37770c016 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -474,7 +474,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
us_ibdev = to_usdev(pd->device);
if (init_attr->create_flags)
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-EOPNOTSUPP);
err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
if (err) {
@@ -557,6 +557,9 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int status;
usnic_dbg("\n");
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
qp_grp = to_uqp_grp(ibqp);
mutex_lock(&qp_grp->vf->pf->usdev_lock);
@@ -581,7 +584,7 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata)
{
if (attr->flags)
- return -EINVAL;
+ return -EOPNOTSUPP;
return 0;
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 319546a39a0d..a119ac3e103c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -119,6 +119,9 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
entries = roundup_pow_of_two(entries);
if (entries < 1 || entries > dev->dsr->caps.max_cqe)
return -EINVAL;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 6895bac53990..00a330909bb3 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -68,21 +68,21 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
+ return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
}
static DEVICE_ATTR_RO(hca_type);
static ssize_t hw_rev_show(struct device *device,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", PVRDMA_REV_ID);
+ return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID);
}
static DEVICE_ATTR_RO(hw_rev);
static ssize_t board_id_show(struct device *device,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
+ return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID);
}
static DEVICE_ATTR_RO(board_id);
@@ -205,27 +205,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
dev->flags = 0;
dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dev.parent = &dev->pdev->dev;
- dev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
@@ -249,13 +228,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
/* Check if SRQ is supported by backend */
if (dev->dsr->caps.max_srq) {
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-
ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops);
dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 428256c55065..1d3bdd7bb51d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -209,7 +209,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
dev_warn(&dev->pdev->dev,
"invalid create queuepair flags %#x\n",
init_attr->create_flags);
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-EOPNOTSUPP);
}
if (init_attr->qp_type != IB_QPT_RC &&
@@ -544,6 +544,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum ib_qp_state cur_state, next_state;
int ret;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
/* Sanity checking. Should need lock here */
mutex_lock(&qp->mutex);
cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 082208f9aa90..bdc2703532c6 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -121,7 +121,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
dev_warn(&dev->pdev->dev,
"shared receive queue type %d not supported\n",
init_attr->srq_type);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr ||
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index c8e268082952..0df48b3a6b56 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT
depends on INFINIBAND_VIRT_DMA
depends on X86_64
depends on PCI
- select DMA_VIRT_OPS
help
This is a common software verbs provider for RDMA networks.
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index b938c4ffa99a..a3e5b368c5e7 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -126,10 +126,9 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
}
/**
- * rvt_destory_ah - Destory an address handle
+ * rvt_destroy_ah - Destroy an address handle
* @ibah: address handle
* @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
- *
* Return: 0 on success
*/
int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 19248be14093..20cc0799ac4b 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -211,7 +211,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
int err;
if (attr->flags)
- return -EINVAL;
+ return -EOPNOTSUPP;
if (entries < 1 || entries > rdi->dparms.props.max_cqe)
return -EINVAL;
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index dd11c6fcd060..5233a63d99a6 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -54,7 +54,7 @@
#include "mcast.h"
/**
- * rvt_driver_mcast - init resources for multicast
+ * rvt_driver_mcast_init - init resources for multicast
* @rdi: rvt dev struct
*
* This is per device that registers with rdmavt
@@ -69,7 +69,7 @@ void rvt_driver_mcast_init(struct rvt_dev_info *rdi)
}
/**
- * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
+ * rvt_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
* @qp: the QP to link
*/
static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp)
@@ -98,7 +98,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
}
/**
- * mcast_alloc - allocate the multicast GID structure
+ * rvt_mcast_alloc - allocate the multicast GID structure
* @mgid: the multicast GID
* @lid: the muilticast LID (host order)
*
@@ -181,7 +181,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
EXPORT_SYMBOL(rvt_mcast_find);
/**
- * mcast_add - insert mcast GID into table and attach QP struct
+ * rvt_mcast_add - insert mcast GID into table and attach QP struct
* @mcast: the mcast GID table
* @mqp: the QP to attach
*
@@ -426,8 +426,8 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
/**
- *rvt_mast_tree_empty - determine if any qps are attached to any mcast group
- *@rdi: rvt dev struct
+ * rvt_mcast_tree_empty - determine if any qps are attached to any mcast group
+ * @rdi: rvt dev struct
*
* Return: in use count
*/
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 8490fdb9c91e..90fc234f489a 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr)
* @acc: access flags
*
* Return: the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the functions in
- * struct dma_virt_ops.
*/
struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
{
@@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
/*
* We use LKEY == zero for kernel virtual addresses
- * (see rvt_get_dma_mr() and dma_virt_ops).
+ * (see rvt_get_dma_mr()).
*/
if (sge->lkey == 0) {
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
@@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
/*
* We use RKEY == zero for kernel virtual addresses
- * (see rvt_get_dma_mr() and dma_virt_ops).
+ * (see rvt_get_dma_mr()).
*/
rcu_read_lock();
if (rkey == 0) {
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index ee48befc8978..22fa9bde5419 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1083,10 +1083,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
if (!rdi)
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge ||
- init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
- (init_attr->create_flags &&
- init_attr->create_flags != IB_QP_CREATE_NETDEV_USE))
+ init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr)
return ERR_PTR(-EINVAL);
/* Check receive queue parameters if no SRQ is specified. */
@@ -1469,6 +1470,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int pmtu = 0; /* for gcc warning only */
int opa_ah;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
spin_lock_irq(&qp->r_lock);
spin_lock(&qp->s_hlock);
spin_lock(&qp->s_lock);
@@ -1823,7 +1827,7 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
/**
- * rvt_post_receive - post a receive on a QP
+ * rvt_post_recv - post a receive on a QP
* @ibqp: the QP to post the receive on
* @wr: the WR to post
* @bad_wr: the first bad WR is put here
@@ -2245,7 +2249,7 @@ bail:
}
/**
- * rvt_post_srq_receive - post a receive on a shared receive queue
+ * rvt_post_srq_recv - post a receive on a shared receive queue
* @ibsrq: the SRQ to post the receive on
* @wr: the list of work requests to post
* @bad_wr: A pointer to the first WR to cause a problem is put here
@@ -2497,7 +2501,7 @@ bail:
EXPORT_SYMBOL(rvt_get_rwqe);
/**
- * qp_comm_est - handle trap with QP established
+ * rvt_comm_est - handle trap with QP established
* @qp: the QP
*/
void rvt_comm_est(struct rvt_qp *qp)
@@ -2943,7 +2947,7 @@ static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp,
}
/**
- * ruc_loopback - handle UC and RC loopback requests
+ * rvt_ruc_loopback - handle UC and RC loopback requests
* @sqp: the sending QP
*
* This is called from rvt_do_send() to forward a WQE addressed to the same HFI
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 670a9623b46e..49cec85a372a 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -384,6 +384,7 @@ static const struct ib_device_ops rvt_dev_ops = {
.create_cq = rvt_create_cq,
.create_qp = rvt_create_qp,
.create_srq = rvt_create_srq,
+ .create_user_ah = rvt_create_ah,
.dealloc_pd = rvt_dealloc_pd,
.dealloc_ucontext = rvt_dealloc_ucontext,
.dereg_mr = rvt_dereg_mr,
@@ -524,7 +525,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
int rvt_register_device(struct rvt_dev_info *rdi)
{
int ret = 0, i;
- u64 dma_mask;
if (!rdi)
return -EINVAL;
@@ -579,13 +579,6 @@ int rvt_register_device(struct rvt_dev_info *rdi)
/* Completion queues */
spin_lock_init(&rdi->n_cqs_lock);
- /* DMA Operations */
- rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms;
- dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
- ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask);
- if (ret)
- goto bail_wss;
-
/* Protection Domain */
spin_lock_init(&rdi->n_pds_lock);
rdi->n_pds_allocated = 0;
@@ -596,36 +589,11 @@ int rvt_register_device(struct rvt_dev_info *rdi)
* exactly which functions rdmavt supports, nor do they know the ABI
* version, so we do all of this sort of stuff here.
*/
- rdi->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
- (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ rdi->ibdev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_POLL_CQ) |
(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_POST_SEND) |
(1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
rdi->ibdev.node_type = RDMA_NODE_IB_CA;
if (!rdi->ibdev.num_comp_vectors)
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 8810bfa68049..452149066792 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -5,7 +5,6 @@ config RDMA_RXE
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
select CRYPTO_CRC32
- select DMA_VIRT_OPS
help
This driver implements the InfiniBand RDMA transport over
the Linux network stack. It enables a system with a
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 43394c3f29d4..b315ebf041ac 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
- /* make sure all changes to the CQ are written before we update the
- * producer pointer
- */
- smp_wmb();
-
advance_producer(cq->queue);
spin_unlock_irqrestore(&cq->cq_lock, flags);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index d2ce852447c1..6e8c41567ba0 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -31,7 +31,6 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
return 0;
case RXE_MEM_TYPE_MR:
- case RXE_MEM_TYPE_FMR:
if (iova < mem->iova ||
length > mem->length ||
iova > mem->iova + mem->length - length)
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 34bef7d8e6b4..c4b06ced30a7 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -8,7 +8,6 @@
#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/if.h>
-#include <linux/if_vlan.h>
#include <net/udp_tunnel.h>
#include <net/sch_generic.h>
#include <linux/netfilter.h>
@@ -20,18 +19,6 @@
static struct rxe_recv_sockets recv_sockets;
-struct device *rxe_dma_device(struct rxe_dev *rxe)
-{
- struct net_device *ndev;
-
- ndev = rxe->ndev;
-
- if (is_vlan_dev(ndev))
- ndev = vlan_dev_real_dev(ndev);
-
- return ndev->dev.parent;
-}
-
int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
{
int err;
@@ -166,14 +153,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct udphdr *udph;
struct net_device *ndev = skb->dev;
- struct net_device *rdev = ndev;
struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
- if (!rxe && is_vlan_dev(rdev)) {
- rdev = vlan_dev_real_dev(ndev);
- rxe = rxe_get_dev_from_net(rdev);
- }
if (!rxe)
goto drop;
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 7d434a6837a7..2902ca7b288c 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -7,9 +7,11 @@
#ifndef RXE_QUEUE_H
#define RXE_QUEUE_H
+/* for definition of shared struct rxe_queue_buf */
+#include <uapi/rdma/rdma_user_rxe.h>
+
/* implements a simple circular buffer that can optionally be
* shared between user space and the kernel and can be resized
-
* the requested element size is rounded up to a power of 2
* and the number of elements in the buffer is also rounded
* up to a power of 2. Since the queue is empty when the
@@ -17,28 +19,6 @@
* of the queue is one less than the number of element slots
*/
-/* this data structure is shared between user space and kernel
- * space for those cases where the queue is shared. It contains
- * the producer and consumer indices. Is also contains a copy
- * of the queue size parameters for user space to use but the
- * kernel must use the parameters in the rxe_queue struct
- * this MUST MATCH the corresponding librxe struct
- * for performance reasons arrange to have producer and consumer
- * pointers in separate cache lines
- * the kernel should always mask the indices to avoid accessing
- * memory outside of the data area
- */
-struct rxe_queue_buf {
- __u32 log2_elem_size;
- __u32 index_mask;
- __u32 pad_1[30];
- __u32 producer_index;
- __u32 pad_2[31];
- __u32 consumer_index;
- __u32 pad_3[31];
- __u8 data[];
-};
-
struct rxe_queue {
struct rxe_dev *rxe;
struct rxe_queue_buf *buf;
@@ -46,7 +26,7 @@ struct rxe_queue {
size_t buf_size;
size_t elem_size;
unsigned int log2_elem_size;
- unsigned int index_mask;
+ u32 index_mask;
};
int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
@@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index)
static inline int queue_empty(struct rxe_queue *q)
{
- return ((q->buf->producer_index - q->buf->consumer_index)
- & q->index_mask) == 0;
+ u32 prod;
+ u32 cons;
+
+ /* make sure all changes to queue complete before
+ * testing queue empty
+ */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ /* same */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+
+ return ((prod - cons) & q->index_mask) == 0;
}
static inline int queue_full(struct rxe_queue *q)
{
- return ((q->buf->producer_index + 1 - q->buf->consumer_index)
- & q->index_mask) == 0;
+ u32 prod;
+ u32 cons;
+
+ /* make sure all changes to queue complete before
+ * testing queue full
+ */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ /* same */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+
+ return ((prod + 1 - cons) & q->index_mask) == 0;
}
static inline void advance_producer(struct rxe_queue *q)
{
- q->buf->producer_index = (q->buf->producer_index + 1)
- & q->index_mask;
+ u32 prod;
+
+ prod = (q->buf->producer_index + 1) & q->index_mask;
+
+ /* make sure all changes to queue complete before
+ * changing producer index
+ */
+ smp_store_release(&q->buf->producer_index, prod);
}
static inline void advance_consumer(struct rxe_queue *q)
{
- q->buf->consumer_index = (q->buf->consumer_index + 1)
- & q->index_mask;
+ u32 cons;
+
+ cons = (q->buf->consumer_index + 1) & q->index_mask;
+
+ /* make sure all changes to queue complete before
+ * changing consumer index
+ */
+ smp_store_release(&q->buf->consumer_index, cons);
}
static inline void *producer_addr(struct rxe_queue *q)
@@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q)
static inline unsigned int producer_index(struct rxe_queue *q)
{
- return q->buf->producer_index;
+ u32 index;
+
+ /* make sure all changes to queue
+ * complete before getting producer index
+ */
+ index = smp_load_acquire(&q->buf->producer_index);
+ index &= q->index_mask;
+
+ return index;
}
static inline unsigned int consumer_index(struct rxe_queue *q)
{
- return q->buf->consumer_index;
+ u32 index;
+
+ /* make sure all changes to queue
+ * complete before getting consumer index
+ */
+ index = smp_load_acquire(&q->buf->consumer_index);
+ index &= q->index_mask;
+
+ return index;
}
static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index af3923bf0a36..d4917646641a 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -634,7 +634,8 @@ next_wqe:
}
if (unlikely(qp_type(qp) == IB_QPT_RC &&
- qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) {
+ psn_compare(qp->req.psn, (qp->comp.psn +
+ RXE_MAX_UNACKED_PSNS)) > 0)) {
qp->req.wait_psn = 1;
goto exit;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c7e3b6a4af38..5a098083a9d2 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -872,11 +872,6 @@ static enum resp_states do_complete(struct rxe_qp *qp,
else
wc->network_hdr_type = RDMA_NETWORK_IPV6;
- if (is_vlan_dev(skb->dev)) {
- wc->wc_flags |= IB_WC_WITH_VLAN;
- wc->vlan_id = vlan_dev_vlan_id(skb->dev);
- }
-
if (pkt->mask & RXE_IMMDT_MASK) {
wc->wc_flags |= IB_WC_WITH_IMM;
wc->ex.imm_data = immdt_imm(pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index f9c832e82552..a031514e2f41 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
recv_wqe->dma.cur_sge = 0;
recv_wqe->dma.sge_offset = 0;
- /* make sure all changes to the work queue are written before we
- * update the producer pointer
- */
- smp_wmb();
-
advance_producer(rq->queue);
return 0;
@@ -265,6 +260,9 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
struct rxe_srq *srq = to_rsrq(ibsrq);
struct rxe_create_srq_resp __user *uresp = NULL;
+ if (init->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
if (udata) {
if (udata->outlen < sizeof(*uresp))
return -EINVAL;
@@ -392,6 +390,9 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
uresp = udata->outbuf;
}
+ if (init->create_flags)
+ return ERR_PTR(-EOPNOTSUPP);
+
err = rxe_qp_chk_init(rxe, init);
if (err)
goto err1;
@@ -433,6 +434,9 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct rxe_dev *rxe = to_rdev(ibqp->device);
struct rxe_qp *qp = to_rqp(ibqp);
+ if (mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
err = rxe_qp_chk_attr(rxe, qp, attr, mask);
if (err)
goto err1;
@@ -624,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
if (unlikely(err))
goto err1;
- /*
- * make sure all changes to the work queue are
- * written before we update the producer pointer
- */
- smp_wmb();
-
advance_producer(sq->queue);
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
@@ -765,7 +763,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
if (attr->flags)
- return -EINVAL;
+ return -EOPNOTSUPP;
err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
if (err)
@@ -1033,7 +1031,7 @@ static ssize_t parent_show(struct device *device,
struct rxe_dev *rxe =
rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
- return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1));
+ return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1));
}
static DEVICE_ATTR_RO(parent);
@@ -1070,6 +1068,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.create_cq = rxe_create_cq,
.create_qp = rxe_create_qp,
.create_srq = rxe_create_srq,
+ .create_user_ah = rxe_create_ah,
.dealloc_driver = rxe_dealloc,
.dealloc_pd = rxe_dealloc_pd,
.dealloc_ucontext = rxe_dealloc_ucontext,
@@ -1118,56 +1117,18 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
int err;
struct ib_device *dev = &rxe->ib_dev;
struct crypto_shash *tfm;
- u64 dma_mask;
strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
dev->num_comp_vectors = num_possible_cpus();
- dev->dev.parent = rxe_dma_device(rxe);
dev->local_dma_lkey = 0;
addrconf_addr_eui48((unsigned char *)&dev->node_guid,
rxe->ndev->dev_addr);
- dev->dev.dma_parms = &rxe->dma_parms;
- dma_set_max_seg_size(&dev->dev, UINT_MAX);
- dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
- err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask);
- if (err)
- return err;
- dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
- | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
- | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
- | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)
- | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)
- | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)
- | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)
- | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)
- | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)
- | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)
- | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)
- | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)
- | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)
- | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)
- | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)
- | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)
- | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)
- | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)
- | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)
- | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)
- | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)
- | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)
- | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)
- | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)
- | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)
- | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)
- | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)
- | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)
- | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)
- | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)
- | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
- ;
+ dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
+ BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
ib_set_device_ops(dev, &rxe_dev_ops);
err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 3414b341b709..79e0a5a878da 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -273,7 +273,6 @@ enum rxe_mem_type {
RXE_MEM_TYPE_NONE,
RXE_MEM_TYPE_DMA,
RXE_MEM_TYPE_MR,
- RXE_MEM_TYPE_FMR,
RXE_MEM_TYPE_MW,
};
@@ -352,7 +351,6 @@ struct rxe_port {
struct rxe_dev {
struct ib_device ib_dev;
struct ib_device_attr attr;
- struct device_dma_parameters dma_parms;
int max_ucontext;
int max_inline_data;
struct mutex usdev_lock;
diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
index 3450ba5081df..1b5105cbabae 100644
--- a/drivers/infiniband/sw/siw/Kconfig
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -2,7 +2,6 @@ config RDMA_SIW
tristate "Software RDMA over TCP/IP (iWARP) driver"
depends on INET && INFINIBAND && LIBCRC32C
depends on INFINIBAND_VIRT_DMA
- select DMA_VIRT_OPS
help
This driver implements the iWARP RDMA transport over
the Linux TCP/IP network stack. It enables a system with a
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index e9753831ac3f..adda78996219 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -69,7 +69,6 @@ struct siw_pd {
struct siw_device {
struct ib_device base_dev;
- struct device_dma_parameters dma_parms;
struct net_device *netdev;
struct siw_dev_cap attrs;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 66764f7ef072..1f9e15b71504 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1047,7 +1047,7 @@ static void siw_cm_work_handler(struct work_struct *w)
cep->state);
}
}
- if (rv && rv != EAGAIN)
+ if (rv && rv != -EAGAIN)
release_cep = 1;
break;
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 181e06c1c43d..ee95cf29179d 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -305,25 +305,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
{
struct siw_device *sdev = NULL;
struct ib_device *base_dev;
- struct device *parent = netdev->dev.parent;
- u64 dma_mask;
int rv;
- if (!parent) {
- /*
- * The loopback device has no parent device,
- * so it appears as a top-level device. To support
- * loopback device connectivity, take this device
- * as the parent device. Skip all other devices
- * w/o parent device.
- */
- if (netdev->type != ARPHRD_LOOPBACK) {
- pr_warn("siw: device %s error: no parent device\n",
- netdev->name);
- return NULL;
- }
- parent = &netdev->dev;
- }
sdev = ib_alloc_device(siw_device, base_dev);
if (!sdev)
return NULL;
@@ -347,30 +330,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
addrconf_addr_eui48((unsigned char *)&base_dev->node_guid,
addr);
}
- base_dev->uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND);
base_dev->node_type = RDMA_NODE_RNIC;
memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON,
@@ -382,13 +343,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
* per physical port.
*/
base_dev->phys_port_cnt = 1;
- base_dev->dev.parent = parent;
- base_dev->dev.dma_parms = &sdev->dma_parms;
- dma_set_max_seg_size(&base_dev->dev, UINT_MAX);
- dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
- if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask))
- goto error;
-
base_dev->num_comp_vectors = num_possible_cpus();
xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
@@ -430,7 +384,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
atomic_set(&sdev->num_mr, 0);
atomic_set(&sdev->num_pd, 0);
- sdev->numa_node = dev_to_node(parent);
+ sdev->numa_node = dev_to_node(&netdev->dev);
spin_lock_init(&sdev->lock);
return sdev;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 7cf3242ffb41..68fd053fc774 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -307,6 +307,9 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
siw_dbg(base_dev, "create new QP\n");
+ if (attrs->create_flags)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) {
siw_dbg(base_dev, "too many QP's\n");
rv = -ENOMEM;
@@ -544,6 +547,9 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr,
if (!attr_mask)
return 0;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
memset(&new_attrs, 0, sizeof(new_attrs));
if (attr_mask & IB_QP_ACCESS_FLAGS) {
@@ -1094,6 +1100,9 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
struct siw_cq *cq = to_siw_cq(base_cq);
int rv, size = attr->cqe;
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) {
siw_dbg(base_cq->device, "too many CQ's\n");
rv = -ENOMEM;
@@ -1555,6 +1564,9 @@ int siw_create_srq(struct ib_srq *base_srq,
base_ucontext);
int rv;
+ if (init_attrs->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) {
siw_dbg_pd(base_srq->pd, "too many SRQ's\n");
rv = -ENOMEM;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 8f0b598a46ec..d5d592bdab35 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1514,9 +1514,9 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr,
struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
- return sprintf(buf, "connected\n");
+ return sysfs_emit(buf, "connected\n");
else
- return sprintf(buf, "datagram\n");
+ return sysfs_emit(buf, "datagram\n");
}
static ssize_t set_mode(struct device *d, struct device_attribute *attr,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 67a21fdf5367..823f6831e7ea 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -166,6 +166,10 @@ static inline int ib_speed_enum_to_int(int speed)
return SPEED_14000;
case IB_SPEED_EDR:
return SPEED_25000;
+ case IB_SPEED_HDR:
+ return SPEED_50000;
+ case IB_SPEED_NDR:
+ return SPEED_100000;
}
return SPEED_UNKNOWN;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index abfab89423f4..a6f413491321 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2266,7 +2266,7 @@ static ssize_t show_pkey(struct device *dev,
struct net_device *ndev = to_net_dev(dev);
struct ipoib_dev_priv *priv = ipoib_priv(ndev);
- return sprintf(buf, "0x%04x\n", priv->pkey);
+ return sysfs_emit(buf, "0x%04x\n", priv->pkey);
}
static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
@@ -2276,7 +2276,8 @@ static ssize_t show_umcast(struct device *dev,
struct net_device *ndev = to_net_dev(dev);
struct ipoib_dev_priv *priv = ipoib_priv(ndev);
- return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
+ return sysfs_emit(buf, "%d\n",
+ test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
@@ -2446,7 +2447,7 @@ static ssize_t dev_id_show(struct device *dev,
"\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n",
current->comm);
- return sprintf(buf, "%#x\n", ndev->dev_id);
+ return sysfs_emit(buf, "%#x\n", ndev->dev_id);
}
static DEVICE_ATTR_RO(dev_id);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 587252fd6f57..5a150a080ac2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -158,6 +158,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size, req_vec;
int i;
+ static atomic_t counter;
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
@@ -171,8 +172,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
if (ret != -EOPNOTSUPP)
return ret;
- req_vec = (priv->port - 1) * 2;
-
+ req_vec = atomic_inc_return(&counter) * 2;
cq_attr.cqe = size;
cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 4c50a87ed7cc..5958840dbeed 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -46,7 +46,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
struct net_device *dev = to_net_dev(d);
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- return sprintf(buf, "%s\n", priv->parent->name);
+ return sysfs_emit(buf, "%s\n", priv->parent->name);
}
static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 3690e28cc7ea..4792b9bf400f 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -187,23 +187,14 @@ iser_initialize_task_headers(struct iscsi_task *task,
struct iser_device *device = iser_conn->ib_conn.device;
struct iscsi_iser_task *iser_task = task->dd_data;
u64 dma_addr;
- const bool mgmt_task = !task->sc && !in_interrupt();
- int ret = 0;
- if (unlikely(mgmt_task))
- mutex_lock(&iser_conn->state_mutex);
-
- if (unlikely(iser_conn->state != ISER_CONN_UP)) {
- ret = -ENODEV;
- goto out;
- }
+ if (unlikely(iser_conn->state != ISER_CONN_UP))
+ return -ENODEV;
dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
- ret = -ENOMEM;
- goto out;
- }
+ if (ib_dma_mapping_error(device->ib_device, dma_addr))
+ return -ENOMEM;
tx_desc->inv_wr.next = NULL;
tx_desc->reg_wr.wr.next = NULL;
@@ -214,11 +205,8 @@ iser_initialize_task_headers(struct iscsi_task *task,
tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey;
iser_task->iser_conn = iser_conn;
-out:
- if (unlikely(mgmt_task))
- mutex_unlock(&iser_conn->state_mutex);
- return ret;
+ return 0;
}
/**
@@ -739,7 +727,7 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
}
/**
- * iscsi_iser_set_param() - set class connection parameter
+ * iscsi_iser_conn_get_stats() - get iscsi connection statistics
* @cls_conn: iscsi class connection
* @stats: iscsi stats to output
*
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 67f65dcb15a6..2ba27221ea85 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -28,6 +28,18 @@ static int isert_debug_level;
module_param_named(debug_level, isert_debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)");
+static int isert_sg_tablesize_set(const char *val,
+ const struct kernel_param *kp);
+static const struct kernel_param_ops sg_tablesize_ops = {
+ .set = isert_sg_tablesize_set,
+ .get = param_get_int,
+};
+
+static int isert_sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE;
+module_param_cb(sg_tablesize, &sg_tablesize_ops, &isert_sg_tablesize, 0644);
+MODULE_PARM_DESC(sg_tablesize,
+ "Number of gather/scatter entries in a single scsi command, should >= 128 (default: 256, max: 4096)");
+
static DEFINE_MUTEX(device_list_mutex);
static LIST_HEAD(device_list);
static struct workqueue_struct *isert_comp_wq;
@@ -47,6 +59,19 @@ static void isert_send_done(struct ib_cq *cq, struct ib_wc *wc);
static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void isert_login_send_done(struct ib_cq *cq, struct ib_wc *wc);
+static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp)
+{
+ int n = 0, ret;
+
+ ret = kstrtoint(val, 10, &n);
+ if (ret != 0 || n < ISCSI_ISER_MIN_SG_TABLESIZE ||
+ n > ISCSI_ISER_MAX_SG_TABLESIZE)
+ return -EINVAL;
+
+ return param_set_int(val, kp);
+}
+
+
static inline bool
isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
{
@@ -101,7 +126,7 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num,
- ISCSI_ISER_MAX_SG_TABLESIZE);
+ isert_sg_tablesize);
attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor;
attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge;
attr.cap.max_recv_sge = 1;
@@ -1076,7 +1101,7 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn,
sequence_cmd:
rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn);
- if (!rc && dump_payload == false && unsol_data)
+ if (!rc && !dump_payload && unsol_data)
iscsit_set_unsolicited_dataout(cmd);
else if (dump_payload && imm_data)
target_put_sess_cmd(&cmd->se_cmd);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 7fee4a65e181..6c5af13db4e0 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -65,6 +65,12 @@
*/
#define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024)
+/* Default I/O size is 1MB */
+#define ISCSI_ISER_DEF_SG_TABLESIZE 256
+
+/* Minimum I/O size is 512KB */
+#define ISCSI_ISER_MIN_SG_TABLESIZE 128
+
/* Maximum support is 16MB I/O size */
#define ISCSI_ISER_MAX_SG_TABLESIZE 4096
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index f64519872297..012fc27c5c93 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -437,7 +437,7 @@ struct opa_veswport_trap {
} __packed;
/**
- * struct opa_vnic_iface_macs_entry - single entry in the mac list
+ * struct opa_vnic_iface_mac_entry - single entry in the mac list
* @mac_addr: MAC address
*/
struct opa_vnic_iface_mac_entry {
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
index 868b5aec1537..292c037aa239 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
@@ -74,7 +74,7 @@ void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event)
}
/**
- * opa_vnic_get_error_counters - get summary counters
+ * opa_vnic_get_summary_counters - get summary counters
* @adapter: vnic port adapter
* @cntrs: pointer to destination summary counters structure
*
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
index ac4c49cbf153..ba00f0de14ca 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
@@ -52,7 +52,8 @@ static ssize_t max_reconnect_attempts_show(struct device *dev,
{
struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev);
- return sprintf(page, "%d\n", rtrs_clt_get_max_reconnect_attempts(clt));
+ return sysfs_emit(page, "%d\n",
+ rtrs_clt_get_max_reconnect_attempts(clt));
}
static ssize_t max_reconnect_attempts_store(struct device *dev,
@@ -95,11 +96,13 @@ static ssize_t mpath_policy_show(struct device *dev,
switch (clt->mp_policy) {
case MP_POLICY_RR:
- return sprintf(page, "round-robin (RR: %d)\n", clt->mp_policy);
+ return sysfs_emit(page, "round-robin (RR: %d)\n",
+ clt->mp_policy);
case MP_POLICY_MIN_INFLIGHT:
- return sprintf(page, "min-inflight (MI: %d)\n", clt->mp_policy);
+ return sysfs_emit(page, "min-inflight (MI: %d)\n",
+ clt->mp_policy);
default:
- return sprintf(page, "Unknown (%d)\n", clt->mp_policy);
+ return sysfs_emit(page, "Unknown (%d)\n", clt->mp_policy);
}
}
@@ -138,9 +141,10 @@ static DEVICE_ATTR_RW(mpath_policy);
static ssize_t add_path_show(struct device *dev,
struct device_attribute *attr, char *page)
{
- return scnprintf(page, PAGE_SIZE,
- "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n",
- attr->attr.name);
+ return sysfs_emit(
+ page,
+ "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n",
+ attr->attr.name);
}
static ssize_t add_path_store(struct device *dev,
@@ -184,20 +188,18 @@ static ssize_t rtrs_clt_state_show(struct kobject *kobj,
sess = container_of(kobj, struct rtrs_clt_sess, kobj);
if (sess->state == RTRS_CLT_CONNECTED)
- return sprintf(page, "connected\n");
+ return sysfs_emit(page, "connected\n");
- return sprintf(page, "disconnected\n");
+ return sysfs_emit(page, "disconnected\n");
}
static struct kobj_attribute rtrs_clt_state_attr =
__ATTR(state, 0444, rtrs_clt_state_show, NULL);
static ssize_t rtrs_clt_reconnect_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n",
- attr->attr.name);
+ return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name);
}
static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj,
@@ -225,11 +227,9 @@ static struct kobj_attribute rtrs_clt_reconnect_attr =
rtrs_clt_reconnect_store);
static ssize_t rtrs_clt_disconnect_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n",
- attr->attr.name);
+ return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name);
}
static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj,
@@ -257,11 +257,9 @@ static struct kobj_attribute rtrs_clt_disconnect_attr =
rtrs_clt_disconnect_store);
static ssize_t rtrs_clt_remove_path_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n",
- attr->attr.name);
+ return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name);
}
static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj,
@@ -324,7 +322,7 @@ static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj,
sess = container_of(kobj, typeof(*sess), kobj);
- return scnprintf(page, PAGE_SIZE, "%u\n", sess->hca_port);
+ return sysfs_emit(page, "%u\n", sess->hca_port);
}
static struct kobj_attribute rtrs_clt_hca_port_attr =
@@ -338,7 +336,7 @@ static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj,
sess = container_of(kobj, struct rtrs_clt_sess, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n", sess->hca_name);
+ return sysfs_emit(page, "%s\n", sess->hca_name);
}
static struct kobj_attribute rtrs_clt_hca_name_attr =
@@ -349,12 +347,13 @@ static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj,
char *page)
{
struct rtrs_clt_sess *sess;
- int cnt;
+ int len;
sess = container_of(kobj, struct rtrs_clt_sess, kobj);
- cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr,
- page, PAGE_SIZE);
- return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n");
+ len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page,
+ PAGE_SIZE);
+ len += sysfs_emit_at(page, len, "\n");
+ return len;
}
static struct kobj_attribute rtrs_clt_src_addr_attr =
@@ -365,12 +364,13 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj,
char *page)
{
struct rtrs_clt_sess *sess;
- int cnt;
+ int len;
sess = container_of(kobj, struct rtrs_clt_sess, kobj);
- cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr,
- page, PAGE_SIZE);
- return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n");
+ len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page,
+ PAGE_SIZE);
+ len += sysfs_emit_at(page, len, "\n");
+ return len;
}
static struct kobj_attribute rtrs_clt_dst_addr_attr =
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index f298adc02acb..560865f65dc4 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -1236,8 +1236,7 @@ static void free_sess_reqs(struct rtrs_clt_sess *sess)
if (req->mr)
ib_dereg_mr(req->mr);
kfree(req->sge);
- rtrs_iu_free(req->iu, DMA_TO_DEVICE,
- sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1);
}
kfree(sess->reqs);
sess->reqs = NULL;
@@ -1499,6 +1498,7 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
con->c.cid = cid;
con->c.sess = &sess->s;
atomic_set(&con->io_cnt, 0);
+ mutex_init(&con->con_mutex);
sess->s.con[cid] = &con->c;
@@ -1510,6 +1510,7 @@ static void destroy_con(struct rtrs_clt_con *con)
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
sess->s.con[con->c.cid] = NULL;
+ mutex_destroy(&con->con_mutex);
kfree(con);
}
@@ -1520,15 +1521,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
int err, cq_vector;
struct rtrs_msg_rkey_rsp *rsp;
- /*
- * This function can fail, but still destroy_con_cq_qp() should
- * be called, this is because create_con_cq_qp() is called on cm
- * event path, thus caller/waiter never knows: have we failed before
- * create_con_cq_qp() or after. To solve this dilemma without
- * creating any additional flags just allow destroy_con_cq_qp() be
- * called many times.
- */
-
+ lockdep_assert_held(&con->con_mutex);
if (con->c.cid == 0) {
/*
* One completion for each receive and two for each send
@@ -1602,11 +1595,10 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con)
* Be careful here: destroy_con_cq_qp() can be called even
* create_con_cq_qp() failed, see comments there.
*/
-
+ lockdep_assert_held(&con->con_mutex);
rtrs_cq_qp_destroy(&con->c);
if (con->rsp_ius) {
- rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE,
- sess->s.dev->ib_dev, con->queue_size);
+ rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_size);
con->rsp_ius = NULL;
con->queue_size = 0;
}
@@ -1634,16 +1626,16 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
struct rtrs_sess *s = con->c.sess;
int err;
+ mutex_lock(&con->con_mutex);
err = create_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
if (err) {
rtrs_err(s, "create_con_cq_qp(), err: %d\n", err);
return err;
}
err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS);
- if (err) {
+ if (err)
rtrs_err(s, "Resolving route failed, err: %d\n", err);
- destroy_con_cq_qp(con);
- }
return err;
}
@@ -1837,8 +1829,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
cm_err = rtrs_rdma_route_resolved(con);
break;
case RDMA_CM_EVENT_ESTABLISHED:
- con->cm_err = rtrs_rdma_conn_established(con, ev);
- if (likely(!con->cm_err)) {
+ cm_err = rtrs_rdma_conn_established(con, ev);
+ if (likely(!cm_err)) {
/*
* Report success and wake up. Here we abuse state_wq,
* i.e. wake up without state change, but we set cm_err.
@@ -1851,20 +1843,22 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
case RDMA_CM_EVENT_REJECTED:
cm_err = rtrs_rdma_conn_rejected(con, ev);
break;
+ case RDMA_CM_EVENT_DISCONNECTED:
+ /* No message for disconnecting */
+ cm_err = -ECONNRESET;
+ break;
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
rtrs_wrn(s, "CM error event %d\n", ev->event);
cm_err = -ECONNRESET;
break;
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
+ rtrs_wrn(s, "CM error event %d\n", ev->event);
cm_err = -EHOSTUNREACH;
break;
- case RDMA_CM_EVENT_DISCONNECTED:
- case RDMA_CM_EVENT_ADDR_CHANGE:
- case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- cm_err = -ECONNRESET;
- break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
/*
* Device removal is a special case. Queue close and return 0.
@@ -1949,8 +1943,9 @@ static int create_cm(struct rtrs_clt_con *con)
errr:
stop_cm(con);
- /* Is safe to call destroy if cq_qp is not inited */
+ mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
destroy_cm:
destroy_cm(con);
@@ -2057,7 +2052,9 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
if (!sess->s.con[cid])
break;
con = to_clt_con(sess->s.con[cid]);
+ mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
destroy_cm(con);
destroy_con(con);
}
@@ -2164,8 +2161,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess)
mutex_unlock(&clt->paths_mutex);
}
-static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess,
- struct rtrs_addr *addr)
+static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess)
{
struct rtrs_clt *clt = sess->clt;
@@ -2224,7 +2220,10 @@ destroy:
struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]);
stop_cm(con);
+
+ mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
destroy_cm(con);
destroy_con(con);
}
@@ -2245,7 +2244,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
struct rtrs_iu *iu;
iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
- rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
rtrs_err(sess->clt, "Sess info request send failed: %s\n",
@@ -2264,8 +2263,12 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
int i, sgi;
sg_cnt = le16_to_cpu(msg->sg_cnt);
- if (unlikely(!sg_cnt))
+ if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) {
+ rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n",
+ sg_cnt);
return -EINVAL;
+ }
+
/*
* Check if IB immediate data size is enough to hold the mem_id and
* the offset inside the memory chunk.
@@ -2278,11 +2281,6 @@ static int process_info_rsp(struct rtrs_clt_sess *sess,
MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size);
return -EINVAL;
}
- if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) {
- rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n",
- sg_cnt);
- return -EINVAL;
- }
total_len = 0;
for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) {
const struct rtrs_sg_desc *desc = &msg->desc[sgi];
@@ -2374,7 +2372,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
out:
rtrs_clt_update_wc_stats(con);
- rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
rtrs_clt_change_state(sess, state);
}
@@ -2436,9 +2434,9 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess)
out:
if (tx_iu)
- rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1);
if (rx_iu)
- rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
if (unlikely(err))
/* If we've never taken async path because of malloc problems */
rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR);
@@ -2938,7 +2936,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
* IO will never grab it. Also it is very important to add
* path before init, since init fires LINK_CONNECTED event.
*/
- rtrs_clt_add_path_to_arr(sess, addr);
+ rtrs_clt_add_path_to_arr(sess);
err = init_sess(sess);
if (err)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index 167acd3c90fc..b8dbd701b3cb 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -72,6 +72,7 @@ struct rtrs_clt_con {
struct rtrs_iu *rsp_ius;
u32 queue_size;
unsigned int cpu;
+ struct mutex con_mutex;
atomic_t io_cnt;
int cm_err;
};
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index b8e43dc4d95a..3f2918671dbe 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -287,8 +287,7 @@ struct rtrs_msg_rdma_hdr {
struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t,
struct ib_device *dev, enum dma_data_direction,
void (*done)(struct ib_cq *cq, struct ib_wc *wc));
-void rtrs_iu_free(struct rtrs_iu *iu, enum dma_data_direction dir,
- struct ib_device *dev, u32 queue_size);
+void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size);
int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
struct ib_send_wr *head);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index 07fbb063555d..d2edff3b8f0d 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -27,11 +27,9 @@ static struct kobj_type ktype = {
};
static ssize_t rtrs_srv_disconnect_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n",
- attr->attr.name);
+ return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name);
}
static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj,
@@ -72,8 +70,7 @@ static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj,
sess = container_of(kobj, typeof(*sess), kobj);
usr_con = sess->s.con[0];
- return scnprintf(page, PAGE_SIZE, "%u\n",
- usr_con->cm_id->port_num);
+ return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num);
}
static struct kobj_attribute rtrs_srv_hca_port_attr =
@@ -87,8 +84,7 @@ static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj,
sess = container_of(kobj, struct rtrs_srv_sess, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n",
- sess->s.dev->ib_dev->name);
+ return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name);
}
static struct kobj_attribute rtrs_srv_hca_name_attr =
@@ -115,12 +111,13 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj,
char *page)
{
struct rtrs_srv_sess *sess;
- int cnt;
+ int len;
sess = container_of(kobj, struct rtrs_srv_sess, kobj);
- cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr,
- page, PAGE_SIZE);
- return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n");
+ len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page,
+ PAGE_SIZE);
+ len += sysfs_emit_at(page, len, "\n");
+ return len;
}
static struct kobj_attribute rtrs_srv_dst_addr_attr =
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index d6f93601712e..c42fd470c4eb 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -113,28 +113,18 @@ static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess,
return changed;
}
-static bool rtrs_srv_change_state_get_old(struct rtrs_srv_sess *sess,
- enum rtrs_srv_state new_state,
- enum rtrs_srv_state *old_state)
+static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess,
+ enum rtrs_srv_state new_state)
{
bool changed;
spin_lock_irq(&sess->state_lock);
- *old_state = sess->state;
changed = __rtrs_srv_change_state(sess, new_state);
spin_unlock_irq(&sess->state_lock);
return changed;
}
-static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess,
- enum rtrs_srv_state new_state)
-{
- enum rtrs_srv_state old_state;
-
- return rtrs_srv_change_state_get_old(sess, new_state, &old_state);
-}
-
static void free_id(struct rtrs_srv_op *id)
{
if (!id)
@@ -471,10 +461,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id,
void close_sess(struct rtrs_srv_sess *sess)
{
- enum rtrs_srv_state old_state;
-
- if (rtrs_srv_change_state_get_old(sess, RTRS_SRV_CLOSING,
- &old_state))
+ if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING))
queue_work(rtrs_wq, &sess->close_work);
WARN_ON(sess->state != RTRS_SRV_CLOSING);
}
@@ -577,8 +564,7 @@ static void unmap_cont_bufs(struct rtrs_srv_sess *sess)
struct rtrs_srv_mr *srv_mr;
srv_mr = &sess->mrs[i];
- rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE,
- sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1);
ib_dereg_mr(srv_mr->mr);
ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl,
srv_mr->sgt.nents, DMA_BIDIRECTIONAL);
@@ -682,8 +668,7 @@ err:
sgt = &srv_mr->sgt;
mr = srv_mr->mr;
free_iu:
- rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE,
- sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1);
dereg_mr:
ib_dereg_mr(mr);
unmap_sg:
@@ -735,7 +720,7 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc)
struct rtrs_iu *iu;
iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe);
- rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
rtrs_err(s, "Sess info response send failed: %s\n",
@@ -861,7 +846,7 @@ static int process_info_req(struct rtrs_srv_con *con,
if (unlikely(err)) {
rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err);
iu_free:
- rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1);
}
rwr_free:
kfree(rwr);
@@ -906,7 +891,7 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc)
goto close;
out:
- rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(iu, sess->s.dev->ib_dev, 1);
return;
close:
close_sess(sess);
@@ -929,7 +914,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con)
err = rtrs_iu_post_recv(&con->c, rx_iu);
if (unlikely(err)) {
rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err);
- rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1);
+ rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1);
return err;
}
@@ -1328,17 +1313,42 @@ static void rtrs_srv_dev_release(struct device *dev)
kfree(srv);
}
-static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx,
- const uuid_t *paths_uuid)
+static void free_srv(struct rtrs_srv *srv)
+{
+ int i;
+
+ WARN_ON(refcount_read(&srv->refcount));
+ for (i = 0; i < srv->queue_depth; i++)
+ mempool_free(srv->chunks[i], chunk_pool);
+ kfree(srv->chunks);
+ mutex_destroy(&srv->paths_mutex);
+ mutex_destroy(&srv->paths_ev_mutex);
+ /* last put to release the srv structure */
+ put_device(&srv->dev);
+}
+
+static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
+ const uuid_t *paths_uuid)
{
struct rtrs_srv *srv;
int i;
+ mutex_lock(&ctx->srv_mutex);
+ list_for_each_entry(srv, &ctx->srv_list, ctx_list) {
+ if (uuid_equal(&srv->paths_uuid, paths_uuid) &&
+ refcount_inc_not_zero(&srv->refcount)) {
+ mutex_unlock(&ctx->srv_mutex);
+ return srv;
+ }
+ }
+
+ /* need to allocate a new srv */
srv = kzalloc(sizeof(*srv), GFP_KERNEL);
- if (!srv)
+ if (!srv) {
+ mutex_unlock(&ctx->srv_mutex);
return NULL;
+ }
- refcount_set(&srv->refcount, 1);
INIT_LIST_HEAD(&srv->paths_list);
mutex_init(&srv->paths_mutex);
mutex_init(&srv->paths_ev_mutex);
@@ -1347,6 +1357,8 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx,
srv->ctx = ctx;
device_initialize(&srv->dev);
srv->dev.release = rtrs_srv_dev_release;
+ list_add(&srv->ctx_list, &ctx->srv_list);
+ mutex_unlock(&ctx->srv_mutex);
srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks),
GFP_KERNEL);
@@ -1358,7 +1370,7 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx,
if (!srv->chunks[i])
goto err_free_chunks;
}
- list_add(&srv->ctx_list, &ctx->srv_list);
+ refcount_set(&srv->refcount, 1);
return srv;
@@ -1369,52 +1381,9 @@ err_free_chunks:
err_free_srv:
kfree(srv);
-
- return NULL;
-}
-
-static void free_srv(struct rtrs_srv *srv)
-{
- int i;
-
- WARN_ON(refcount_read(&srv->refcount));
- for (i = 0; i < srv->queue_depth; i++)
- mempool_free(srv->chunks[i], chunk_pool);
- kfree(srv->chunks);
- mutex_destroy(&srv->paths_mutex);
- mutex_destroy(&srv->paths_ev_mutex);
- /* last put to release the srv structure */
- put_device(&srv->dev);
-}
-
-static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx,
- const uuid_t *paths_uuid)
-{
- struct rtrs_srv *srv;
-
- list_for_each_entry(srv, &ctx->srv_list, ctx_list) {
- if (uuid_equal(&srv->paths_uuid, paths_uuid) &&
- refcount_inc_not_zero(&srv->refcount))
- return srv;
- }
-
return NULL;
}
-static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx,
- const uuid_t *paths_uuid)
-{
- struct rtrs_srv *srv;
-
- mutex_lock(&ctx->srv_mutex);
- srv = __find_srv_and_get(ctx, paths_uuid);
- if (!srv)
- srv = __alloc_srv(ctx, paths_uuid);
- mutex_unlock(&ctx->srv_mutex);
-
- return srv;
-}
-
static void put_srv(struct rtrs_srv *srv)
{
if (refcount_dec_and_test(&srv->refcount)) {
@@ -1813,7 +1782,11 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id,
}
recon_cnt = le16_to_cpu(msg->recon_cnt);
srv = get_or_create_srv(ctx, &msg->paths_uuid);
- if (!srv) {
+ /*
+ * "refcount == 0" happens if a previous thread calls get_or_create_srv
+ * allocate srv, but chunks of srv are not allocated yet.
+ */
+ if (!srv || refcount_read(&srv->refcount) == 0) {
err = -ENOMEM;
goto reject_w_err;
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
index 08b0b8a6eebe..9543ae19996c 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h
@@ -62,7 +62,7 @@ struct rtrs_srv_op {
/*
* server side memory region context, when always_invalidate=Y, we need
- * queue_depth of memory regrion to invalidate each memory region.
+ * queue_depth of memory region to invalidate each memory region.
*/
struct rtrs_srv_mr {
struct ib_mr *mr;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index ff1093d6e4bc..2e3a849e0a77 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -31,6 +31,7 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask,
return NULL;
for (i = 0; i < queue_size; i++) {
iu = &ius[i];
+ iu->direction = dir;
iu->buf = kzalloc(size, gfp_mask);
if (!iu->buf)
goto err;
@@ -41,17 +42,15 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask,
iu->cqe.done = done;
iu->size = size;
- iu->direction = dir;
}
return ius;
err:
- rtrs_iu_free(ius, dir, dma_dev, i);
+ rtrs_iu_free(ius, dma_dev, i);
return NULL;
}
EXPORT_SYMBOL_GPL(rtrs_iu_alloc);
-void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir,
- struct ib_device *ibdev, u32 queue_size)
+void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_size)
{
struct rtrs_iu *iu;
int i;
@@ -61,7 +60,7 @@ void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir,
for (i = 0; i < queue_size; i++) {
iu = &ius[i];
- ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, dir);
+ ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction);
kfree(iu->buf);
}
kfree(ius);
@@ -105,6 +104,22 @@ int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe)
}
EXPORT_SYMBOL_GPL(rtrs_post_recv_empty);
+static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head,
+ struct ib_send_wr *wr)
+{
+ if (head) {
+ struct ib_send_wr *tail = head;
+
+ while (tail->next)
+ tail = tail->next;
+ tail->next = wr;
+ } else {
+ head = wr;
+ }
+
+ return ib_post_send(qp, head, NULL);
+}
+
int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
struct ib_send_wr *head)
{
@@ -127,17 +142,7 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
.send_flags = IB_SEND_SIGNALED,
};
- if (head) {
- struct ib_send_wr *tail = head;
-
- while (tail->next)
- tail = tail->next;
- tail->next = &wr;
- } else {
- head = &wr;
- }
-
- return ib_post_send(con->qp, head, NULL);
+ return rtrs_post_send(con->qp, head, &wr);
}
EXPORT_SYMBOL_GPL(rtrs_iu_post_send);
@@ -169,17 +174,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
if (WARN_ON(sge[i].length == 0))
return -EINVAL;
- if (head) {
- struct ib_send_wr *tail = head;
-
- while (tail->next)
- tail = tail->next;
- tail->next = &wr.wr;
- } else {
- head = &wr.wr;
- }
-
- return ib_post_send(con->qp, head, NULL);
+ return rtrs_post_send(con->qp, head, &wr.wr);
}
EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm);
@@ -196,17 +191,7 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
.ex.imm_data = cpu_to_be32(imm_data),
};
- if (head) {
- struct ib_send_wr *tail = head;
-
- while (tail->next)
- tail = tail->next;
- tail->next = &wr;
- } else {
- head = &wr;
- }
-
- return ib_post_send(con->qp, head, NULL);
+ return rtrs_post_send(con->qp, head, &wr);
}
EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index d8fcd21ab472..5492b66a8153 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -169,9 +169,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
int tmo = *(int *)kp->arg;
if (tmo >= 0)
- return sprintf(buffer, "%d\n", tmo);
+ return sysfs_emit(buffer, "%d\n", tmo);
else
- return sprintf(buffer, "off\n");
+ return sysfs_emit(buffer, "off\n");
}
static int srp_tmo_set(const char *val, const struct kernel_param *kp)
@@ -2896,7 +2896,7 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
+ return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
}
static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
@@ -2904,7 +2904,7 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
+ return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
}
static ssize_t show_service_id(struct device *dev,
@@ -2914,8 +2914,8 @@ static ssize_t show_service_id(struct device *dev,
if (target->using_rdma_cm)
return -ENOENT;
- return sprintf(buf, "0x%016llx\n",
- be64_to_cpu(target->ib_cm.service_id));
+ return sysfs_emit(buf, "0x%016llx\n",
+ be64_to_cpu(target->ib_cm.service_id));
}
static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
@@ -2925,7 +2925,8 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
if (target->using_rdma_cm)
return -ENOENT;
- return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
+
+ return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
}
static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
@@ -2933,7 +2934,7 @@ static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%pI6\n", target->sgid.raw);
+ return sysfs_emit(buf, "%pI6\n", target->sgid.raw);
}
static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
@@ -2944,7 +2945,8 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
if (target->using_rdma_cm)
return -ENOENT;
- return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
+
+ return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
}
static ssize_t show_orig_dgid(struct device *dev,
@@ -2954,7 +2956,8 @@ static ssize_t show_orig_dgid(struct device *dev,
if (target->using_rdma_cm)
return -ENOENT;
- return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
+
+ return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
}
static ssize_t show_req_lim(struct device *dev,
@@ -2968,7 +2971,8 @@ static ssize_t show_req_lim(struct device *dev,
ch = &target->ch[i];
req_lim = min(req_lim, ch->req_lim);
}
- return sprintf(buf, "%d\n", req_lim);
+
+ return sysfs_emit(buf, "%d\n", req_lim);
}
static ssize_t show_zero_req_lim(struct device *dev,
@@ -2976,7 +2980,7 @@ static ssize_t show_zero_req_lim(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%d\n", target->zero_req_lim);
+ return sysfs_emit(buf, "%d\n", target->zero_req_lim);
}
static ssize_t show_local_ib_port(struct device *dev,
@@ -2984,7 +2988,7 @@ static ssize_t show_local_ib_port(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%d\n", target->srp_host->port);
+ return sysfs_emit(buf, "%d\n", target->srp_host->port);
}
static ssize_t show_local_ib_device(struct device *dev,
@@ -2992,8 +2996,8 @@ static ssize_t show_local_ib_device(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%s\n",
- dev_name(&target->srp_host->srp_dev->dev->dev));
+ return sysfs_emit(buf, "%s\n",
+ dev_name(&target->srp_host->srp_dev->dev->dev));
}
static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
@@ -3001,7 +3005,7 @@ static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%d\n", target->ch_count);
+ return sysfs_emit(buf, "%d\n", target->ch_count);
}
static ssize_t show_comp_vector(struct device *dev,
@@ -3009,7 +3013,7 @@ static ssize_t show_comp_vector(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%d\n", target->comp_vector);
+ return sysfs_emit(buf, "%d\n", target->comp_vector);
}
static ssize_t show_tl_retry_count(struct device *dev,
@@ -3017,7 +3021,7 @@ static ssize_t show_tl_retry_count(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%d\n", target->tl_retry_count);
+ return sysfs_emit(buf, "%d\n", target->tl_retry_count);
}
static ssize_t show_cmd_sg_entries(struct device *dev,
@@ -3025,7 +3029,7 @@ static ssize_t show_cmd_sg_entries(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%u\n", target->cmd_sg_cnt);
+ return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt);
}
static ssize_t show_allow_ext_sg(struct device *dev,
@@ -3033,7 +3037,7 @@ static ssize_t show_allow_ext_sg(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
+ return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
}
static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
@@ -3893,7 +3897,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
{
struct srp_host *host = container_of(dev, struct srp_host, dev);
- return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
+ return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -3903,7 +3907,7 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr,
{
struct srp_host *host = container_of(dev, struct srp_host, dev);
- return sprintf(buf, "%d\n", host->port);
+ return sysfs_emit(buf, "%d\n", host->port);
}
static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index a17c56cd8312..6be60aa5ffe2 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -3448,7 +3448,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item,
struct se_portal_group *se_tpg = attrib_to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
- return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
+ return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
}
static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item,
@@ -3485,7 +3485,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item,
struct se_portal_group *se_tpg = attrib_to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
- return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
+ return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
}
static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item,
@@ -3522,7 +3522,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item,
struct se_portal_group *se_tpg = attrib_to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
- return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size);
+ return sysfs_emit(page, "%u\n", sport->port_attrib.srp_sq_size);
}
static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item,
@@ -3559,7 +3559,7 @@ static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item,
struct se_portal_group *se_tpg = attrib_to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
- return sprintf(page, "%d\n", sport->port_attrib.use_srq);
+ return sysfs_emit(page, "%d\n", sport->port_attrib.use_srq);
}
static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item,
@@ -3649,7 +3649,7 @@ out:
static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page)
{
- return sprintf(page, "%d\n", rdma_cm_port);
+ return sysfs_emit(page, "%d\n", rdma_cm_port);
}
static ssize_t srpt_rdma_cm_port_store(struct config_item *item,
@@ -3705,7 +3705,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
struct se_portal_group *se_tpg = to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
- return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled);
+ return sysfs_emit(page, "%d\n", sport->enabled);
}
static ssize_t srpt_tpg_enable_store(struct config_item *item,
@@ -3812,7 +3812,7 @@ static void srpt_drop_tport(struct se_wwn *wwn)
static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
{
- return scnprintf(buf, PAGE_SIZE, "\n");
+ return sysfs_emit(buf, "\n");
}
CONFIGFS_ATTR_RO(srpt_wwn_, version);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index bdeb010efee6..76e66f630c17 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -347,7 +347,7 @@ struct srpt_nexus {
};
/**
- * struct srpt_port_attib - attributes for SRPT port
+ * struct srpt_port_attrib - attributes for SRPT port
* @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections.
* @srp_max_rsp_size: Maximum size of SRP response messages in bytes.
* @srp_sq_size: Shared receive queue (SRQ) size.
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index df9f6f4549f1..cf6c49d09c82 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -853,7 +853,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
return error;
ctrl->device = ctrl->queues[0].device;
- ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
+ ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
/* T10-PI support */
if (ctrl->device->dev->attrs.device_cap_flags &
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index ae6620489457..5c1e7cb7fe0d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -414,7 +414,8 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
goto out_free_rsp;
- r->req.p2p_client = &ndev->device->dev;
+ if (!ib_uses_virt_dma(ndev->device))
+ r->req.p2p_client = &ndev->device->dev;
r->send_sge.length = sizeof(*r->req.cqe);
r->send_sge.lkey = ndev->pd->local_dma_lkey;
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index bace04145c5f..196382630363 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -556,15 +556,6 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
return -1;
for (i = 0; i < num_clients; i++) {
-#ifdef CONFIG_DMA_VIRT_OPS
- if (clients[i]->dma_ops == &dma_virt_ops) {
- if (verbose)
- dev_warn(clients[i],
- "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n");
- return -1;
- }
-#endif
-
pci_client = find_parent_pci_dev(clients[i]);
if (!pci_client) {
if (verbose)
@@ -834,24 +825,10 @@ static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap,
struct device *dev, struct scatterlist *sg, int nents)
{
struct scatterlist *s;
- phys_addr_t paddr;
int i;
- /*
- * p2pdma mappings are not compatible with devices that use
- * dma_virt_ops. If the upper layers do the right thing
- * this should never happen because it will be prevented
- * by the check in pci_p2pdma_distance_many()
- */
-#ifdef CONFIG_DMA_VIRT_OPS
- if (WARN_ON_ONCE(dev->dma_ops == &dma_virt_ops))
- return 0;
-#endif
-
for_each_sg(sg, s, nents, i) {
- paddr = sg_phys(s);
-
- s->dma_address = paddr - p2p_pgmap->bus_offset;
+ s->dma_address = sg_phys(s) - p2p_pgmap->bus_offset;
sg_dma_len(s) = s->length;
}
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 956151052d45..2aaed35b556d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -565,6 +565,4 @@ static inline int dma_mmap_wc(struct device *dev,
int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start,
dma_addr_t dma_start, u64 size);
-extern const struct dma_map_ops dma_virt_ops;
-
#endif /* _LINUX_DMA_MAPPING_H */
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 70597508c765..7752211c9638 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -34,6 +34,13 @@ static inline int ib_umem_offset(struct ib_umem *umem)
return umem->address & ~PAGE_MASK;
}
+static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) &
+ (pgsz - 1);
+}
+
static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
unsigned long pgsz)
{
@@ -79,6 +86,35 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long pgsz_bitmap,
unsigned long virt);
+/**
+ * ib_umem_find_best_pgoff - Find best HW page size
+ *
+ * @umem: umem struct
+ * @pgsz_bitmap bitmap of HW supported page sizes
+ * @pgoff_bitmask: Mask of bits that can be represented with an offset
+ *
+ * This is very similar to ib_umem_find_best_pgsz() except instead of accepting
+ * an IOVA it accepts a bitmask specifying what address bits can be represented
+ * with a page offset.
+ *
+ * For instance if the HW has multiple page sizes, requires 64 byte alignemnt,
+ * and can support aligned offsets up to 4032 then pgoff_bitmask would be
+ * "111111000000".
+ *
+ * If the pgoff_bitmask requires either alignment in the low bit or an
+ * unavailable page size for the high bits, this function returns 0.
+ */
+static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ u64 pgoff_bitmask)
+{
+ struct scatterlist *sg = umem->sg_head.sgl;
+ dma_addr_t dma_addr;
+
+ dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK);
+ return ib_umem_find_best_pgsz(umem, pgsz_bitmap,
+ dma_addr & pgoff_bitmask);
+}
#else /* CONFIG_INFINIBAND_USER_MEM */
@@ -101,6 +137,12 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
{
return 0;
}
+static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ u64 pgoff_bitmask)
+{
+ return 0;
+}
#endif /* CONFIG_INFINIBAND_USER_MEM */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3883efd588aa..9fed65bf9279 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1235,6 +1235,8 @@ enum ib_qp_attr_mask {
IB_QP_RESERVED3 = (1<<23),
IB_QP_RESERVED4 = (1<<24),
IB_QP_RATE_LIMIT = (1<<25),
+
+ IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0),
};
enum ib_qp_state {
@@ -1470,6 +1472,8 @@ enum rdma_remove_reason {
RDMA_REMOVE_DRIVER_REMOVE,
/* uobj is being cleaned-up before being committed */
RDMA_REMOVE_ABORT,
+ /* The driver failed to destroy the uobject and is being disconnected */
+ RDMA_REMOVE_DRIVER_FAILURE,
};
struct ib_rdmacg_object {
@@ -1482,8 +1486,6 @@ struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
- bool cleanup_retryable;
-
struct ib_rdmacg_object cg_obj;
/*
* Implementation details of the RDMA core, don't use in drivers:
@@ -2402,6 +2404,8 @@ struct ib_device_ops {
int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
struct ib_udata *udata);
+ int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*destroy_ah)(struct ib_ah *ah, u32 flags);
@@ -2430,9 +2434,10 @@ struct ib_device_ops {
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
- int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_pd *pd, struct ib_udata *udata);
+ struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
@@ -2666,7 +2671,6 @@ struct ib_device {
const struct attribute_group *groups[3];
u64 uverbs_cmd_mask;
- u64 uverbs_ex_cmd_mask;
char node_desc[IB_DEVICE_NODE_DESC_MAX];
__be64 node_guid;
@@ -2902,46 +2906,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
}
/**
- * ib_is_destroy_retryable - Check whether the uobject destruction
- * is retryable.
- * @ret: The initial destruction return code
- * @why: remove reason
- * @uobj: The uobject that is destroyed
- *
- * This function is a helper function that IB layer and low-level drivers
- * can use to consider whether the destruction of the given uobject is
- * retry-able.
- * It checks the original return code, if it wasn't success the destruction
- * is retryable according to the ucontext state (i.e. cleanup_retryable) and
- * the remove reason. (i.e. why).
- * Must be called with the object locked for destroy.
- */
-static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
- struct ib_uobject *uobj)
-{
- return ret && (why == RDMA_REMOVE_DESTROY ||
- uobj->context->cleanup_retryable);
-}
-
-/**
- * ib_destroy_usecnt - Called during destruction to check the usecnt
- * @usecnt: The usecnt atomic
- * @why: remove reason
- * @uobj: The uobject that is destroyed
- *
- * Non-zero usecnts will block destruction unless destruction was triggered by
- * a ucontext cleanup.
- */
-static inline int ib_destroy_usecnt(atomic_t *usecnt,
- enum rdma_remove_reason why,
- struct ib_uobject *uobj)
-{
- if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
- return -EBUSY;
- return 0;
-}
-
-/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
@@ -3431,6 +3395,17 @@ enum ib_pd_flags {
struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
const char *caller);
+/**
+ * ib_alloc_pd - Allocates an unused protection domain.
+ * @device: The device on which to allocate the protection domain.
+ * @flags: protection domain flags
+ *
+ * A protection domain object provides an association between QPs, shared
+ * receive queues, address handles, memory regions, and memory windows.
+ *
+ * Every PD has a local_dma_lkey which can be used as the lkey value for local
+ * memory operations.
+ */
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
@@ -3656,8 +3631,14 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
bad_recv_wr ? : &dummy);
}
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr);
+struct ib_qp *ib_create_named_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ const char *caller);
+static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr)
+{
+ return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME);
+}
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
@@ -3944,6 +3925,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
-ENOSYS;
}
+/*
+ * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
+ * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
+ * address into the dma address.
+ */
+static inline bool ib_uses_virt_dma(struct ib_device *dev)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
+}
+
/**
* ib_dma_mapping_error - check a DMA addr for error
* @dev: The device for which the dma_addr was created
@@ -3951,6 +3942,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
*/
static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
+ if (ib_uses_virt_dma(dev))
+ return 0;
return dma_mapping_error(dev->dma_device, dma_addr);
}
@@ -3965,6 +3958,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)cpu_addr;
return dma_map_single(dev->dma_device, cpu_addr, size, direction);
}
@@ -3979,7 +3974,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_single(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_single(dev->dma_device, addr, size, direction);
}
/**
@@ -3996,6 +3992,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)(page_address(page) + offset);
return dma_map_page(dev->dma_device, page, offset, size, direction);
}
@@ -4010,7 +4008,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_page(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (ib_uses_virt_dma(dev))
+ return ib_dma_virt_map_sg(dev, sg, nents);
+ return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
+}
+
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
}
/**
@@ -4024,7 +4045,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- return dma_map_sg(dev->dma_device, sg, nents, direction);
+ return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -4038,24 +4059,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- dma_unmap_sg(dev->dma_device, sg, nents, direction);
-}
-
-static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
- dma_attrs);
-}
-
-static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
+ ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -4066,6 +4070,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
*/
static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
{
+ if (ib_uses_virt_dma(dev))
+ return UINT_MAX;
return dma_get_max_seg_size(dev->dma_device);
}
@@ -4081,7 +4087,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
}
/**
@@ -4096,36 +4103,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-/**
- * ib_dma_alloc_coherent - Allocate memory and map it for DMA
- * @dev: The device for which the DMA address is requested
- * @size: The size of the region to allocate in bytes
- * @dma_handle: A pointer for returning the DMA address of the region
- * @flag: memory allocator flags
- */
-static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t flag)
-{
- return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
-}
-
-/**
- * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
- * @dev: The device for which the DMA addresses were allocated
- * @size: The size of the region
- * @cpu_addr: the address returned by ib_dma_alloc_coherent()
- * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
- */
-static inline void ib_dma_free_coherent(struct ib_device *dev,
- size_t size, void *cpu_addr,
- dma_addr_t dma_handle)
-{
- dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
}
/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
@@ -4217,7 +4196,8 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
struct inode *inode, struct ib_udata *udata);
int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
-static inline int ib_check_mr_access(int flags)
+static inline int ib_check_mr_access(struct ib_device *ib_dev,
+ unsigned int flags)
{
/*
* Local write permission is required if remote write or
@@ -4230,6 +4210,9 @@ static inline int ib_check_mr_access(int flags)
if (flags & ~IB_ACCESS_SUPPORTED)
return -EINVAL;
+ if (flags & IB_ACCESS_ON_DEMAND &&
+ !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+ return -EINVAL;
return 0;
}
@@ -4617,6 +4600,19 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
}
/**
+ * ibdev_to_node - return the NUMA node for a given ib_device
+ * @dev: device to get the NUMA node for.
+ */
+static inline int ibdev_to_node(struct ib_device *ibdev)
+{
+ struct device *parent = ibdev->dev.parent;
+
+ if (!parent)
+ return NUMA_NO_NODE;
+ return dev_to_node(parent);
+}
+
+/**
* rdma_device_to_drv_device - Helper macro to reach back to driver's
* ib_device holder structure from device pointer.
*
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index d3a1cc5be7bc..05e18839eaff 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -68,6 +68,14 @@ struct rdma_restrack_entry {
* As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI
*/
bool valid;
+ /**
+ * @no_track: don't add this entry to restrack DB
+ *
+ * This field is used to mark an entry that doesn't need to be added to
+ * internal restrack DB and presented later to the users at the nldev
+ * query stage.
+ */
+ u8 no_track : 1;
/*
* @kref: Protect destroy of the resource
*/
@@ -145,4 +153,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev,
enum rdma_restrack_type type,
u32 id);
+
+/**
+ * rdma_restrack_no_track() - don't add resource to the DB
+ * @res: resource entry
+ *
+ * Every user of thie API should be cross examined.
+ * Probaby you don't need to use this function.
+ */
+static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res)
+{
+ res->no_track = true;
+}
+static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res)
+{
+ return !res->no_track;
+}
#endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index b00270c72740..39ef204753ec 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -647,12 +647,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b
* 'ucontext'.
*
*/
-#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \
- (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \
- driver_udata) \
- ->context, \
- drv_dev_struct, member) : \
- (drv_dev_struct *)NULL)
+static inline struct uverbs_attr_bundle *
+rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata)
+{
+ return container_of(udata, struct uverbs_attr_bundle, driver_udata);
+}
+
+#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \
+ (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \
+ drv_dev_struct, member) : (drv_dev_struct *)NULL)
#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT)
@@ -862,6 +865,16 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
{
return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
}
+
+static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle,
+ size_t n, size_t size)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(n, size, &bytes)))
+ return ERR_PTR(-EOVERFLOW);
+ return uverbs_zalloc(bundle, bytes);
+}
int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, s64 lower_bound, u64 upper_bound,
s64 *def_val);
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 06db27e35f40..ccd11631c167 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -71,6 +71,8 @@ struct uverbs_obj_type_class {
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs);
void (*remove_handle)(struct ib_uobject *uobj);
+ void (*swap_uobjects)(struct ib_uobject *obj_old,
+ struct ib_uobject *obj_new);
};
struct uverbs_obj_type {
@@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
bool hw_obj_valid);
void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
struct uverbs_attr_bundle *attrs);
+void rdma_assign_uobject(struct ib_uobject *to_uobj,
+ struct ib_uobject *new_uobj,
+ struct uverbs_attr_bundle *attrs);
/*
* uverbs_uobject_get is called in order to increase the reference count on
@@ -138,8 +143,8 @@ struct uverbs_obj_fd_type {
* because the driver is removed or the FD is closed.
*/
struct uverbs_obj_type type;
- int (*destroy_object)(struct ib_uobject *uobj,
- enum rdma_remove_reason why);
+ void (*destroy_object)(struct ib_uobject *uobj,
+ enum rdma_remove_reason why);
const struct file_operations *fops;
const char *name;
int flags;
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 9ec85f76e9ac..90b739d05adf 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -43,6 +43,10 @@ struct hns_roce_ib_create_cq {
__u32 reserved;
};
+enum hns_roce_cq_cap_flags {
+ HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0,
+};
+
struct hns_roce_ib_create_cq_resp {
__aligned_u64 cqn; /* Only 32 bits used, 64 for compat */
__aligned_u64 cap_flags;
@@ -69,6 +73,12 @@ struct hns_roce_ib_create_qp {
__aligned_u64 sdb_addr;
};
+enum hns_roce_qp_cap_flags {
+ HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0,
+ HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1,
+ HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2,
+};
+
struct hns_roce_ib_create_qp_resp {
__aligned_u64 cap_flags;
};
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 456438c18c2c..7ee73a0652f1 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -596,20 +596,6 @@ enum {
IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE,
};
-enum {
- /*
- * This value is equal to IB_QP_DEST_QPN.
- */
- IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20,
-};
-
-enum {
- /*
- * This value is equal to IB_QP_RATE_LIMIT.
- */
- IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25,
-};
-
struct ib_uverbs_ex_create_qp {
__aligned_u64 user_handle;
__u32 pd_handle;
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index e591d8c1f3cf..068433e2229d 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd {
__aligned_u64 mmap_info_addr;
};
+/* This data structure is stored at the base of work and
+ * completion queues shared between user space and kernel space.
+ * It contains the producer and consumer indices. Is also
+ * contains a copy of the queue size parameters for user space
+ * to use but the kernel must use the parameters in the
+ * rxe_queue struct. For performance reasons arrange to have
+ * producer and consumer indices in separate cache lines
+ * the kernel should always mask the indices to avoid accessing
+ * memory outside of the data area
+ */
+struct rxe_queue_buf {
+ __u32 log2_elem_size;
+ __u32 index_mask;
+ __u32 pad_1[30];
+ __u32 producer_index;
+ __u32 pad_2[31];
+ __u32 consumer_index;
+ __u32 pad_3[31];
+ __u8 data[];
+};
+
#endif /* RDMA_USER_RXE_H */
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index c99de4a21458..fd2db2665fc6 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -75,11 +75,6 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
-config DMA_VIRT_OPS
- bool
- depends on HAS_DMA
- select DMA_OPS
-
config SWIOTLB
bool
select NEED_DMA_MAP_STATE
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index dc755ab68aab..cd1d86358a7a 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -5,7 +5,6 @@ obj-$(CONFIG_DMA_OPS) += ops_helpers.o
obj-$(CONFIG_DMA_OPS) += dummy.o
obj-$(CONFIG_DMA_CMA) += contiguous.o
obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o
-obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
obj-$(CONFIG_DMA_API_DEBUG) += debug.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o
diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c
deleted file mode 100644
index 59d32317dd57..000000000000
--- a/kernel/dma/virt.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DMA operations that map to virtual addresses without flushing memory.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-map-ops.h>
-#include <linux/scatterlist.h>
-
-static void *dma_virt_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- unsigned long attrs)
-{
- void *ret;
-
- ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size));
- if (ret)
- *dma_handle = (uintptr_t)ret;
- return ret;
-}
-
-static void dma_virt_free(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- return (uintptr_t)(page_address(page) + offset);
-}
-
-static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- int i;
- struct scatterlist *sg;
-
- for_each_sg(sgl, sg, nents, i) {
- BUG_ON(!sg_page(sg));
- sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
- sg_dma_len(sg) = sg->length;
- }
-
- return nents;
-}
-
-const struct dma_map_ops dma_virt_ops = {
- .alloc = dma_virt_alloc,
- .free = dma_virt_free,
- .map_page = dma_virt_map_page,
- .map_sg = dma_virt_map_sg,
- .alloc_pages = dma_common_alloc_pages,
- .free_pages = dma_common_free_pages,
-};
-EXPORT_SYMBOL(dma_virt_ops);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index deecbdcdae84..24c9a9005a6f 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*
*/
-#include <linux/dmapool.h>
#include <linux/kernel.h>
#include <linux/in.h>
#include <linux/if.h>
@@ -108,7 +107,6 @@ static void rds_ib_dev_free(struct work_struct *work)
rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
if (rds_ibdev->pd)
ib_dealloc_pd(rds_ibdev->pd);
- dma_pool_destroy(rds_ibdev->rid_hdrs_pool);
list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
list_del(&i_ipaddr->list);
@@ -191,14 +189,6 @@ static int rds_ib_add_one(struct ib_device *device)
rds_ibdev->pd = NULL;
goto put_dev;
}
- rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name,
- device->dma_device,
- sizeof(struct rds_header),
- L1_CACHE_BYTES, 0);
- if (!rds_ibdev->rid_hdrs_pool) {
- ret = -ENOMEM;
- goto put_dev;
- }
rds_ibdev->mr_1m_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 8dfff43cf07f..2ba71102b1f1 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -246,7 +246,6 @@ struct rds_ib_device {
struct list_head conn_list;
struct ib_device *dev;
struct ib_pd *pd;
- struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */
u8 odp_capable:1;
unsigned int max_mrs;
@@ -264,13 +263,6 @@ struct rds_ib_device {
int *vector_load;
};
-static inline int ibdev_to_node(struct ib_device *ibdev)
-{
- struct device *parent;
-
- parent = ibdev->dev.parent;
- return parent ? dev_to_node(parent) : NUMA_NO_NODE;
-}
#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
/* bits for i_ack_flags */
@@ -387,11 +379,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
void rds_ib_cm_connect_complete(struct rds_connection *conn,
struct rdma_cm_event *event);
-struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
- struct dma_pool *pool,
- dma_addr_t **dma_addrs, u32 num_hdrs);
-void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
- dma_addr_t *dma_addrs, u32 num_hdrs);
#define rds_ib_conn_error(conn, fmt...) \
__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index b36b60668b1d..f5cbe963cd8f 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*
*/
-#include <linux/dmapool.h>
#include <linux/kernel.h>
#include <linux/in.h>
#include <linux/slab.h>
@@ -441,42 +440,87 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
rds_ibdev->vector_load[index]--;
}
+static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr,
+ dma_addr_t dma_addr, enum dma_data_direction dir)
+{
+ ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir);
+ kfree(hdr);
+}
+
+static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev,
+ dma_addr_t *dma_addr, enum dma_data_direction dir)
+{
+ struct rds_header *hdr;
+
+ hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev));
+ if (!hdr)
+ return NULL;
+
+ *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr),
+ DMA_BIDIRECTIONAL);
+ if (ib_dma_mapping_error(dev, *dma_addr)) {
+ kfree(hdr);
+ return NULL;
+ }
+
+ return hdr;
+}
+
+/* Free the DMA memory used to store struct rds_header.
+ *
+ * @dev: the RDS IB device
+ * @hdrs: pointer to the array storing DMA memory pointers
+ * @dma_addrs: pointer to the array storing DMA addresses
+ * @num_hdars: number of headers to free.
+ */
+static void rds_dma_hdrs_free(struct rds_ib_device *dev,
+ struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs,
+ enum dma_data_direction dir)
+{
+ u32 i;
+
+ for (i = 0; i < num_hdrs; i++)
+ rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir);
+ kvfree(hdrs);
+ kvfree(dma_addrs);
+}
+
+
/* Allocate DMA coherent memory to be used to store struct rds_header for
* sending/receiving packets. The pointers to the DMA memory and the
* associated DMA addresses are stored in two arrays.
*
- * @ibdev: the IB device
- * @pool: the DMA memory pool
+ * @dev: the RDS IB device
* @dma_addrs: pointer to the array for storing DMA addresses
* @num_hdrs: number of headers to allocate
*
* It returns the pointer to the array storing the DMA memory pointers. On
* error, NULL pointer is returned.
*/
-struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
- struct dma_pool *pool,
- dma_addr_t **dma_addrs, u32 num_hdrs)
+static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev,
+ dma_addr_t **dma_addrs, u32 num_hdrs,
+ enum dma_data_direction dir)
{
struct rds_header **hdrs;
dma_addr_t *hdr_daddrs;
u32 i;
hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
- ibdev_to_node(ibdev));
+ ibdev_to_node(dev->dev));
if (!hdrs)
return NULL;
hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
- ibdev_to_node(ibdev));
+ ibdev_to_node(dev->dev));
if (!hdr_daddrs) {
kvfree(hdrs);
return NULL;
}
for (i = 0; i < num_hdrs; i++) {
- hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]);
+ hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir);
if (!hdrs[i]) {
- rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i);
+ rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir);
return NULL;
}
}
@@ -485,24 +529,6 @@ struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
return hdrs;
}
-/* Free the DMA memory used to store struct rds_header.
- *
- * @pool: the DMA memory pool
- * @hdrs: pointer to the array storing DMA memory pointers
- * @dma_addrs: pointer to the array storing DMA addresses
- * @num_hdars: number of headers to free.
- */
-void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
- dma_addr_t *dma_addrs, u32 num_hdrs)
-{
- u32 i;
-
- for (i = 0; i < num_hdrs; i++)
- dma_pool_free(pool, hdrs[i], dma_addrs[i]);
- kvfree(hdrs);
- kvfree(dma_addrs);
-}
-
/*
* This needs to be very careful to not leave IS_ERR pointers around for
* cleanup to trip over.
@@ -516,7 +542,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
struct rds_ib_device *rds_ibdev;
unsigned long max_wrs;
int ret, fr_queue_space;
- struct dma_pool *pool;
/*
* It's normal to see a null device if an incoming connection races
@@ -612,25 +637,26 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
goto recv_cq_out;
}
- pool = rds_ibdev->rid_hdrs_pool;
- ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma,
- ic->i_send_ring.w_nr);
+ ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma,
+ ic->i_send_ring.w_nr,
+ DMA_TO_DEVICE);
if (!ic->i_send_hdrs) {
ret = -ENOMEM;
rdsdebug("DMA send hdrs alloc failed\n");
goto qp_out;
}
- ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma,
- ic->i_recv_ring.w_nr);
+ ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma,
+ ic->i_recv_ring.w_nr,
+ DMA_FROM_DEVICE);
if (!ic->i_recv_hdrs) {
ret = -ENOMEM;
rdsdebug("DMA recv hdrs alloc failed\n");
goto send_hdrs_dma_out;
}
- ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL,
- &ic->i_ack_dma);
+ ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma,
+ DMA_TO_DEVICE);
if (!ic->i_ack) {
ret = -ENOMEM;
rdsdebug("DMA ack header alloc failed\n");
@@ -666,18 +692,19 @@ sends_out:
vfree(ic->i_sends);
ack_dma_out:
- dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
+ rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma,
+ DMA_TO_DEVICE);
ic->i_ack = NULL;
recv_hdrs_dma_out:
- rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
- ic->i_recv_ring.w_nr);
+ rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
+ ic->i_recv_ring.w_nr, DMA_FROM_DEVICE);
ic->i_recv_hdrs = NULL;
ic->i_recv_hdrs_dma = NULL;
send_hdrs_dma_out:
- rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma,
- ic->i_send_ring.w_nr);
+ rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma,
+ ic->i_send_ring.w_nr, DMA_TO_DEVICE);
ic->i_send_hdrs = NULL;
ic->i_send_hdrs_dma = NULL;
@@ -1110,29 +1137,30 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
}
if (ic->rds_ibdev) {
- struct dma_pool *pool;
-
- pool = ic->rds_ibdev->rid_hdrs_pool;
-
/* then free the resources that ib callbacks use */
if (ic->i_send_hdrs) {
- rds_dma_hdrs_free(pool, ic->i_send_hdrs,
+ rds_dma_hdrs_free(ic->rds_ibdev,
+ ic->i_send_hdrs,
ic->i_send_hdrs_dma,
- ic->i_send_ring.w_nr);
+ ic->i_send_ring.w_nr,
+ DMA_TO_DEVICE);
ic->i_send_hdrs = NULL;
ic->i_send_hdrs_dma = NULL;
}
if (ic->i_recv_hdrs) {
- rds_dma_hdrs_free(pool, ic->i_recv_hdrs,
+ rds_dma_hdrs_free(ic->rds_ibdev,
+ ic->i_recv_hdrs,
ic->i_recv_hdrs_dma,
- ic->i_recv_ring.w_nr);
+ ic->i_recv_ring.w_nr,
+ DMA_FROM_DEVICE);
ic->i_recv_hdrs = NULL;
ic->i_recv_hdrs_dma = NULL;
}
if (ic->i_ack) {
- dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
+ rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack,
+ ic->i_ack_dma, DMA_TO_DEVICE);
ic->i_ack = NULL;
}
} else {
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 3cffcec5fb37..6fdedd9dbbc2 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -662,10 +662,16 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
seq = rds_ib_get_ack(ic);
rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
+
+ ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma,
+ sizeof(*hdr), DMA_TO_DEVICE);
rds_message_populate_header(hdr, 0, 0, 0);
hdr->h_ack = cpu_to_be64(seq);
hdr->h_credit = adv_credits;
rds_message_make_checksum(hdr);
+ ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma,
+ sizeof(*hdr), DMA_TO_DEVICE);
+
ic->i_ack_queued = jiffies;
ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL);
@@ -845,6 +851,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
struct rds_ib_connection *ic = conn->c_transport_data;
struct rds_ib_incoming *ibinc = ic->i_ibinc;
struct rds_header *ihdr, *hdr;
+ dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
/* XXX shut down the connection if port 0,0 are seen? */
@@ -863,6 +870,8 @@ static void rds_ib_process_recv(struct rds_connection *conn,
ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
+ ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr,
+ sizeof(*ihdr), DMA_FROM_DEVICE);
/* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) {
rds_ib_conn_error(conn, "incoming message "
@@ -870,7 +879,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
"forcing a reconnect\n",
&conn->c_faddr);
rds_stats_inc(s_recv_drop_bad_checksum);
- return;
+ goto done;
}
/* Process the ACK sequence which comes with every packet */
@@ -899,7 +908,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
*/
rds_ib_frag_free(ic, recv->r_frag);
recv->r_frag = NULL;
- return;
+ goto done;
}
/*
@@ -933,7 +942,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
hdr->h_dport != ihdr->h_dport) {
rds_ib_conn_error(conn,
"fragment header mismatch; forcing reconnect\n");
- return;
+ goto done;
}
}
@@ -965,6 +974,9 @@ static void rds_ib_process_recv(struct rds_connection *conn,
rds_inc_put(&ibinc->ii_inc);
}
+done:
+ ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr,
+ sizeof(*ihdr), DMA_FROM_DEVICE);
}
void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index dfe778220657..92b4a8689aae 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -638,6 +638,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
send->s_sge[0].length = sizeof(struct rds_header);
send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
+ ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev,
+ ic->i_send_hdrs_dma[pos],
+ sizeof(struct rds_header),
+ DMA_TO_DEVICE);
memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
sizeof(struct rds_header));
@@ -688,6 +692,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
adv_credits = 0;
rds_ib_stats_inc(s_ib_tx_credit_updates);
}
+ ib_dma_sync_single_for_device(ic->rds_ibdev->dev,
+ ic->i_send_hdrs_dma[pos],
+ sizeof(struct rds_header),
+ DMA_TO_DEVICE);
if (prev)
prev->s_wr.next = &send->s_wr;
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 8a577c95496e..71c960dcd8a4 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -9,6 +9,7 @@ struct test {
int alloc_ret;
unsigned num_pages;
unsigned *pfn;
+ unsigned *pfn_app;
unsigned size;
unsigned int max_seg;
unsigned int expected_segments;
@@ -52,31 +53,39 @@ int main(void)
{
const unsigned int sgmax = UINT_MAX;
struct test *test, tests[] = {
- { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
- { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
- { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
- { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 },
- { 0, 1, pfn(0), 1, sgmax, 1 },
- { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 },
- { 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 },
- { 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 },
- { 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 },
- { 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 },
- { 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 },
- { 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 },
- { 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 },
- { 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 },
- { 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 },
- { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 },
- { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
- { 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
- { 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 },
- { 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
- { 0, 0, NULL, 0, 0, 0 },
+ { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 },
+ { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 },
+ { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 },
+ { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 },
+ { 0, 1, pfn(0), NULL, 1, sgmax, 1 },
+ { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 },
+ { 0, 2, pfn(1, 0), NULL, 2 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 },
+ { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 },
+ { 0, 3, pfn(0, 1, 2), pfn(3, 4, 5), 3 * PAGE_SIZE, sgmax, 1 },
+ { 0, 3, pfn(0, 1, 2), pfn(4, 5, 6), 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(0, 2, 1), NULL, 3 * PAGE_SIZE, sgmax, 3 },
+ { 0, 3, pfn(0, 1, 3), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(1, 2, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(1, 3, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 4, pfn(0, 1, 3, 4), NULL, 4 * PAGE_SIZE, sgmax, 2 },
+ { 0, 5, pfn(0, 1, 3, 4, 5), NULL, 5 * PAGE_SIZE, sgmax, 2 },
+ { 0, 5, pfn(0, 1, 3, 4, 6), NULL, 5 * PAGE_SIZE, sgmax, 3 },
+ { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, sgmax, 1 },
+ { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, 2 * PAGE_SIZE,
+ 3 },
+ { 0, 6, pfn(0, 1, 2, 3, 4, 5), NULL, 6 * PAGE_SIZE,
+ 2 * PAGE_SIZE, 3 },
+ { 0, 6, pfn(0, 2, 3, 4, 5, 6), NULL, 6 * PAGE_SIZE,
+ 2 * PAGE_SIZE, 4 },
+ { 0, 6, pfn(0, 1, 3, 4, 5, 6), pfn(7, 8, 9, 10, 11, 12),
+ 6 * PAGE_SIZE, 12 * PAGE_SIZE, 2 },
+ { 0, 0, NULL, NULL, 0, 0, 0 },
};
unsigned int i;
for (i = 0, test = tests; test->expected_segments; test++, i++) {
+ int left_pages = test->pfn_app ? test->num_pages : 0;
struct page *pages[MAX_PAGES];
struct sg_table st;
struct scatterlist *sg;
@@ -84,14 +93,23 @@ int main(void)
set_pages(pages, test->pfn, test->num_pages);
sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
- test->size, test->max_seg, NULL, 0, GFP_KERNEL);
+ test->size, test->max_seg, NULL, left_pages, GFP_KERNEL);
assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
if (test->alloc_ret)
continue;
+ if (test->pfn_app) {
+ set_pages(pages, test->pfn_app, test->num_pages);
+ sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
+ test->size, test->max_seg, sg, 0, GFP_KERNEL);
+
+ assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+ }
+
VALIDATE(st.nents == test->expected_segments, &st, test);
- VALIDATE(st.orig_nents == test->expected_segments, &st, test);
+ if (!test->pfn_app)
+ VALIDATE(st.orig_nents == test->expected_segments, &st, test);
sg_free_table(&st);
}