diff options
175 files changed, 3845 insertions, 3915 deletions
@@ -345,3 +345,4 @@ Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de> Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de> Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com> Yusuke Goda <goda.yusuke@renesas.com> +Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com> diff --git a/MAINTAINERS b/MAINTAINERS index a14cad29c031..8c1c5f9830c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16399,7 +16399,7 @@ F: drivers/infiniband/sw/siw/ F: include/uapi/rdma/siw-abi.h SOFT-ROCE DRIVER (rxe) -M: Zhu Yanjun <yanjunz@nvidia.com> +M: Zhu Yanjun <zyjzyj2000@gmail.com> L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rxe/ diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5afd142fe8c7..98165589c8ab 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1251,7 +1251,8 @@ out: EXPORT_SYMBOL(ib_cm_listen); /** - * Create a new listening ib_cm_id and listen on the given service ID. + * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on + * the given service ID. * * If there's an existing ID listening on that same device and service ID, * return it. @@ -1765,7 +1766,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work) } /** - * Convert OPA SGID to IB SGID + * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID * ULPs (such as IPoIB) do not understand OPA GIDs and will * reject them as the local_gid will not match the sgid. Therefore, * change the pathrec's SGID to an IB SGID. @@ -4273,8 +4274,8 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, group = container_of(obj, struct cm_counter_group, obj); cm_attr = container_of(attr, struct cm_counter_attribute, attr); - return sprintf(buf, "%ld\n", - atomic_long_read(&group->counter[cm_attr->index])); + return sysfs_emit(buf, "%ld\n", + atomic_long_read(&group->counter[cm_attr->index])); } static const struct sysfs_ops cm_counter_ops = { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a77750b8954d..c51b84b2d2f3 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -477,6 +477,10 @@ static void cma_release_dev(struct rdma_id_private *id_priv) list_del(&id_priv->list); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; + if (id_priv->id.route.addr.dev_addr.sgid_attr) { + rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); + id_priv->id.route.addr.dev_addr.sgid_attr = NULL; + } mutex_unlock(&lock); } @@ -1861,9 +1865,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, kfree(id_priv->id.route.path_rec); - if (id_priv->id.route.addr.dev_addr.sgid_attr) - rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); - put_net(id_priv->id.route.addr.dev_addr.net); rdma_restrack_del(&id_priv->res); kfree(id_priv); @@ -2495,8 +2496,9 @@ static int cma_listen_handler(struct rdma_cm_id *id, return id_priv->id.event_handler(id, event); } -static void cma_listen_on_dev(struct rdma_id_private *id_priv, - struct cma_device *cma_dev) +static int cma_listen_on_dev(struct rdma_id_private *id_priv, + struct cma_device *cma_dev, + struct rdma_id_private **to_destroy) { struct rdma_id_private *dev_id_priv; struct net *net = id_priv->id.route.addr.dev_addr.net; @@ -2504,21 +2506,21 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, lockdep_assert_held(&lock); + *to_destroy = NULL; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) - return; + return 0; dev_id_priv = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type, id_priv); if (IS_ERR(dev_id_priv)) - return; + return PTR_ERR(dev_id_priv); dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); _cma_attach_to_dev(dev_id_priv, cma_dev); - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); cma_id_get(id_priv); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; @@ -2527,19 +2529,42 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) - dev_warn(&cma_dev->device->dev, - "RDMA CMA: cma_listen_on_dev, error %d\n", ret); + goto err_listen; + list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + return 0; +err_listen: + /* Caller must destroy this after releasing lock */ + *to_destroy = dev_id_priv; + dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret); + return ret; } -static void cma_listen_on_all(struct rdma_id_private *id_priv) +static int cma_listen_on_all(struct rdma_id_private *id_priv) { + struct rdma_id_private *to_destroy; struct cma_device *cma_dev; + int ret; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); - list_for_each_entry(cma_dev, &dev_list, list) - cma_listen_on_dev(id_priv, cma_dev); + list_for_each_entry(cma_dev, &dev_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) { + /* Prevent racing with cma_process_remove() */ + if (to_destroy) + list_del_init(&to_destroy->list); + goto err_listen; + } + } mutex_unlock(&lock); + return 0; + +err_listen: + list_del(&id_priv->list); + mutex_unlock(&lock); + if (to_destroy) + rdma_destroy_id(&to_destroy->id); + return ret; } void rdma_set_service_type(struct rdma_cm_id *id, int tos) @@ -3692,8 +3717,11 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) ret = -ENOSYS; goto err; } - } else - cma_listen_on_all(id_priv); + } else { + ret = cma_listen_on_all(id_priv); + if (ret) + goto err; + } return 0; err: @@ -4773,69 +4801,6 @@ static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; -static int cma_add_one(struct ib_device *device) -{ - struct cma_device *cma_dev; - struct rdma_id_private *id_priv; - unsigned int i; - unsigned long supported_gids = 0; - int ret; - - cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); - if (!cma_dev) - return -ENOMEM; - - cma_dev->device = device; - cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_gid_type), - GFP_KERNEL); - if (!cma_dev->default_gid_type) { - ret = -ENOMEM; - goto free_cma_dev; - } - - cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, - sizeof(*cma_dev->default_roce_tos), - GFP_KERNEL); - if (!cma_dev->default_roce_tos) { - ret = -ENOMEM; - goto free_gid_type; - } - - rdma_for_each_port (device, i) { - supported_gids = roce_gid_type_mask_support(device, i); - WARN_ON(!supported_gids); - if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) - cma_dev->default_gid_type[i - rdma_start_port(device)] = - CMA_PREFERRED_ROCE_GID_TYPE; - else - cma_dev->default_gid_type[i - rdma_start_port(device)] = - find_first_bit(&supported_gids, BITS_PER_LONG); - cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; - } - - init_completion(&cma_dev->comp); - refcount_set(&cma_dev->refcount, 1); - INIT_LIST_HEAD(&cma_dev->id_list); - ib_set_client_data(device, &cma_client, cma_dev); - - mutex_lock(&lock); - list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) - cma_listen_on_dev(id_priv, cma_dev); - mutex_unlock(&lock); - - trace_cm_add_one(device); - return 0; - -free_gid_type: - kfree(cma_dev->default_gid_type); - -free_cma_dev: - kfree(cma_dev); - return ret; -} - static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; @@ -4898,6 +4863,80 @@ static void cma_process_remove(struct cma_device *cma_dev) wait_for_completion(&cma_dev->comp); } +static int cma_add_one(struct ib_device *device) +{ + struct rdma_id_private *to_destroy; + struct cma_device *cma_dev; + struct rdma_id_private *id_priv; + unsigned int i; + unsigned long supported_gids = 0; + int ret; + + cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL); + if (!cma_dev) + return -ENOMEM; + + cma_dev->device = device; + cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_gid_type), + GFP_KERNEL); + if (!cma_dev->default_gid_type) { + ret = -ENOMEM; + goto free_cma_dev; + } + + cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, + sizeof(*cma_dev->default_roce_tos), + GFP_KERNEL); + if (!cma_dev->default_roce_tos) { + ret = -ENOMEM; + goto free_gid_type; + } + + rdma_for_each_port (device, i) { + supported_gids = roce_gid_type_mask_support(device, i); + WARN_ON(!supported_gids); + if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) + cma_dev->default_gid_type[i - rdma_start_port(device)] = + CMA_PREFERRED_ROCE_GID_TYPE; + else + cma_dev->default_gid_type[i - rdma_start_port(device)] = + find_first_bit(&supported_gids, BITS_PER_LONG); + cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; + } + + init_completion(&cma_dev->comp); + refcount_set(&cma_dev->refcount, 1); + INIT_LIST_HEAD(&cma_dev->id_list); + ib_set_client_data(device, &cma_client, cma_dev); + + mutex_lock(&lock); + list_add_tail(&cma_dev->list, &dev_list); + list_for_each_entry(id_priv, &listen_any_list, list) { + ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); + if (ret) + goto free_listen; + } + mutex_unlock(&lock); + + trace_cm_add_one(device); + return 0; + +free_listen: + list_del(&cma_dev->list); + mutex_unlock(&lock); + + /* cma_process_remove() will delete to_destroy */ + cma_process_remove(cma_dev); + kfree(cma_dev->default_roce_tos); +free_gid_type: + kfree(cma_dev->default_gid_type); + +free_cma_dev: + kfree(cma_dev); + return ret; +} + static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7ec4af2ed87a..7f70e5a7de10 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -115,7 +115,7 @@ static ssize_t default_roce_mode_show(struct config_item *item, if (gid_type < 0) return gid_type; - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type)); + return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type)); } static ssize_t default_roce_mode_store(struct config_item *item, @@ -157,7 +157,7 @@ static ssize_t default_roce_tos_show(struct config_item *item, char *buf) tos = cma_get_default_roce_tos(cma_dev, group->port_num); cma_configfs_params_put(cma_dev); - return sprintf(buf, "%u\n", tos); + return sysfs_emit(buf, "%u\n", tos); } static ssize_t default_roce_tos_store(struct config_item *item, diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index e84b0fedaacb..315f7a297eee 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -318,15 +318,12 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index); void nldev_init(void); void nldev_exit(void); -static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, - struct ib_pd *pd, - struct ib_qp_init_attr *attr, - struct ib_udata *udata, - struct ib_uqp_object *uobj) +static inline struct ib_qp * +_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, struct ib_udata *udata, + struct ib_uqp_object *uobj, const char *caller) { - enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; - bool is_xrc; if (!dev->ops.create_qp) return ERR_PTR(-EOPNOTSUPP); @@ -347,6 +344,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->srq = attr->srq; qp->rwq_ind_tbl = attr->rwq_ind_tbl; qp->event_handler = attr->event_handler; + qp->port = attr->port_num; atomic_set(&qp->usecnt, 0); spin_lock_init(&qp->mr_lock); @@ -354,16 +352,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, INIT_LIST_HEAD(&qp->sig_mrs); rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); - /* - * We don't track XRC QPs for now, because they don't have PD - * and more importantly they are created internaly by driver, - * see mlx5 create_dev_resources() as an example. - */ - is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; - if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { - rdma_restrack_parent_name(&qp->res, &pd->res); - rdma_restrack_add(&qp->res); - } + WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); + rdma_restrack_set_name(&qp->res, udata ? NULL : caller); + rdma_restrack_add(&qp->res); return qp; } @@ -411,7 +402,6 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv, struct vm_area_struct *vma, struct rdma_user_mmap_entry *entry); -void ib_cq_pool_init(struct ib_device *dev); -void ib_cq_pool_destroy(struct ib_device *dev); +void ib_cq_pool_cleanup(struct ib_device *dev); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index e4ff0d3328b6..92745522250e 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -64,8 +64,40 @@ out: return ret; } -static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, - enum rdma_nl_counter_mode mode) +static void auto_mode_init_counter(struct rdma_counter *counter, + const struct ib_qp *qp, + enum rdma_nl_counter_mask new_mask) +{ + struct auto_mode_param *param = &counter->mode.param; + + counter->mode.mode = RDMA_COUNTER_MODE_AUTO; + counter->mode.mask = new_mask; + + if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) + param->qp_type = qp->qp_type; +} + +static int __rdma_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + int ret; + + if (qp->counter) + return -EINVAL; + + if (!qp->device->ops.counter_bind_qp) + return -EOPNOTSUPP; + + mutex_lock(&counter->lock); + ret = qp->device->ops.counter_bind_qp(counter, qp); + mutex_unlock(&counter->lock); + + return ret; +} + +static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u8 port, + struct ib_qp *qp, + enum rdma_nl_counter_mode mode) { struct rdma_port_counter *port_counter; struct rdma_counter *counter; @@ -88,11 +120,22 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, port_counter = &dev->port_data[port].port_counter; mutex_lock(&port_counter->lock); - if (mode == RDMA_COUNTER_MODE_MANUAL) { + switch (mode) { + case RDMA_COUNTER_MODE_MANUAL: ret = __counter_set_mode(&port_counter->mode, RDMA_COUNTER_MODE_MANUAL, 0); - if (ret) + if (ret) { + mutex_unlock(&port_counter->lock); goto err_mode; + } + break; + case RDMA_COUNTER_MODE_AUTO: + auto_mode_init_counter(counter, qp, port_counter->mode.mask); + break; + default: + ret = -EOPNOTSUPP; + mutex_unlock(&port_counter->lock); + goto err_mode; } port_counter->num_counters++; @@ -102,10 +145,15 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, kref_init(&counter->kref); mutex_init(&counter->lock); + ret = __rdma_counter_bind_qp(counter, qp); + if (ret) + goto err_mode; + + rdma_restrack_parent_name(&counter->res, &qp->res); + rdma_restrack_add(&counter->res); return counter; err_mode: - mutex_unlock(&port_counter->lock); kfree(counter->stats); err_stats: rdma_restrack_put(&counter->res); @@ -132,19 +180,6 @@ static void rdma_counter_free(struct rdma_counter *counter) kfree(counter); } -static void auto_mode_init_counter(struct rdma_counter *counter, - const struct ib_qp *qp, - enum rdma_nl_counter_mask new_mask) -{ - struct auto_mode_param *param = &counter->mode.param; - - counter->mode.mode = RDMA_COUNTER_MODE_AUTO; - counter->mode.mask = new_mask; - - if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) - param->qp_type = qp->qp_type; -} - static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, enum rdma_nl_counter_mask auto_mask) { @@ -161,24 +196,6 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, return match; } -static int __rdma_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - int ret; - - if (qp->counter) - return -EINVAL; - - if (!qp->device->ops.counter_bind_qp) - return -EOPNOTSUPP; - - mutex_lock(&counter->lock); - ret = qp->device->ops.counter_bind_qp(counter, qp); - mutex_unlock(&counter->lock); - - return ret; -} - static int __rdma_counter_unbind_qp(struct ib_qp *qp) { struct rdma_counter *counter = qp->counter; @@ -247,13 +264,6 @@ next: return counter; } -static void rdma_counter_res_add(struct rdma_counter *counter, - struct ib_qp *qp) -{ - rdma_restrack_parent_name(&counter->res, &qp->res); - rdma_restrack_add(&counter->res); -} - static void counter_release(struct kref *kref) { struct rdma_counter *counter; @@ -275,7 +285,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) + if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) @@ -293,19 +303,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) return ret; } } else { - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO); if (!counter) return -ENOMEM; - - auto_mode_init_counter(counter, qp, port_counter->mode.mask); - - ret = __rdma_counter_bind_qp(counter, qp); - if (ret) { - rdma_counter_free(counter); - return ret; - } - - rdma_counter_res_add(counter, qp); } return 0; @@ -419,15 +419,6 @@ err: return NULL; } -static int rdma_counter_bind_qp_manual(struct rdma_counter *counter, - struct ib_qp *qp) -{ - if ((counter->device != qp->device) || (counter->port != qp->port)) - return -EINVAL; - - return __rdma_counter_bind_qp(counter, qp); -} - static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev, u32 counter_id) { @@ -475,7 +466,12 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, goto err_task; } - ret = rdma_counter_bind_qp_manual(counter, qp); + if ((counter->device != qp->device) || (counter->port != qp->port)) { + ret = -EINVAL; + goto err_task; + } + + ret = __rdma_counter_bind_qp(counter, qp); if (ret) goto err_task; @@ -520,26 +516,18 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, goto err; } - counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_MANUAL); + counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL); if (!counter) { ret = -ENOMEM; goto err; } - ret = rdma_counter_bind_qp_manual(counter, qp); - if (ret) - goto err_bind; - if (counter_id) *counter_id = counter->id; - rdma_counter_res_add(counter, qp); - rdma_restrack_put(&qp->res); - return ret; + return 0; -err_bind: - rdma_counter_free(counter); err: rdma_restrack_put(&qp->res); return ret; diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 12ebacf52958..433b426729d4 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -123,7 +123,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, } /** - * ib_process_direct_cq - process a CQ in caller context + * ib_process_cq_direct - process a CQ in caller context * @cq: CQ to process * @budget: number of CQEs to poll for * @@ -197,7 +197,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) } /** - * __ib_alloc_cq allocate a completion queue + * __ib_alloc_cq - allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate @@ -349,16 +349,7 @@ void ib_free_cq(struct ib_cq *cq) } EXPORT_SYMBOL(ib_free_cq); -void ib_cq_pool_init(struct ib_device *dev) -{ - unsigned int i; - - spin_lock_init(&dev->cq_pools_lock); - for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) - INIT_LIST_HEAD(&dev->cq_pools[i]); -} - -void ib_cq_pool_destroy(struct ib_device *dev) +void ib_cq_pool_cleanup(struct ib_device *dev) { struct ib_cq *cq, *n; unsigned int i; @@ -367,6 +358,7 @@ void ib_cq_pool_destroy(struct ib_device *dev) list_for_each_entry_safe(cq, n, &dev->cq_pools[i], pool_entry) { WARN_ON(cq->cqe_used); + list_del(&cq->pool_entry); cq->shared = false; ib_free_cq(cq); } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a3b1fc84cdca..e96f979e6d52 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -284,6 +284,7 @@ static void ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(reg_user_mr), IB_MANDATORY_FUNC(dereg_mr), IB_MANDATORY_FUNC(get_port_immutable) }; @@ -569,6 +570,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev, struct ib_device *_ib_alloc_device(size_t size) { struct ib_device *device; + unsigned int i; if (WARN_ON(size < sizeof(struct ib_device))) return NULL; @@ -600,6 +602,41 @@ struct ib_device *_ib_alloc_device(size_t size) init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); + spin_lock_init(&device->cq_pools_lock); + for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++) + INIT_LIST_HEAD(&device->cq_pools[i]); + + device->uverbs_cmd_mask = + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) | + BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) | + BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | + BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) | + BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) | + BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | + BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | + BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | + BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); return device; } EXPORT_SYMBOL(_ib_alloc_device); @@ -1177,25 +1214,6 @@ out: return ret; } -static void setup_dma_device(struct ib_device *device, - struct device *dma_device) -{ - /* - * If the caller does not provide a DMA capable device then the IB - * device will be used. In this case the caller should fully setup the - * ibdev for DMA. This usually means using dma_virt_ops. - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (!dma_device) { - device->dev.dma_ops = &dma_virt_ops; - dma_device = &device->dev; - } -#endif - WARN_ON(!dma_device); - device->dma_device = dma_device; - WARN_ON(!device->dma_device->dma_parms); -} - /* * setup_device() allocates memory and sets up data that requires calling the * device ops, this is the only reason these actions are not done during @@ -1249,7 +1267,7 @@ static void disable_device(struct ib_device *device) remove_client_context(device, cid); } - ib_cq_pool_destroy(device); + ib_cq_pool_cleanup(device); /* Pairs with refcount_set in enable_device */ ib_device_put(device); @@ -1294,8 +1312,6 @@ static int enable_device_and_get(struct ib_device *device) goto out; } - ib_cq_pool_init(device); - down_read(&clients_rwsem); xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { ret = add_client_context(device, client); @@ -1341,7 +1357,14 @@ int ib_register_device(struct ib_device *device, const char *name, if (ret) return ret; - setup_dma_device(device, dma_device); + /* + * If the caller does not provide a DMA capable device then the IB core + * will set up ib_sge and scatterlist structures that stash the kernel + * virtual address into the address field. + */ + WARN_ON(dma_device && !dma_device->dma_parms); + device->dma_device = dma_device; + ret = setup_device(device); if (ret) return ret; @@ -1374,9 +1397,6 @@ int ib_register_device(struct ib_device *device, const char *name, } ret = enable_device_and_get(device); - dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); if (ret) { void (*dealloc_fn)(struct ib_device *); @@ -1396,8 +1416,12 @@ int ib_register_device(struct ib_device *device, const char *name, ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; + dev_set_uevent_suppress(&device->dev, false); return ret; } + dev_set_uevent_suppress(&device->dev, false); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_put(device); return 0; @@ -2576,6 +2600,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); + SET_DEVICE_OP(dev_ops, create_user_ah); SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); SET_DEVICE_OP(dev_ops, dealloc_driver); @@ -2675,6 +2700,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) } EXPORT_SYMBOL(ib_set_device_ops); +#ifdef CONFIG_INFINIBAND_VIRT_DMA +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + sg_dma_address(s) = (uintptr_t)sg_virt(s); + sg_dma_len(s) = s->length; + } + return nents; +} +EXPORT_SYMBOL(ib_dma_virt_map_sg); +#endif /* CONFIG_INFINIBAND_VIRT_DMA */ + static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 1bf87d9fd0bd..eeb8e6010907 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -141,7 +141,7 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); int iwpm_get_nlmsg_seq(void); /** - * iwpm_add_reminfo - Add remote address info of the connecting peer + * iwpm_add_remote_info - Add remote address info of the connecting peer * to the remote info hash table * @reminfo: The remote info to be added */ diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ffe11b03724c..75eafd9208aa 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -137,15 +137,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, } else if (uobj->object) { ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, attrs); - if (ret) { - if (ib_is_destroy_retryable(ret, reason, uobj)) - return ret; - - /* Nothing to be done, dangle the memory and move on */ - WARN(true, - "ib_uverbs: failed to remove uobject id %d, driver err=%d", - uobj->id, ret); - } + if (ret) + /* Nothing to be done, wait till ucontext will clean it */ + return ret; uobj->object = NULL; } @@ -543,12 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, struct uverbs_obj_idr_type, type); int ret = idr_type->destroy_object(uobj, why, attrs); - /* - * We can only fail gracefully if the user requested to destroy the - * object or when a retry may be called upon an error. - * In the rest of the cases, just remove whatever you can. - */ - if (ib_is_destroy_retryable(ret, why, uobj)) + if (ret) return ret; if (why == RDMA_REMOVE_ABORT) @@ -581,11 +570,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - int ret = fd_type->destroy_object(uobj, why); - - if (ib_is_destroy_retryable(ret, why, uobj)) - return ret; + fd_type->destroy_object(uobj, why); return 0; } @@ -609,6 +595,27 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj) WARN_ON(old != NULL); } +static void swap_idr_uobjects(struct ib_uobject *obj_old, + struct ib_uobject *obj_new) +{ + struct ib_uverbs_file *ufile = obj_old->ufile; + void *old; + + /* + * New must be an object that been allocated but not yet committed, this + * moves the pre-committed state to obj_old, new still must be comitted. + */ + old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY, + GFP_KERNEL); + if (WARN_ON(old != obj_old)) + return; + + swap(obj_old->id, obj_new->id); + + old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL); + WARN_ON(old != NULL); +} + static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { int fd = uobj->id; @@ -655,6 +662,35 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj, } /* + * new_uobj will be assigned to the handle currently used by to_uobj, and + * to_uobj will be destroyed. + * + * Upon return the caller must do: + * rdma_alloc_commit_uobject(new_uobj) + * uobj_put_destroy(to_uobj) + * + * to_uobj must have a write get but the put mode switches to destroy once + * this is called. + */ +void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs) +{ + assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE); + + if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object || + !to_uobj->uapi_object->type_class->swap_uobjects)) + return; + + to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj); + + /* + * If this fails then the uobject is still completely valid (though with + * a new ID) and we leak it until context close. + */ + uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs); +} + +/* * This consumes the kref for uobj. It is up to the caller to unwind the HW * object and anything else connected to uobj before calling this. */ @@ -761,6 +797,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = { .lookup_put = lookup_put_idr_uobject, .destroy_hw = destroy_hw_idr_uobject, .remove_handle = remove_handle_idr_uobject, + .swap_uobjects = swap_idr_uobjects, }; EXPORT_SYMBOL(uverbs_idr_class); @@ -863,11 +900,18 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); + if (reason == RDMA_REMOVE_DRIVER_FAILURE) + obj->object = NULL; if (!uverbs_destroy_uobject(obj, reason, &attrs)) ret = 0; else atomic_set(&obj->usecnt, 0); } + + if (reason == RDMA_REMOVE_DRIVER_FAILURE) { + WARN_ON(!list_empty(&ufile->uobjects)); + return 0; + } return ret; } @@ -889,21 +933,12 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, if (!ufile->ucontext) goto done; - ufile->ucontext->cleanup_retryable = true; - while (!list_empty(&ufile->uobjects)) - if (__uverbs_cleanup_ufile(ufile, reason)) { - /* - * No entry was cleaned-up successfully during this - * iteration. It is a driver bug to fail destruction. - */ - WARN_ON(!list_empty(&ufile->uobjects)); - break; - } - - ufile->ucontext->cleanup_retryable = false; - if (!list_empty(&ufile->uobjects)) - __uverbs_cleanup_ufile(ufile, reason); + while (!list_empty(&ufile->uobjects) && + !__uverbs_cleanup_ufile(ufile, reason)) { + } + if (WARN_ON(!list_empty(&ufile->uobjects))) + __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE); ufile_destroy_ucontext(ufile, reason); done: diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 4aeeaaed0f17..e0a41c867002 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -221,19 +221,29 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); struct rdma_restrack_root *rt; - int ret; + int ret = 0; if (!dev) return; + if (res->no_track) + goto out; + rt = &dev->res[res->type]; if (res->type == RDMA_RESTRACK_QP) { /* Special case to ensure that LQPN points to right QP */ struct ib_qp *qp = container_of(res, struct ib_qp, res); - ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL); - res->id = ret ? 0 : qp->qp_num; + WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8, + "QP number 0x%0X and port 0x%0X", qp->qp_num, + qp->port); + res->id = qp->qp_num; + if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI) + res->id |= qp->port << 24; + ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL); + if (ret) + res->id = 0; } else if (res->type == RDMA_RESTRACK_COUNTER) { /* Special case to ensure that cntn points to right counter */ struct rdma_counter *counter; @@ -246,6 +256,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) &rt->next_id, GFP_KERNEL); } +out: if (!ret) res->valid = true; } @@ -318,6 +329,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) return; } + if (res->no_track) + goto out; + dev = res_to_dev(res); if (WARN_ON(!dev)) return; @@ -328,8 +342,9 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP) return; WARN_ON(old != res); - res->valid = false; +out: + res->valid = false; rdma_restrack_put(res); wait_for_completion(&res->comp); } diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 13f43ab7220b..a96030b784eb 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) + if (is_pci_p2pdma_page(sg_page(sg))) { + if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) + return 0; return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + } return ib_dma_map_sg(dev, sg, sg_cnt, dir); } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8c930bf1df89..89a831fa1885 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1435,7 +1435,8 @@ enum opa_pr_supported { }; /** - * Check if current PR query can be an OPA query. + * opa_pr_query_possible - Check if current PR query can be an OPA query. + * * Retuns PR_NOT_SUPPORTED if a path record query is not * possible, PR_OPA_SUPPORTED if an OPA path record query * is possible and PR_IB_SUPPORTED if an IB path record diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 914cddea525d..b8abb30f80df 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -165,9 +165,11 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d: %s\n", attr.state, - attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ? - state_name[attr.state] : "UNKNOWN"); + return sysfs_emit(buf, "%d: %s\n", attr.state, + attr.state >= 0 && + attr.state < ARRAY_SIZE(state_name) ? + state_name[attr.state] : + "UNKNOWN"); } static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, @@ -180,7 +182,7 @@ static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%x\n", attr.lid); + return sysfs_emit(buf, "0x%x\n", attr.lid); } static ssize_t lid_mask_count_show(struct ib_port *p, @@ -194,7 +196,7 @@ static ssize_t lid_mask_count_show(struct ib_port *p, if (ret) return ret; - return sprintf(buf, "%d\n", attr.lmc); + return sysfs_emit(buf, "%d\n", attr.lmc); } static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, @@ -207,7 +209,7 @@ static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%x\n", attr.sm_lid); + return sysfs_emit(buf, "0x%x\n", attr.sm_lid); } static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, @@ -220,7 +222,7 @@ static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d\n", attr.sm_sl); + return sysfs_emit(buf, "%d\n", attr.sm_sl); } static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, @@ -233,7 +235,7 @@ static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "0x%08x\n", attr.port_cap_flags); + return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags); } static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, @@ -273,6 +275,10 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, speed = " HDR"; rate = 500; break; + case IB_SPEED_NDR: + speed = " NDR"; + rate = 1000; + break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ speed = " SDR"; @@ -284,9 +290,9 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, if (rate < 0) return -EINVAL; - return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate / 10, rate % 10 ? ".5" : "", - ib_width_enum_to_int(attr.active_width), speed); + return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10, + rate % 10 ? ".5" : "", + ib_width_enum_to_int(attr.active_width), speed); } static const char *phys_state_to_str(enum ib_port_phys_state phys_state) @@ -318,21 +324,28 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; - return sprintf(buf, "%d: %s\n", attr.phys_state, - phys_state_to_str(attr.phys_state)); + return sysfs_emit(buf, "%d: %s\n", attr.phys_state, + phys_state_to_str(attr.phys_state)); } static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, char *buf) { + const char *output; + switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { case IB_LINK_LAYER_INFINIBAND: - return sprintf(buf, "%s\n", "InfiniBand"); + output = "InfiniBand"; + break; case IB_LINK_LAYER_ETHERNET: - return sprintf(buf, "%s\n", "Ethernet"); + output = "Ethernet"; + break; default: - return sprintf(buf, "%s\n", "Unknown"); + output = "Unknown"; + break; } + + return sysfs_emit(buf, "%s\n", output); } static PORT_ATTR_RO(state); @@ -358,27 +371,28 @@ static struct attribute *port_default_attrs[] = { NULL }; -static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) +static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { struct net_device *ndev; - size_t ret = -EINVAL; + int ret = -EINVAL; rcu_read_lock(); ndev = rcu_dereference(gid_attr->ndev); if (ndev) - ret = sprintf(buf, "%s\n", ndev->name); + ret = sysfs_emit(buf, "%s\n", ndev->name); rcu_read_unlock(); return ret; } -static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) +static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) { - return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); + return sysfs_emit(buf, "%s\n", + ib_cache_gid_type_str(gid_attr->gid_type)); } static ssize_t _show_port_gid_attr( struct ib_port *p, struct port_attribute *attr, char *buf, - size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) + ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); @@ -401,7 +415,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); const struct ib_gid_attr *gid_attr; - ssize_t ret; + int len; gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); if (IS_ERR(gid_attr)) { @@ -416,12 +430,12 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, * space throwing such error on fail to read gid, return zero * GID as before. This maintains backward compatibility. */ - return sprintf(buf, "%pI6\n", zgid.raw); + return sysfs_emit(buf, "%pI6\n", zgid.raw); } - ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw); + len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw); rdma_put_gid_attr(gid_attr); - return ret; + return len; } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, @@ -443,13 +457,13 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); u16 pkey; - ssize_t ret; + int ret; ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey); if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ @@ -521,8 +535,9 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, container_of(attr, struct port_table_attribute, attr); int offset = tab_attr->index & 0xffff; int width = (tab_attr->index >> 16) & 0xff; - ssize_t ret; + int ret; u8 data[8]; + int len; ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 40 + offset / 8, sizeof(data)); @@ -531,30 +546,27 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, switch (width) { case 4: - ret = sprintf(buf, "%u\n", (*data >> - (4 - (offset % 8))) & 0xf); + len = sysfs_emit(buf, "%u\n", + (*data >> (4 - (offset % 8))) & 0xf); break; case 8: - ret = sprintf(buf, "%u\n", *data); + len = sysfs_emit(buf, "%u\n", *data); break; case 16: - ret = sprintf(buf, "%u\n", - be16_to_cpup((__be16 *)data)); + len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data)); break; case 32: - ret = sprintf(buf, "%u\n", - be32_to_cpup((__be32 *)data)); + len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data)); break; case 64: - ret = sprintf(buf, "%llu\n", - be64_to_cpup((__be64 *)data)); + len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data)); break; - default: - ret = 0; + len = 0; + break; } - return ret; + return len; } static PORT_PMA_ATTR(symbol_error , 0, 16, 32); @@ -815,12 +827,12 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, return 0; } -static ssize_t print_hw_stat(struct ib_device *dev, int port_num, - struct rdma_hw_stats *stats, int index, char *buf) +static int print_hw_stat(struct ib_device *dev, int port_num, + struct rdma_hw_stats *stats, int index, char *buf) { u64 v = rdma_counter_get_hwstat_value(dev, port_num, index); - return sprintf(buf, "%llu\n", stats->value[index] + v); + return sysfs_emit(buf, "%llu\n", stats->value[index] + v); } static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, @@ -877,7 +889,7 @@ static ssize_t show_stats_lifespan(struct kobject *kobj, msecs = jiffies_to_msecs(stats->lifespan); mutex_unlock(&stats->lock); - return sprintf(buf, "%d\n", msecs); + return sysfs_emit(buf, "%d\n", msecs); } static ssize_t set_stats_lifespan(struct kobject *kobj, @@ -1224,21 +1236,34 @@ err_put: return ret; } +static const char *node_type_string(int node_type) +{ + switch (node_type) { + case RDMA_NODE_IB_CA: + return "CA"; + case RDMA_NODE_IB_SWITCH: + return "switch"; + case RDMA_NODE_IB_ROUTER: + return "router"; + case RDMA_NODE_RNIC: + return "RNIC"; + case RDMA_NODE_USNIC: + return "usNIC"; + case RDMA_NODE_USNIC_UDP: + return "usNIC UDP"; + case RDMA_NODE_UNSPECIFIED: + return "unspecified"; + } + return "<unknown>"; +} + static ssize_t node_type_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); - switch (dev->node_type) { - case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); - case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); - case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); - case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); - case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type); - case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); - case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); - default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); - } + return sysfs_emit(buf, "%d: %s\n", dev->node_type, + node_type_string(dev->node_type)); } static DEVICE_ATTR_RO(node_type); @@ -1246,12 +1271,13 @@ static ssize_t sys_image_guid_show(struct device *device, struct device_attribute *dev_attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid; - return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), - be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(guid[0]), + be16_to_cpu(guid[1]), + be16_to_cpu(guid[2]), + be16_to_cpu(guid[3])); } static DEVICE_ATTR_RO(sys_image_guid); @@ -1259,12 +1285,13 @@ static ssize_t node_guid_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + __be16 *node_guid = (__be16 *)&dev->node_guid; - return sprintf(buf, "%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) &dev->node_guid)[0]), - be16_to_cpu(((__be16 *) &dev->node_guid)[1]), - be16_to_cpu(((__be16 *) &dev->node_guid)[2]), - be16_to_cpu(((__be16 *) &dev->node_guid)[3])); + return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n", + be16_to_cpu(node_guid[0]), + be16_to_cpu(node_guid[1]), + be16_to_cpu(node_guid[2]), + be16_to_cpu(node_guid[3])); } static DEVICE_ATTR_RO(node_guid); @@ -1273,7 +1300,7 @@ static ssize_t node_desc_show(struct device *device, { struct ib_device *dev = rdma_device_to_ibdev(device); - return sprintf(buf, "%.64s\n", dev->node_desc); + return sysfs_emit(buf, "%.64s\n", dev->node_desc); } static ssize_t node_desc_store(struct device *device, @@ -1300,10 +1327,11 @@ static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = rdma_device_to_ibdev(device); + char version[IB_FW_VERSION_NAME_MAX] = {}; + + ib_get_device_fw_str(dev, version); - ib_get_device_fw_str(dev, buf); - strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); - return strlen(buf); + return sysfs_emit(buf, "%s\n", version); } static DEVICE_ATTR_RO(fw_ver); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ffe2563ad345..7dab9a27a145 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1825,7 +1825,7 @@ static ssize_t show_abi_version(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); + return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index e9fecbdf391b..7ca4112e3e8f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -84,6 +84,15 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, dma_addr_t mask; int i; + if (umem->is_odp) { + unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift); + + /* ODP must always be self consistent. */ + if (!(pgsz_bitmap & page_size)) + return 0; + return page_size; + } + /* rdma_for_each_block() has a bug if the page size is smaller than the * page size used to build the umem. For now prevent smaller page sizes * from being returned. @@ -220,10 +229,10 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, cur_base += ret * PAGE_SIZE; npages -= ret; - sg = __sg_alloc_table_from_pages( - &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, - dma_get_max_seg_size(device->dma_device), sg, npages, - GFP_KERNEL); + sg = __sg_alloc_table_from_pages(&umem->sg_head, page_list, ret, + 0, ret << PAGE_SHIFT, + ib_dma_max_seg_size(device), sg, npages, + GFP_KERNEL); umem->sg_nents = umem->sg_head.nents; if (IS_ERR(sg)) { unpin_user_pages_dirty_lock(page_list, ret, 0); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index b0d0b522cc76..19104a675691 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1191,7 +1191,7 @@ static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr, if (!port) return -ENODEV; - return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev)); + return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev)); } static DEVICE_ATTR_RO(ibdev); @@ -1203,7 +1203,7 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr, if (!port) return -ENODEV; - return sprintf(buf, "%d\n", port->port_num); + return sysfs_emit(buf, "%d\n", port->port_num); } static DEVICE_ATTR_RO(port); @@ -1222,7 +1222,7 @@ static char *umad_devnode(struct device *dev, umode_t *mode) static ssize_t abi_version_show(struct class *class, struct class_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); + return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION); } static CLASS_ATTR_RO(abi_version); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 418d133a8fb0..98a5d36813ff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -681,8 +681,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, return 0; ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); - - if (ib_is_destroy_retryable(ret, why, uobject)) { + if (ret) { atomic_inc(&xrcd->usecnt); return ret; } @@ -690,7 +689,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, if (inode) xrcd_table_delete(dev, inode); - return ret; + return 0; } static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) @@ -710,29 +709,20 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; - ret = ib_check_mr_access(cmd.access_flags); - if (ret) - return ret; - uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); if (IS_ERR(uobj)) return PTR_ERR(uobj); + ret = ib_check_mr_access(ib_dev, cmd.access_flags); + if (ret) + goto err_free; + pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); if (!pd) { ret = -EINVAL; goto err_free; } - if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { - if (!(pd->device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING)) { - pr_debug("ODP support not available\n"); - ret = -EINVAL; - goto err_put; - } - } - mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &attrs->driver_udata); @@ -774,23 +764,28 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_rereg_mr cmd; struct ib_uverbs_rereg_mr_resp resp; - struct ib_pd *pd = NULL; struct ib_mr *mr; - struct ib_pd *old_pd; int ret; struct ib_uobject *uobj; + struct ib_uobject *new_uobj; + struct ib_device *ib_dev; + struct ib_pd *orig_pd; + struct ib_pd *new_pd; + struct ib_mr *new_mr; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) return ret; - if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) + if (!cmd.flags) return -EINVAL; + if (cmd.flags & ~IB_MR_REREG_SUPPORTED) + return -EOPNOTSUPP; + if ((cmd.flags & IB_MR_REREG_TRANS) && - (!cmd.start || !cmd.hca_va || 0 >= cmd.length || - (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) - return -EINVAL; + (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs); if (IS_ERR(uobj)) @@ -804,36 +799,74 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) } if (cmd.flags & IB_MR_REREG_ACCESS) { - ret = ib_check_mr_access(cmd.access_flags); + ret = ib_check_mr_access(mr->device, cmd.access_flags); if (ret) goto put_uobjs; } + orig_pd = mr->pd; if (cmd.flags & IB_MR_REREG_PD) { - pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, - attrs); - if (!pd) { + new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, + attrs); + if (!new_pd) { ret = -EINVAL; goto put_uobjs; } + } else { + new_pd = mr->pd; } - old_pd = mr->pd; - ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start, - cmd.length, cmd.hca_va, - cmd.access_flags, pd, - &attrs->driver_udata); - if (ret) + /* + * The driver might create a new HW object as part of the rereg, we need + * to have a uobject ready to hold it. + */ + new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev); + if (IS_ERR(new_uobj)) { + ret = PTR_ERR(new_uobj); goto put_uobj_pd; - - if (cmd.flags & IB_MR_REREG_PD) { - atomic_inc(&pd->usecnt); - mr->pd = pd; - atomic_dec(&old_pd->usecnt); } - if (cmd.flags & IB_MR_REREG_TRANS) - mr->iova = cmd.hca_va; + new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length, + cmd.hca_va, cmd.access_flags, new_pd, + &attrs->driver_udata); + if (IS_ERR(new_mr)) { + ret = PTR_ERR(new_mr); + goto put_new_uobj; + } + if (new_mr) { + new_mr->device = new_pd->device; + new_mr->pd = new_pd; + new_mr->type = IB_MR_TYPE_USER; + new_mr->dm = NULL; + new_mr->sig_attrs = NULL; + new_mr->uobject = uobj; + atomic_inc(&new_pd->usecnt); + new_mr->iova = cmd.hca_va; + new_uobj->object = new_mr; + + rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&new_mr->res, NULL); + rdma_restrack_add(&new_mr->res); + + /* + * The new uobj for the new HW object is put into the same spot + * in the IDR and the old uobj & HW object is deleted. + */ + rdma_assign_uobject(uobj, new_uobj, attrs); + rdma_alloc_commit_uobject(new_uobj, attrs); + uobj_put_destroy(uobj); + new_uobj = NULL; + uobj = NULL; + mr = new_mr; + } else { + if (cmd.flags & IB_MR_REREG_PD) { + atomic_dec(&orig_pd->usecnt); + mr->pd = new_pd; + atomic_inc(&new_pd->usecnt); + } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + } memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; @@ -841,12 +874,16 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); +put_new_uobj: + if (new_uobj) + uobj_alloc_abort(new_uobj, attrs); put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) - uobj_put_obj_read(pd); + uobj_put_obj_read(new_pd); put_uobjs: - uobj_put_write(uobj); + if (uobj) + uobj_put_write(uobj); return ret; } @@ -1401,8 +1438,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1906,8 +1943,7 @@ static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs) if (ret) return ret; - if (cmd.base.attr_mask & - ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) + if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; return modify_qp(attrs, &cmd); @@ -1929,10 +1965,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) * Last bit is reserved for extending the attr_mask by * using another field. */ - BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31)); - - if (cmd.base.attr_mask & - ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) + if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; ret = modify_qp(attrs, &cmd); @@ -3693,13 +3726,13 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_create_ah, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_create_ah, - struct ib_uverbs_create_ah_resp), - UAPI_DEF_METHOD_NEEDS_FN(create_ah)), + struct ib_uverbs_create_ah_resp)), DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_DESTROY_AH, ib_uverbs_destroy_ah, - UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah), - UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))), + UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)), + UAPI_DEF_OBJ_NEEDS_FN(create_user_ah), + UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, @@ -3753,7 +3786,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { IB_USER_VERBS_EX_CMD_MODIFY_CQ, ib_uverbs_ex_modify_cq, UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), - UAPI_DEF_METHOD_NEEDS_FN(create_cq))), + UAPI_DEF_METHOD_NEEDS_FN(modify_cq))), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_DEVICE, @@ -3999,8 +4032,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { DECLARE_UVERBS_WRITE( IB_USER_VERBS_CMD_CLOSE_XRCD, ib_uverbs_close_xrcd, - UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd), - UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)), + UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)), DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP, ib_uverbs_open_qp, UAPI_DEF_WRITE_UDATA_IO( @@ -4010,8 +4042,9 @@ const struct uapi_definition uverbs_def_write_intf[] = { ib_uverbs_open_xrcd, UAPI_DEF_WRITE_UDATA_IO( struct ib_uverbs_open_xrcd, - struct ib_uverbs_open_xrcd_resp), - UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))), + struct ib_uverbs_open_xrcd_resp)), + UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd), + UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)), {}, }; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 4bb7c642f80c..f173ecd102dc 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1046,7 +1046,7 @@ static ssize_t ibdev_show(struct device *device, struct device_attribute *attr, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev)); + ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev)); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; @@ -1065,7 +1065,7 @@ static ssize_t abi_version_show(struct device *device, srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) - ret = sprintf(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); + ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 0658101fca00..13776a66e2e4 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -88,7 +88,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, return -EBUSY; ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; for (i = 0; i < table_size; i++) @@ -96,7 +96,7 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, kfree(rwq_ind_tbl); kfree(ind_tbl); - return ret; + return 0; } static int uverbs_free_xrcd(struct ib_uobject *uobject, @@ -108,9 +108,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, container_of(uobject, struct ib_uxrcd_object, uobject); int ret; - ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&uxrcd->refcnt)) + return -EBUSY; mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); @@ -124,11 +123,9 @@ static int uverbs_free_pd(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_pd *pd = uobject->object; - int ret; - ret = ib_destroy_usecnt(&pd->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&pd->usecnt)) + return -EBUSY; return ib_dealloc_pd_user(pd, &attrs->driver_udata); } @@ -157,7 +154,7 @@ void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) spin_unlock_irq(&event_queue->lock); } -static int +static void uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, enum rdma_remove_reason why) { @@ -166,7 +163,6 @@ uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, uobj); ib_uverbs_free_event_queue(&file->ev_queue); - return 0; } int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c index 61899eaf1f91..cc24cfdf7aee 100644 --- a/drivers/infiniband/core/uverbs_std_types_async_fd.c +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -19,8 +19,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)( return 0; } -static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct ib_uverbs_async_event_file *event_file = container_of(uobj, struct ib_uverbs_async_event_file, uobj); @@ -30,7 +30,6 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, if (why == RDMA_REMOVE_DRIVER_REMOVE) ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL, NULL, NULL); - return 0; } int uverbs_async_event_release(struct inode *inode, struct file *filp) diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index b3c6c066b601..999da9c79866 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -42,9 +42,8 @@ static int uverbs_free_counters(struct ib_uobject *uobject, struct ib_counters *counters = uobject->object; int ret; - ret = ib_destroy_usecnt(&counters->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&counters->usecnt)) + return -EBUSY; ret = counters->device->ops.destroy_counters(counters); if (ret) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 8dabd05988b2..370ad7c83f88 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -46,7 +46,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, int ret; ret = ib_destroy_cq_user(cq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_ucq( @@ -55,7 +55,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, ev_queue) : NULL, ucq); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 302f898c5833..9ec6971056fa 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -317,8 +317,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( struct ib_device *ib_dev; size_t user_entry_size; ssize_t num_entries; - size_t max_entries; - size_t num_bytes; + int max_entries; u32 flags; int ret; @@ -336,19 +335,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, user_entry_size); if (max_entries <= 0) - return -EINVAL; + return max_entries ?: -EINVAL; ucontext = ib_uverbs_get_ucontext(attrs); if (IS_ERR(ucontext)) return PTR_ERR(ucontext); ib_dev = ucontext->device; - if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes)) - return -EINVAL; - - entries = uverbs_zalloc(attrs, num_bytes); - if (!entries) - return -ENOMEM; + entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries)); + if (IS_ERR(entries)) + return PTR_ERR(entries); num_entries = rdma_query_gid_table(ib_dev, entries, max_entries); if (num_entries < 0) diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index d5a1de33c2c9..98c522cf86d6 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -39,11 +39,9 @@ static int uverbs_free_dm(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_dm *dm = uobject->object; - int ret; - ret = ib_destroy_usecnt(&dm->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&dm->usecnt)) + return -EBUSY; return dm->device->ops.dealloc_dm(dm, attrs); } diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 459cf165b231..d42ed7ff223e 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -39,11 +39,9 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct ib_flow_action *action = uobject->object; - int ret; - ret = ib_destroy_usecnt(&action->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&action->usecnt)) + return -EBUSY; return action->device->ops.destroy_flow_action(action); } diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 9b22bb553e8b..dd4e76b26c74 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -33,6 +33,7 @@ #include "rdma_core.h" #include "uverbs.h" #include <rdma/uverbs_std_types.h> +#include "restrack.h" static int uverbs_free_mr(struct ib_uobject *uobject, enum rdma_remove_reason why, @@ -114,7 +115,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( if (!(attr.access_flags & IB_ZERO_BASED)) return -EINVAL; - ret = ib_check_mr_access(attr.access_flags); + ret = ib_check_mr_access(ib_dev, attr.access_flags); if (ret) return ret; @@ -134,6 +135,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( atomic_inc(&pd->usecnt); atomic_inc(&dm->usecnt); + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&mr->res, NULL); + rdma_restrack_add(&mr->res); uobj->object = mr; uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index 3bf8dcdfe7eb..c00cfb5ed387 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -32,14 +32,14 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } ret = ib_destroy_qp_user(qp, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); ib_uverbs_release_uevent(&uqp->uevent); - return ret; + return 0; } static int check_creation_flags(enum ib_qp_type qp_type, @@ -251,8 +251,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( if (attr.qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - obj); + qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, obj, + NULL); if (IS_ERR(qp)) { ret = PTR_ERR(qp); diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c index c0ecbba26bf4..e5513f828bdc 100644 --- a/drivers/infiniband/core/uverbs_std_types_srq.c +++ b/drivers/infiniband/core/uverbs_std_types_srq.c @@ -18,7 +18,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, int ret; ret = ib_destroy_srq_user(srq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; if (srq_type == IB_SRQT_XRC) { @@ -30,7 +30,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, } ib_uverbs_release_uevent(uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c index f2e6a625724a..7ded8339346f 100644 --- a/drivers/infiniband/core/uverbs_std_types_wq.c +++ b/drivers/infiniband/core/uverbs_std_types_wq.c @@ -17,11 +17,11 @@ static int uverbs_free_wq(struct ib_uobject *uobject, int ret; ret = ib_destroy_wq_user(wq, &attrs->driver_udata); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; ib_uverbs_release_uevent(&uwq->uevent); - return ret; + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)( diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 5addc8fae3f3..62f5bcb712cf 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -79,10 +79,7 @@ static int uapi_create_write(struct uverbs_api *uapi, method_elm->is_ex = def->write.is_ex; method_elm->handler = def->func_write; - if (def->write.is_ex) - method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask & - BIT_ULL(def->write.command_num)); - else + if (!def->write.is_ex) method_elm->disabled = !(ibdev->uverbs_cmd_mask & BIT_ULL(def->write.command_num)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 740f8454b6b4..9137a25bb521 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); /* Protection domains */ /** - * ib_alloc_pd - Allocates an unused protection domain. + * __ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * @flags: protection domain flags * @caller: caller's build-time module name @@ -516,7 +516,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); - if (!device->ops.create_ah) + if (!udata && !device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); ah = rdma_zalloc_drv_obj_gfp( @@ -533,7 +533,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, init_attr.flags = flags; init_attr.xmit_slave = xmit_slave; - ret = device->ops.create_ah(ah, &init_attr, udata); + if (udata) + ret = device->ops.create_user_ah(ah, &init_attr, udata); + else + ret = device->ops.create_ah(ah, &init_attr, NULL); if (ret) { kfree(ah); return ERR_PTR(ret); @@ -1188,17 +1191,19 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, } /** - * ib_create_qp - Creates a kernel QP associated with the specified protection + * ib_create_named_qp - Creates a kernel QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. + * @caller: caller's build-time module name * * NOTE: for user qp use ib_create_qp_user with valid udata! */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; @@ -1223,7 +1228,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) rdma_rw_init_qp(device, qp_init_attr); - qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL, caller); if (IS_ERR(qp)) return qp; @@ -1289,7 +1294,7 @@ err: return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_create_qp); +EXPORT_SYMBOL(ib_create_named_qp); static const struct { int valid; @@ -1662,7 +1667,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp) qp->qp_type == IB_QPT_XRC_TGT); } -/** +/* * IB core internal function to perform QP attributes modification. */ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, @@ -1698,8 +1703,10 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, slave = rdma_lag_get_ah_roce_slave(qp->device, &attr->ah_attr, GFP_KERNEL); - if (IS_ERR(slave)) + if (IS_ERR(slave)) { + ret = PTR_ERR(slave); goto out_av; + } attr->xmit_slave = slave; } } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index cf3db9628397..401bdc9e931e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1271,10 +1271,12 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, } qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */ - if (init_attr->create_flags) + if (init_attr->create_flags) { ibdev_dbg(&rdev->ibdev, "QP create flags 0x%x not supported", init_attr->create_flags); + return -EOPNOTSUPP; + } /* Setup CQs */ if (init_attr->send_cq) { @@ -1657,8 +1659,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.max_wqe = entries; srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; - srq->qplib_srq.wqe_size = - bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); + /* 128 byte wqe size for SRQ . So use max sges */ + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; @@ -1829,6 +1831,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, unsigned int flags; u8 nw_type; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp->qplib_qp.modify_flags = 0; if (qp_attr_mask & IB_QP_STATE) { curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); @@ -2078,6 +2083,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, goto out; } qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); + qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); qp_attr->pkey_index = qplib_qp->pkey_index; @@ -2827,6 +2833,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; + if (attr->flags) + return -EOPNOTSUPP; + /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded"); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 04621ba8fa76..fdb8c2478258 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -608,7 +608,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); + return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -618,7 +618,7 @@ static ssize_t hca_type_show(struct device *device, struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); + return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); @@ -646,6 +646,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .create_cq = bnxt_re_create_cq, .create_qp = bnxt_re_create_qp, .create_srq = bnxt_re_create_srq, + .create_user_ah = bnxt_re_create_ah, .dealloc_driver = bnxt_re_dealloc_driver, .dealloc_pd = bnxt_re_dealloc_pd, .dealloc_ucontext = bnxt_re_dealloc_ucontext, @@ -701,35 +702,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; - /* User space */ - ibdev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - - rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 64d44f51db4b..6316179583a6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -118,7 +118,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * 128 WQEs needs to be reserved for the HW (8916). Prevent * reporting the max number */ - attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS; + attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 28349ed50885..44c2416588d4 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1006,6 +1006,9 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) + return -EOPNOTSUPP; + + if (entries < 1 || entries > ibdev->attrs.max_cqe) return -EINVAL; if (vector >= rhp->rdev.lldi.nciq) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index a27899402f59..f85477f3b037 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -983,9 +983,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int c4iw_dealloc_mw(struct ib_mw *mw); void c4iw_dealloc(struct uld_ctx *ctx); -int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 42234df896fb..a2c71a1d93d5 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -365,22 +365,6 @@ static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, pbl_size, pbl_addr, skb, wr_waitp); } -static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, - struct c4iw_wr_wait *wr_waitp) -{ - *stag = T4_STAG_UNSET; - return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, - 0UL, 0, 0, 0, 0, NULL, wr_waitp); -} - -static int deallocate_window(struct c4iw_rdev *rdev, u32 stag, - struct sk_buff *skb, - struct c4iw_wr_wait *wr_waitp) -{ - return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, - 0, skb, wr_waitp); -} - static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr, struct c4iw_wr_wait *wr_waitp) @@ -611,74 +595,6 @@ err_free_mhp: return ERR_PTR(err); } -int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) -{ - struct c4iw_mw *mhp = to_c4iw_mw(ibmw); - struct c4iw_dev *rhp; - struct c4iw_pd *php; - u32 mmid; - u32 stag = 0; - int ret; - - if (ibmw->type != IB_MW_TYPE_1) - return -EINVAL; - - php = to_c4iw_pd(ibmw->pd); - rhp = php->rhp; - mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!mhp->wr_waitp) - return -ENOMEM; - - mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); - if (!mhp->dereg_skb) { - ret = -ENOMEM; - goto free_wr_wait; - } - - ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); - if (ret) - goto free_skb; - - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = FW_RI_STAG_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - ibmw->rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - ret = -ENOMEM; - goto dealloc_win; - } - pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); - return 0; - -dealloc_win: - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); -free_skb: - kfree_skb(mhp->dereg_skb); -free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); - return ret; -} - -int c4iw_dealloc_mw(struct ib_mw *mw) -{ - struct c4iw_dev *rhp; - struct c4iw_mw *mhp; - u32 mmid; - - mhp = to_c4iw_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - xa_erase_irq(&rhp->mrs, mmid); - deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, - mhp->wr_waitp); - kfree_skb(mhp->dereg_skb); - c4iw_put_wr_wait(mhp->wr_waitp); - return 0; -} - struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8138c57a1e43..1f1f856f8715 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -322,8 +322,9 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%d\n", - CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); + return sysfs_emit( + buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } static DEVICE_ATTR_RO(hw_rev); @@ -337,7 +338,7 @@ static ssize_t hca_type_show(struct device *dev, pr_debug("dev 0x%p\n", dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); + return sysfs_emit(buf, "%s\n", info.driver); } static DEVICE_ATTR_RO(hca_type); @@ -348,8 +349,8 @@ static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev); pr_debug("dev 0x%p\n", dev); - return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, - c4iw_dev->rdev.lldi.pdev->device); + return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, + c4iw_dev->rdev.lldi.pdev->device); } static DEVICE_ATTR_RO(board_id); @@ -456,13 +457,11 @@ static const struct ib_device_ops c4iw_dev_ops = { .alloc_hw_stats = c4iw_alloc_stats, .alloc_mr = c4iw_alloc_mr, - .alloc_mw = c4iw_alloc_mw, .alloc_pd = c4iw_allocate_pd, .alloc_ucontext = c4iw_alloc_ucontext, .create_cq = c4iw_create_cq, .create_qp = c4iw_create_qp, .create_srq = c4iw_create_srq, - .dealloc_mw = c4iw_dealloc_mw, .dealloc_pd = c4iw_deallocate_pd, .dealloc_ucontext = c4iw_dealloc_ucontext, .dereg_mr = c4iw_dereg_mr, @@ -533,28 +532,6 @@ void c4iw_register_device(struct work_struct *work) if (fastreg_support) dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.local_dma_lkey = 0; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f20379e4e2ec..a7401398cb34 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2126,7 +2126,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, pr_debug("ib_pd %p\n", pd); - if (attrs->qp_type != IB_QPT_RC) + if (attrs->qp_type != IB_QPT_RC || attrs->create_flags) return ERR_PTR(-EOPNOTSUPP); php = to_c4iw_pd(pd); @@ -2374,6 +2374,9 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, pr_debug("ib_qp %p\n", ibqp); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) attr_mask &= ~IB_QP_STATE; @@ -2680,6 +2683,9 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, int ret; int wr_len; + if (attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + pr_debug("%s ib_pd %p\n", __func__, pd); php = to_c4iw_pd(pd); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 6faed3a81e08..0f578734bddb 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -245,9 +245,9 @@ static const struct ib_device_ops efa_dev_ops = { .alloc_hw_stats = efa_alloc_hw_stats, .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, - .create_ah = efa_create_ah, .create_cq = efa_create_cq, .create_qp = efa_create_qp, + .create_user_ah = efa_create_ah, .dealloc_pd = efa_dealloc_pd, .dealloc_ucontext = efa_dealloc_ucontext, .dereg_mr = efa_dereg_mr, @@ -308,27 +308,6 @@ static int efa_ib_device_add(struct efa_dev *dev) dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - - dev->ibdev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); @@ -405,19 +384,12 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) return err; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width)); if (err) { - dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err); + dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err); return err; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); - if (err) { - dev_err(&pdev->dev, - "err_pci_set_consistent_dma_mask failed %d\n", - err); - return err; - } dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; } diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4e940fc50bba..479b604e533a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -917,6 +917,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, enum ib_qp_state new_state; int err; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + if (udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, @@ -1029,6 +1032,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, ibdev_dbg(ibdev, "create_cq entries %d\n", entries); + if (attr->flags) + return -EOPNOTSUPP; + if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { ibdev_dbg(ibdev, "cq: requested entries[%u] non-positive or greater than max[%u]\n", diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 356518e17fa6..681bb4e918c9 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -339,6 +339,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) return -EINVAL; + break; default: break; } diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 074ec71772d2..5650130e68d4 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -151,7 +151,7 @@ struct hfi1_port_attr { static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf) { - return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); + return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off"); } static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf, @@ -296,7 +296,7 @@ static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sc2vl_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); + return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc)); } static const struct sysfs_ops hfi1_sc2vl_ops = { @@ -401,7 +401,7 @@ static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, sl2sc_kobj); struct hfi1_ibport *ibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); + return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]); } static const struct sysfs_ops hfi1_sl2sc_ops = { @@ -475,7 +475,7 @@ static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj); struct hfi1_devdata *dd = ppd->dd; - return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu); + return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu); } static const struct sysfs_ops hfi1_vl2mtu_ops = { @@ -500,7 +500,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -510,13 +510,11 @@ static ssize_t board_id_show(struct device *device, struct hfi1_ibdev *dev = rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(board_id); @@ -528,7 +526,7 @@ static ssize_t boardversion_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -545,9 +543,9 @@ static ssize_t nctxts_show(struct device *device, * and a receive context, so returning the smaller of the two counts * give a more accurate picture of total contexts available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_user_contexts, - (u32)dd->sc_sizes[SC_USER].count)); + return sysfs_emit(buf, "%u\n", + min(dd->num_user_contexts, + (u32)dd->sc_sizes[SC_USER].count)); } static DEVICE_ATTR_RO(nctxts); @@ -559,7 +557,7 @@ static ssize_t nfreectxts_show(struct device *device, struct hfi1_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); @@ -570,7 +568,8 @@ static ssize_t serial_show(struct device *device, rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev); struct hfi1_devdata *dd = dd_from_dev(dev); - return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); + /* dd->serial is already newline terminated in chip.c */ + return sysfs_emit(buf, "%s", dd->serial); } static DEVICE_ATTR_RO(serial); @@ -598,9 +597,8 @@ static DEVICE_ATTR_WO(chip_reset); * Convert the reported temperature from an integer (reported in * units of 0.25C) to a floating point number. */ -#define temp2str(temp, buf, size, idx) \ - scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \ - ((temp) >> 2), ((temp) & 0x3) * 25) +#define temp_d(t) ((t) >> 2) +#define temp_f(t) (((t)&0x3) * 25u) /* * Dump tempsense values, in decimal, to ease shell-scripts. @@ -615,19 +613,17 @@ static ssize_t tempsense_show(struct device *device, int ret; ret = hfi1_tempsense_rd(dd, &temp); - if (!ret) { - int idx = 0; - - idx += temp2str(temp.curr, buf, PAGE_SIZE, idx); - idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx); - idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx); - idx += scnprintf(buf + idx, PAGE_SIZE - idx, - "%u %u %u\n", temp.triggers & 0x1, - temp.triggers & 0x2, temp.triggers & 0x4); - ret = idx; - } - return ret; + if (ret) + return ret; + + return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n", + temp_d(temp.curr), temp_f(temp.curr), + temp_d(temp.lo_lim), temp_f(temp.lo_lim), + temp_d(temp.hi_lim), temp_f(temp.hi_lim), + temp_d(temp.crit_lim), temp_f(temp.crit_lim), + temp.triggers & 0x1, + temp.triggers & 0x2, + temp.triggers & 0x4); } static DEVICE_ATTR_RO(tempsense); @@ -817,7 +813,7 @@ static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) if (vl < 0) return vl; - return snprintf(buf, PAGE_SIZE, "%d\n", vl); + return sysfs_emit(buf, "%d\n", vl); } static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 73d197e21730..92aa2a9b3b5a 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2826,6 +2826,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3005,6 +3006,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, default: break; } + break; default: break; } @@ -3221,6 +3223,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } @@ -3239,9 +3242,11 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) req = wqe_to_tid_req(prev); if (req->ack_seg != req->total_segs) goto interlock; + break; default: break; } + break; default: break; } diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 75b06db60f7c..cc258edec331 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -31,14 +31,11 @@ */ #include <linux/platform_device.h> +#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include "hns_roce_device.h" -#define HNS_ROCE_PORT_NUM_SHIFT 24 -#define HNS_ROCE_VLAN_SL_BIT_MASK 7 -#define HNS_ROCE_VLAN_SL_SHIFT 13 - static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) { u32 fl = ah_attr->grh.flow_label; @@ -58,47 +55,41 @@ static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); - const struct ib_gid_attr *gid_attr; - struct device *dev = hr_dev->dev; - struct hns_roce_ah *ah = to_hr_ah(ibah); struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - u16 vlan_id = 0xffff; - bool vlan_en = false; - int ret; - - gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); - if (ret) - return ret; - - /* Get mac address */ - memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); + struct hns_roce_ah *ah = to_hr_ah(ibah); + int ret = 0; - if (vlan_id < VLAN_N_VID) { - vlan_en = true; - vlan_id |= (rdma_ah_get_sl(ah_attr) & - HNS_ROCE_VLAN_SL_BIT_MASK) << - HNS_ROCE_VLAN_SL_SHIFT; - } + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata) + return -EOPNOTSUPP; ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan_id = vlan_id; - ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, - ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; - memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); - ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.hop_limit = grh->hop_limit; ah->av.flowlabel = grh->flow_label; ah->av.udp_sport = get_ah_udp_sport(ah_attr); + ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.tclass = get_tclass(grh); - return 0; + memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); + memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); + + /* HIP08 needs to record vlan info in Address Vector */ + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, + &ah->av.vlan_id, NULL); + if (ret) + return ret; + + ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; + } + + return ret; } int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a6b23dec1adc..4bcaaa0524b1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -159,76 +159,96 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - struct device *dev = hr_dev->dev; - u32 size = buf->size; - int i; + struct hns_roce_buf_list *trunks; + u32 i; - if (size == 0) + if (!buf) return; - buf->size = 0; + trunks = buf->trunk_list; + if (trunks) { + buf->trunk_list = NULL; + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift, + trunks[i].buf, trunks[i].map); - if (hns_roce_buf_is_direct(buf)) { - dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); - } else { - for (i = 0; i < buf->npages; ++i) - if (buf->page_list[i].buf) - dma_free_coherent(dev, 1 << buf->page_shift, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); - buf->page_list = NULL; + kfree(trunks); } + + kfree(buf); } -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift) +/* + * Allocate the dma buffer for storing ROCEE table entries + * + * @size: required size + * @page_shift: the unit size in a continuous dma address range + * @flags: HNS_ROCE_BUF_ flags to control the allocation flow. + */ +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags) { - struct hns_roce_buf_list *buf_list; - struct device *dev = hr_dev->dev; - u32 page_size; - int i; + u32 trunk_size, page_size, alloced_size; + struct hns_roce_buf_list *trunks; + struct hns_roce_buf *buf; + gfp_t gfp_flags; + u32 ntrunk, i; /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */ - buf->page_shift = max_t(int, HNS_HW_PAGE_SHIFT, page_shift); + if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT)) + return ERR_PTR(-EINVAL); + + gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL; + buf = kzalloc(sizeof(*buf), gfp_flags); + if (!buf) + return ERR_PTR(-ENOMEM); + buf->page_shift = page_shift; page_size = 1 << buf->page_shift; - buf->npages = DIV_ROUND_UP(size, page_size); - - /* required size is not bigger than one trunk size */ - if (size <= max_direct) { - buf->page_list = NULL; - buf->direct.buf = dma_alloc_coherent(dev, size, - &buf->direct.map, - GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; + + /* Calc the trunk size and num by required size and page_shift */ + if (flags & HNS_ROCE_BUF_DIRECT) { + buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); + ntrunk = 1; } else { - buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); - if (!buf_list) - return -ENOMEM; - - for (i = 0; i < buf->npages; i++) { - buf_list[i].buf = dma_alloc_coherent(dev, page_size, - &buf_list[i].map, - GFP_KERNEL); - if (!buf_list[i].buf) - break; - } + buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); + ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); + } - if (i != buf->npages && i > 0) { - while (i-- > 0) - dma_free_coherent(dev, page_size, - buf_list[i].buf, - buf_list[i].map); - kfree(buf_list); - return -ENOMEM; - } - buf->page_list = buf_list; + trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags); + if (!trunks) { + kfree(buf); + return ERR_PTR(-ENOMEM); } - buf->size = size; - return 0; + trunk_size = 1 << buf->trunk_shift; + alloced_size = 0; + for (i = 0; i < ntrunk; i++) { + trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size, + &trunks[i].map, gfp_flags); + if (!trunks[i].buf) + break; + + alloced_size += trunk_size; + } + + buf->ntrunks = i; + + /* In nofail mode, it's only failed when the alloced size is 0 */ + if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) { + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, trunk_size, + trunks[i].buf, trunks[i].map); + + kfree(trunks); + kfree(buf); + return ERR_PTR(-ENOMEM); + } + + buf->npages = DIV_ROUND_UP(alloced_size, page_size); + buf->trunk_list = trunks; + + return buf; } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, @@ -240,7 +260,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "Failed to check kmem bufs, end %d + %d total %d!\n", + "failed to check kmem bufs, end %d + %d total %u!\n", start, buf_cnt, buf->npages); return -EINVAL; } @@ -262,7 +282,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, u64 addr; if (page_shift < HNS_HW_PAGE_SHIFT) { - dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", + dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", page_shift); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 455d533dd7c4..339e3fd98b0b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -36,9 +36,9 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" -#define CMD_POLL_TOKEN 0xffff -#define CMD_MAX_NUM 32 -#define CMD_TOKEN_MASK 0x1f +#define CMD_POLL_TOKEN 0xffff +#define CMD_MAX_NUM 32 +#define CMD_TOKEN_MASK 0x1f static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, @@ -60,7 +60,7 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { struct device *dev = hr_dev->dev; int ret; @@ -78,7 +78,7 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned int timeout) { int ret; @@ -93,8 +93,8 @@ static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, u64 out_param) { - struct hns_roce_cmd_context - *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask]; + struct hns_roce_cmd_context *context = + &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds]; if (token != context->token) return; @@ -108,7 +108,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { struct hns_roce_cmdq *cmd = &hr_dev->cmd; struct hns_roce_cmd_context *context; @@ -159,13 +159,13 @@ out: static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned long timeout) + u8 op_modifier, u16 op, unsigned int timeout) { int ret; down(&hr_dev->cmd.event_sem); - ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, timeout); up(&hr_dev->cmd.event_sem); return ret; @@ -173,7 +173,7 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout) + unsigned int timeout) { int ret; @@ -231,9 +231,8 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; int i; - hr_cmd->context = kmalloc_array(hr_cmd->max_cmds, - sizeof(*hr_cmd->context), - GFP_KERNEL); + hr_cmd->context = + kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); if (!hr_cmd->context) return -ENOMEM; @@ -262,8 +261,8 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) hr_cmd->use_events = 0; } -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) { struct hns_roce_cmd_mailbox *mailbox; @@ -271,8 +270,8 @@ struct hns_roce_cmd_mailbox if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, - &mailbox->dma); + mailbox->buf = + dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 1915bacaded0..8025e7f657fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -141,10 +141,10 @@ enum { int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned long timeout); + unsigned int timeout); -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index f5669ff8cfeb..5afee04fb02c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -38,21 +38,33 @@ #define roce_raw_write(value, addr) \ __raw_writel((__force u32)cpu_to_le32(value), (addr)) -#define roce_get_field(origin, mask, shift) \ - (((le32_to_cpu(origin)) & (mask)) >> (shift)) +#define roce_get_field(origin, mask, shift) \ + ((le32_to_cpu(origin) & (mask)) >> (u32)(shift)) #define roce_get_bit(origin, shift) \ roce_get_field((origin), (1ul << (shift)), (shift)) -#define roce_set_field(origin, mask, shift, val) \ - do { \ - (origin) &= ~cpu_to_le32(mask); \ - (origin) |= cpu_to_le32(((u32)(val) << (shift)) & (mask)); \ +#define roce_set_field(origin, mask, shift, val) \ + do { \ + (origin) &= ~cpu_to_le32(mask); \ + (origin) |= cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \ } while (0) -#define roce_set_bit(origin, shift, val) \ +#define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) +#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l + +#define _hr_reg_enable(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) |= \ + cpu_to_le32(BIT((field_l) % 32)) + \ + BUILD_BUG_ON_ZERO((field_h) != (field_l)); \ + }) + +#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 809b22aa5056..8533fc2d8df2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -36,43 +36,42 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" -#include <rdma/hns-abi.h> #include "hns_roce_common.h" static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_cq_table *cq_table; - struct ib_device *ibdev = &hr_dev->ib_dev; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), &dma_handle); - if (ret < 1) { - ibdev_err(ibdev, "Failed to find CQ mtr\n"); + if (!ret) { + ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret); return -EINVAL; } cq_table = &hr_dev->cq_table; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", + ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n", hr_cq->cqn, ret); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to xa_store CQ\n"); + ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret); goto err_put; } @@ -91,7 +90,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, - "Failed to send create cmd for CQ(0x%lx), err %d\n", + "failed to send create cmd for CQ(0x%lx), ret = %d.\n", hr_cq->cqn, ret); goto err_xa; } @@ -147,7 +146,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; @@ -155,13 +154,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region_count = 1; buf_attr.fixed_page = true; - err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); + if (ret) + ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret); - return err; + return ret; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) @@ -251,14 +250,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, u32 cq_entries = attr->cqe; int ret; + if (attr->flags) + return -EOPNOTSUPP; + if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", + ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", + ibdev_err(ibdev, "failed to check CQ vector = %d, max = %d.\n", vector, hr_dev->caps.num_comp_vectors); return -EINVAL; } @@ -274,9 +276,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { ret = ib_copy_from_udata(&ucmd, udata, - min(sizeof(ucmd), udata->inlen)); + min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", ret); return ret; } @@ -286,19 +288,20 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); return ret; } ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret); goto err_cq_buf; } ret = alloc_cqc(hr_dev, hr_cq); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc CQ context, ret = %d.\n", ret); goto err_cq_db; } @@ -313,7 +316,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (udata) { resp.cqn = hr_cq->cqn; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto err_cqc; } diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index bff6abdccfb0..5cb7376ce978 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -95,8 +95,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir( static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir, struct hns_roce_db *db, int order) { - int o; - int i; + unsigned long o; + unsigned long i; for (o = order; o <= 1; ++o) { i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o); @@ -154,8 +154,8 @@ out: void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db) { - int o; - int i; + unsigned long o; + unsigned long i; mutex_lock(&hr_dev->pgdir_mutex); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6d2acff69f98..55d538625e36 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_DEVICE_H #include <rdma/ib_verbs.h> +#include <rdma/hns-abi.h> #define DRV_NAME "hns_roce" @@ -117,6 +118,8 @@ #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 +#define HNS_ROCE_QP_BANK_NUM 8 + /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth */ @@ -129,15 +132,6 @@ enum { SERV_TYPE_UD, }; -enum { - HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), - HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), -}; - -enum hns_roce_cq_flags { - HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0), -}; - enum hns_roce_qp_state { HNS_ROCE_QP_STATE_RST, HNS_ROCE_QP_STATE_INIT, @@ -166,7 +160,6 @@ enum hns_roce_event { /* 0x10 and 0x11 is unused in currently application case */ HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12, HNS_ROCE_EVENT_TYPE_MB = 0x13, - HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14, HNS_ROCE_EVENT_TYPE_FLR = 0x15, }; @@ -221,6 +214,8 @@ enum { HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), + HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), + HNS_ROCE_CAP_FLAG_STASH = BIT(17), }; #define HNS_ROCE_DB_TYPE_COUNT 2 @@ -265,9 +260,6 @@ enum { #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) -/* The minimum page count for hardware access page directly. */ -#define HNS_HW_DIRECT_PAGE_COUNT 2 - struct hns_roce_uar { u64 pfn; unsigned long index; @@ -318,7 +310,7 @@ struct hns_roce_hem_table { }; struct hns_roce_buf_region { - int offset; /* page offset */ + u32 offset; /* page offset */ u32 count; /* page count */ int hopnum; /* addressing hop num */ }; @@ -338,10 +330,10 @@ struct hns_roce_buf_attr { size_t size; /* region size */ int hopnum; /* multi-hop addressing hop num */ } region[HNS_ROCE_MAX_BT_REGION]; - int region_count; /* valid region count */ + unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ bool fixed_page; /* decide page shift is fixed-size or maximum size */ - int user_access; /* umem access flag */ + unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; @@ -352,7 +344,7 @@ struct hns_roce_hem_cfg { unsigned int buf_pg_shift; /* buffer page shift */ unsigned int buf_pg_count; /* buffer page count */ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; - int region_count; + unsigned int region_count; }; /* memory translate region */ @@ -400,7 +392,7 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; u32 wqe_cnt; /* WQE num */ - int max_gs; + u32 max_gs; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -419,11 +411,26 @@ struct hns_roce_buf_list { dma_addr_t map; }; +/* + * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous + * dma address range. + * + * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep. + * + * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even + * the allocated size is smaller than the required size. + */ +enum { + HNS_ROCE_BUF_DIRECT = BIT(0), + HNS_ROCE_BUF_NOSLEEP = BIT(1), + HNS_ROCE_BUF_NOFAIL = BIT(2), +}; + struct hns_roce_buf { - struct hns_roce_buf_list direct; - struct hns_roce_buf_list *page_list; + struct hns_roce_buf_list *trunk_list; + u32 ntrunks; u32 npages; - u32 size; + unsigned int trunk_shift; unsigned int page_shift; }; @@ -451,8 +458,8 @@ struct hns_roce_db { } u; dma_addr_t dma; void *virt_addr; - int index; - int order; + unsigned long index; + unsigned long order; }; struct hns_roce_cq { @@ -500,8 +507,8 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - int head; - int tail; + u16 head; + u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; @@ -510,13 +517,22 @@ struct hns_roce_uar_table { struct hns_roce_bitmap bitmap; }; +struct hns_roce_bank { + struct ida ida; + u32 inuse; /* Number of IDs allocated */ + u32 min; /* Lowest ID to allocate. */ + u32 max; /* Highest ID to allocate. */ + u32 next; /* Next ID to allocate. */ +}; + struct hns_roce_qp_table { - struct hns_roce_bitmap bitmap; struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; struct hns_roce_hem_table trrl_table; struct hns_roce_hem_table sccc_table; struct mutex scc_mutex; + struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; + spinlock_t bank_lock; }; struct hns_roce_cq_table { @@ -547,7 +563,7 @@ struct hns_roce_av { u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; u16 vlan_id; - bool vlan_en; + u8 vlan_en; }; struct hns_roce_ah { @@ -619,10 +635,9 @@ enum { struct hns_roce_work { struct hns_roce_dev *hr_dev; struct work_struct work; - u32 qpn; - u32 cqn; int event_type; int sub_type; + u32 queue_num; }; struct hns_roce_qp { @@ -690,28 +705,10 @@ struct hns_roce_aeqe { __le32 asyn; union { struct { - __le32 qp; - u32 rsv0; - u32 rsv1; - } qp_event; - - struct { - __le32 srq; - u32 rsv0; - u32 rsv1; - } srq_event; - - struct { - __le32 cq; - u32 rsv0; - u32 rsv1; - } cq_event; - - struct { - __le32 ceqe; + __le32 num; u32 rsv0; u32 rsv1; - } ce_event; + } queue_event; struct { __le64 out_param; @@ -730,11 +727,11 @@ struct hns_roce_eq { int type_flag; /* Aeq:1 ceq:0 */ int eqn; u32 entries; - int log_entries; + u32 log_entries; int eqe_size; int irq; int log_page_size; - int cons_index; + u32 cons_index; struct hns_roce_buf_list *buf_list; int over_ignore; int coalesce; @@ -742,7 +739,7 @@ struct hns_roce_eq { int hop_num; struct hns_roce_mtr mtr; u16 eq_max_cnt; - int eq_period; + u32 eq_period; int shift; int event_type; int sub_type; @@ -765,8 +762,8 @@ struct hns_roce_caps { u32 max_sq_inline; u32 max_rq_sg; u32 max_extend_sg; - int num_qps; - int reserved_qps; + u32 num_qps; + u32 reserved_qps; int num_qpc_timer; int num_cqc_timer; int num_srqs; @@ -778,7 +775,7 @@ struct hns_roce_caps { u32 max_srq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int num_cqs; + u32 num_cqs; u32 max_cqes; u32 min_cqes; u32 min_wqes; @@ -787,7 +784,7 @@ struct hns_roce_caps { int num_aeq_vectors; int num_comp_vectors; int num_other_vectors; - int num_mtpts; + u32 num_mtpts; u32 num_mtt_segs; u32 num_cqe_segs; u32 num_srqwqe_segs; @@ -825,6 +822,7 @@ struct hns_roce_caps { u32 cqc_timer_bt_num; u32 mpt_bt_num; u32 sccc_bt_num; + u32 gmv_bt_num; u32 qpc_ba_pg_sz; u32 qpc_buf_pg_sz; u32 qpc_hop_num; @@ -864,6 +862,11 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 gmv_entry_num; + u32 gmv_entry_sz; + u32 gmv_ba_pg_sz; + u32 gmv_buf_pg_sz; + u32 gmv_hop_num; u32 sl_num; u32 tsq_buf_pg_sz; u32 tpq_buf_pg_sz; @@ -898,7 +901,7 @@ struct hns_roce_hw { int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event); - int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); + int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned int timeout); int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); @@ -999,6 +1002,10 @@ struct hns_roce_dev { struct hns_roce_eq_table eq_table; struct hns_roce_hem_table qpc_timer_table; struct hns_roce_hem_table cqc_timer_table; + /* GMV is the memory area that the driver allocates for the hardware + * to store SGID, SMAC and VLAN information. + */ + struct hns_roce_hem_table gmv_table; int cmd_mod; int loop_idc; @@ -1069,29 +1076,19 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } -static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) +static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, + unsigned int offset) { - if (buf->page_list) - return false; - - return true; + return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) + + (offset & ((1 << buf->trunk_shift) - 1)); } -static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) { - if (hns_roce_buf_is_direct(buf)) - return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); - - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & ((1 << buf->page_shift) - 1)); -} + unsigned int offset = idx << buf->page_shift; -static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) -{ - if (hns_roce_buf_is_direct(buf)) - return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); - else - return buf->page_list[idx].map; + return buf->trunk_list[offset >> buf->trunk_shift].map + + (offset & ((1 << buf->trunk_shift) - 1)); } #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) @@ -1132,6 +1129,14 @@ static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) return ilog2(to_hr_hem_entries_count(count, buf_shift)); } +#define DSCP_SHIFT 2 + +static inline u8 get_tclass(const struct ib_global_route *grh) +{ + return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ? + grh->traffic_class >> DSCP_SHIFT : grh->traffic_class; +} + int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); @@ -1155,7 +1160,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr); int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int page_cnt); + dma_addr_t *pages, unsigned int page_cnt); int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); @@ -1198,9 +1203,10 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, @@ -1215,8 +1221,8 @@ int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift); +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags); int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); @@ -1238,10 +1244,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); -void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n); -void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n); -void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n); -bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n); +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n); +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n); +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq); enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, @@ -1271,7 +1277,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); -int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 7487cf3d2c37..edc9d6b98d95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -75,6 +75,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) case HEM_TYPE_CQC_TIMER: hop_num = hr_dev->caps.cqc_timer_hop_num; break; + case HEM_TYPE_GMV: + hop_num = hr_dev->caps.gmv_hop_num; + break; default: return false; } @@ -183,8 +186,16 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; mhop->hop_num = hr_dev->caps.srqc_hop_num; break; + case HEM_TYPE_GMV: + mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = hr_dev->caps.gmv_bt_num; + mhop->hop_num = hr_dev->caps.gmv_hop_num; + break; default: - dev_err(dev, "Table %d not support multi-hop addressing!\n", + dev_err(dev, "table %u not support multi-hop addressing!\n", type); return -EINVAL; } @@ -198,9 +209,9 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; u32 chunk_ba_num; + u32 chunk_size; u32 table_idx; u32 bt_num; - u32 chunk_size; if (get_hem_table_config(hr_dev, mhop, table->type)) return -EINVAL; @@ -232,8 +243,8 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->l0_idx = table_idx; break; default: - dev_err(dev, "Table %d not support hop_num = %d!\n", - table->type, mhop->hop_num); + dev_err(dev, "table %u not support hop_num = %u!\n", + table->type, mhop->hop_num); return -EINVAL; } if (mhop->l0_idx >= mhop->ba_l0_num) @@ -332,15 +343,15 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, { spinlock_t *lock = &hr_dev->bt_cmd_lock; struct device *dev = hr_dev->dev; - long end; - unsigned long flags; struct hns_roce_hem_iter iter; void __iomem *bt_cmd; __le32 bt_cmd_val[2]; __le32 bt_cmd_h = 0; + unsigned long flags; __le32 bt_cmd_l; - u64 bt_ba; int ret = 0; + u64 bt_ba; + long end; /* Find the HEM(Hardware Entry Memory) entry */ unsigned long i = (obj & (table->num_obj - 1)) / @@ -438,13 +449,13 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, index->buf = l0_idx; break; default: - ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n", + ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", table->type, mhop->hop_num); return -EINVAL; } if (unlikely(index->buf >= table->num_hem)) { - ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n", + ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", table->type, index->buf, table->num_hem); return -EINVAL; } @@ -640,8 +651,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { struct device *dev = hr_dev->dev; - int ret = 0; unsigned long i; + int ret = 0; if (hns_roce_check_whether_mhop(hr_dev, table->type)) return hns_roce_table_mhop_get(hr_dev, table, obj); @@ -714,15 +725,15 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, step_idx = hop_num; if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx)) - ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num); + ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num); if (index->inited & HEM_INDEX_L1) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) - ibdev_warn(ibdev, "Clear HEM step 1 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 1.\n"); if (index->inited & HEM_INDEX_L0) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - ibdev_warn(ibdev, "Clear HEM step 0 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 0.\n"); } } @@ -789,14 +800,14 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_chunk *chunk; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; - void *addr = NULL; unsigned long mhop_obj = obj; unsigned long obj_per_chunk; unsigned long idx_offset; int offset, dma_offset; + void *addr = NULL; + u32 hem_idx = 0; int length; int i, j; - u32 hem_idx = 0; if (!table->lowmem) return NULL; @@ -876,7 +887,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, unsigned long buf_chunk_size; unsigned long bt_chunk_size; unsigned long bt_chunk_num; - unsigned long num_bt_l0 = 0; + unsigned long num_bt_l0; u32 hop_num; if (get_hem_table_config(hr_dev, &mhop, type)) @@ -966,8 +977,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, { struct hns_roce_hem_mhop mhop; u32 buf_chunk_size; - int i; u64 obj; + int i; if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) return; @@ -1017,7 +1028,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); @@ -1027,12 +1038,16 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.cqc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); - if (hr_dev->caps.sccc_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); + + if (hr_dev->caps.gmv_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); @@ -1234,7 +1249,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, } if (offset < r->offset) { - dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", + dev_err(hr_dev->dev, "invalid offset %d, min %u!\n", offset, r->offset); return -EINVAL; } @@ -1298,8 +1313,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, const struct hns_roce_buf_region *regions, int region_cnt) { - struct roce_hem_item *hem, *temp_hem, *root_hem; struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; + struct roce_hem_item *hem, *temp_hem, *root_hem; const struct hns_roce_buf_region *r; struct list_head temp_root; struct list_head temp_btm; @@ -1404,8 +1419,8 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; - int ret; int unit; + int ret; int i; if (region_cnt > HNS_ROCE_MAX_BT_REGION) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index b34c940077bb..13fdeb3274e7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -47,6 +47,7 @@ enum { HEM_TYPE_SCCC, HEM_TYPE_QPC_TIMER, HEM_TYPE_CQC_TIMER, + HEM_TYPE_GMV, /* UNMAP HEM */ HEM_TYPE_MTT, @@ -174,4 +175,4 @@ static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter) return sg_dma_address(&iter->chunk->mem[iter->page_idx]); } -#endif /*_HNS_ROCE_HEM_H*/ +#endif /* _HNS_ROCE_HEM_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f4d8a32ed6d..f68585ff8e8a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -239,7 +239,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, break; } - /*Ctrl field, ctrl set type: sig, solic, imm, fence */ + /* Ctrl field, ctrl set type: sig, solic, imm, fence */ /* SO wait for conforming application scenarios */ ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | @@ -288,7 +288,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ret = -EINVAL; *bad_wr = wr; dev_err(dev, "inline len(1-%d)=%d, illegal", - ctrl->msg_length, + le32_to_cpu(ctrl->msg_length), hr_dev->caps.max_sq_inline); goto out; } @@ -300,7 +300,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, } ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE); } else { - /*sqe num is two */ + /* sqe num is two */ for (i = 0; i < wr->num_sge; i++) set_data_seg(dseg + i, wr->sg_list + i); @@ -353,8 +353,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, unsigned long flags = 0; unsigned int wqe_idx; int ret = 0; - int nreq = 0; - int i = 0; + int nreq; + int i; u32 reg_val; spin_lock_irqsave(&hr_qp->rq.lock, flags); @@ -1165,7 +1165,7 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) } raq->e_raq_buf->map = addr; - /* Configure raq extended address. 48bit 4K align*/ + /* Configure raq extended address. 48bit 4K align */ roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); /* Configure raq_shift */ @@ -1639,7 +1639,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned long timeout) + unsigned int timeout) { u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; unsigned long end; @@ -2062,11 +2062,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); } -static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { @@ -2305,7 +2300,7 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct hns_roce_qp *cur_qp = NULL; unsigned long flags; int npolled; - int ret = 0; + int ret; spin_lock_irqsave(&hr_cq->lock, flags); @@ -2765,7 +2760,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qpc_bytes_16, QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -3261,6 +3255,8 @@ static int hns_roce_v1_modify_qp(struct ib_qp *ibqp, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, @@ -3604,10 +3600,10 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) return 0; } -static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) +static void set_eq_cons_index_v1(struct hns_roce_eq *eq, u32 req_not) { roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) | - (req_not << eq->log_entries), eq->doorbell); + (req_not << eq->log_entries), eq->doorbell); } static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev, @@ -3687,10 +3683,10 @@ static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev, int phy_port; int qpn; - qpn = roce_get_field(aeqe->event.qp_event.qp, + qpn = roce_get_field(aeqe->event.queue_event.num, HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); - phy_port = roce_get_field(aeqe->event.qp_event.qp, + phy_port = roce_get_field(aeqe->event.queue_event.num, HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M, HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S); if (qpn <= 1) @@ -3721,9 +3717,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; u32 cqn; - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); + cqn = roce_get_field(aeqe->event.queue_event.num, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -3798,7 +3794,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, int event_type; while ((aeqe = next_aeqe_sw_v1(eq))) { - /* Make sure we read the AEQ entry after we have checked the * ownership bit */ @@ -3853,12 +3848,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: hns_roce_v1_db_overflow_handle(hr_dev, aeqe); break; - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - dev_warn(dev, "CEQ 0x%lx overflow.\n", - roce_get_field(aeqe->event.ce_event.ceqe, - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M, - HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)); - break; default: dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n", event_type, eq->eqn, eq->cons_index); @@ -3903,7 +3892,6 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev, u32 cqn; while ((ceqe = next_ceqe_sw_v1(eq))) { - /* Make sure we read CEQ entry after we have checked the * ownership bit */ @@ -4129,7 +4117,7 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; struct device *dev = &hr_dev->pdev->dev; dma_addr_t tmp_dma_addr; - u32 eqcuridx_val = 0; + u32 eqcuridx_val; u32 eqconsindx_val; u32 eqshift_val; __le32 tmp2 = 0; @@ -4347,7 +4335,6 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) static const struct ib_device_ops hns_roce_v1_dev_ops = { .destroy_qp = hns_roce_v1_destroy_qp, - .modify_cq = hns_roce_v1_modify_cq, .poll_cq = hns_roce_v1_poll_cq, .post_recv = hns_roce_v1_post_recv, .post_send = hns_roce_v1_post_send, @@ -4367,7 +4354,6 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, - .modify_cq = hns_roce_v1_modify_cq, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .query_qp = hns_roce_v1_query_qp, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index ffd0156080f5..46ab0a321d21 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -419,7 +419,7 @@ struct hns_roce_wqe_data_seg { struct hns_roce_wqe_raddr_seg { __le32 rkey; - __le32 len;/* reserved */ + __le32 len; /* reserved */ __le64 raddr; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0468028ffe39..833e1f259936 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -214,25 +214,20 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, return 0; } -static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind, unsigned int valid_num_sge) +static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge, + unsigned int *sge_ind, unsigned int cnt) { struct hns_roce_v2_wqe_data_seg *dseg; - unsigned int cnt = valid_num_sge; - struct ib_sge *sge = wr->sg_list; unsigned int idx = *sge_ind; - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - cnt -= HNS_ROCE_SGE_IN_WQE; - sge += HNS_ROCE_SGE_IN_WQE; - } - while (cnt > 0) { dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); - set_data_seg_v2(dseg, sge); - idx++; + if (likely(sge->length)) { + set_data_seg_v2(dseg, sge); + idx++; + cnt--; + } sge++; - cnt--; } *sge_ind = idx; @@ -340,7 +335,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, } } - set_extend_sge(qp, wr, sge_ind, valid_num_sge); + set_extend_sge(qp, wr->sg_list + i, sge_ind, + valid_num_sge - HNS_ROCE_SGE_IN_WQE); } roce_set_field(rc_sq_wqe->byte_16, @@ -365,7 +361,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %d!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { @@ -422,19 +418,54 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, return 0; } +static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + struct hns_roce_ah *ah) +{ + struct ib_device *ib_dev = ah->ibah.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); + + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, + V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, + V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + + if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) + return -EINVAL; + + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, + V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); + + ud_sq_wqe->sgid_index = ah->av.gid_index; + + memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN); + memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2); + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + + roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, + ah->av.vlan_en); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + + return 0; +} + static inline int set_ud_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { - struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; - int valid_num_sge; + unsigned int valid_num_sge; u32 msg_len = 0; - bool loopback; - u8 *smac; int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); @@ -444,38 +475,13 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (WARN_ON(ret)) return ret; - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, - V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, - V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M, - V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M, - V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); - - /* MAC loopback */ - smac = (u8 *)hr_dev->dev_addr[qp->port]; - loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; - - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - /* Set sig attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, - (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + !!(wr->send_flags & IB_SEND_SIGNALED)); - /* Set se attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, - (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); + !!(wr->send_flags & IB_SEND_SOLICITED)); roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); @@ -488,36 +494,29 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, curr_idx & (qp->sge.sge_cnt - 1)); - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, - V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); - - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en ? 1 : 0); - roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, - V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); + ret = fill_ud_av(ud_sq_wqe, ah); + if (ret) + return ret; - memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); - set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); *sge_idx = curr_idx; + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return 0; } @@ -591,9 +590,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); @@ -601,7 +597,18 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, &curr_idx, valid_num_sge); + /* + * The pipeline can sequentially post all valid WQEs into WQ buffer, + * including new WQEs waiting for the doorbell to update the PI again. + * Therefore, the owner bit of WQE MUST be updated after all fields + * and extSGEs have been written into DDR instead of cache. + */ + if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB) + dma_wmb(); + *sge_idx = curr_idx; + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); return ret; } @@ -649,7 +656,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, unsigned int sge_idx; unsigned int wqe_idx; void *wqe = NULL; - int nreq; + u32 nreq; int ret; spin_lock_irqsave(&qp->sq.lock, flags); @@ -673,7 +680,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); if (unlikely(wr->num_sge > qp->sq.max_gs)) { - ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n", wr->num_sge, qp->sq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -686,7 +693,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); /* Corresponding to the QP type, wqe process separately */ - if (ibqp->qp_type == IB_QPT_GSI) + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit); else if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); @@ -758,7 +765,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { - ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -827,7 +834,7 @@ static void *get_srq_wqe(struct hns_roce_srq *srq, int n) return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } -static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); @@ -868,12 +875,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; unsigned long flags; + unsigned int ind; __le32 *srq_idx; int ret = 0; int wqe_idx; void *wqe; int nreq; - int ind; int i; spin_lock_irqsave(&srq->lock, flags); @@ -1018,8 +1025,8 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; - unsigned long instance_stage; /* the current instance stage */ - unsigned long reset_stage; /* the current reset stage */ + unsigned long instance_stage; /* the current instance stage */ + unsigned long reset_stage; /* the current reset stage */ unsigned long reset_cnt; bool sw_resetting; bool hw_resetting; @@ -1118,7 +1125,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0); } else { @@ -1126,7 +1133,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG, - ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); + (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0); } @@ -1573,6 +1580,10 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) PF_RES_DATA_4_PF_SCCC_BT_NUM_M, PF_RES_DATA_4_PF_SCCC_BT_NUM_S); + hr_dev->caps.gmv_bt_num = roce_get_field(req_b->gmv_idx_num, + PF_RES_DATA_5_PF_GMV_BT_NUM_M, + PF_RES_DATA_5_PF_GMV_BT_NUM_S); + return 0; } @@ -1896,11 +1907,20 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / + caps->gmv_entry_sz); } } -static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, - int *buf_page_size, int *bt_page_size, u32 hem_type) +static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num, + u32 *buf_page_size, u32 *bt_page_size, u32 hem_type) { u64 obj_per_chunk; u64 bt_chunk_size = PAGE_SIZE; @@ -1930,8 +1950,8 @@ static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, obj_per_chunk = ctx_bt_num * obj_per_chunk_default; break; default: - pr_err("Table %d not support hop_num = %d!\n", hem_type, - hop_num); + pr_err("table %u not support hop_num = %u!\n", hem_type, + hop_num); return; } @@ -2122,6 +2142,14 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; + caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / + caps->gmv_entry_sz); + caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; + caps->gmv_ba_pg_sz = 0; + caps->gmv_buf_pg_sz = 0; + caps->gid_table_len[0] = caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); } calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, @@ -2371,10 +2399,10 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, u32 buf_chk_sz; dma_addr_t t; int func_num = 1; - int pg_num_a; - int pg_num_b; - int pg_num; - int size; + u32 pg_num_a; + u32 pg_num_b; + u32 pg_num; + u32 size; int i; switch (type) { @@ -2423,7 +2451,6 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, if (i < (pg_num - 1)) entry[i].blk_ba1_nxt_ptr |= (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S; - } link_tbl->npages = pg_num; link_tbl->pg_sz = buf_chk_sz; @@ -2465,24 +2492,13 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, link_tbl->table.map); } -static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +static int get_hem_table(struct hns_roce_dev *hr_dev) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - int qpc_count, cqc_count; - int ret, i; - - /* TSQ includes SQ doorbell and ack doorbell */ - ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); - return ret; - } - - ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); - if (ret) { - dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); - goto err_tpq_init_failed; - } + unsigned int qpc_count; + unsigned int cqc_count; + unsigned int gmv_count; + int ret; + int i; /* Alloc memory for QPC Timer buffer space chunk */ for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; @@ -2506,8 +2522,23 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) } } + /* Alloc memory for GMV(GID/MAC/VLAN) table buffer space chunk */ + for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num; + gmv_count++) { + ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count); + if (ret) { + dev_err(hr_dev->dev, + "failed to get gmv table, ret = %d.\n", ret); + goto err_gmv_failed; + } + } + return 0; +err_gmv_failed: + for (i = 0; i < gmv_count; i++) + hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i); + err_cqc_timer_failed: for (i = 0; i < cqc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i); @@ -2516,6 +2547,34 @@ err_qpc_timer_failed: for (i = 0; i < qpc_count; i++) hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i); + return ret; +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TSQ, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "failed to init TPQ, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + ret = get_hem_table(hr_dev); + if (ret) + goto err_get_hem_table_failed; + + return 0; + +err_get_hem_table_failed: hns_roce_free_link_table(hr_dev, &priv->tpq); err_tpq_init_failed: @@ -2539,7 +2598,7 @@ static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc; struct hns_roce_mbox_status *mb_st = (struct hns_roce_mbox_status *)desc.data; - enum hns_roce_cmd_return_status status; + int status; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); @@ -2610,7 +2669,7 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned long timeout) + unsigned int timeout) { struct device *dev = hr_dev->dev; unsigned long end; @@ -2637,14 +2696,27 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, - int gid_index, const union ib_gid *gid, - enum hns_roce_sgid_type sgid_type) +static void copy_gid(void *dest, const union ib_gid *gid) +{ +#define GID_SIZE 4 + const union ib_gid *src = gid; + __le32 (*p)[GID_SIZE] = dest; + int i; + + if (!gid) + src = &zgid; + + for (i = 0; i < GID_SIZE; i++) + (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]); +} + +static int config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) { struct hns_roce_cmq_desc desc; struct hns_roce_cfg_sgid_tb *sgid_tb = (struct hns_roce_cfg_sgid_tb *)desc.data; - u32 *p; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); @@ -2653,19 +2725,54 @@ static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); - p = (u32 *)&gid->raw[0]; - sgid_tb->vf_sgid_l = cpu_to_le32(*p); + copy_gid(&sgid_tb->vf_sgid_l, gid); - p = (u32 *)&gid->raw[4]; - sgid_tb->vf_sgid_ml = cpu_to_le32(*p); + return hns_roce_cmq_send(hr_dev, &desc, 1); +} - p = (u32 *)&gid->raw[8]; - sgid_tb->vf_sgid_mh = cpu_to_le32(*p); +static int config_gmv_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type, + const struct ib_gid_attr *attr) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_gmv_tb_a *tb_a = + (struct hns_roce_cfg_gmv_tb_a *)desc[0].data; + struct hns_roce_cfg_gmv_tb_b *tb_b = + (struct hns_roce_cfg_gmv_tb_b *)desc[1].data; - p = (u32 *)&gid->raw[0xc]; - sgid_tb->vf_sgid_h = cpu_to_le32(*p); + u16 vlan_id = VLAN_CFI_MASK; + u8 mac[ETH_ALEN] = {}; + int ret; - return hns_roce_cmq_send(hr_dev, &desc, 1); + if (gid) { + ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac); + if (ret) + return ret; + } + + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false); + + copy_gid(&tb_a->vf_sgid_l, gid); + + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, + CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); + roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, + vlan_id < VLAN_CFI_MASK); + roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, + CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); + + tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); + roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, + CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); + + roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, + CFG_GMV_TB_SGID_IDX_S, gid_index); + + return hns_roce_cmq_send(hr_dev, desc, 2); } static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, @@ -2675,23 +2782,24 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; int ret; - if (!gid || !attr) - return -EINVAL; - - if (attr->gid_type == IB_GID_TYPE_ROCE) - sgid_type = GID_TYPE_FLAG_ROCE_V1; - - if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { - if (ipv6_addr_v4mapped((void *)gid)) - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; - else - sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + if (gid) { + if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + if (ipv6_addr_v4mapped((void *)gid)) + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4; + else + sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; + } else if (attr->gid_type == IB_GID_TYPE_ROCE) { + sgid_type = GID_TYPE_FLAG_ROCE_V1; + } } - ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr); + else + ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to configure sgid table, ret = %d!\n", + ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n", ret); return ret; @@ -2959,7 +3067,7 @@ static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); } -static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) +static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n) { struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe); @@ -3060,6 +3168,9 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE ? 1 : 0); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(cq_context, CQC_STASH); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, @@ -3303,7 +3414,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, int is_send; u16 wqe_ctr; u32 opcode; - int qpn; + u32 qpn; int ret; /* Find cqe according to consumer index */ @@ -3572,7 +3683,7 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, break; default: dev_warn(hr_dev->dev, - "Table %d not to be written by mailbox!\n", type); + "table %u not to be written by mailbox!\n", type); return -EINVAL; } @@ -3583,9 +3694,25 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, u32 hem_type, int step_idx) { struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_gmv_bt *gmv_bt = + (struct hns_roce_cfg_gmv_bt *)desc.data; int ret; int op; + if (hem_type == HEM_TYPE_GMV) { + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, + false); + + gmv_bt->gmv_ba_l = cpu_to_le32(bt_ba >> HNS_HW_PAGE_SHIFT); + gmv_bt->gmv_ba_h = cpu_to_le32(bt_ba >> (HNS_HW_PAGE_SHIFT + + 32)); + gmv_bt->gmv_bt_idx = cpu_to_le32(obj / + (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz)); + + return hns_roce_cmq_send(hr_dev, &desc, 1); + } + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); if (op < 0) return 0; @@ -3683,24 +3810,20 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, case HEM_TYPE_CQC: op = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; - case HEM_TYPE_SCCC: - case HEM_TYPE_QPC_TIMER: - case HEM_TYPE_CQC_TIMER: - break; case HEM_TYPE_SRQC: op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; + case HEM_TYPE_SCCC: + case HEM_TYPE_QPC_TIMER: + case HEM_TYPE_CQC_TIMER: + case HEM_TYPE_GMV: + return 0; default: - dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", + dev_warn(dev, "table %u not to be destroyed by mailbox!\n", table->type); return 0; } - if (table->type == HEM_TYPE_SCCC || - table->type == HEM_TYPE_QPC_TIMER || - table->type == HEM_TYPE_CQC_TIMER) - return 0; - op += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -3851,9 +3974,14 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); - hr_qp->access_flags = attr->qp_access_flags; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); + + if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ) + return; + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH) + hr_reg_enable(&context->ext, QPCEX_STASH); } static void modify_qp_init_to_init(struct ib_qp *ibqp, @@ -3874,51 +4002,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, 0); - if (attr_mask & IB_QP_ACCESS_FLAGS) { - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - 0); - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, 0); - } else { - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_WRITE)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, - 0); - - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, - 0); - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, - !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_EXT_ATE_S, 0); - } - roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, @@ -4328,7 +4411,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { - ibdev_err(ibdev, "failed to config sq buf, ret %d\n", ret); + ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); return ret; } @@ -4421,7 +4504,9 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan_id < VLAN_N_VID) { + /* Only HIP08 needs to set the vlan_en bits in QPC */ + if (vlan_id < VLAN_N_VID && + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4468,15 +4553,11 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (is_udp) - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); - else - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class); - + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, get_tclass(&attr->ah_attr.grh)); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, V2_QPC_BYTE_28_FL_S, grh->flow_label); roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, @@ -4758,6 +4839,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, unsigned long rq_flag = 0; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* * In v2 engine, software pass context and context mask to hardware * when modifying qp. If software need modify some fields in context, @@ -4818,7 +4902,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* SW pass context to HW */ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { - ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); + ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); goto out; } @@ -4911,7 +4995,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); if (ret) { - ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret); + ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); ret = -EINVAL; goto out; } @@ -5026,13 +5110,15 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, unsigned long flags; int ret = 0; - if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { + if ((hr_qp->ibqp.qp_type == IB_QPT_RC || + hr_qp->ibqp.qp_type == IB_QPT_UD) && + hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); if (ret) ibdev_err(ibdev, - "failed to modify QP to RST, ret = %d\n", + "failed to modify QP to RST, ret = %d.\n", ret); } @@ -5071,7 +5157,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to destroy QP 0x%06lx, ret = %d\n", + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret); hns_roce_qp_destroy(hr_dev, hr_qp, udata); @@ -5094,7 +5180,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret); goto out; } @@ -5104,7 +5190,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, clr->qpn = cpu_to_le32(hr_qp->qpn); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret); goto out; } @@ -5353,7 +5439,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying CQ, ret = %d\n", + "failed to process cmd when modifying CQ, ret = %d.\n", ret); return ret; @@ -5364,8 +5450,6 @@ static void hns_roce_irq_work_handle(struct work_struct *work) struct hns_roce_work *irq_work = container_of(work, struct hns_roce_work, work); struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; - u32 qpn = irq_work->qpn; - u32 cqn = irq_work->cqn; switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5381,15 +5465,15 @@ static void hns_roce_irq_work_handle(struct work_struct *work) break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", - qpn, irq_work->sub_type); + irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n", - qpn); + irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n", - qpn, irq_work->sub_type); + irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: ibdev_warn(ibdev, "SRQ limit reach.\n"); @@ -5401,10 +5485,10 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_err(ibdev, "SRQ catas error.\n"); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - ibdev_err(ibdev, "CQ 0x%x access err.\n", cqn); + ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - ibdev_warn(ibdev, "CQ 0x%x overflow\n", cqn); + ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: ibdev_warn(ibdev, "DB overflow.\n"); @@ -5420,8 +5504,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work) } static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - u32 qpn, u32 cqn) + struct hns_roce_eq *eq, u32 queue_num) { struct hns_roce_work *irq_work; @@ -5431,10 +5514,9 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); irq_work->hr_dev = hr_dev; - irq_work->qpn = qpn; - irq_work->cqn = cqn; irq_work->event_type = eq->event_type; irq_work->sub_type = eq->sub_type; + irq_work->queue_num = queue_num; queue_work(hr_dev->irq_workq, &(irq_work->work)); } @@ -5486,10 +5568,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); int aeqe_found = 0; int event_type; + u32 queue_num; int sub_type; - u32 srqn; - u32 qpn; - u32 cqn; while (aeqe) { /* Make sure we read AEQ entry after we have checked the @@ -5503,15 +5583,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M, HNS_ROCE_V2_AEQE_SUB_TYPE_S); - qpn = roce_get_field(aeqe->event.qp_event.qp, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); - srqn = roce_get_field(aeqe->event.srq_event.srq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + queue_num = roce_get_field(aeqe->event.queue_event.num, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5522,17 +5596,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_qp_event(hr_dev, qpn, event_type); + hns_roce_qp_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - hns_roce_srq_event(hr_dev, srqn, event_type); + hns_roce_srq_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_cq_event(hr_dev, cqn, event_type); - break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: + hns_roce_cq_event(hr_dev, queue_num, event_type); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, @@ -5540,8 +5612,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; - case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: case HNS_ROCE_EVENT_TYPE_FLR: break; default: @@ -5558,7 +5629,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, if (eq->cons_index > (2 * eq->entries - 1)) eq->cons_index = 0; - hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn); + hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); } @@ -6193,6 +6264,7 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0}, /* required last entry */ {0, } }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index be7f2fe1e883..bdaccf86460d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -44,6 +44,7 @@ #define HNS_ROCE_VF_SMAC_NUM 32 #define HNS_ROCE_VF_SGID_NUM 32 #define HNS_ROCE_VF_SL_NUM 8 +#define HNS_ROCE_VF_GMV_BT_NUM 256 #define HNS_ROCE_V2_MAX_QP_NUM 0x100000 #define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200 @@ -89,6 +90,7 @@ #define HNS_ROCE_V2_SCCC_SZ 32 #define HNS_ROCE_V3_SCCC_SZ 64 +#define HNS_ROCE_V3_GMV_ENTRY_SZ 32 #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE @@ -241,6 +243,8 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CLR_SCCC = 0x8509, HNS_ROCE_OPC_QUERY_SCCC = 0x850a, HNS_ROCE_OPC_RESET_SCCC = 0x850b, + HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, + HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -263,23 +267,24 @@ enum hns_roce_sgid_type { }; struct hns_roce_v2_cq_context { - __le32 byte_4_pg_ceqn; - __le32 byte_8_cqn; - __le32 cqe_cur_blk_addr; - __le32 byte_16_hop_addr; - __le32 cqe_nxt_blk_addr; - __le32 byte_24_pgsz_addr; - __le32 byte_28_cq_pi; - __le32 byte_32_cq_ci; - __le32 cqe_ba; - __le32 byte_40_cqe_ba; - __le32 byte_44_db_record; - __le32 db_record_addr; - __le32 byte_52_cqe_cnt; - __le32 byte_56_cqe_period_maxcnt; - __le32 cqe_report_timer; - __le32 byte_64_se_cqe_idx; + __le32 byte_4_pg_ceqn; + __le32 byte_8_cqn; + __le32 cqe_cur_blk_addr; + __le32 byte_16_hop_addr; + __le32 cqe_nxt_blk_addr; + __le32 byte_24_pgsz_addr; + __le32 byte_28_cq_pi; + __le32 byte_32_cq_ci; + __le32 cqe_ba; + __le32 byte_40_cqe_ba; + __le32 byte_44_db_record; + __le32 db_record_addr; + __le32 byte_52_cqe_cnt; + __le32 byte_56_cqe_period_maxcnt; + __le32 cqe_report_timer; + __le32 byte_64_se_cqe_idx; }; + #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 @@ -356,6 +361,10 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) + +#define CQC_STASH CQC_FIELD_LOC(63, 63) + struct hns_roce_srq_context { __le32 byte_4_srqn_srqst; __le32 byte_8_limit_wl; @@ -440,7 +449,7 @@ struct hns_roce_srq_context { #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) -enum{ +enum { V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, }; @@ -457,68 +466,72 @@ enum hns_roce_v2_qp_state { HNS_ROCE_QP_NUM_ST }; +struct hns_roce_v2_qp_context_ex { + __le32 data[64]; +}; struct hns_roce_v2_qp_context { - __le32 byte_4_sqpn_tst; - __le32 wqe_sge_ba; - __le32 byte_12_sq_hop; - __le32 byte_16_buf_ba_pg_sz; - __le32 byte_20_smac_sgid_idx; - __le32 byte_24_mtu_tc; - __le32 byte_28_at_fl; - u8 dgid[GID_LEN_V2]; - __le32 dmac; - __le32 byte_52_udpspn_dmac; - __le32 byte_56_dqpn_err; - __le32 byte_60_qpst_tempid; - __le32 qkey_xrcd; - __le32 byte_68_rq_db; - __le32 rq_db_record_addr; - __le32 byte_76_srqn_op_en; - __le32 byte_80_rnr_rx_cqn; - __le32 byte_84_rq_ci_pi; - __le32 rq_cur_blk_addr; - __le32 byte_92_srq_info; - __le32 byte_96_rx_reqmsn; - __le32 rq_nxt_blk_addr; - __le32 byte_104_rq_sge; - __le32 byte_108_rx_reqepsn; - __le32 rq_rnr_timer; - __le32 rx_msg_len; - __le32 rx_rkey_pkt_info; - __le64 rx_va; - __le32 byte_132_trrl; - __le32 trrl_ba; - __le32 byte_140_raq; - __le32 byte_144_raq; - __le32 byte_148_raq; - __le32 byte_152_raq; - __le32 byte_156_raq; - __le32 byte_160_sq_ci_pi; - __le32 sq_cur_blk_addr; - __le32 byte_168_irrl_idx; - __le32 byte_172_sq_psn; - __le32 byte_176_msg_pktn; - __le32 sq_cur_sge_blk_addr; - __le32 byte_184_irrl_idx; - __le32 cur_sge_offset; - __le32 byte_192_ext_sge; - __le32 byte_196_sq_psn; - __le32 byte_200_sq_max; - __le32 irrl_ba; - __le32 byte_208_irrl; - __le32 byte_212_lsn; - __le32 sq_timer; - __le32 byte_220_retry_psn_msn; - __le32 byte_224_retry_msg; - __le32 rx_sq_cur_blk_addr; - __le32 byte_232_irrl_sge; - __le32 irrl_cur_sge_offset; - __le32 byte_240_irrl_tail; - __le32 byte_244_rnr_rxack; - __le32 byte_248_ack_psn; - __le32 byte_252_err_txcqn; - __le32 byte_256_sqflush_rqcqe; - __le32 ext[64]; + __le32 byte_4_sqpn_tst; + __le32 wqe_sge_ba; + __le32 byte_12_sq_hop; + __le32 byte_16_buf_ba_pg_sz; + __le32 byte_20_smac_sgid_idx; + __le32 byte_24_mtu_tc; + __le32 byte_28_at_fl; + u8 dgid[GID_LEN_V2]; + __le32 dmac; + __le32 byte_52_udpspn_dmac; + __le32 byte_56_dqpn_err; + __le32 byte_60_qpst_tempid; + __le32 qkey_xrcd; + __le32 byte_68_rq_db; + __le32 rq_db_record_addr; + __le32 byte_76_srqn_op_en; + __le32 byte_80_rnr_rx_cqn; + __le32 byte_84_rq_ci_pi; + __le32 rq_cur_blk_addr; + __le32 byte_92_srq_info; + __le32 byte_96_rx_reqmsn; + __le32 rq_nxt_blk_addr; + __le32 byte_104_rq_sge; + __le32 byte_108_rx_reqepsn; + __le32 rq_rnr_timer; + __le32 rx_msg_len; + __le32 rx_rkey_pkt_info; + __le64 rx_va; + __le32 byte_132_trrl; + __le32 trrl_ba; + __le32 byte_140_raq; + __le32 byte_144_raq; + __le32 byte_148_raq; + __le32 byte_152_raq; + __le32 byte_156_raq; + __le32 byte_160_sq_ci_pi; + __le32 sq_cur_blk_addr; + __le32 byte_168_irrl_idx; + __le32 byte_172_sq_psn; + __le32 byte_176_msg_pktn; + __le32 sq_cur_sge_blk_addr; + __le32 byte_184_irrl_idx; + __le32 cur_sge_offset; + __le32 byte_192_ext_sge; + __le32 byte_196_sq_psn; + __le32 byte_200_sq_max; + __le32 irrl_ba; + __le32 byte_208_irrl; + __le32 byte_212_lsn; + __le32 sq_timer; + __le32 byte_220_retry_psn_msn; + __le32 byte_224_retry_msg; + __le32 rx_sq_cur_blk_addr; + __le32 byte_232_irrl_sge; + __le32 irrl_cur_sge_offset; + __le32 byte_240_irrl_tail; + __le32 byte_244_rnr_rxack; + __le32 byte_248_ack_psn; + __le32 byte_252_err_txcqn; + __le32 byte_256_sqflush_rqcqe; + + struct hns_roce_v2_qp_context_ex ext; }; #define V2_QPC_BYTE_4_TST_S 0 @@ -887,6 +900,10 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16 #define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16) +#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l) + +#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82) + #define V2_QP_RWE_S 1 /* rdma write enable */ #define V2_QP_RRE_S 2 /* rdma read enable */ #define V2_QP_ATE_S 3 /* rdma atomic enable */ @@ -1073,12 +1090,13 @@ struct hns_roce_v2_ud_send_wqe { __le32 byte_32; __le32 byte_36; __le32 byte_40; - __le32 dmac; - __le32 byte_48; + u8 dmac[ETH_ALEN]; + u8 sgid_index; + u8 smac_index; u8 dgid[GID_LEN_V2]; - }; -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 + +#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) #define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 @@ -1117,37 +1135,10 @@ struct hns_roce_v2_ud_send_wqe { #define V2_UD_SEND_WQE_BYTE_40_SL_S 20 #define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) -#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 -#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) - #define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 -#define V2_UD_SEND_WQE_DMAC_0_S 0 -#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_DMAC_1_S 8 -#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_DMAC_2_S 16 -#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_DMAC_3_S 24 -#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0) - -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8 -#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8) - -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16 -#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24 -#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24) - struct hns_roce_v2_rc_send_wqe { __le32 byte_4; __le32 msg_len; @@ -1334,7 +1325,7 @@ struct hns_roce_pf_res_b { __le32 sgid_idx_num; __le32 qid_idx_sl_num; __le32 sccc_bt_idx_num; - __le32 rsv; + __le32 gmv_idx_num; }; #define PF_RES_DATA_1_PF_SMAC_IDX_S 0 @@ -1361,6 +1352,12 @@ struct hns_roce_pf_res_b { #define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9 #define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9) +#define PF_RES_DATA_5_PF_GMV_BT_IDX_S 0 +#define PF_RES_DATA_5_PF_GMV_BT_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_5_PF_GMV_BT_NUM_S 8 +#define PF_RES_DATA_5_PF_GMV_BT_NUM_M GENMASK(16, 8) + struct hns_roce_pf_timer_res_a { __le32 rsv0; __le32 qpc_timer_bt_idx_num; @@ -1425,7 +1422,7 @@ struct hns_roce_vf_res_b { __le32 vf_sgid_idx_num; __le32 vf_qid_idx_sl_num; __le32 vf_sccc_idx_num; - __le32 rsv1; + __le32 vf_gmv_idx_num; }; #define VF_RES_B_DATA_0_VF_ID_S 0 @@ -1455,6 +1452,12 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9 #define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9) +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_S 0 +#define VF_RES_B_DATA_5_VF_GMV_BT_IDX_M GENMASK(7, 0) + +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_S 16 +#define VF_RES_B_DATA_5_VF_GMV_BT_NUM_M GENMASK(24, 16) + struct hns_roce_vf_switch { __le32 rocee_sel; __le32 fun_id; @@ -1577,6 +1580,46 @@ struct hns_roce_cfg_smac_tb { #define CFG_SMAC_TB_VF_SMAC_H_S 0 #define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) +struct hns_roce_cfg_gmv_bt { + __le32 gmv_ba_l; + __le32 gmv_ba_h; + __le32 gmv_bt_idx; + __le32 rsv[3]; +}; + +#define CFG_GMV_BA_H_S 0 +#define CFG_GMV_BA_H_M GENMASK(19, 0) + +struct hns_roce_cfg_gmv_tb_a { + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_vlan; + __le32 resv; +}; + +#define CFG_GMV_TB_SGID_IDX_S 0 +#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) + +#define CFG_GMV_TB_VF_SGID_TYPE_S 0 +#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) + +#define CFG_GMV_TB_VF_VLAN_EN_S 2 + +#define CFG_GMV_TB_VF_VLAN_ID_S 16 +#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) + +struct hns_roce_cfg_gmv_tb_b { + __le32 vf_smac_l; + __le32 vf_smac_h; + __le32 table_idx_rsv; + __le32 resv[3]; +}; + +#define CFG_GMV_TB_SMAC_H_S 0 +#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) + #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 struct hns_roce_query_pf_caps_a { u8 number_ports; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index afeffafc59f9..d9179bae4989 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -33,13 +33,13 @@ #include <linux/acpi.h> #include <linux/of_platform.h> #include <linux/module.h> +#include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> #include "hns_roce_common.h" #include "hns_roce_device.h" -#include <rdma/hns-abi.h> #include "hns_roce_hem.h" /** @@ -53,7 +53,7 @@ * GID[0][0], GID[1][0],.....GID[N - 1][0], * And so on */ -int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) { return gid_index * hr_dev->caps.num_ports + port; } @@ -61,7 +61,10 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; - u32 i = 0; + u32 i; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN)) return 0; @@ -90,14 +93,13 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); - struct ib_gid_attr zattr = {}; u8 port = attr->port_num - 1; int ret; if (port >= hr_dev->caps.num_ports) return -EINVAL; - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL); return ret; } @@ -325,7 +327,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, resp.cqe_size = hr_dev->caps.cqe_sz; - ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); if (ret) goto error_fail_copy_to_udata; @@ -421,6 +424,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, + .create_user_ah = hns_roce_create_ah, .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, @@ -491,36 +495,13 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->phys_port_cnt = hr_dev->caps.num_ports; ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; - ib_dev->uverbs_cmd_mask = - (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ULL << IB_USER_VERBS_CMD_REG_MR) | - (1ULL << IB_USER_VERBS_CMD_DEREG_MR) | - (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ULL << IB_USER_VERBS_CMD_CREATE_QP) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); - - ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); - } /* MW */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); - } /* FRMR */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) @@ -528,12 +509,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* SRQ */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { - ib_dev->uverbs_cmd_mask |= - (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); } @@ -580,8 +555,8 @@ error_failed_setup_mtu_mac: static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, @@ -631,7 +606,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } - if (hr_dev->caps.srqc_entry_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, HEM_TYPE_SRQC, hr_dev->caps.srqc_entry_sz, @@ -643,7 +618,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.sccc_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, @@ -680,18 +655,35 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } + if (hr_dev->caps.gmv_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table, + HEM_TYPE_GMV, + hr_dev->caps.gmv_entry_sz, + hr_dev->caps.gmv_entry_num, 1); + if (ret) { + dev_err(dev, + "failed to init gmv table memory, ret = %d\n", + ret); + goto err_unmap_cqc_timer; + } + } + return 0; +err_unmap_cqc_timer: + if (hr_dev->caps.cqc_timer_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); + err_unmap_qpc_timer: if (hr_dev->caps.qpc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); err_unmap_ctx: - if (hr_dev->caps.sccc_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); err_unmap_srq: - if (hr_dev->caps.srqc_entry_sz) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); err_unmap_cq: @@ -721,8 +713,8 @@ err_unmap_dmpt: */ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; spin_lock_init(&hr_dev->sm_lock); spin_lock_init(&hr_dev->bt_cmd_lock); @@ -846,8 +838,8 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) int hns_roce_init(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; if (hr_dev->hw->reset) { ret = hr_dev->hw->reset(hr_dev, true); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7f81a695e9af..1bcffd93ff3e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -167,10 +167,10 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - int ret; unsigned long mtpt_idx = key_to_hw_index(mr->key); - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + int ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -185,14 +185,14 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, else ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); if (ret) { - dev_err(dev, "Write mtpt fail!\n"); + dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); + dev_err(dev, "failed to create mpt, ret = %d.\n", ret); goto err_page; } @@ -328,9 +328,10 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, return ret; } -int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct ib_device *ib_dev = &hr_dev->ib_dev; @@ -341,11 +342,11 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, int ret; if (!mr->enabled) - return -EINVAL; + return ERR_PTR(-EINVAL); mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + return ERR_CAST(mailbox); mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, @@ -390,12 +391,12 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return 0; + return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return ret; + return ERR_PTR(ret); } int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -495,7 +496,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); if (ret < 1) { - ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", + ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); goto err_page_list; } @@ -509,7 +510,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); ret = 0; } else { - mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size); + mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size); ret = mr->npages; } @@ -695,15 +696,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) return size; } -static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, - unsigned int page_shift) -{ - if (is_direct) - return ALIGN(alloc_size, 1 << page_shift); - else - return HNS_HW_DIRECT_PAGE_COUNT << page_shift; -} - /* * check the given pages in continuous address space * Returns 0 on success, or the error page num. @@ -732,7 +724,6 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) /* release kernel buffers */ if (mtr->kmem) { hns_roce_buf_free(hr_dev, mtr->kmem); - kfree(mtr->kmem); mtr->kmem = NULL; } } @@ -744,13 +735,12 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int best_pg_shift; int all_pg_count = 0; - size_t direct_size; size_t total_size; int ret; total_size = mtr_bufs_size(buf_attr); if (total_size < 1) { - ibdev_err(ibdev, "Failed to check mtr size\n"); + ibdev_err(ibdev, "failed to check mtr size\n."); return -EINVAL; } @@ -762,7 +752,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); if (IS_ERR_OR_NULL(mtr->umem)) { - ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", PTR_ERR(mtr->umem)); return -ENOMEM; } @@ -780,19 +770,16 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, ret = 0; } else { mtr->umem = NULL; - mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); - if (!mtr->kmem) { - ibdev_err(ibdev, "Failed to alloc kmem\n"); - return -ENOMEM; - } - direct_size = mtr_kmem_direct_size(is_direct, total_size, - buf_attr->page_shift); - ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, - mtr->kmem, buf_attr->page_shift); - if (ret) { - ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); - goto err_alloc_mem; + mtr->kmem = + hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + is_direct ? HNS_ROCE_BUF_DIRECT : 0); + if (IS_ERR(mtr->kmem)) { + ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", + PTR_ERR(mtr->kmem)); + return PTR_ERR(mtr->kmem); } + best_pg_shift = buf_attr->page_shift; all_pg_count = mtr->kmem->npages; } @@ -800,7 +787,8 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, /* must bigger than minimum hardware page shift */ if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { ret = -EINVAL; - ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + ibdev_err(ibdev, + "failed to check mtr, page shift = %u count = %d.\n", best_pg_shift, all_pg_count); goto err_alloc_mem; } @@ -841,12 +829,12 @@ static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int page_cnt) + dma_addr_t *pages, unsigned int page_cnt) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; + unsigned int i; int err; - int i; /* * Only use the first page address as root ba when hopnum is 0, this @@ -862,7 +850,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, if (r->offset + r->count > page_cnt) { err = -EINVAL; ibdev_err(ibdev, - "Failed to check mtr%d end %d + %d, max %d\n", + "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); return err; } @@ -870,7 +858,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); if (err) { ibdev_err(ibdev, - "Failed to map mtr%d offset %d, err %d\n", + "failed to map mtr%u offset %u, ret = %d.\n", i, r->offset, err); return err; } @@ -883,13 +871,12 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int mtt_count, left; int start_index; - int mtt_count; int total = 0; __le64 *mtts; - int npage; + u32 npage; u64 addr; - int left; if (!mtt_buf || mtt_max < 1) goto done; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 98f69496adb4..cca818d05a8f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -32,7 +32,6 @@ #include <linux/platform_device.h> #include <linux/pci.h> -#include <uapi/rdma/hns-abi.h> #include "hns_roce_device.h" static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) @@ -65,21 +64,22 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); if (ret) { - ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret); + ibdev_err(ib_dev, "failed to alloc pd, ret = %d.\n", ret); return ret; } if (udata) { - struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; + struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn}; - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); - ibdev_err(ib_dev, "failed to copy to udata\n"); - return -EFAULT; + ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret); } } - return 0; + return ret; } int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6c081dd985fc..d8e2fe5558d2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,7 +39,6 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" -#include <rdma/hns-abi.h> static void flush_work_handle(struct work_struct *work) { @@ -114,8 +113,8 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, enum hns_roce_event type) { - struct ib_event event; struct ib_qp *ibqp = &hr_qp->ibqp; + struct ib_event event; if (ibqp->event_handler) { event.device = ibqp->device; @@ -154,9 +153,50 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, } } +static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, + unsigned long *qpn) +{ + int id; + + id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL); + if (id < 0) { + id = ida_alloc_range(&bank->ida, bank->min, bank->max, + GFP_KERNEL); + if (id < 0) + return id; + } + + /* the QPN should keep increasing until the max value is reached. */ + bank->next = (id + 1) > bank->max ? bank->min : id + 1; + + /* the lower 3 bits is bankid */ + *qpn = (id << 3) | bankid; + + return 0; +} static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; unsigned long num = 0; + u8 bankid; int ret; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { @@ -169,13 +209,21 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->doorbell_qpn = 1; } else { - ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap, - 1, 1, &num); + spin_lock(&qp_table->bank_lock); + bankid = get_least_load_bankid_for_qp(qp_table->bank); + + ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, + &num); if (ret) { - ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n"); - return -ENOMEM; + ibdev_err(&hr_dev->ib_dev, + "failed to alloc QPN, ret = %d\n", ret); + spin_unlock(&qp_table->bank_lock); + return ret; } + qp_table->bank[bankid].inuse++; + spin_unlock(&qp_table->bank_lock); + hr_qp->doorbell_qpn = (u32)num; } @@ -286,7 +334,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) } } - if (hr_dev->caps.sccc_sz) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { /* Alloc memory for SCC CTX */ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); @@ -340,9 +388,15 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); } +static inline u8 get_qp_bankid(unsigned long qpn) +{ + /* The lower 3 bits of QPN are used to hash to different banks */ + return (u8)(qpn & GENMASK(2, 0)); +} + static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + u8 bankid; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) return; @@ -350,7 +404,13 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->qpn < hr_dev->caps.reserved_qps) return; - hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); + bankid = get_qp_bankid(hr_qp->qpn); + + ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); + + spin_lock(&hr_dev->qp_table.bank_lock); + hr_dev->qp_table.bank[bankid].inuse--; + spin_unlock(&hr_dev->qp_table.bank_lock); } static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, @@ -404,37 +464,43 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return 0; } -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, - struct hns_roce_qp *hr_qp, - struct ib_qp_cap *cap) +static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) { - u32 cnt; + /* GSI/UD QP only has extended sge */ + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) + return qp->sq.max_gs; - cnt = max(1U, cap->max_send_sge); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { - hr_qp->sq.max_gs = roundup_pow_of_two(cnt); - hr_qp->sge.sge_cnt = 0; + if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) + return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; - return 0; - } + return 0; +} - hr_qp->sq.max_gs = cnt; +static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, + struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) +{ + u32 total_sge_cnt; + u32 wqe_sge_cnt; - /* UD sqwqe's sge use extend sge */ - if (hr_qp->ibqp.qp_type == IB_QPT_GSI || - hr_qp->ibqp.qp_type == IB_QPT_UD) { - cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs); - } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { - cnt = roundup_pow_of_two(sq_wqe_cnt * - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - } else { - cnt = 0; + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; + return; } - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - hr_qp->sge.sge_cnt = cnt; + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); - return 0; + wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); + + /* If the number of extended sge is not zero, they MUST use the + * space of HNS_HW_PAGE_SIZE at least. + */ + if (wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + hr_qp->sge.sge_cnt = max(total_sge_cnt, + (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); + } } static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, @@ -447,12 +513,12 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, /* Sanity check SQ size before proceeding */ if (ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n"); + ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n"); return -EINVAL; } if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n", + ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n", cap->max_send_sge); return -EINVAL; } @@ -479,9 +545,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return ret; } - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); - if (ret) - return ret; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; @@ -546,7 +610,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, { struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt; - int ret; if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg) { @@ -558,7 +621,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); if (cnt > hr_dev->caps.max_wqes) { - ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", + ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n", cnt); return -EINVAL; } @@ -566,9 +629,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); hr_qp->sq.wqe_cnt = cnt; - ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); - if (ret) - return ret; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap); /* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; @@ -725,13 +786,17 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_device *ibdev = &hr_dev->ib_dev; int ret; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB; + if (udata) { if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, - "Failed to map user SQ doorbell\n"); + "failed to map user SQ doorbell, ret = %d.\n", + ret); goto err_out; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; @@ -743,7 +808,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, - "Failed to map user RQ doorbell\n"); + "failed to map user RQ doorbell, ret = %d.\n", + ret); goto err_sdb; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; @@ -760,7 +826,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); if (ret) { ibdev_err(ibdev, - "Failed to alloc kernel RQ doorbell\n"); + "failed to alloc kernel RQ doorbell, ret = %d.\n", + ret); goto err_out; } *hr_qp->rdb.db_record = 0; @@ -803,14 +870,14 @@ static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev, sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(sq_wrid)) { - ibdev_err(ibdev, "Failed to alloc SQ wrid\n"); + ibdev_err(ibdev, "failed to alloc SQ wrid.\n"); return -ENOMEM; } if (hr_qp->rq.wqe_cnt) { rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(rq_wrid)) { - ibdev_err(ibdev, "Failed to alloc RQ wrid\n"); + ibdev_err(ibdev, "failed to alloc RQ wrid.\n"); ret = -ENOMEM; goto err_sq; } @@ -860,29 +927,25 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } if (udata) { - if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { - ibdev_err(ibdev, "Failed to copy QP ucmd\n"); - return -EFAULT; + ret = ib_copy_from_udata(ucmd, udata, + min(udata->inlen, sizeof(*ucmd))); + if (ret) { + ibdev_err(ibdev, + "failed to copy QP ucmd, ret = %d\n", ret); + return ret; } ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); if (ret) - ibdev_err(ibdev, "Failed to set user SQ size\n"); + ibdev_err(ibdev, + "failed to set user SQ size, ret = %d.\n", + ret); } else { - if (init_attr->create_flags & - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - ibdev_err(ibdev, "Failed to check multicast loopback\n"); - return -EINVAL; - } - - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { - ibdev_err(ibdev, "Failed to check ipoib ud lso\n"); - return -EINVAL; - } - ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) - ibdev_err(ibdev, "Failed to set kernel SQ size\n"); + ibdev_err(ibdev, + "failed to set kernel SQ size, ret = %d.\n", + ret); } return ret; @@ -906,47 +969,53 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + if (init_attr->create_flags) + return -EOPNOTSUPP; + ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { - ibdev_err(ibdev, "Failed to set QP param\n"); + ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); return ret; } if (!udata) { ret = alloc_kernel_wrid(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc wrid\n"); + ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n", + ret); return ret; } } ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP doorbell\n"); + ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n", + ret); goto err_wrid; } ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP buffer\n"); + ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); goto err_db; } ret = alloc_qpn(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QPN\n"); + ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); goto err_buf; } ret = alloc_qpc(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP context\n"); + ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n", + ret); goto err_qpn; } ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr); if (ret) { - ibdev_err(ibdev, "Failed to store QP\n"); + ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret); goto err_qpc; } @@ -1003,6 +1072,30 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, kfree(hr_qp); } +static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, + bool is_user) +{ + switch (type) { + case IB_QPT_UD: + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && + is_user) + goto out; + fallthrough; + case IB_QPT_RC: + case IB_QPT_GSI: + break; + default: + goto out; + } + + return 0; + +out: + ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type); + + return -EOPNOTSUPP; +} + struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -1012,15 +1105,9 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct hns_roce_qp *hr_qp; int ret; - switch (init_attr->qp_type) { - case IB_QPT_RC: - case IB_QPT_GSI: - break; - default: - ibdev_err(ibdev, "not support QP type %d\n", - init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); - } + ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); + if (ret) + return ERR_PTR(ret); hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); if (!hr_qp) @@ -1035,10 +1122,11 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, if (ret) { ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); - ibdev_err(ibdev, "Create GSI QP failed!\n"); + kfree(hr_qp); return ERR_PTR(ret); } + return &hr_qp->ibqp; } @@ -1091,9 +1179,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { - ibdev_err(&hr_dev->ib_dev, - "attr port_num invalid.attr->port_num=%d\n", - attr->port_num); + ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n", + attr->port_num); return -EINVAL; } @@ -1101,8 +1188,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { ibdev_err(&hr_dev->ib_dev, - "attr pkey_index invalid.attr->pkey_index=%d\n", - attr->pkey_index); + "invalid attr, pkey_index = %u.\n", + attr->pkey_index); return -EINVAL; } } @@ -1110,16 +1197,16 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", - attr->max_rd_atomic); + "invalid attr, max_rd_atomic = %u.\n", + attr->max_rd_atomic); return -EINVAL; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", - attr->max_dest_rd_atomic); + "invalid attr, max_dest_rd_atomic = %u.\n", + attr->max_dest_rd_atomic); return -EINVAL; } @@ -1244,22 +1331,22 @@ static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset) return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); } -void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); } -void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); } -void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n) { return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } -bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq) { struct hns_roce_cq *hr_cq; @@ -1280,22 +1367,24 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - int reserved_from_top = 0; - int reserved_from_bot; - int ret; + unsigned int reserved_from_bot; + unsigned int i; mutex_init(&qp_table->scc_mutex); xa_init(&hr_dev->qp_table_xa); reserved_from_bot = hr_dev->caps.reserved_qps; - ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, - hr_dev->caps.num_qps - 1, reserved_from_bot, - reserved_from_top); - if (ret) { - dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", - ret); - return ret; + for (i = 0; i < reserved_from_bot; i++) { + hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++; + hr_dev->qp_table.bank[get_qp_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + ida_init(&hr_dev->qp_table.bank[i].ida); + hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps / + HNS_ROCE_QP_BANK_NUM - 1; + hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min; } return 0; @@ -1303,5 +1392,8 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) + ida_destroy(&hr_dev->qp_table.bank[i].ida); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 8caf74e44efd..c4ae57e4173a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -4,7 +4,6 @@ */ #include <rdma/ib_umem.h> -#include <rdma/hns-abi.h> #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" @@ -93,7 +92,8 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); return -ENOBUFS; } @@ -101,32 +101,34 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, ARRAY_SIZE(mtts_idx), &dma_handle_idx); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); return -ENOBUFS; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ number, ret = %d.\n", ret); return -ENOMEM; } ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); + ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); goto err_out; } ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); goto err_put; } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { ret = -ENOMEM; - ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); + ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); goto err_xa; } @@ -137,7 +139,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); goto err_xa; } @@ -198,7 +200,8 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.srqwqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) - ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", err); return err; } @@ -229,18 +232,18 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) { - ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ idx mtr, ret = %d.\n", err); return err; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { - ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); + ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); err = -ENOMEM; goto err_idx_mtr; } - } return 0; @@ -288,6 +291,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, int ret; u32 cqn; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Check the actual SRQ wqe and SRQ sge num */ if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) @@ -300,9 +307,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->max_gs = init_attr->attr.max_sge; if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", ret); return ret; } @@ -310,20 +318,21 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ buffer, ret = %d.\n", ret); return ret; } ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); goto err_buf_alloc; } if (!udata) { ret = alloc_srq_wrid(hr_dev, srq); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", + ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", ret); goto err_idx_alloc; } @@ -335,7 +344,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ context, ret = %d.\n", ret); goto err_wrid_alloc; } @@ -343,11 +353,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, resp.srqn = srq->srqn; if (udata) { - if (ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp)))) { - ret = -EFAULT; + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + if (ret) goto err_srqc_alloc; - } } return 0; diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 832b80de004f..6a79502c8b53 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -274,7 +274,6 @@ struct i40iw_device { u8 max_sge; u8 iw_status; u8 send_term_ok; - bool push_mode; /* Initialized from parameter passed to driver */ /* x710 specific */ struct mutex pbl_mutex; diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 3053c345a5a3..9acc0ecc9a43 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2426,7 +2426,7 @@ static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node, } break; case I40IW_CM_STATE_MPAREQ_RCVD: - atomic_add_return(1, &cm_node->passive_state); + atomic_inc(&cm_node->passive_state); break; case I40IW_CM_STATE_ESTABLISHED: case I40IW_CM_STATE_SYN_RCVD: @@ -3020,7 +3020,7 @@ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 i40iw_cleanup_retrans_entry(cm_node); if (!loopback) { - passive_state = atomic_add_return(1, &cm_node->passive_state); + passive_state = atomic_inc_return(&cm_node->passive_state); if (passive_state == I40IW_SEND_RESET_EVENT) { cm_node->state = I40IW_CM_STATE_CLOSED; i40iw_rem_ref_cm_node(cm_node); @@ -3678,7 +3678,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -EINVAL; } - passive_state = atomic_add_return(1, &cm_node->passive_state); + passive_state = atomic_inc_return(&cm_node->passive_state); if (passive_state == I40IW_SEND_RESET_EVENT) { i40iw_rem_ref_cm_node(cm_node); return -ECONNRESET; diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 86d3f8aff329..c943d491b72b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -820,46 +820,6 @@ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done( } /** - * i40iw_sc_manage_push_page - Handle push page - * @cqp: struct for cqp hw - * @info: push page info - * @scratch: u64 saved to be used during cqp completion - * @post_sq: flag for cqp db to ring - */ -static enum i40iw_status_code i40iw_sc_manage_push_page( - struct i40iw_sc_cqp *cqp, - struct i40iw_cqp_manage_push_page_info *info, - u64 scratch, - bool post_sq) -{ - u64 *wqe; - u64 header; - - if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT) - return I40IW_ERR_INVALID_PUSH_PAGE_INDEX; - - wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); - if (!wqe) - return I40IW_ERR_RING_FULL; - - set_64bit_val(wqe, 16, info->qs_handle); - - header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) | - LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) | - LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) | - LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE); - - i40iw_insert_wqe_hdr(wqe, header); - - i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE", - wqe, I40IW_CQP_WQE_SIZE * 8); - - if (post_sq) - i40iw_sc_cqp_post_sq(cqp); - return 0; -} - -/** * i40iw_sc_manage_hmc_pm_func_table - manage of function table * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion @@ -2859,9 +2819,7 @@ static enum i40iw_status_code i40iw_sc_qp_setctx( LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) | LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) | LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) | - LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) | - LS_64(info->push_idx, I40IWQPC_PPIDX) | - LS_64(info->push_mode_en, I40IWQPC_PMENA); + LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN); set_64bit_val(qp_ctx, 8, qp->sq_pa); set_64bit_val(qp_ctx, 16, qp->rq_pa); @@ -4291,13 +4249,6 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev, pcmdinfo->in.u.add_arp_cache_entry.scratch, pcmdinfo->post_sq); break; - case OP_MANAGE_PUSH_PAGE: - status = i40iw_sc_manage_push_page( - pcmdinfo->in.u.manage_push_page.cqp, - &pcmdinfo->in.u.manage_push_page.info, - pcmdinfo->in.u.manage_push_page.scratch, - pcmdinfo->post_sq); - break; case OP_UPDATE_PE_SDS: /* case I40IW_CQP_OP_UPDATE_PE_SDS */ status = i40iw_update_pe_sds( @@ -5098,7 +5049,7 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) i40iw_hw_stats_stop_timer(vsi); } -static struct i40iw_cqp_ops iw_cqp_ops = { +static const struct i40iw_cqp_ops iw_cqp_ops = { .cqp_init = i40iw_sc_cqp_init, .cqp_create = i40iw_sc_cqp_create, .cqp_post_sq = i40iw_sc_cqp_post_sq, @@ -5107,7 +5058,7 @@ static struct i40iw_cqp_ops iw_cqp_ops = { .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done }; -static struct i40iw_ccq_ops iw_ccq_ops = { +static const struct i40iw_ccq_ops iw_ccq_ops = { .ccq_init = i40iw_sc_ccq_init, .ccq_create = i40iw_sc_ccq_create, .ccq_destroy = i40iw_sc_ccq_destroy, @@ -5116,7 +5067,7 @@ static struct i40iw_ccq_ops iw_ccq_ops = { .ccq_arm = i40iw_sc_ccq_arm }; -static struct i40iw_ceq_ops iw_ceq_ops = { +static const struct i40iw_ceq_ops iw_ceq_ops = { .ceq_init = i40iw_sc_ceq_init, .ceq_create = i40iw_sc_ceq_create, .cceq_create_done = i40iw_sc_cceq_create_done, @@ -5126,7 +5077,7 @@ static struct i40iw_ceq_ops iw_ceq_ops = { .process_ceq = i40iw_sc_process_ceq }; -static struct i40iw_aeq_ops iw_aeq_ops = { +static const struct i40iw_aeq_ops iw_aeq_ops = { .aeq_init = i40iw_sc_aeq_init, .aeq_create = i40iw_sc_aeq_create, .aeq_destroy = i40iw_sc_aeq_destroy, @@ -5137,11 +5088,11 @@ static struct i40iw_aeq_ops iw_aeq_ops = { }; /* iwarp pd ops */ -static struct i40iw_pd_ops iw_pd_ops = { +static const struct i40iw_pd_ops iw_pd_ops = { .pd_init = i40iw_sc_pd_init, }; -static struct i40iw_priv_qp_ops iw_priv_qp_ops = { +static const struct i40iw_priv_qp_ops iw_priv_qp_ops = { .qp_init = i40iw_sc_qp_init, .qp_create = i40iw_sc_qp_create, .qp_modify = i40iw_sc_qp_modify, @@ -5156,14 +5107,14 @@ static struct i40iw_priv_qp_ops iw_priv_qp_ops = { .iw_mr_fast_register = i40iw_sc_mr_fast_register }; -static struct i40iw_priv_cq_ops iw_priv_cq_ops = { +static const struct i40iw_priv_cq_ops iw_priv_cq_ops = { .cq_init = i40iw_sc_cq_init, .cq_create = i40iw_sc_cq_create, .cq_destroy = i40iw_sc_cq_destroy, .cq_modify = i40iw_sc_cq_modify, }; -static struct i40iw_mr_ops iw_mr_ops = { +static const struct i40iw_mr_ops iw_mr_ops = { .alloc_stag = i40iw_sc_alloc_stag, .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared, .mr_reg_shared = i40iw_sc_mr_reg_shared, @@ -5172,8 +5123,7 @@ static struct i40iw_mr_ops iw_mr_ops = { .mw_alloc = i40iw_sc_mw_alloc }; -static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { - .manage_push_page = i40iw_sc_manage_push_page, +static const struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table, .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile, .commit_fpm_values = i40iw_sc_commit_fpm_values, @@ -5195,7 +5145,7 @@ static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = { .update_resume_qp = i40iw_sc_resume_qp }; -static struct i40iw_hmc_ops iw_hmc_ops = { +static const struct i40iw_hmc_ops iw_hmc_ops = { .init_iw_hmc = i40iw_sc_init_iw_hmc, .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf, .configure_iw_fpm = i40iw_sc_configure_iw_fpm, diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index e8367d67575d..86d5a33c57cc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -40,11 +40,6 @@ #define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024) #define I40IW_VF_DB_ADDR_OFFSET (64 * 1024) -#define I40IW_PUSH_OFFSET (4 * 1024 * 1024) -#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16 -#define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024) -#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2 - #define I40IW_PE_DB_SIZE_4M 1 #define I40IW_PE_DB_SIZE_8M 2 @@ -402,7 +397,6 @@ #define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e #define I40IW_CQP_OP_MANAGE_ARP 0x0f #define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10 -#define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11 #define I40IW_CQP_OP_QUERY_RDMA_FEATURES 0x12 #define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13 #define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14 @@ -843,7 +837,6 @@ #define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \ (0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT) -/* Manage Push Page - MPP */ #define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff #define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0 @@ -1352,9 +1345,6 @@ #define I40IWQPSQ_ADDFRAGCNT_SHIFT 38 #define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT) -#define I40IWQPSQ_PUSHWQE_SHIFT 56 -#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT) - #define I40IWQPSQ_STREAMMODE_SHIFT 58 #define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT) @@ -1740,18 +1730,17 @@ enum i40iw_alignment { #define OP_MW_ALLOC 20 #define OP_QP_FLUSH_WQES 21 #define OP_ADD_ARP_CACHE_ENTRY 22 -#define OP_MANAGE_PUSH_PAGE 23 -#define OP_UPDATE_PE_SDS 24 -#define OP_MANAGE_HMC_PM_FUNC_TABLE 25 -#define OP_SUSPEND 26 -#define OP_RESUME 27 -#define OP_MANAGE_VF_PBLE_BP 28 -#define OP_QUERY_FPM_VALUES 29 -#define OP_COMMIT_FPM_VALUES 30 -#define OP_REQUESTED_COMMANDS 31 -#define OP_COMPLETED_COMMANDS 32 -#define OP_GEN_AE 33 -#define OP_QUERY_RDMA_FEATURES 34 -#define OP_SIZE_CQP_STAT_ARRAY 35 +#define OP_UPDATE_PE_SDS 23 +#define OP_MANAGE_HMC_PM_FUNC_TABLE 24 +#define OP_SUSPEND 25 +#define OP_RESUME 26 +#define OP_MANAGE_VF_PBLE_BP 27 +#define OP_QUERY_FPM_VALUES 28 +#define OP_COMMIT_FPM_VALUES 29 +#define OP_REQUESTED_COMMANDS 30 +#define OP_COMPLETED_COMMANDS 31 +#define OP_GEN_AE 32 +#define OP_QUERY_RDMA_FEATURES 33 +#define OP_SIZE_CQP_STAT_ARRAY 34 #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h index d1c5855bd8c3..36a19c4e5bba 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_status.h +++ b/drivers/infiniband/hw/i40iw/i40iw_status.h @@ -61,7 +61,6 @@ enum i40iw_status_code { I40IW_ERR_QUEUE_EMPTY = -22, I40IW_ERR_INVALID_ALIGNMENT = -23, I40IW_ERR_FLUSHED_QUEUE = -24, - I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25, I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26, I40IW_ERR_TIMEOUT = -27, I40IW_ERR_OPCODE_MISMATCH = -28, diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index c3babf3cbb8e..394e182686cf 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -387,7 +387,6 @@ struct i40iw_sc_qp { u8 *q2_buf; u64 qp_compl_ctx; u16 qs_handle; - u16 push_idx; u8 sq_tph_val; u8 rq_tph_val; u8 qp_state; @@ -493,16 +492,16 @@ struct i40iw_sc_dev { struct i40iw_sc_aeq *aeq; struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT]; struct i40iw_sc_cq *ccq; - struct i40iw_cqp_ops *cqp_ops; - struct i40iw_ccq_ops *ccq_ops; - struct i40iw_ceq_ops *ceq_ops; - struct i40iw_aeq_ops *aeq_ops; - struct i40iw_pd_ops *iw_pd_ops; - struct i40iw_priv_qp_ops *iw_priv_qp_ops; - struct i40iw_priv_cq_ops *iw_priv_cq_ops; - struct i40iw_mr_ops *mr_ops; - struct i40iw_cqp_misc_ops *cqp_misc_ops; - struct i40iw_hmc_ops *hmc_ops; + const struct i40iw_cqp_ops *cqp_ops; + const struct i40iw_ccq_ops *ccq_ops; + const struct i40iw_ceq_ops *ceq_ops; + const struct i40iw_aeq_ops *aeq_ops; + const struct i40iw_pd_ops *iw_pd_ops; + const struct i40iw_priv_qp_ops *iw_priv_qp_ops; + const struct i40iw_priv_cq_ops *iw_priv_cq_ops; + const struct i40iw_mr_ops *mr_ops; + const struct i40iw_cqp_misc_ops *cqp_misc_ops; + const struct i40iw_hmc_ops *hmc_ops; struct i40iw_vchnl_if vchnl_if; const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; @@ -749,8 +748,6 @@ struct i40iw_qp_host_ctx_info { struct i40iwarp_offload_info *iwarp_info; u32 send_cq_num; u32 rcv_cq_num; - u16 push_idx; - bool push_mode_en; bool tcp_info_valid; bool iwarp_info_valid; bool err_rq_idx_valid; @@ -937,12 +934,6 @@ struct i40iw_local_mac_ipaddr_entry_info { u8 entry_idx; }; -struct i40iw_cqp_manage_push_page_info { - u32 push_idx; - u16 qs_handle; - u8 free_page; -}; - struct i40iw_qp_flush_info { u16 sq_minor_code; u16 sq_major_code; @@ -1114,9 +1105,6 @@ struct i40iw_mr_ops { }; struct i40iw_cqp_misc_ops { - enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *, - struct i40iw_cqp_manage_push_page_info *, - u64, bool); enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *, u64, u8, bool, bool); enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *, @@ -1254,12 +1242,6 @@ struct cqp_info { } manage_vf_pble_bp; struct { - struct i40iw_sc_cqp *cqp; - struct i40iw_cqp_manage_push_page_info info; - u64 scratch; - } manage_push_page; - - struct { struct i40iw_sc_dev *dev; struct i40iw_upload_context_info info; u64 scratch; diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 8afa5a67a86b..c3633c9944db 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -115,17 +115,6 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp) } /** - * i40iw_qp_ring_push_db - ring qp doorbell - * @qp: hw qp ptr - * @wqe_idx: wqe index - */ -static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx) -{ - set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id); - qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); -} - -/** * i40iw_qp_get_next_send_wqe - return next wqe ptr * @qp: hw qp ptr * @wqe_idx: return wqe index @@ -426,7 +415,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, u64 *wqe; u8 *dest, *src; struct i40iw_inline_rdma_write *op_info; - u64 *push; u64 header = 0; u32 wqe_idx; enum i40iw_status_code ret_code; @@ -453,7 +441,6 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | LS_64(read_fence, I40IWQPSQ_READFENCE) | LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | @@ -475,14 +462,8 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, set_64bit_val(wqe, 24, header); - if (qp->push_db) { - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); - i40iw_qp_ring_push_db(qp, wqe_idx); - } else { - if (post_sq) - i40iw_qp_post_wr(qp); - } + if (post_sq) + i40iw_qp_post_wr(qp); return 0; } @@ -507,7 +488,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, enum i40iw_status_code ret_code; bool read_fence = false; u8 wqe_size; - u64 *push; op_info = &info->op.inline_send; if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) @@ -526,7 +506,6 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, LS_64(info->op_type, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | LS_64(1, I40IWQPSQ_INLINEDATAFLAG) | - LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) | LS_64(read_fence, I40IWQPSQ_READFENCE) | LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | @@ -548,14 +527,8 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, set_64bit_val(wqe, 24, header); - if (qp->push_db) { - push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20); - memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32); - i40iw_qp_ring_push_db(qp, wqe_idx); - } else { - if (post_sq) - i40iw_qp_post_wr(qp); - } + if (post_sq) + i40iw_qp_post_wr(qp); return 0; } @@ -772,7 +745,6 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, q_type = (u8)RS_64(qword3, I40IW_CQ_SQ); info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR); - info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP); if (info->error) { info->comp_status = I40IW_COMPL_STATUS_FLUSHED; info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR); @@ -951,7 +923,6 @@ enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth) static const struct i40iw_qp_uk_ops iw_qp_uk_ops = { .iw_qp_post_wr = i40iw_qp_post_wr, - .iw_qp_ring_push_db = i40iw_qp_ring_push_db, .iw_rdma_write = i40iw_rdma_write, .iw_rdma_read = i40iw_rdma_read, .iw_send = i40iw_send, @@ -1009,11 +980,7 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, qp->wqe_alloc_reg = info->wqe_alloc_reg; qp->qp_id = info->qp_id; - qp->sq_size = info->sq_size; - qp->push_db = info->push_db; - qp->push_wqe = info->push_wqe; - qp->max_sq_frag_cnt = info->max_sq_frag_cnt; sq_ring_size = qp->sq_size << sqshift; diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index b125925641e0..93fc3081dd65 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -64,13 +64,11 @@ enum i40iw_device_capabilities_const { I40IW_MAX_SGE_RD = 1, I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647, I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647, - I40IW_MAX_PUSH_PAGE_COUNT = 4096, I40IW_MAX_PE_ENABLED_VF_COUNT = 32, I40IW_MAX_VF_FPM_ID = 47, I40IW_MAX_VF_PER_PF = 127, I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, I40IW_MAX_INLINE_DATA_SIZE = 48, - I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, I40IW_MAX_IRD_SIZE = 64, I40IW_MAX_ORD_SIZE = 127, I40IW_MAX_WQ_ENTRIES = 2048, @@ -272,7 +270,6 @@ struct i40iw_cq_poll_info { u16 minor_err; u8 op_type; bool stag_invalid_set; - bool push_dropped; bool error; bool is_srq; bool solicited_event; @@ -280,7 +277,6 @@ struct i40iw_cq_poll_info { struct i40iw_qp_uk_ops { void (*iw_qp_post_wr)(struct i40iw_qp_uk *); - void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32); enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *, struct i40iw_post_sq_info *, bool); enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *, @@ -340,8 +336,6 @@ struct i40iw_qp_uk { struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; u64 *shadow_area; - u32 *push_db; - u64 *push_wqe; struct i40iw_ring sq_ring; struct i40iw_ring rq_ring; struct i40iw_ring initial_ring; @@ -381,8 +375,6 @@ struct i40iw_qp_uk_init_info { u64 *shadow_area; struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; - u32 *push_db; - u64 *push_wqe; u32 qp_id; u32 sq_size; u32 rq_size; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 533f3caecb7a..65aedfe57e77 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -180,78 +180,6 @@ static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) } /** - * i40iw_alloc_push_page - allocate a push page for qp - * @iwdev: iwarp device - * @qp: hardware control qp - */ -static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) -{ - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; - enum i40iw_status_code status; - - if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX) - return; - - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true); - if (!cqp_request) - return; - - atomic_inc(&cqp_request->refcount); - - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; - cqp_info->post_sq = 1; - - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; - cqp_info->in.u.manage_push_page.info.free_page = 0; - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; - - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - qp->push_idx = cqp_request->compl_info.op_ret_val; - else - i40iw_pr_err("CQP-OP Push page fail"); - i40iw_put_cqp_request(&iwdev->cqp, cqp_request); -} - -/** - * i40iw_dealloc_push_page - free a push page for qp - * @iwdev: iwarp device - * @qp: hardware control qp - */ -static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp) -{ - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; - enum i40iw_status_code status; - - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) - return; - - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); - if (!cqp_request) - return; - - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE; - cqp_info->post_sq = 1; - - cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx; - cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle; - cqp_info->in.u.manage_push_page.info.free_page = 1; - cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp; - cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request; - - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; - else - i40iw_pr_err("CQP-OP Push page fail"); -} - -/** * i40iw_alloc_pd - allocate protection domain * @pd: PD pointer * @udata: user data @@ -348,7 +276,6 @@ void i40iw_free_qp_resources(struct i40iw_qp *iwqp) u32 qp_num = iwqp->ibqp.qp_num; i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); - i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); if (qp_num) i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num); if (iwpbl->pbl_allocated) @@ -533,7 +460,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENODEV); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; @@ -561,8 +488,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; - qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; - iwqp->iwdev = iwdev; iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; @@ -606,8 +531,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, err_code = -EOPNOTSUPP; goto error; } - if (iwdev->push_mode) - i40iw_alloc_push_page(iwdev, qp); if (udata) { err_code = ib_copy_from_udata(&req, udata, sizeof(req)); if (err_code) { @@ -666,13 +589,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, ctx_info->iwarp_info_valid = true; ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; - if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) { - ctx_info->push_mode_en = false; - } else { - ctx_info->push_mode_en = true; - ctx_info->push_idx = qp->push_idx; - } - ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)iwqp->host_ctx.va, ctx_info); @@ -712,7 +628,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; uresp.qp_id = qp_num; - uresp.push_idx = qp->push_idx; + uresp.push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (err_code) { i40iw_pr_err("copy_to_udata failed\n"); @@ -832,6 +748,9 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, u32 err; unsigned long flags; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&info, 0, sizeof(info)); ctx_info = &iwqp->ctx_info; iwarp_info = &iwqp->iwarp_info; @@ -1081,6 +1000,9 @@ static int i40iw_create_cq(struct ib_cq *ibcq, int err_code; int entries = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (iwdev->closing) return -ENODEV; @@ -2033,7 +1955,7 @@ static ssize_t hw_rev_show(struct device *dev, rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev); u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev; - return sprintf(buf, "%x\n", hw_rev); + return sysfs_emit(buf, "%x\n", hw_rev); } static DEVICE_ATTR_RO(hw_rev); @@ -2043,7 +1965,7 @@ static DEVICE_ATTR_RO(hw_rev); static ssize_t hca_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "I40IW\n"); + return sysfs_emit(buf, "I40IW\n"); } static DEVICE_ATTR_RO(hca_type); @@ -2053,7 +1975,7 @@ static DEVICE_ATTR_RO(hca_type); static ssize_t board_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); + return sysfs_emit(buf, "%.*s\n", 32, "I40IW Board ID"); } static DEVICE_ATTR_RO(board_id); @@ -2661,27 +2583,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.node_type = RDMA_NODE_RNIC; ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr); - iwibdev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_POST_SEND); iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8bd16474708f..f3ace85552f3 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1523,6 +1523,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc return; } else *slave_id = slave; + break; default: /* nothing */; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index cd0fba6b0964..e3cd402c079a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2024,7 +2024,8 @@ static ssize_t hca_type_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); + + return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2033,7 +2034,8 @@ static ssize_t hw_rev_show(struct device *device, { struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->dev->rev_id); + + return sysfs_emit(buf, "%x\n", dev->dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2043,8 +2045,7 @@ static ssize_t board_id_show(struct device *device, struct mlx4_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, - dev->dev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -2264,10 +2265,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, u64 release_mac = MLX4_IB_INVALID_MAC; struct mlx4_ib_qp *qp; - read_lock(&dev_base_lock); new_smac = mlx4_mac_to_u64(dev->dev_addr); - read_unlock(&dev_base_lock); - atomic64_set(&ibdev->iboe.mac[port - 1], new_smac); /* no need for update QP1 and mac registration in non-SRIOV */ @@ -2657,73 +2655,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == - IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + IB_LINK_LAYER_ETHERNET))) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || - dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 5e4ec9786081..33f525b744f2 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -988,53 +988,63 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, } static ssize_t sysfs_show_group(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct mcast_group *group = container_of(attr, struct mcast_group, dentry); struct mcast_req *req = NULL; - char pending_str[40]; char state_str[40]; - ssize_t len = 0; - int f; + char pending_str[40]; + int len; + int i; + u32 hoplimit; if (group->state == MCAST_IDLE) - sprintf(state_str, "%s", get_state_string(group->state)); + scnprintf(state_str, sizeof(state_str), "%s", + get_state_string(group->state)); else - sprintf(state_str, "%s(TID=0x%llx)", - get_state_string(group->state), - be64_to_cpu(group->last_req_tid)); + scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)", + get_state_string(group->state), + be64_to_cpu(group->last_req_tid)); + if (list_empty(&group->pending_list)) { - sprintf(pending_str, "No"); + scnprintf(pending_str, sizeof(pending_str), "No"); } else { - req = list_first_entry(&group->pending_list, struct mcast_req, group_list); - sprintf(pending_str, "Yes(TID=0x%llx)", - be64_to_cpu(req->sa_mad.mad_hdr.tid)); + req = list_first_entry(&group->pending_list, struct mcast_req, + group_list); + scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)", + be64_to_cpu(req->sa_mad.mad_hdr.tid)); } - len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", - group->rec.scope_join_state & 0xf, - group->members[2], group->members[1], group->members[0], - atomic_read(&group->refcount), - pending_str, - state_str); - for (f = 0; f < MAX_VFS; ++f) - if (group->func[f].state == MCAST_MEMBER) - len += sprintf(buf + len, "%d[%1x] ", - f, group->func[f].join_state); - - len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " - "%4x %4x %2x %2x)\n", - be16_to_cpu(group->rec.pkey), - be32_to_cpu(group->rec.qkey), - (group->rec.mtusel_mtu & 0xc0) >> 6, - group->rec.mtusel_mtu & 0x3f, - group->rec.tclass, - (group->rec.ratesel_rate & 0xc0) >> 6, - group->rec.ratesel_rate & 0x3f, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, - (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, - be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, - group->rec.proxy_join); + + len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ", + group->rec.scope_join_state & 0xf, + group->members[2], + group->members[1], + group->members[0], + atomic_read(&group->refcount), + pending_str, + state_str); + + for (i = 0; i < MAX_VFS; i++) { + if (group->func[i].state == MCAST_MEMBER) + len += sysfs_emit_at(buf, len, "%d[%1x] ", i, + group->func[i].join_state); + } + + hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit); + len += sysfs_emit_at(buf, len, + "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n", + be16_to_cpu(group->rec.pkey), + be32_to_cpu(group->rec.qkey), + (group->rec.mtusel_mtu & 0xc0) >> 6, + (group->rec.mtusel_mtu & 0x3f), + group->rec.tclass, + (group->rec.ratesel_rate & 0xc0) >> 6, + (group->rec.ratesel_rate & 0x3f), + (hoplimit & 0xf0000000) >> 28, + (hoplimit & 0x0fffff00) >> 8, + (hoplimit & 0x000000ff), + group->rec.proxy_join); return len; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 58df06492d69..78c9bb79ec75 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -908,10 +908,10 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata); +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, const struct ib_gid_attr *attr); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 426fed005d53..50becc0e4b62 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -456,10 +456,10 @@ err_free: return ERR_PTR(err); } -int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, - u64 start, u64 length, u64 virt_addr, - int mr_access_flags, struct ib_pd *pd, - struct ib_udata *udata) +struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(mr->device); struct mlx4_ib_mr *mmr = to_mmr(mr); @@ -472,9 +472,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, * race exists. */ err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); - if (err) - return err; + return ERR_PTR(err); if (flags & IB_MR_REREG_PD) { err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, @@ -542,8 +541,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, release_mpt_entry: mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); - - return err; + if (err) + return ERR_PTR(err); + return NULL; } static int diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5cb8e602294c..651785bd57f2 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1493,7 +1493,7 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | MLX4_IB_QP_CREATE_ROCE_V2_GSI)) - return -EINVAL; + return -EOPNOTSUPP; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) @@ -1561,6 +1561,11 @@ static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, if (err) return err; + if (init_attr->create_flags & + (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP)) + /* Internal QP created with ib_create_qp */ + rdma_restrack_no_track(&qp->ibqp.res); + qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1; @@ -2787,6 +2792,9 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *mqp = to_mqp(ibqp); int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { @@ -4007,7 +4015,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index bf618529e734..6a381751c0d8 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -86,6 +86,10 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, int err; int i; + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index ea1f3a081b05..1b5891130aab 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -56,7 +56,7 @@ static ssize_t show_admin_alias_guid(struct device *dev, mlx4_ib_iov_dentry->entry_num, port->num); - return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); + return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val)); } /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. @@ -117,22 +117,24 @@ static ssize_t show_port_gid(struct device *dev, struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_dev *mdev = port->dev; union ib_gid gid; - ssize_t ret; + int ret; + __be16 *raw; ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, mlx4_ib_iov_dentry->entry_num, &gid, 1); if (ret) return ret; - ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - be16_to_cpu(((__be16 *) gid.raw)[0]), - be16_to_cpu(((__be16 *) gid.raw)[1]), - be16_to_cpu(((__be16 *) gid.raw)[2]), - be16_to_cpu(((__be16 *) gid.raw)[3]), - be16_to_cpu(((__be16 *) gid.raw)[4]), - be16_to_cpu(((__be16 *) gid.raw)[5]), - be16_to_cpu(((__be16 *) gid.raw)[6]), - be16_to_cpu(((__be16 *) gid.raw)[7])); - return ret; + + raw = (__be16 *)gid.raw; + return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(raw[0]), + be16_to_cpu(raw[1]), + be16_to_cpu(raw[2]), + be16_to_cpu(raw[3]), + be16_to_cpu(raw[4]), + be16_to_cpu(raw[5]), + be16_to_cpu(raw[6]), + be16_to_cpu(raw[7])); } static ssize_t show_phys_port_pkey(struct device *dev, @@ -151,7 +153,7 @@ static ssize_t show_phys_port_pkey(struct device *dev, if (ret) return ret; - return sprintf(buf, "0x%04x\n", pkey); + return sysfs_emit(buf, "0x%04x\n", pkey); } #define DENTRY_REMOVE(_dentry) \ @@ -441,16 +443,12 @@ static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr, { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); - ssize_t ret = -ENODEV; - - if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= - (p->dev->dev->caps.pkey_table_len[p->port_num])) - ret = sprintf(buf, "none\n"); - else - ret = sprintf(buf, "%d\n", - p->dev->pkeys.virt2phys_pkey[p->slave] - [p->port_num - 1][tab_attr->index]); - return ret; + struct pkey_mgt *m = &p->dev->pkeys; + u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index]; + + if (key >= p->dev->dev->caps.pkey_table_len[p->port_num]) + return sysfs_emit(buf, "none\n"); + return sysfs_emit(buf, "%d\n", key); } static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, @@ -488,7 +486,7 @@ static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, static ssize_t show_port_gid_idx(struct mlx4_port *p, struct port_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", p->slave); + return sysfs_emit(buf, "%d\n", p->slave); } static struct attribute ** @@ -542,14 +540,10 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, smi_enabled); - ssize_t len = 0; - if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); - else - len = sprintf(buf, "%d\n", 0); - - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_smi_enabled(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_show_enable_smi_admin(struct device *dev, @@ -558,14 +552,10 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev, { struct mlx4_port *p = container_of(attr, struct mlx4_port, enable_smi_admin); - ssize_t len = 0; - - if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) - len = sprintf(buf, "%d\n", 1); - else - len = sprintf(buf, "%d\n", 0); - return len; + return sysfs_emit(buf, "%d\n", + !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, + p->port_num)); } static ssize_t sysfs_store_enable_smi_admin(struct device *dev, diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index fb62f1d04afa..eb92cefffd77 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -707,10 +707,10 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; + unsigned long page_size; + unsigned int page_offset_quantized; size_t ucmdlen; - int page_shift; __be64 *pas; - int npages; int ncont; void *cqc; int err; @@ -742,14 +742,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", - ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, + ib_umem_num_pages(cq->buf.umem), page_size, ncont); *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; @@ -760,11 +770,12 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, - page_shift - MLX5_ADAPTER_PAGE_SHIFT); + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { *index = ucmd.uar_page_index; @@ -1128,13 +1139,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) } static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, - int entries, struct ib_udata *udata, int *npas, - int *page_shift, int *cqe_size) + int entries, struct ib_udata *udata, + int *cqe_size) { struct mlx5_ib_resize_cq ucmd; struct ib_umem *umem; int err; - int npages; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) @@ -1155,9 +1165,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, - npas, NULL); - cq->resize_umem = umem; *cqe_size = ucmd.cqe_size; @@ -1250,7 +1257,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int err; int npas; __be64 *pas; - int page_shift; + unsigned int page_offset_quantized = 0; + unsigned int page_shift; int inlen; int cqe_size; unsigned long flags; @@ -1277,22 +1285,34 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mutex_lock(&cq->resize_mutex); if (udata) { - err = resize_user(dev, cq, entries, udata, &npas, &page_shift, - &cqe_size); + unsigned long page_size; + + err = resize_user(dev, cq, entries, udata, &cqe_size); + if (err) + goto ex; + + page_size = mlx5_umem_find_best_cq_quantized_pgoff( + cq->resize_umem, cqc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto ex_resize; + } + npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size); + page_shift = order_base_2(page_size); } else { + struct mlx5_frag_buf *frag_buf; + cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); - if (!err) { - struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; - - npas = frag_buf->npages; - page_shift = frag_buf->page_shift; - } + if (err) + goto ex; + frag_buf = &cq->resize_buf->frag_buf; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; } - if (err) - goto ex; - inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; @@ -1304,8 +1324,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) - mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - pas, 0); + mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas, + 0); else mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); @@ -1319,6 +1339,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, page_offset, page_offset_quantized); MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size, cq->private_flags & diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e3d8b826498..819c142857d6 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -93,9 +93,6 @@ struct devx_async_event_file { struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; - u32 page_offset; - int page_shift; - int ncont; u32 dinlen; u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; }; @@ -1311,7 +1308,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, else ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(ret, why, uobject)) + if (ret) return ret; devx_event_table = &dev->devx_event_table; @@ -2057,9 +2054,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, u64 addr; size_t size; u32 access; - int npages; int err; - u32 page_mask; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) @@ -2073,57 +2068,62 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - err = ib_check_mr_access(access); + err = ib_check_mr_access(&dev->ib_dev, access); if (err) return err; obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); - - mlx5_ib_cont_pages(obj->umem, obj->umem->address, - MLX5_MKEY_PAGE_SHIFT_MASK, &npages, - &obj->page_shift, &obj->ncont, NULL); - - if (!npages) { - ib_umem_release(obj->umem); - return -EINVAL; - } - - page_mask = (1 << obj->page_shift) - 1; - obj->page_offset = obj->umem->address & page_mask; - return 0; } -static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, +static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, + struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { + unsigned int page_size; + __be64 *mtt; + void *umem; + + /* + * We don't know what the user intends to use this umem for, but the HW + * restrictions must be met. MR, doorbell records, QP, WQ and CQ all + * have different requirements. Since we have no idea how to sort this + * out, only support PAGE_SIZE with the expectation that userspace will + * provide the necessary alignments inside the known PAGE_SIZE and that + * FW will check everything. + */ + page_size = ib_umem_find_best_pgoff( + obj->umem, PAGE_SIZE, + __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), + 0)); + if (!page_size) + return -EINVAL; + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + - (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + (MLX5_ST_SZ_BYTES(mtt) * + ib_umem_num_dma_blocks(obj->umem, page_size)); cmd->in = uverbs_zalloc(attrs, cmd->inlen); - return PTR_ERR_OR_ZERO(cmd->in); -} - -static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, - struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) -{ - void *umem; - __be64 *mtt; + if (IS_ERR(cmd->in)) + return PTR_ERR(cmd->in); umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); - MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); - MLX5_SET(umem, umem, log_page_size, obj->page_shift - - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(umem, umem, page_offset, obj->page_offset); - mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + MLX5_SET64(umem, umem, num_of_mtt, + ib_umem_num_dma_blocks(obj->umem, page_size)); + MLX5_SET(umem, umem, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, + ib_umem_dma_offset(obj->umem, page_size)); + + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | - MLX5_IB_MTT_READ); + MLX5_IB_MTT_READ); + return 0; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( @@ -2150,12 +2150,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); if (err) goto err_umem_release; - devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); @@ -2187,7 +2185,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject, int err; err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); - if (ib_is_destroy_retryable(err, why, uobject)) + if (err) return err; ib_umem_release(obj->umem); @@ -2600,8 +2598,8 @@ static const struct file_operations devx_async_event_fops = { .llseek = no_llseek, }; -static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, @@ -2623,11 +2621,10 @@ static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, kvfree(entry); } spin_unlock_irq(&comp_ev_file->ev_queue.lock); - return 0; }; -static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static void devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, @@ -2671,7 +2668,6 @@ static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, mutex_unlock(&dev->devx_event_table.event_xa_lock); put_device(&dev->ib_dev.dev); - return 0; }; DECLARE_UVERBS_NAMED_METHOD( diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 492cfe063bca..25da0b05b4e2 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2035,11 +2035,9 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; + if (atomic_read(&obj->usecnt)) + return -EBUSY; kfree(obj); return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 55545f1286e5..3bae9ba0ead8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -75,12 +75,6 @@ static LIST_HEAD(mlx5_ib_dev_list); */ static DEFINE_MUTEX(mlx5_ib_multiport_mutex); -/* We can't use an array for xlt_emergency_page because dma_map_single - * doesn't work on kernel modules memory - */ -static unsigned long xlt_emergency_page; -static struct mutex xlt_emergency_page_mutex; - struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) { struct mlx5_ib_dev *dev; @@ -425,10 +419,22 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_NDR; + break; case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_NDR; + break; + case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_NDR; + break; default: return -EINVAL; } @@ -2628,7 +2634,7 @@ static ssize_t fw_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); + return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); } static DEVICE_ATTR_RO(fw_pages); @@ -2638,7 +2644,7 @@ static ssize_t reg_pages_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); + return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); } static DEVICE_ATTR_RO(reg_pages); @@ -2648,7 +2654,7 @@ static ssize_t hca_type_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); + return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); } static DEVICE_ATTR_RO(hca_type); @@ -2658,7 +2664,7 @@ static ssize_t hw_rev_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%x\n", dev->mdev->rev_id); + return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); } static DEVICE_ATTR_RO(hw_rev); @@ -2668,8 +2674,8 @@ static ssize_t board_id_show(struct device *device, struct mlx5_ib_dev *dev = rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); - return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, - dev->mdev->board_id); + return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -4024,6 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .create_cq = mlx5_ib_create_cq, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, + .create_user_ah = mlx5_ib_create_ah, .dealloc_pd = mlx5_ib_dealloc_pd, .dealloc_ucontext = mlx5_ib_dealloc_ucontext, .del_gid = mlx5_ib_del_gid, @@ -4141,42 +4148,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; int err; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_REREG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) ib_set_device_ops(&dev->ib_dev, @@ -4187,19 +4158,11 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); - if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + if (MLX5_CAP_GEN(mdev, imaicl)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); - } - if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | - (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + if (MLX5_CAP_GEN(mdev, xrc)) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); - } if (MLX5_CAP_DEV_MEM(mdev, memic) || MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & @@ -4278,12 +4241,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -4878,30 +4835,17 @@ static struct auxiliary_driver mlx5r_driver = { .id_table = mlx5r_id_table, }; -unsigned long mlx5_ib_get_xlt_emergency_page(void) -{ - mutex_lock(&xlt_emergency_page_mutex); - return xlt_emergency_page; -} - -void mlx5_ib_put_xlt_emergency_page(void) -{ - mutex_unlock(&xlt_emergency_page_mutex); -} - static int __init mlx5_ib_init(void) { int ret; - xlt_emergency_page = __get_free_page(GFP_KERNEL); + xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); if (!xlt_emergency_page) return -ENOMEM; - mutex_init(&xlt_emergency_page_mutex); - mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); if (!mlx5_ib_event_wq) { - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); return -ENOMEM; } @@ -4934,8 +4878,7 @@ static void __exit mlx5_ib_cleanup(void) mlx5r_rep_cleanup(); destroy_workqueue(mlx5_ib_event_wq); - mutex_destroy(&xlt_emergency_page_mutex); - free_page(xlt_emergency_page); + free_page((unsigned long)xlt_emergency_page); } module_init(mlx5_ib_init); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 13de3d2edd34..844545064c9e 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -36,161 +36,65 @@ #include "mlx5_ib.h" #include <linux/jiffies.h> -/* @umem: umem object to scan - * @addr: ib virtual address requested by the user - * @max_page_shift: high limit for page_shift - 0 means no limit - * @count: number of PAGE_SIZE pages covered by umem - * @shift: page shift for the compound pages found in the region - * @ncont: number of compund pages - * @order: log2 of the number of compound pages +/* + * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be + * filled in the pas array. */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order) +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags) { - unsigned long tmp; - unsigned long m; - u64 base = ~0, p = 0; - u64 len, pfn; - int i = 0; - struct scatterlist *sg; - int entry; - - addr = addr >> PAGE_SHIFT; - tmp = (unsigned long)addr; - m = find_first_bit(&tmp, BITS_PER_LONG); - if (max_page_shift) - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - pfn = sg_dma_address(sg) >> PAGE_SHIFT; - if (base + p != pfn) { - /* If either the offset or the new - * base are unaligned update m - */ - tmp = (unsigned long)(pfn | p); - if (!IS_ALIGNED(tmp, 1 << m)) - m = find_first_bit(&tmp, BITS_PER_LONG); - - base = pfn; - p = 0; - } - - p += len; - i += len; - } - - if (i) { - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - - if (order) - *order = ilog2(roundup_pow_of_two(i) >> m); - - *ncont = DIV_ROUND_UP(i, (1 << m)); - } else { - m = 0; + struct ib_block_iter biter; - if (order) - *order = 0; - - *ncont = 0; + rdma_umem_for_each_dma_block (umem, &biter, page_size) { + *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) | + access_flags); + pas++; } - *shift = PAGE_SHIFT + m; - *count = i; } /* - * Populate the given array with bus addresses from the umem. - * - * dev - mlx5_ib device - * umem - umem to use to fill the pages - * page_shift - determines the page size used in the resulting array - * offset - offset into the umem to start from, - * only implemented for ODP umems - * num_pages - total number of pages to fill - * pas - bus addresses array to fill - * access_flags - access flags to set on all present pages. - use enum mlx5_ib_mtt_access_flags for this. + * Compute the page shift and page_offset for mailboxes that use a quantized + * page_offset. The granulatity of the page offset scales according to page + * size. */ -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags) +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized) { - int shift = page_shift - PAGE_SHIFT; - int mask = (1 << shift) - 1; - int i, k, idx; - u64 cur = 0; - u64 base; - int len; - struct scatterlist *sg; - int entry; - - i = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - base = sg_dma_address(sg); - - /* Skip elements below offset */ - if (i + len < offset << shift) { - i += len; - continue; - } - - /* Skip pages below offset */ - if (i < offset << shift) { - k = (offset << shift) - i; - i = offset << shift; - } else { - k = 0; - } - - for (; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - cur |= access_flags; - idx = (i >> shift) - offset; - - pas[idx] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[idx])); - } - i++; - - /* Stop after num_pages reached */ - if (i >> shift >= offset + num_pages) - return; - } + const u64 page_offset_mask = (1UL << page_offset_bits) - 1; + unsigned long page_size; + u64 page_offset; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask); + if (!page_size) + return 0; + + /* + * page size is the largest possible page size. + * + * Reduce the page_size, and thus the page_offset and quanta, until the + * page_offset fits into the mailbox field. Once page_size < scale this + * loop is guaranteed to terminate. + */ + page_offset = ib_umem_dma_offset(umem, page_size); + while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) { + page_size /= 2; + page_offset = ib_umem_dma_offset(umem, page_size); } -} -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags) -{ - return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_dma_blocks(umem, PAGE_SIZE), - pas, access_flags); -} -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) -{ - u64 page_size; - u64 page_mask; - u64 off_size; - u64 off_mask; - u64 buf_off; - - page_size = (u64)1 << page_shift; - page_mask = page_size - 1; - buf_off = addr & page_mask; - off_size = page_size >> 6; - off_mask = off_size - 1; - - if (buf_off & off_mask) - return -EINVAL; - - *offset = buf_off >> ilog2(off_size); - return 0; + /* + * The address is not aligned, or otherwise cannot be represented by the + * page_offset. + */ + if (!(pgsz_bitmap & page_size)) + return 0; + + *page_offset_quantized = + (unsigned long)page_offset / (page_size / scale); + if (WARN_ON(*page_offset_quantized > page_offset_mask)) + return 0; + return page_size; } #define WR_ID_BF 0xBF diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ea5243815cf6..b0fdc1b08e06 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -40,7 +40,73 @@ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) -#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) +static __always_inline unsigned long +__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits, + unsigned int pgsz_shift) +{ + unsigned int largest_pg_shift = + min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift, + BITS_PER_LONG - 1); + + /* + * Despite a command allowing it, the device does not support lower than + * 4k page size. + */ + pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift); + return GENMASK(largest_pg_shift, pgsz_shift); +} + +/* + * For mkc users, instead of a page_offset the command has a start_iova which + * specifies both the page_offset and the on-the-wire IOVA + */ +#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \ + ib_umem_find_best_pgsz(umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), \ + pgsz_shift), \ + iova) + +static __always_inline unsigned long +__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits, + unsigned int offset_shift) +{ + unsigned int largest_offset_shift = + min_t(unsigned long, page_offset_bits - 1 + offset_shift, + BITS_PER_LONG - 1); + + return GENMASK(largest_offset_shift, offset_shift); +} + +/* + * QP/CQ/WQ/etc type commands take a page offset that satisifies: + * page_offset_quantized * (page_size/scale) = page_offset + * Which restricts allowed page sizes to ones that satisify the above. + */ +unsigned long __mlx5_umem_find_best_quantized_pgoff( + struct ib_umem *umem, unsigned long pgsz_bitmap, + unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale, + unsigned int *page_offset_quantized); +#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), \ + GENMASK(31, order_base_2(scale)), scale, \ + page_offset_quantized) + +#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \ + pgsz_shift, page_offset_fld, \ + scale, page_offset_quantized) \ + __mlx5_umem_find_best_quantized_pgoff( \ + umem, \ + __mlx5_log_page_size_to_bitmap( \ + __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \ + __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \ + page_offset_quantized) enum { MLX5_IB_MMAP_OFFSET_START = 9, @@ -597,14 +663,12 @@ struct mlx5_ib_mr { int max_descs; int desc_size; int access_mode; + unsigned int page_shift; struct mlx5_core_mkey mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - unsigned int order; struct mlx5_cache_ent *cache_ent; - int npages; - struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; void *descs_alloc; @@ -1042,6 +1106,11 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr) +{ + return to_mdev(mr->ibmr.device); +} + static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) { struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( @@ -1189,9 +1258,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int access_flags, - struct ib_pd *pd, struct ib_udata *udata); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 virt_addr, int access_flags, + struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -1210,7 +1279,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad); @@ -1230,15 +1298,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, int *shift, - int *ncont, int *order); -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags); -void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, __be64 *pas, int access_flags); +void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, + u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); @@ -1283,7 +1344,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); -int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable); +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1305,7 +1366,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } -static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } @@ -1456,8 +1517,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; } -unsigned long mlx5_ib_get_xlt_emergency_page(void); -void mlx5_ib_put_xlt_emergency_page(void); +extern void *xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b261797b258f..24f8d59a42ea 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -41,6 +41,13 @@ #include <rdma/ib_verbs.h> #include "mlx5_ib.h" +/* + * We can't use an array for xlt_emergency_page because dma_map_single doesn't + * work on kernel modules memory + */ +void *xlt_emergency_page; +static DEFINE_MUTEX(xlt_emergency_page_mutex); + enum { MAX_PENDING_REG_MR = 8, }; @@ -49,6 +56,9 @@ enum { static void create_mkey_callback(int status, struct mlx5_async_work *context); +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate); static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) @@ -123,19 +133,12 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, - u64 length) -{ - return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= - length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); -} - static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); - struct mlx5_ib_dev *dev = mr->dev; struct mlx5_cache_ent *ent = mr->cache_ent; + struct mlx5_ib_dev *dev = ent->dev; unsigned long flags; if (status) { @@ -172,9 +175,7 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return NULL; - mr->order = ent->order; mr->cache_ent = ent; - mr->dev = ent->dev; set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); @@ -642,6 +643,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mlx5_mr_cache_invalidate(mr)) { detach_mr_from_cache(mr); destroy_mkey(dev, mr); + kfree(mr); return; } @@ -867,57 +869,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, - int access_flags, struct ib_umem **umem, int *npages, - int *page_shift, int *ncont, int *order) -{ - struct ib_umem *u; - - *umem = NULL; - - if (access_flags & IB_ACCESS_ON_DEMAND) { - struct ib_umem_odp *odp; - - odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, - &mlx5_mn_ops); - if (IS_ERR(odp)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", - PTR_ERR(odp)); - return PTR_ERR(odp); - } - - u = &odp->umem; - - *page_shift = odp->page_shift; - *ncont = ib_umem_odp_num_pages(odp); - *npages = *ncont << (*page_shift - PAGE_SHIFT); - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - } else { - u = ib_umem_get(&dev->ib_dev, start, length, access_flags); - if (IS_ERR(u)) { - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); - return PTR_ERR(u); - } - - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); - } - - if (!*npages) { - mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(u); - return -EINVAL; - } - - *umem = u; - - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", - *npages, *ncont, *order, *page_shift); - - return 0; -} - static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -974,25 +925,49 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, return &cache->ent[order]; } -static struct mlx5_ib_mr * -alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, - u64 len, int npages, int page_shift, unsigned int order, - int access_flags) +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, + u64 length, int access_flags) +{ + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->ibmr.length = length; + mr->ibmr.device = &dev->ib_dev; + mr->access_flags = access_flags; +} + +static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, + struct ib_umem *umem, u64 iova, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); + struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; + unsigned int page_size; - if (!ent) - return ERR_PTR(-E2BIG); - - /* Matches access in alloc_cache_mr() */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) - return ERR_PTR(-EOPNOTSUPP); + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!page_size)) + return ERR_PTR(-EINVAL); + ent = mr_cache_ent_from_order( + dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); + /* + * Matches access in alloc_cache_mr(). If the MR can't come from the + * cache then synchronously create an uncached one. + */ + if (!ent || ent->limit == 0 || + !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { + mutex_lock(&dev->slow_path_mutex); + mr = reg_create(pd, umem, iova, access_flags, page_size, false); + mutex_unlock(&dev->slow_path_mutex); + return mr; + } mr = get_cache_mr(ent); if (!mr) { mr = create_cache_mr(ent); + /* + * The above already tried to do the same stuff as reg_create(), + * no reason to try it again. + */ if (IS_ERR(mr)) return mr; } @@ -1001,9 +976,12 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, mr->umem = umem; mr->access_flags = access_flags; mr->desc_size = sizeof(struct mlx5_mtt); - mr->mmkey.iova = virt_addr; - mr->mmkey.size = len; + mr->mmkey.iova = iova; + mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; + mr->page_shift = order_base_2(page_size); + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); return mr; } @@ -1012,14 +990,144 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, MLX5_UMR_MTT_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 +/* + * Allocate a temporary buffer to hold the per-page information to transfer to + * HW. For efficiency this should be as large as it can be, but buffer + * allocation failure is not allowed, so try smaller sizes. + */ +static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) +{ + const size_t xlt_chunk_align = + MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); + size_t size; + void *res = NULL; + + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + + /* + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the + * allocation can't trigger any kind of reclaim. + */ + might_sleep(); + + gfp_mask |= __GFP_ZERO; + + /* + * If the system already has a suitable high order page then just use + * that, but don't try hard to create one. This max is about 1M, so a + * free x86 huge page will satisfy it. + */ + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), + MLX5_MAX_UMR_CHUNK); + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + + if (size > MLX5_SPARE_UMR_CHUNK) { + size = MLX5_SPARE_UMR_CHUNK; + *nents = get_order(size) / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + } + + *nents = PAGE_SIZE / ent_size; + res = (void *)__get_free_page(gfp_mask); + if (res) + return res; + + mutex_lock(&xlt_emergency_page_mutex); + memset(xlt_emergency_page, 0, PAGE_SIZE); + return xlt_emergency_page; +} + +static void mlx5_ib_free_xlt(void *xlt, size_t length) +{ + if (xlt == xlt_emergency_page) { + mutex_unlock(&xlt_emergency_page_mutex); + return; + } + + free_pages((unsigned long)xlt, get_order(length)); +} + +/* + * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for + * submission. + */ +static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, + struct mlx5_umr_wr *wr, struct ib_sge *sg, + size_t nents, size_t ent_size, + unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + dma_addr_t dma; + void *xlt; + + xlt = mlx5_ib_alloc_xlt(&nents, ent_size, + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : + GFP_KERNEL); + sg->length = nents * ent_size; + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); + mlx5_ib_free_xlt(xlt, sg->length); + return NULL; + } + sg->addr = dma; + sg->lkey = dev->umrc.pd->local_dma_lkey; + + memset(wr, 0, sizeof(*wr)); + wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; + wr->wr.sg_list = sg; + wr->wr.num_sge = 1; + wr->wr.opcode = MLX5_IB_WR_UMR; + wr->pd = mr->ibmr.pd; + wr->mkey = mr->mmkey.key; + wr->length = mr->mmkey.size; + wr->virt_addr = mr->mmkey.iova; + wr->access_flags = mr->access_flags; + wr->page_shift = mr->page_shift; + wr->xlt_size = sg->length; + return xlt; +} + +static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, + struct ib_sge *sg) +{ + struct device *ddev = &dev->mdev->pdev->dev; + + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); + mlx5_ib_free_xlt(xlt, sg->length); +} + +static unsigned int xlt_wr_final_send_flags(unsigned int flags) +{ + unsigned int res = 0; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + res |= MLX5_IB_SEND_UMR_ENABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) + res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & MLX5_IB_UPD_XLT_ADDR) + res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + return res; +} + int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { - struct mlx5_ib_dev *dev = mr->dev; - struct device *ddev = dev->ib_dev.dev.parent; - int size; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; void *xlt; - dma_addr_t dma; struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; @@ -1030,15 +1138,17 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, const int page_mask = page_align - 1; size_t pages_mapped = 0; size_t pages_to_map = 0; - size_t pages_iter = 0; + size_t pages_iter; size_t size_to_map = 0; - gfp_t gfp; - bool use_emergency_page = false; + size_t orig_sg_length; if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && !umr_can_use_indirect_mkey(dev)) return -EPERM; + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ @@ -1046,63 +1156,21 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, npages += idx & page_mask; idx &= ~page_mask; } - - gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; - gfp |= __GFP_ZERO | __GFP_NOWARN; - pages_to_map = ALIGN(npages, page_align); - size = desc_size * pages_to_map; - size = min_t(int, size, MLX5_MAX_UMR_CHUNK); - - xlt = (void *)__get_free_pages(gfp, get_order(size)); - if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { - mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", - size, get_order(size), MLX5_SPARE_UMR_CHUNK); - - size = MLX5_SPARE_UMR_CHUNK; - xlt = (void *)__get_free_pages(gfp, get_order(size)); - } - if (!xlt) { - mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); - xlt = (void *)mlx5_ib_get_xlt_emergency_page(); - size = PAGE_SIZE; - memset(xlt, 0, size); - use_emergency_page = true; - } - pages_iter = size / desc_size; - dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - err = -ENOMEM; - goto free_xlt; - } + xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); + if (!xlt) + return -ENOMEM; + pages_iter = sg.length / desc_size; + orig_sg_length = sg.length; - if (mr->umem->is_odp) { - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } + pages_to_map = min_t(size_t, pages_to_map, max_pages); } - sg.addr = dma; - sg.lkey = dev->umrc.pd->local_dma_lkey; - - memset(&wr, 0, sizeof(wr)); - wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr.wr.sg_list = &sg; - wr.wr.num_sge = 1; - wr.wr.opcode = MLX5_IB_WR_UMR; - - wr.pd = mr->ibmr.pd; - wr.mkey = mr->mmkey.key; - wr.length = mr->mmkey.size; - wr.virt_addr = mr->mmkey.iova; - wr.access_flags = mr->access_flags; wr.page_shift = page_shift; for (pages_mapped = 0; @@ -1110,50 +1178,87 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); - if (mr->umem->is_odp) { - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - } else { - __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, - npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + size_to_map, 0, size - size_to_map); - } - dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - if (pages_mapped + pages_iter >= pages_to_map) { - if (flags & MLX5_IB_UPD_XLT_ENABLE) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || - flags & MLX5_IB_UPD_XLT_ACCESS) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - } + if (pages_mapped + pages_iter >= pages_to_map) + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); wr.offset = idx * desc_size; wr.xlt_size = sg.length; err = mlx5_ib_post_send_wait(dev, &wr); } - dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, xlt, &sg); + return err; +} + +/* + * Send the DMA list to the HW for a normal MR using UMR. + */ +static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + struct mlx5_umr_wr wr; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, + ib_umem_num_dma_blocks(mr->umem, + 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + orig_sg_length = sg.length; + + cur_mtt = mtt; + rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + wr.offset += sg.length; + cur_mtt = mtt; + } + + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + cur_mtt++; + } -free_xlt: - if (use_emergency_page) - mlx5_ib_put_xlt_emergency_page(); - else - free_pages((unsigned long)xlt, get_order(size)); + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); + wr.xlt_size = sg.length; + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + +err: + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, mtt, &sg); return err; } @@ -1161,11 +1266,9 @@ free_xlt: * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. */ -static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, - u64 virt_addr, u64 length, - struct ib_umem *umem, int npages, - int page_shift, int access_flags, - bool populate) +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags, + unsigned int page_size, bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; @@ -1176,16 +1279,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); - mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); + if (!page_size) + return ERR_PTR(-EINVAL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->ibmr.pd = pd; mr->access_flags = access_flags; + mr->page_shift = order_base_2(page_size); inlen = MLX5_ST_SZ_BYTES(create_mkey_in); if (populate) - inlen += sizeof(*pas) * roundup(npages, 2); + inlen += sizeof(*pas) * + roundup(ib_umem_num_dma_blocks(umem, page_size), 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1197,7 +1304,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err = -EINVAL; goto err_2; } - mlx5_ib_populate_pas(dev, umem, page_shift, pas, + mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); } @@ -1206,20 +1313,20 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr, + set_mkc_access_pd_addr_fields(mkc, access_flags, iova, populate ? pd : dev->umrc.pd); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET64(mkc, mkc, len, length); + MLX5_SET64(mkc, mkc, len, umem->length); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, page_shift)); - MLX5_SET(mkc, mkc, log_page_size, page_shift); + get_octo_len(iova, umem->length, mr->page_shift)); + MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, page_shift)); + get_octo_len(iova, umem->length, mr->page_shift)); } err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); @@ -1229,7 +1336,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); - mr->dev = dev; + mr->umem = umem; + set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1238,25 +1346,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, err_2: kvfree(in); - err_1: - if (!ibmr) - kfree(mr); - + kfree(mr); return ERR_PTR(err); } -static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - int npages, u64 length, int access_flags) -{ - mr->npages = npages; - atomic_add(npages, &dev->mdev->priv.reg_pages); - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; - mr->ibmr.length = length; - mr->access_flags = access_flags; -} - static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, u64 length, int acc, int mode) { @@ -1290,8 +1384,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, kfree(in); - mr->umem = NULL; - set_mr_fields(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, length, acc); return &mr->ibmr; @@ -1352,116 +1445,128 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, attr->access_flags, mode); } -struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, + u64 iova, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; bool xlt_with_umr; - struct ib_umem *umem; - int page_shift; - int npages; - int ncont; - int order; int err; - if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) - return ERR_PTR(-EOPNOTSUPP); - - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); - - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length); - /* ODP requires xlt update via umr to work. */ - if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND)) - return ERR_PTR(-EINVAL); - - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && - length == U64_MAX) { - if (virt_addr != start) - return ERR_PTR(-EINVAL); - if (!(access_flags & IB_ACCESS_ON_DEMAND) || - !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return ERR_PTR(-EINVAL); - - mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); - if (IS_ERR(mr)) - return ERR_CAST(mr); - return &mr->ibmr; - } - - err = mr_umem_get(dev, start, length, access_flags, &umem, - &npages, &page_shift, &ncont, &order); - - if (err < 0) - return ERR_PTR(err); - + xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); if (xlt_with_umr) { - mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, - page_shift, order, access_flags); - if (IS_ERR(mr)) - mr = NULL; - } + mr = alloc_cacheable_mr(pd, umem, iova, access_flags); + } else { + unsigned int page_size = mlx5_umem_find_best_pgsz( + umem, mkc, log_page_size, 0, iova); - if (!mr) { mutex_lock(&dev->slow_path_mutex); - mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !xlt_with_umr); + mr = reg_create(pd, umem, iova, access_flags, page_size, true); mutex_unlock(&dev->slow_path_mutex); } - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - goto error; + ib_umem_release(umem); + return ERR_CAST(mr); } mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); - mr->umem = umem; - set_mr_fields(dev, mr, npages, length, access_flags); + atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { + if (xlt_with_umr) { /* * If the MR was created with reg_create then it will be * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, - update_xlt_flags); + err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { dereg_mr(dev, mr); return ERR_PTR(err); } } + return &mr->ibmr; +} - if (is_odp_mr(mr)) { - to_ib_umem_odp(mr->umem)->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, - GFP_KERNEL)); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } +static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; + int err; - err = mlx5_ib_init_odp_mr(mr, xlt_with_umr); - if (err) { - dereg_mr(dev, mr); - return ERR_PTR(err); - } + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + if (!start && length == U64_MAX) { + if (iova != 0) + return ERR_PTR(-EINVAL); + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return ERR_PTR(-EINVAL); + + mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); + if (IS_ERR(mr)) + return ERR_CAST(mr); + return &mr->ibmr; + } + + /* ODP requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, + &mlx5_mn_ops); + if (IS_ERR(odp)) + return ERR_CAST(odp); + + mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&odp->umem); + return ERR_CAST(mr); } + odp->private = mr; + init_waitqueue_head(&mr->q_deferred_work); + atomic_set(&mr->num_deferred_work, 0); + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL)); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_odp_mr(mr); + if (err) + goto err_dereg_mr; return &mr->ibmr; -error: - ib_umem_release(umem); + +err_dereg_mr: + dereg_mr(dev, mr); return ERR_PTR(err); } +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *umem; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, access_flags); + + if (access_flags & IB_ACCESS_ON_DEMAND) + return create_user_odp_mr(pd, start, length, iova, access_flags, + udata); + umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); + return create_real_mr(pd, umem, iova, access_flags); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1474,151 +1579,224 @@ int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { struct mlx5_umr_wr umrwr = {}; - if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = mr->dev->umrc.pd; + umrwr.pd = mr_to_mdev(mr)->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(mr->dev, &umrwr); + return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); } -static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, - int access_flags, int flags) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_umr_wr umrwr = {}; +/* + * True if the change in access flags can be done via UMR, only some access + * flags can be updated. + */ +static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) + return false; + return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, + target_access_flags); +} + +static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_umr_wr umrwr = { + .wr = { + .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, + .opcode = MLX5_IB_WR_UMR, + }, + .mkey = mr->mmkey.key, + .pd = pd, + .access_flags = access_flags, + }; int err; - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; - - umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.mkey = mr->mmkey.key; - - if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { - umrwr.pd = pd; - umrwr.access_flags = access_flags; - umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - } - err = mlx5_ib_post_send_wait(dev, &umrwr); + if (err) + return err; - return err; + mr->access_flags = access_flags; + mr->mmkey.pd = to_mpd(pd)->pdn; + return 0; } -int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, - u64 length, u64 virt_addr, int new_access_flags, - struct ib_pd *new_pd, struct ib_udata *udata) +static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, + struct ib_umem *new_umem, + int new_access_flags, u64 iova, + unsigned long *page_size) { - struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); - struct mlx5_ib_mr *mr = to_mmr(ib_mr); - struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; - int access_flags = flags & IB_MR_REREG_ACCESS ? - new_access_flags : - mr->access_flags; - int page_shift = 0; - int upd_flags = 0; - int npages = 0; - int ncont = 0; - int order = 0; - u64 addr, len; - int err; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", - start, virt_addr, length, access_flags); + /* We only track the allocated sizes of MRs from the cache */ + if (!mr->cache_ent) + return false; + if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) + return false; - atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + *page_size = + mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); + if (WARN_ON(!*page_size)) + return false; + return (1ULL << mr->cache_ent->order) >= + ib_umem_num_dma_blocks(new_umem, *page_size); +} - if (!mr->umem) - return -EINVAL; +static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags, int flags, struct ib_umem *new_umem, + u64 iova, unsigned long page_size) +{ + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; + struct ib_umem *old_umem = mr->umem; + int err; - if (is_odp_mr(mr)) - return -EOPNOTSUPP; + /* + * To keep everything simple the MR is revoked before we start to mess + * with it. This ensure the change is atomic relative to any use of the + * MR. + */ + err = mlx5_mr_cache_invalidate(mr); + if (err) + return err; - if (flags & IB_MR_REREG_TRANS) { - addr = virt_addr; - len = length; - } else { - addr = mr->umem->address; - len = mr->umem->length; + if (flags & IB_MR_REREG_PD) { + mr->ibmr.pd = pd; + mr->mmkey.pd = to_mpd(pd)->pdn; + upd_flags |= MLX5_IB_UPD_XLT_PD; + } + if (flags & IB_MR_REREG_ACCESS) { + mr->access_flags = access_flags; + upd_flags |= MLX5_IB_UPD_XLT_ACCESS; } - if (flags != IB_MR_REREG_PD) { + mr->ibmr.length = new_umem->length; + mr->mmkey.iova = iova; + mr->mmkey.size = new_umem->length; + mr->page_shift = order_base_2(page_size); + mr->umem = new_umem; + err = mlx5_ib_update_mr_pas(mr, upd_flags); + if (err) { /* - * Replace umem. This needs to be done whether or not UMR is - * used. + * The MR is revoked at this point so there is no issue to free + * new_umem. */ - flags |= IB_MR_REREG_TRANS; - ib_umem_release(mr->umem); - mr->umem = NULL; - err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, - &npages, &page_shift, &ncont, &order); - if (err) - goto err; + mr->umem = old_umem; + return err; } - if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, - access_flags) || - !mlx5_ib_can_load_pas_with_umr(dev, len) || - (flags & IB_MR_REREG_TRANS && - !mlx5_ib_pas_fits_in_mr(mr, addr, len))) { - /* - * UMR can't be used - MKey needs to be replaced. - */ - if (mr->cache_ent) - detach_mr_from_cache(mr); - err = destroy_mkey(dev, mr); - if (err) - goto err; + atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); + ib_umem_release(old_umem); + atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); + return 0; +} - mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags, true); +struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, + u64 length, u64 iova, int new_access_flags, + struct ib_pd *new_pd, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); + struct mlx5_ib_mr *mr = to_mmr(ib_mr); + int err; - if (IS_ERR(mr)) { - err = PTR_ERR(mr); - mr = to_mmr(ib_mr); - goto err; - } - } else { - /* - * Send a UMR WQE - */ - mr->ibmr.pd = pd; - mr->access_flags = access_flags; - mr->mmkey.iova = addr; - mr->mmkey.size = len; - mr->mmkey.pd = to_mpd(pd)->pdn; + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) + return ERR_PTR(-EOPNOTSUPP); - if (flags & IB_MR_REREG_TRANS) { - upd_flags = MLX5_IB_UPD_XLT_ADDR; - if (flags & IB_MR_REREG_PD) - upd_flags |= MLX5_IB_UPD_XLT_PD; - if (flags & IB_MR_REREG_ACCESS) - upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - upd_flags); - } else { - err = rereg_umr(pd, mr, access_flags, flags); + mlx5_ib_dbg( + dev, + "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", + start, iova, length, new_access_flags); + + if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) + return ERR_PTR(-EOPNOTSUPP); + + if (!(flags & IB_MR_REREG_ACCESS)) + new_access_flags = mr->access_flags; + if (!(flags & IB_MR_REREG_PD)) + new_pd = ib_mr->pd; + + if (!(flags & IB_MR_REREG_TRANS)) { + struct ib_umem *umem; + + /* Fast path for PD/access change */ + if (can_use_umr_rereg_access(dev, mr->access_flags, + new_access_flags)) { + err = umr_rereg_pd_access(mr, new_pd, new_access_flags); + if (err) + return ERR_PTR(err); + return NULL; } + /* DM or ODP MR's don't have a umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + /* + * Only one active MR can refer to a umem at one time, revoke + * the old MR before assigning the umem to the new one. + */ + err = mlx5_mr_cache_invalidate(mr); if (err) - goto err; - } - - set_mr_fields(dev, mr, npages, len, access_flags); + return ERR_PTR(err); + umem = mr->umem; + mr->umem = NULL; + atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); - return 0; + return create_real_mr(new_pd, umem, mr->mmkey.iova, + new_access_flags); + } -err: - ib_umem_release(mr->umem); - mr->umem = NULL; + /* + * DM doesn't have a PAS list so we can't re-use it, odp does but the + * logic around releasing the umem is different + */ + if (!mr->umem || is_odp_mr(mr)) + goto recreate; + + if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && + can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { + struct ib_umem *new_umem; + unsigned long page_size; + + new_umem = ib_umem_get(&dev->ib_dev, start, length, + new_access_flags); + if (IS_ERR(new_umem)) + return ERR_CAST(new_umem); + + /* Fast path for PAS change */ + if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, + &page_size)) { + err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, + new_umem, iova, page_size); + if (err) { + ib_umem_release(new_umem); + return ERR_PTR(err); + } + return NULL; + } + return create_real_mr(new_pd, new_umem, iova, new_access_flags); + } - clean_mr(dev, mr); - return err; + /* + * Everything else has no state we can preserve, just create a new MR + * from scratch + */ +recreate: + return mlx5_ib_reg_user_mr(new_pd, start, length, iova, + new_access_flags, udata); } static int @@ -1627,6 +1805,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, int ndescs, int desc_size) { + struct mlx5_ib_dev *dev = to_mdev(device); + struct device *ddev = &dev->mdev->pdev->dev; int size = ndescs * desc_size; int add_size; int ret; @@ -1639,9 +1819,8 @@ mlx5_alloc_priv_descs(struct ib_device *device, mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); - mr->desc_map = dma_map_single(device->dev.parent, mr->descs, - size, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev.parent, mr->desc_map)) { + mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->desc_map)) { ret = -ENOMEM; goto err; } @@ -1659,9 +1838,10 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) if (mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); - dma_unmap_single(device->dev.parent, mr->desc_map, - size, DMA_TO_DEVICE); + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); kfree(mr->descs_alloc); mr->descs = NULL; } @@ -1691,7 +1871,6 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int npages = mr->npages; struct ib_umem *umem = mr->umem; /* Stop all DMA */ @@ -1700,14 +1879,17 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); + if (umem) { + if (!is_odp_mr(mr)) + atomic_sub(ib_umem_num_pages(umem), + &dev->mdev->priv.reg_pages); + ib_umem_release(umem); + } + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); - - ib_umem_release(umem); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5c853ec1b0d8..aa2413b50adc 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -102,7 +102,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, if (flags & MLX5_IB_UPD_XLT_ZAP) { for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } return; @@ -129,7 +129,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * locking around the xarray. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&imr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -139,7 +139,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, pklm->key = cpu_to_be32(mtt->ibmr.lkey); pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->key = cpu_to_be32(mr_to_mdev(imr)->null_mkey); pklm->va = 0; } } @@ -199,7 +199,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) mutex_unlock(&odp->umem_mutex); if (!mr->cache_ent) { - mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); WARN_ON(mr->descs); } } @@ -222,19 +222,19 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) WARN_ON(atomic_read(&mr->num_deferred_work)); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr->dev->odp_srcu); + srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); + srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); mr->parent = NULL; - mlx5_mr_cache_free(mr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); if (atomic_dec_and_test(&imr->num_deferred_work)) wake_up(&imr->q_deferred_work); @@ -274,7 +274,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) goto out_unlock; atomic_inc(&imr->num_deferred_work); - call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, free_implicit_child_mr_rcu); out_unlock: @@ -476,12 +476,13 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY, - imr->access_flags); + ret = mr = mlx5_mr_cache_alloc( + mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); if (IS_ERR(mr)) goto out_umem; mr->ibmr.pd = imr->ibmr.pd; + mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -517,11 +518,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); + mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; out_mr: - mlx5_mr_cache_free(imr->dev, mr); + mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: ib_umem_odp_release(odp); return ret; @@ -536,6 +537,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_mr *imr; int err; + if (!mlx5_ib_can_load_pas_with_umr(dev, + MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) + return ERR_PTR(-EOPNOTSUPP); + umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); @@ -551,6 +556,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; + imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); @@ -584,7 +590,7 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); - struct mlx5_ib_dev *dev = imr->dev; + struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct list_head destroy_list; struct mlx5_ib_mr *mtt; struct mlx5_ib_mr *tmp; @@ -654,10 +660,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) { /* Prevent new page faults and prefetch requests from succeeding */ - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr->dev->odp_srcu); + synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); @@ -701,7 +707,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(mr->dev, + mlx5_ib_err(mr_to_mdev(mr), "Failed to update mkey page tables\n"); goto out; } @@ -791,7 +797,7 @@ out: MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); if (err) { - mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); return err; } return ret; @@ -811,7 +817,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr->dev->odp_srcu); + lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -831,17 +837,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, flags); } -int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { - u32 flags = MLX5_PF_FLAGS_SNAPSHOT; int ret; - if (enable) - flags |= MLX5_PF_FLAGS_ENABLE; - - ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), - mr->umem->address, mr->umem->length, NULL, - flags); + ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address, + mr->umem->length, NULL, + MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE); return ret >= 0 ? 0 : ret; } @@ -1783,7 +1785,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = work->frags[0].mr->dev; + dev = mr_to_mdev(work->frags[0].mr); /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 600e056798c0..0cb7cc642d87 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -778,39 +778,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, return bfregi->sys_pages[index_of_sys_page] + offset; } -static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, - unsigned long addr, size_t size, - struct ib_umem **umem, int *npages, int *page_shift, - int *ncont, u32 *offset) -{ - int err; - - *umem = ib_umem_get(&dev->ib_dev, addr, size, 0); - if (IS_ERR(*umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - return PTR_ERR(*umem); - } - - mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); - - err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n", - addr, size, *npages, *page_shift, *ncont, *offset); - - return 0; - -err_umem: - ib_umem_release(*umem); - *umem = NULL; - - return err; -} - static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { @@ -833,10 +800,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - int page_shift = 0; - int npages; + unsigned long page_size = 0; u32 offset = 0; - int ncont = 0; int err; if (!ucmd->buf_addr) @@ -849,23 +814,26 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, - &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, - &rwq->rq_page_offset); - if (err) { + page_size = mlx5_umem_find_best_quantized_pgoff( + rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &rwq->rq_page_offset); + if (!page_size) { mlx5_ib_warn(dev, "bad offset\n"); + err = -EINVAL; goto err_umem; } - rwq->rq_num_pas = ncont; - rwq->page_shift = page_shift; - rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size); + rwq->page_shift = order_base_2(page_size); + rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT; rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE); - mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n", - (unsigned long long)ucmd->buf_addr, rwq->buf_size, - npages, page_shift, ncont, offset); + mlx5_ib_dbg( + dev, + "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n", + (unsigned long long)ucmd->buf_addr, rwq->buf_size, + ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas, + offset); err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db); if (err) { @@ -896,10 +864,9 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *context; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; - int page_shift = 0; + unsigned int page_offset_quantized = 0; + unsigned long page_size = 0; int uar_index = 0; - int npages; - u32 offset = 0; int bfregn; int ncont = 0; __be64 *pas; @@ -950,11 +917,21 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd->buf_addr; - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, - ubuffer->buf_size, &ubuffer->umem, - &npages, &page_shift, &ncont, &offset); - if (err) + ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(ubuffer->umem)) { + err = PTR_ERR(ubuffer->umem); goto err_bfreg; + } + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, qpc, log_page_size, + MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, + &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } + ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size); } else { ubuffer->umem = NULL; } @@ -969,15 +946,14 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; MLX5_SET(create_qp_in, *in, uid, uid); - pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); - if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); - qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); - - MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(qpc, qpc, page_offset, offset); - + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); + if (ubuffer->umem) { + mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0); + MLX5_SET(qpc, qpc, log_page_size, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, page_offset_quantized); + } MLX5_SET(qpc, qpc, uar_page, uar_index); if (bfregn != MLX5_IB_INVALID_BFREG) resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); @@ -1209,18 +1185,24 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, void *wq; int inlen; int err; - int page_shift = 0; - int npages; - int ncont = 0; - u32 offset = 0; - - err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, - &sq->ubuffer.umem, &npages, &page_shift, &ncont, - &offset); - if (err) - return err; + unsigned int page_offset_quantized; + unsigned long page_size; + + sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, + ubuffer->buf_size, 0); + if (IS_ERR(sq->ubuffer.umem)) + return PTR_ERR(sq->ubuffer.umem); + page_size = mlx5_umem_find_best_quantized_pgoff( + ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, &page_offset_quantized); + if (!page_size) { + err = -EINVAL; + goto err_umem; + } - inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont; + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * + ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; @@ -1248,11 +1230,12 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size)); - MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET(wq, wq, page_offset, offset); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0); + mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0); err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp); @@ -1278,40 +1261,31 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, ib_umem_release(sq->ubuffer.umem); } -static size_t get_rq_pas_size(void *qpc) -{ - u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12; - u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride); - u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size); - u32 page_offset = MLX5_GET(qpc, qpc, page_offset); - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; - - return rq_num_pas * sizeof(u64); -} - static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - size_t qpinlen, struct ib_pd *pd) + struct ib_pd *pd) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; - __be64 *qp_pas; void *in; void *rqc; void *wq; void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc); - size_t rq_pas_size = get_rq_pas_size(qpc); + struct ib_umem *umem = rq->base.ubuffer.umem; + unsigned int page_offset_quantized; + unsigned long page_size = 0; size_t inlen; int err; - if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas)) + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, + MLX5_ADAPTER_PAGE_SHIFT, + page_offset, 64, + &page_offset_quantized); + if (!page_size) return -EINVAL; - inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size; + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size); in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1333,16 +1307,16 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING) MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset)); + MLX5_SET(wq, wq, page_offset, page_offset_quantized); MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd)); MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr)); MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size)); + MLX5_SET(wq, wq, log_wq_pg_sz, + order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size)); pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas); - memcpy(pas, qp_pas, rq_pas_size); + mlx5_ib_populate_pas(umem, page_size, pas, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp); @@ -1463,7 +1437,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd); if (err) goto err_destroy_sq; @@ -2436,7 +2410,7 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, } qp->state = IB_QPS_RESET; - + rdma_restrack_no_track(&qp->ibqp.res); return 0; } @@ -2460,6 +2434,7 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case IB_QPT_GSI: if (dev->profile == &raw_eth_profile) goto out; + fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2712,11 +2687,12 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, true, qp); - if (create_flags) + if (create_flags) { mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", create_flags); - - return (create_flags) ? -EINVAL : 0; + return -EOPNOTSUPP; + } + return 0; } static int process_udata_size(struct mlx5_ib_dev *dev, @@ -3102,7 +3078,7 @@ static int ib_to_mlx5_rate_map(u8 rate) return 5; default: return rate + MLX5_STAT_RATE_OFFSET; - }; + } return 0; } @@ -4247,6 +4223,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err = -EINVAL; int port; + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) + return -EOPNOTSUPP; + if (ibqp->rwq_ind_tbl) return -ENOSYS; @@ -4576,7 +4555,9 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) { to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path); to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path); qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index); @@ -4882,7 +4863,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET(rqc, rqc, delay_drop_en, 1); } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); - mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); + mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp); if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { err = set_delay_drop(dev); diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 887270dd3ce2..4ac429e72004 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -116,7 +116,7 @@ static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); - return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, + return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY, mlx5_mkey_to_idx(mr->mmkey.key)); } diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e2f720eec1e1..fab6736e4d6a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -51,10 +51,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata, struct mlx5_ib_ucontext, ibucontext); size_t ucmdlen; int err; - int npages; - int page_shift; - int ncont; - u32 offset; u32 uidx = MLX5_IB_DEFAULT_UIDX; ucmdlen = min(udata->inlen, sizeof(ucmd)); @@ -86,32 +82,14 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, err = PTR_ERR(srq->umem); return err; } - - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, - &page_shift, &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, - &offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; - } - - in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL); - if (!in->pas) { - err = -ENOMEM; - goto err_umem; - } - - mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); + in->umem = srq->umem; err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); if (err) { mlx5_ib_dbg(dev, "map doorbell failed\n"); - goto err_in; + goto err_umem; } - in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->page_offset = offset; in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) @@ -119,9 +97,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return 0; -err_in: - kvfree(in->pas); - err_umem: ib_umem_release(srq->umem); @@ -226,6 +201,11 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC && + init_attr->srq_type != IB_SRQT_TM) + return -EOPNOTSUPP; + /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 2c3627b2509d..a7e3dc5564ac 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -28,6 +28,7 @@ struct mlx5_srq_attr { u32 user_index; u64 db_record; __be64 *pas; + struct ib_umem *umem; u32 tm_log_list_size; u32 tm_next_tag; u32 tm_hw_phase_cnt; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index db889ec3fd48..8b3385396599 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -92,6 +92,25 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) return srq; } +static int __set_srq_page_size(struct mlx5_srq_attr *in, + unsigned long page_size) +{ + if (!page_size) + return -EINVAL; + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; + + if (WARN_ON(get_pas_size(in) != + ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64))) + return -EINVAL; + return 0; +} + +#define set_srq_page_size(in, typ, log_pgsz_fld) \ + __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \ + (in)->umem, typ, log_pgsz_fld, \ + MLX5_ADAPTER_PAGE_SHIFT, page_offset, \ + 64, &(in)->page_offset)) + static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in) { @@ -103,6 +122,12 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -114,7 +139,13 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); @@ -194,6 +225,12 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, xrc_srqc, log_page_size); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -207,7 +244,13 @@ static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, set_srqc(xrc_srqc, in); MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -289,11 +332,18 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in = NULL; void *rmpc; void *wq; + void *pas; int pas_size; int outlen; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; outlen = MLX5_ST_SZ_BYTES(create_rmp_out); @@ -309,8 +359,16 @@ static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); MLX5_SET(create_rmp_in, create_in, uid, in->uid); + pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas); + set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); @@ -421,10 +479,17 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, void *create_in; void *xrqc; void *wq; + void *pas; int pas_size; int inlen; int err; + if (in->umem) { + err = set_srq_page_size(in, wq, log_wq_pg_sz); + if (err) + return err; + } + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; create_in = kvzalloc(inlen, GFP_KERNEL); @@ -433,9 +498,16 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas); set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + if (in->umem) + mlx5_ib_populate_pas( + in->umem, + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), + pas, 0); + else + memcpy(pas, in->pas, pas_size); if (in->type == IB_SRQT_TM) { MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 119b2573c9a0..26c3408dcaca 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -604,7 +604,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; break; default: - entry->opcode = MTHCA_OPCODE_INVALID; + entry->opcode = 0xFF; break; } } else { diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 9dbbf4d16796..a445160de3e1 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -105,7 +105,6 @@ enum { MTHCA_OPCODE_ATOMIC_CS = 0x11, MTHCA_OPCODE_ATOMIC_FA = 0x12, MTHCA_OPCODE_BIND_MW = 0x18, - MTHCA_OPCODE_INVALID = 0xff }; enum { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index c4d9cdc4ee97..1a3dd07f993b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -470,7 +470,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, int err; if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); switch (init_attr->qp_type) { case IB_QPT_RC: @@ -612,7 +612,7 @@ static int mthca_create_cq(struct ib_cq *ibcq, udata, struct mthca_ucontext, ibucontext); if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return -EINVAL; @@ -961,29 +961,34 @@ static ssize_t hw_rev_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%x\n", dev->rev_id); + return sysfs_emit(buf, "%x\n", dev->rev_id); } static DEVICE_ATTR_RO(hw_rev); -static ssize_t hca_type_show(struct device *device, - struct device_attribute *attr, char *buf) +static const char *hca_type_string(int hca_type) { - struct mthca_dev *dev = - rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - - switch (dev->pdev->device) { + switch (hca_type) { case PCI_DEVICE_ID_MELLANOX_TAVOR: - return sprintf(buf, "MT23108\n"); + return "MT23108"; case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT: - return sprintf(buf, "MT25208 (MT23108 compat mode)\n"); + return "MT25208 (MT23108 compat mode)"; case PCI_DEVICE_ID_MELLANOX_ARBEL: - return sprintf(buf, "MT25208\n"); + return "MT25208"; case PCI_DEVICE_ID_MELLANOX_SINAI: case PCI_DEVICE_ID_MELLANOX_SINAI_OLD: - return sprintf(buf, "MT25204\n"); - default: - return sprintf(buf, "unknown\n"); + return "MT25204"; } + + return "unknown"; +} + +static ssize_t hca_type_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mthca_dev *dev = + rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); + + return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device)); } static DEVICE_ATTR_RO(hca_type); @@ -993,7 +998,7 @@ static ssize_t board_id_show(struct device *device, struct mthca_dev *dev = rdma_device_to_drv_device(device, struct mthca_dev, ib_dev); - return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); + return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); } static DEVICE_ATTR_RO(board_id); @@ -1158,36 +1163,12 @@ int mthca_register_device(struct mthca_dev *dev) if (ret) return ret; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - if (mthca_is_memfree(dev)) ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_srq_ops); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 08a2a7afafd3..07cfc0934b17 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -863,6 +863,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, new_state; int err = -EINVAL; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + mutex_lock(&qp->mutex); if (attr_mask & IB_QP_CUR_STATE) { cur_state = attr->cur_qp_state; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 9b96661a7143..9a834a9cca0e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -119,7 +119,7 @@ static ssize_t hw_rev_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); + return sysfs_emit(buf, "0x%x\n", dev->nic_info.pdev->vendor); } static DEVICE_ATTR_RO(hw_rev); @@ -129,7 +129,7 @@ static ssize_t hca_type_show(struct device *device, struct ocrdma_dev *dev = rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); + return sysfs_emit(buf, "%s\n", &dev->model_number[0]); } static DEVICE_ATTR_RO(hca_type); @@ -154,6 +154,7 @@ static const struct ib_device_ops ocrdma_dev_ops = { .create_ah = ocrdma_create_ah, .create_cq = ocrdma_create_cq, .create_qp = ocrdma_create_qp, + .create_user_ah = ocrdma_create_ah, .dealloc_pd = ocrdma_dealloc_pd, .dealloc_ucontext = ocrdma_dealloc_ucontext, .dereg_mr = ocrdma_dereg_mr, @@ -204,32 +205,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = - OCRDMA_UVERBS(GET_CONTEXT) | - OCRDMA_UVERBS(QUERY_DEVICE) | - OCRDMA_UVERBS(QUERY_PORT) | - OCRDMA_UVERBS(ALLOC_PD) | - OCRDMA_UVERBS(DEALLOC_PD) | - OCRDMA_UVERBS(REG_MR) | - OCRDMA_UVERBS(DEREG_MR) | - OCRDMA_UVERBS(CREATE_COMP_CHANNEL) | - OCRDMA_UVERBS(CREATE_CQ) | - OCRDMA_UVERBS(RESIZE_CQ) | - OCRDMA_UVERBS(DESTROY_CQ) | - OCRDMA_UVERBS(REQ_NOTIFY_CQ) | - OCRDMA_UVERBS(CREATE_QP) | - OCRDMA_UVERBS(MODIFY_QP) | - OCRDMA_UVERBS(QUERY_QP) | - OCRDMA_UVERBS(DESTROY_QP) | - OCRDMA_UVERBS(POLL_CQ) | - OCRDMA_UVERBS(POST_SEND) | - OCRDMA_UVERBS(POST_RECV); - - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_AH) | - OCRDMA_UVERBS(MODIFY_AH) | - OCRDMA_UVERBS(QUERY_AH) | - OCRDMA_UVERBS(DESTROY_AH); dev->ibdev.node_type = RDMA_NODE_IB_CA; dev->ibdev.phys_port_cnt = 1; @@ -240,16 +215,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); - if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { - dev->ibdev.uverbs_cmd_mask |= - OCRDMA_UVERBS(CREATE_SRQ) | - OCRDMA_UVERBS(MODIFY_SRQ) | - OCRDMA_UVERBS(QUERY_SRQ) | - OCRDMA_UVERBS(DESTROY_SRQ) | - OCRDMA_UVERBS(POST_SRQ_RECV); - + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); - } + rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); if (ret) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 7350fe16f164..bc98bd950d99 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -974,7 +974,7 @@ int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ocrdma_create_cq_ureq ureq; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) @@ -1299,6 +1299,9 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; @@ -1391,6 +1394,9 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ocrdma_dev *dev; enum ib_qp_state old_qps, new_qps; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp = get_ocrdma_qp(ibqp); dev = get_ocrdma_dev(ibqp->device); @@ -1770,6 +1776,9 @@ int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq); + if (init_attr->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (init_attr->attr.max_sge > dev->attr.max_recv_sge) return -EINVAL; if (init_attr->attr.max_wr > dev->attr.max_rqe) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 967641662b24..8e7c069e1a2d 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -124,7 +124,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver); + return sysfs_emit(buf, "0x%x\n", dev->attr.hw_ver); } static DEVICE_ATTR_RO(hw_rev); @@ -134,10 +134,9 @@ static ssize_t hca_type_show(struct device *device, struct qedr_dev *dev = rdma_device_to_drv_device(device, struct qedr_dev, ibdev); - return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n", - dev->pdev->device, - rdma_protocol_iwarp(&dev->ibdev, 1) ? - "iWARP" : "RoCE"); + return sysfs_emit(buf, "FastLinQ QL%x %s\n", dev->pdev->device, + rdma_protocol_iwarp(&dev->ibdev, 1) ? "iWARP" : + "RoCE"); } static DEVICE_ATTR_RO(hca_type); @@ -188,10 +187,6 @@ static void qedr_roce_register_device(struct qedr_dev *dev) dev->ibdev.node_type = RDMA_NODE_IB_CA; ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); - - dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) | - QEDR_UVERBS(CLOSE_XRCD) | - QEDR_UVERBS(CREATE_XSRQ); } static const struct ib_device_ops qedr_dev_ops = { @@ -249,31 +244,6 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.node_guid = dev->attr.node_guid; memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); - dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) | - QEDR_UVERBS(QUERY_DEVICE) | - QEDR_UVERBS(QUERY_PORT) | - QEDR_UVERBS(ALLOC_PD) | - QEDR_UVERBS(DEALLOC_PD) | - QEDR_UVERBS(CREATE_COMP_CHANNEL) | - QEDR_UVERBS(CREATE_CQ) | - QEDR_UVERBS(RESIZE_CQ) | - QEDR_UVERBS(DESTROY_CQ) | - QEDR_UVERBS(REQ_NOTIFY_CQ) | - QEDR_UVERBS(CREATE_QP) | - QEDR_UVERBS(MODIFY_QP) | - QEDR_UVERBS(QUERY_QP) | - QEDR_UVERBS(DESTROY_QP) | - QEDR_UVERBS(CREATE_SRQ) | - QEDR_UVERBS(DESTROY_SRQ) | - QEDR_UVERBS(QUERY_SRQ) | - QEDR_UVERBS(MODIFY_SRQ) | - QEDR_UVERBS(POST_SRQ_RECV) | - QEDR_UVERBS(REG_MR) | - QEDR_UVERBS(DEREG_MR) | - QEDR_UVERBS(POLL_CQ) | - QEDR_UVERBS(POST_SEND) | - QEDR_UVERBS(POST_RECV); - if (IS_IWARP(dev)) { rc = qedr_iw_register_device(dev); if (rc) @@ -796,6 +766,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) } xa_unlock_irqrestore(&dev->srqs, flags); DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); + break; default: break; } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 511c95bb3d01..0eb6a7a618e0 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -928,6 +928,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, "create_cq: called from %s. entries=%d, vector=%d\n", udata ? "User Lib" : "Kernel", entries, vector); + if (attr->flags) + return -EOPNOTSUPP; + if (entries > QEDR_MAX_CQES) { DP_ERR(dev, "create cq: the number of entries %d is too high. Must be equal or below %d.\n", @@ -1546,6 +1549,10 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, "create SRQ called from %s (pd %p)\n", (udata) ? "User lib" : "kernel", pd); + if (init_attr->srq_type != IB_SRQT_BASIC && + init_attr->srq_type != IB_SRQT_XRC) + return -EOPNOTSUPP; + rc = qedr_check_srq_params(dev, init_attr, udata); if (rc) return -EINVAL; @@ -2241,6 +2248,9 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct ib_qp *ibqp; int rc = 0; + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + if (attrs->qp_type == IB_QPT_XRC_TGT) { xrcd = get_qedr_xrcd(attrs->xrcd); dev = get_qedr_dev(xrcd->ibxrcd.device); @@ -2477,6 +2487,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, attr->qp_state); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + old_qp_state = qedr_get_ibqp_state(qp->state); if (attr_mask & IB_QP_STATE) new_qp_state = attr->qp_state; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 3dc6ce033319..2e07b3749b88 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -90,25 +90,18 @@ int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (ret) { /* * If the 64 bit setup fails, try 32 bit. Some systems * do not setup 64 bit maps on systems with 2GB or less * memory installed. */ - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); goto bail; } - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } else - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - qib_early_err(&pdev->dev, - "Unable to set DMA consistent mask: %d\n", ret); - goto bail; } pci_set_master(pdev); diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 021df0654ba7..62c179fc764b 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -43,11 +43,8 @@ static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf) { struct qib_devdata *dd = ppd->dd; - int ret; - ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT); - ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); - return ret; + return sysfs_emit(buf, "%d\n", dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT)); } static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, @@ -106,14 +103,10 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, static ssize_t show_status(struct qib_pportdata *ppd, char *buf) { - ssize_t ret; - if (!ppd->statusp) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", - (unsigned long long) *(ppd->statusp)); - return ret; + return -EINVAL; + + return sysfs_emit(buf, "0x%llx\n", (unsigned long long)*(ppd->statusp)); } /* @@ -392,7 +385,7 @@ static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr, container_of(kobj, struct qib_pportdata, sl2vl_kobj); struct qib_ibport *qibp = &ppd->ibport_data; - return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); + return sysfs_emit(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); } static const struct sysfs_ops qib_sl2vl_ops = { @@ -501,17 +494,18 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, diagc_kobj); struct qib_ibport *qibp = &ppd->ibport_data; + u64 val; if (!strncmp(dattr->attr.name, "rc_acks", 7)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks)); + val = READ_PER_CPU_CNTR(rc_acks); else if (!strncmp(dattr->attr.name, "rc_qacks", 8)) - return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks)); + val = READ_PER_CPU_CNTR(rc_qacks); else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15)) - return sprintf(buf, "%llu\n", - READ_PER_CPU_CNTR(rc_delayed_comp)); + val = READ_PER_CPU_CNTR(rc_delayed_comp); else - return sprintf(buf, "%u\n", - *(u32 *)((char *)qibp + dattr->counter)); + val = *(u32 *)((char *)qibp + dattr->counter); + + return sysfs_emit(buf, "%llu\n", val); } static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, @@ -565,7 +559,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); - return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); + return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev); } static DEVICE_ATTR_RO(hw_rev); @@ -575,13 +569,10 @@ static ssize_t hca_type_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; if (!dd->boardname) - ret = -EINVAL; - else - ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); - return ret; + return -EINVAL; + return sysfs_emit(buf, "%s\n", dd->boardname); } static DEVICE_ATTR_RO(hca_type); static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL); @@ -590,7 +581,7 @@ static ssize_t version_show(struct device *device, struct device_attribute *attr, char *buf) { /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); + return sysfs_emit(buf, "%s", (char *)ib_qib_version); } static DEVICE_ATTR_RO(version); @@ -602,7 +593,7 @@ static ssize_t boardversion_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); + return sysfs_emit(buf, "%s", dd->boardversion); } static DEVICE_ATTR_RO(boardversion); @@ -614,7 +605,7 @@ static ssize_t localbus_info_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); + return sysfs_emit(buf, "%s", dd->lbus_info); } static DEVICE_ATTR_RO(localbus_info); @@ -628,9 +619,10 @@ static ssize_t nctxts_show(struct device *device, /* Return the number of user ports (contexts) available. */ /* The calculation below deals with a special case where * cfgctxts is set to 1 on a single-port board. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", - (dd->first_user_ctxt > dd->cfgctxts) ? 0 : - (dd->cfgctxts - dd->first_user_ctxt)); + return sysfs_emit(buf, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? + 0 : + (dd->cfgctxts - dd->first_user_ctxt)); } static DEVICE_ATTR_RO(nctxts); @@ -642,21 +634,20 @@ static ssize_t nfreectxts_show(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); + return sysfs_emit(buf, "%u\n", dd->freectxts); } static DEVICE_ATTR_RO(nfreectxts); -static ssize_t serial_show(struct device *device, - struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *device, struct device_attribute *attr, + char *buf) { struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); + const u8 *end = memchr(dd->serial, 0, ARRAY_SIZE(dd->serial)); + int size = end ? end - dd->serial : ARRAY_SIZE(dd->serial); - buf[sizeof(dd->serial)] = '\0'; - memcpy(buf, dd->serial, sizeof(dd->serial)); - strcat(buf, "\n"); - return strlen(buf); + return sysfs_emit(buf, ".%*s\n", size, dd->serial); } static DEVICE_ATTR_RO(serial); @@ -689,27 +680,26 @@ static ssize_t tempsense_show(struct device *device, struct qib_ibdev *dev = rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev); struct qib_devdata *dd = dd_from_dev(dev); - int ret; - int idx; + int i; u8 regvals[8]; - ret = -ENXIO; - for (idx = 0; idx < 8; ++idx) { - if (idx == 6) + for (i = 0; i < 8; i++) { + int ret; + + if (i == 6) continue; - ret = dd->f_tempsense_rd(dd, idx); + ret = dd->f_tempsense_rd(dd, i); if (ret < 0) - break; - regvals[idx] = ret; + return ret; /* return error on bad read */ + regvals[i] = ret; } - if (idx == 8) - ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", - *(signed char *)(regvals), - *(signed char *)(regvals + 1), - regvals[2], regvals[3], - *(signed char *)(regvals + 5), - *(signed char *)(regvals + 7)); - return ret; + return sysfs_emit(buf, "%d %d %02X %02X %d %d\n", + (signed char)regvals[0], + (signed char)regvals[1], + regvals[2], + regvals[3], + (signed char)regvals[5], + (signed char)regvals[7]); } static DEVICE_ATTR_RO(tempsense); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index aa2e65fc5cd6..1b63a491fa72 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -398,25 +398,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; us_ibdev->ib_dev.dev.parent = &dev->dev; - us_ibdev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index c85d48ae7442..e59615a4c9d9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -57,7 +57,7 @@ static ssize_t board_id_show(struct device *device, subsystem_device_id = us_ibdev->pdev->subsystem_device; mutex_unlock(&us_ibdev->usdev_lock); - return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); + return sysfs_emit(buf, "%u\n", subsystem_device_id); } static DEVICE_ATTR_RO(board_id); @@ -69,19 +69,13 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) { struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - char *ptr; - unsigned left; - unsigned n; enum usnic_vnic_res_type res_type; - - /* Buffer space limit is 1 page */ - ptr = buf; - left = PAGE_SIZE; + int len; mutex_lock(&us_ibdev->usdev_lock); if (kref_read(&us_ibdev->vf_cnt) > 0) { char *busname; - + char *sep = ""; /* * bus name seems to come with annoying prefix. * Remove it if it is predictable @@ -90,39 +84,35 @@ config_show(struct device *device, struct device_attribute *attr, char *buf) if (strncmp(busname, "PCI Bus ", 8) == 0) busname += 8; - n = scnprintf(ptr, left, - "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", - dev_name(&us_ibdev->ib_dev.dev), - busname, - PCI_SLOT(us_ibdev->pdev->devfn), - PCI_FUNC(us_ibdev->pdev->devfn), - netdev_name(us_ibdev->netdev), - us_ibdev->ufdev->mac, - kref_read(&us_ibdev->vf_cnt)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: %s:%d.%d, %s, %pM, %u VFs\n", + dev_name(&us_ibdev->ib_dev.dev), + busname, + PCI_SLOT(us_ibdev->pdev->devfn), + PCI_FUNC(us_ibdev->pdev->devfn), + netdev_name(us_ibdev->netdev), + us_ibdev->ufdev->mac, + kref_read(&us_ibdev->vf_cnt)); + len += sysfs_emit_at(buf, len, " Per VF:"); for (res_type = USNIC_VNIC_RES_TYPE_EOL; - res_type < USNIC_VNIC_RES_TYPE_MAX; - res_type++) { + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { if (us_ibdev->vf_res_cnt[res_type] == 0) continue; - n = scnprintf(ptr, left, " %d %s%s", - us_ibdev->vf_res_cnt[res_type], - usnic_vnic_res_type_to_str(res_type), - (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? - "," : ""); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "%s %d %s", + sep, + us_ibdev->vf_res_cnt[res_type], + usnic_vnic_res_type_to_str(res_type)); + sep = ","; } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len += sysfs_emit_at(buf, len, "\n"); } else { - n = scnprintf(ptr, left, "%s: no VFs\n", - dev_name(&us_ibdev->ib_dev.dev)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "%s: no VFs\n", + dev_name(&us_ibdev->ib_dev.dev)); } + mutex_unlock(&us_ibdev->usdev_lock); - return ptr - buf; + return len; } static DEVICE_ATTR_RO(config); @@ -132,8 +122,7 @@ iface_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", - netdev_name(us_ibdev->netdev)); + return sysfs_emit(buf, "%s\n", netdev_name(us_ibdev->netdev)); } static DEVICE_ATTR_RO(iface); @@ -143,8 +132,7 @@ max_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%u\n", - kref_read(&us_ibdev->vf_cnt)); + return sysfs_emit(buf, "%u\n", kref_read(&us_ibdev->vf_cnt)); } static DEVICE_ATTR_RO(max_vf); @@ -158,8 +146,7 @@ qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); - return scnprintf(buf, PAGE_SIZE, - "%d\n", qp_per_vf); + return sysfs_emit(buf, "%d\n", qp_per_vf); } static DEVICE_ATTR_RO(qp_per_vf); @@ -169,8 +156,8 @@ cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf) struct usnic_ib_dev *us_ibdev = rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", - us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); + return sysfs_emit(buf, "%d\n", + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); } static DEVICE_ATTR_RO(cq_per_vf); @@ -217,43 +204,36 @@ struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME) static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); + return sysfs_emit(buf, "0x%p\n", qp_grp->ctx); } static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) { - int i, j, n; - int left; - char *ptr; + int i, j; struct usnic_vnic_res_chunk *res_chunk; struct usnic_vnic_res *vnic_res; + int len; - left = PAGE_SIZE; - ptr = buf; - - n = scnprintf(ptr, left, - "QPN: %d State: (%s) PID: %u VF Idx: %hu ", - qp_grp->ibqp.qp_num, - usnic_ib_qp_grp_state_to_string(qp_grp->state), - qp_grp->owner_pid, - usnic_vnic_get_index(qp_grp->vf->vnic)); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu ", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string(qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic)); for (i = 0; qp_grp->res_chunk_list[i]; i++) { res_chunk = qp_grp->res_chunk_list[i]; for (j = 0; j < res_chunk->cnt; j++) { vnic_res = res_chunk->res[j]; - n = scnprintf(ptr, left, "%s[%d] ", + len += sysfs_emit_at( + buf, len, "%s[%d] ", usnic_vnic_res_type_to_str(vnic_res->type), vnic_res->vnic_idx); - UPDATE_PTR_LEFT(n, ptr, left); } } - n = scnprintf(ptr, left, "\n"); - UPDATE_PTR_LEFT(n, ptr, left); + len = sysfs_emit_at(buf, len, "\n"); - return ptr - buf; + return len; } static QPN_ATTR_RO(context); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 9e961f8ffa10..38a37770c016 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -474,7 +474,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, us_ibdev = to_usdev(pd->device); if (init_attr->create_flags) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); if (err) { @@ -557,6 +557,9 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int status; usnic_dbg("\n"); + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + qp_grp = to_uqp_grp(ibqp); mutex_lock(&qp_grp->vf->pf->usdev_lock); @@ -581,7 +584,7 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 319546a39a0d..a119ac3e103c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -119,6 +119,9 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); + if (attr->flags) + return -EOPNOTSUPP; + entries = roundup_pow_of_two(entries); if (entries < 1 || entries > dev->dsr->caps.max_cqe) return -EINVAL; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6895bac53990..00a330909bb3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -68,21 +68,21 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t hca_type_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); + return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); } static DEVICE_ATTR_RO(hca_type); static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_REV_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); } static DEVICE_ATTR_RO(hw_rev); static ssize_t board_id_show(struct device *device, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); + return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); } static DEVICE_ATTR_RO(board_id); @@ -205,27 +205,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->flags = 0; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; @@ -249,13 +228,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) /* Check if SRQ is supported by backend */ if (dev->dsr->caps.max_srq) { - dev->ib_dev.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 428256c55065..1d3bdd7bb51d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -209,7 +209,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, dev_warn(&dev->pdev->dev, "invalid create queuepair flags %#x\n", init_attr->create_flags); - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } if (init_attr->qp_type != IB_QPT_RC && @@ -544,6 +544,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_state cur_state, next_state; int ret; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + /* Sanity checking. Should need lock here */ mutex_lock(&qp->mutex); cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state : diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 082208f9aa90..bdc2703532c6 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -121,7 +121,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, dev_warn(&dev->pdev->dev, "shared receive queue type %d not supported\n", init_attr->srq_type); - return -EINVAL; + return -EOPNOTSUPP; } if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig index c8e268082952..0df48b3a6b56 100644 --- a/drivers/infiniband/sw/rdmavt/Kconfig +++ b/drivers/infiniband/sw/rdmavt/Kconfig @@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT depends on INFINIBAND_VIRT_DMA depends on X86_64 depends on PCI - select DMA_VIRT_OPS help This is a common software verbs provider for RDMA networks. diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index b938c4ffa99a..a3e5b368c5e7 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -126,10 +126,9 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, } /** - * rvt_destory_ah - Destory an address handle + * rvt_destroy_ah - Destroy an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) - * * Return: 0 on success */ int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 19248be14093..20cc0799ac4b 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -211,7 +211,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int err; if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; if (entries < 1 || entries > rdi->dparms.props.max_cqe) return -EINVAL; diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index dd11c6fcd060..5233a63d99a6 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -54,7 +54,7 @@ #include "mcast.h" /** - * rvt_driver_mcast - init resources for multicast + * rvt_driver_mcast_init - init resources for multicast * @rdi: rvt dev struct * * This is per device that registers with rdmavt @@ -69,7 +69,7 @@ void rvt_driver_mcast_init(struct rvt_dev_info *rdi) } /** - * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * rvt_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct * @qp: the QP to link */ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) @@ -98,7 +98,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) } /** - * mcast_alloc - allocate the multicast GID structure + * rvt_mcast_alloc - allocate the multicast GID structure * @mgid: the multicast GID * @lid: the muilticast LID (host order) * @@ -181,7 +181,7 @@ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, EXPORT_SYMBOL(rvt_mcast_find); /** - * mcast_add - insert mcast GID into table and attach QP struct + * rvt_mcast_add - insert mcast GID into table and attach QP struct * @mcast: the mcast GID table * @mqp: the QP to attach * @@ -426,8 +426,8 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } /** - *rvt_mast_tree_empty - determine if any qps are attached to any mcast group - *@rdi: rvt dev struct + * rvt_mcast_tree_empty - determine if any qps are attached to any mcast group + * @rdi: rvt dev struct * * Return: in use count */ diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8490fdb9c91e..90fc234f489a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @acc: access flags * * Return: the memory region on success, otherwise returns an errno. - * Note that all DMA addresses should be created via the functions in - * struct dma_virt_ops. */ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) { @@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, /* * We use LKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); @@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, /* * We use RKEY == zero for kernel virtual addresses - * (see rvt_get_dma_mr() and dma_virt_ops). + * (see rvt_get_dma_mr()). */ rcu_read_lock(); if (rkey == 0) { diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ee48befc8978..22fa9bde5419 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1083,10 +1083,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!rdi) return ERR_PTR(-EINVAL); + if (init_attr->create_flags & ~IB_QP_CREATE_NETDEV_USE) + return ERR_PTR(-EOPNOTSUPP); + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || - init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - (init_attr->create_flags && - init_attr->create_flags != IB_QP_CREATE_NETDEV_USE)) + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ @@ -1469,6 +1470,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int pmtu = 0; /* for gcc warning only */ int opa_ah; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); @@ -1823,7 +1827,7 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } /** - * rvt_post_receive - post a receive on a QP + * rvt_post_recv - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post * @bad_wr: the first bad WR is put here @@ -2245,7 +2249,7 @@ bail: } /** - * rvt_post_srq_receive - post a receive on a shared receive queue + * rvt_post_srq_recv - post a receive on a shared receive queue * @ibsrq: the SRQ to post the receive on * @wr: the list of work requests to post * @bad_wr: A pointer to the first WR to cause a problem is put here @@ -2497,7 +2501,7 @@ bail: EXPORT_SYMBOL(rvt_get_rwqe); /** - * qp_comm_est - handle trap with QP established + * rvt_comm_est - handle trap with QP established * @qp: the QP */ void rvt_comm_est(struct rvt_qp *qp) @@ -2943,7 +2947,7 @@ static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp, } /** - * ruc_loopback - handle UC and RC loopback requests + * rvt_ruc_loopback - handle UC and RC loopback requests * @sqp: the sending QP * * This is called from rvt_do_send() to forward a WQE addressed to the same HFI diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 670a9623b46e..49cec85a372a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -384,6 +384,7 @@ static const struct ib_device_ops rvt_dev_ops = { .create_cq = rvt_create_cq, .create_qp = rvt_create_qp, .create_srq = rvt_create_srq, + .create_user_ah = rvt_create_ah, .dealloc_pd = rvt_dealloc_pd, .dealloc_ucontext = rvt_dealloc_ucontext, .dereg_mr = rvt_dereg_mr, @@ -524,7 +525,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; - u64 dma_mask; if (!rdi) return -EINVAL; @@ -579,13 +579,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) /* Completion queues */ spin_lock_init(&rdi->n_cqs_lock); - /* DMA Operations */ - rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms; - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask); - if (ret) - goto bail_wss; - /* Protection Domain */ spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; @@ -596,36 +589,11 @@ int rvt_register_device(struct rvt_dev_info *rdi) * exactly which functions rdmavt supports, nor do they know the ABI * version, so we do all of this sort of stuff here. */ - rdi->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | - (1ull << IB_USER_VERBS_CMD_QUERY_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + rdi->ibdev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); rdi->ibdev.node_type = RDMA_NODE_IB_CA; if (!rdi->ibdev.num_comp_vectors) diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 8810bfa68049..452149066792 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -5,7 +5,6 @@ config RDMA_RXE depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL select CRYPTO_CRC32 - select DMA_VIRT_OPS help This driver implements the InfiniBand RDMA transport over the Linux network stack. It enables a system with a diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 43394c3f29d4..b315ebf041ac 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); - /* make sure all changes to the CQ are written before we update the - * producer pointer - */ - smp_wmb(); - advance_producer(cq->queue); spin_unlock_irqrestore(&cq->cq_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index d2ce852447c1..6e8c41567ba0 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -31,7 +31,6 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length) return 0; case RXE_MEM_TYPE_MR: - case RXE_MEM_TYPE_FMR: if (iova < mem->iova || length > mem->length || iova > mem->iova + mem->length - length) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 34bef7d8e6b4..c4b06ced30a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -8,7 +8,6 @@ #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/if.h> -#include <linux/if_vlan.h> #include <net/udp_tunnel.h> #include <net/sch_generic.h> #include <linux/netfilter.h> @@ -20,18 +19,6 @@ static struct rxe_recv_sockets recv_sockets; -struct device *rxe_dma_device(struct rxe_dev *rxe) -{ - struct net_device *ndev; - - ndev = rxe->ndev; - - if (is_vlan_dev(ndev)) - ndev = vlan_dev_real_dev(ndev); - - return ndev->dev.parent; -} - int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { int err; @@ -166,14 +153,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; struct net_device *ndev = skb->dev; - struct net_device *rdev = ndev; struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - if (!rxe && is_vlan_dev(rdev)) { - rdev = vlan_dev_real_dev(ndev); - rxe = rxe_get_dev_from_net(rdev); - } if (!rxe) goto drop; diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 7d434a6837a7..2902ca7b288c 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -7,9 +7,11 @@ #ifndef RXE_QUEUE_H #define RXE_QUEUE_H +/* for definition of shared struct rxe_queue_buf */ +#include <uapi/rdma/rdma_user_rxe.h> + /* implements a simple circular buffer that can optionally be * shared between user space and the kernel and can be resized - * the requested element size is rounded up to a power of 2 * and the number of elements in the buffer is also rounded * up to a power of 2. Since the queue is empty when the @@ -17,28 +19,6 @@ * of the queue is one less than the number of element slots */ -/* this data structure is shared between user space and kernel - * space for those cases where the queue is shared. It contains - * the producer and consumer indices. Is also contains a copy - * of the queue size parameters for user space to use but the - * kernel must use the parameters in the rxe_queue struct - * this MUST MATCH the corresponding librxe struct - * for performance reasons arrange to have producer and consumer - * pointers in separate cache lines - * the kernel should always mask the indices to avoid accessing - * memory outside of the data area - */ -struct rxe_queue_buf { - __u32 log2_elem_size; - __u32 index_mask; - __u32 pad_1[30]; - __u32 producer_index; - __u32 pad_2[31]; - __u32 consumer_index; - __u32 pad_3[31]; - __u8 data[]; -}; - struct rxe_queue { struct rxe_dev *rxe; struct rxe_queue_buf *buf; @@ -46,7 +26,7 @@ struct rxe_queue { size_t buf_size; size_t elem_size; unsigned int log2_elem_size; - unsigned int index_mask; + u32 index_mask; }; int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, @@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index) static inline int queue_empty(struct rxe_queue *q) { - return ((q->buf->producer_index - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue empty + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod - cons) & q->index_mask) == 0; } static inline int queue_full(struct rxe_queue *q) { - return ((q->buf->producer_index + 1 - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue full + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod + 1 - cons) & q->index_mask) == 0; } static inline void advance_producer(struct rxe_queue *q) { - q->buf->producer_index = (q->buf->producer_index + 1) - & q->index_mask; + u32 prod; + + prod = (q->buf->producer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing producer index + */ + smp_store_release(&q->buf->producer_index, prod); } static inline void advance_consumer(struct rxe_queue *q) { - q->buf->consumer_index = (q->buf->consumer_index + 1) - & q->index_mask; + u32 cons; + + cons = (q->buf->consumer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing consumer index + */ + smp_store_release(&q->buf->consumer_index, cons); } static inline void *producer_addr(struct rxe_queue *q) @@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q) static inline unsigned int producer_index(struct rxe_queue *q) { - return q->buf->producer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting producer index + */ + index = smp_load_acquire(&q->buf->producer_index); + index &= q->index_mask; + + return index; } static inline unsigned int consumer_index(struct rxe_queue *q) { - return q->buf->consumer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting consumer index + */ + index = smp_load_acquire(&q->buf->consumer_index); + index &= q->index_mask; + + return index; } static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index af3923bf0a36..d4917646641a 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -634,7 +634,8 @@ next_wqe: } if (unlikely(qp_type(qp) == IB_QPT_RC && - qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) { + psn_compare(qp->req.psn, (qp->comp.psn + + RXE_MAX_UNACKED_PSNS)) > 0)) { qp->req.wait_psn = 1; goto exit; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c7e3b6a4af38..5a098083a9d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -872,11 +872,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, else wc->network_hdr_type = RDMA_NETWORK_IPV6; - if (is_vlan_dev(skb->dev)) { - wc->wc_flags |= IB_WC_WITH_VLAN; - wc->vlan_id = vlan_dev_vlan_id(skb->dev); - } - if (pkt->mask & RXE_IMMDT_MASK) { wc->wc_flags |= IB_WC_WITH_IMM; wc->ex.imm_data = immdt_imm(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index f9c832e82552..a031514e2f41 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; - /* make sure all changes to the work queue are written before we - * update the producer pointer - */ - smp_wmb(); - advance_producer(rq->queue); return 0; @@ -265,6 +260,9 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; + if (init->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; @@ -392,6 +390,9 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, uresp = udata->outbuf; } + if (init->create_flags) + return ERR_PTR(-EOPNOTSUPP); + err = rxe_qp_chk_init(rxe, init); if (err) goto err1; @@ -433,6 +434,9 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); + if (mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) goto err1; @@ -624,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, if (unlikely(err)) goto err1; - /* - * make sure all changes to the work queue are - * written before we update the producer pointer - */ - smp_wmb(); - advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); @@ -765,7 +763,7 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } if (attr->flags) - return -EINVAL; + return -EOPNOTSUPP; err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); if (err) @@ -1033,7 +1031,7 @@ static ssize_t parent_show(struct device *device, struct rxe_dev *rxe = rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1)); + return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR_RO(parent); @@ -1070,6 +1068,7 @@ static const struct ib_device_ops rxe_dev_ops = { .create_cq = rxe_create_cq, .create_qp = rxe_create_qp, .create_srq = rxe_create_srq, + .create_user_ah = rxe_create_ah, .dealloc_driver = rxe_dealloc, .dealloc_pd = rxe_dealloc_pd, .dealloc_ucontext = rxe_dealloc_ucontext, @@ -1118,56 +1117,18 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) int err; struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; - u64 dma_mask; strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); - dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); - dev->dev.dma_parms = &rxe->dma_parms; - dma_set_max_seg_size(&dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask); - if (err) - return err; - dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) - | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) - | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) - | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) - | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) - | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) - | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) - | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) - | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) - ; + dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | + BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ); ib_set_device_ops(dev, &rxe_dev_ops); err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 3414b341b709..79e0a5a878da 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -273,7 +273,6 @@ enum rxe_mem_type { RXE_MEM_TYPE_NONE, RXE_MEM_TYPE_DMA, RXE_MEM_TYPE_MR, - RXE_MEM_TYPE_FMR, RXE_MEM_TYPE_MW, }; @@ -352,7 +351,6 @@ struct rxe_port { struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; - struct device_dma_parameters dma_parms; int max_ucontext; int max_inline_data; struct mutex usdev_lock; diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 3450ba5081df..1b5105cbabae 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -2,7 +2,6 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" depends on INET && INFINIBAND && LIBCRC32C depends on INFINIBAND_VIRT_DMA - select DMA_VIRT_OPS help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index e9753831ac3f..adda78996219 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -69,7 +69,6 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct device_dma_parameters dma_parms; struct net_device *netdev; struct siw_dev_cap attrs; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 66764f7ef072..1f9e15b71504 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1047,7 +1047,7 @@ static void siw_cm_work_handler(struct work_struct *w) cep->state); } } - if (rv && rv != EAGAIN) + if (rv && rv != -EAGAIN) release_cep = 1; break; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 181e06c1c43d..ee95cf29179d 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -305,25 +305,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { struct siw_device *sdev = NULL; struct ib_device *base_dev; - struct device *parent = netdev->dev.parent; - u64 dma_mask; int rv; - if (!parent) { - /* - * The loopback device has no parent device, - * so it appears as a top-level device. To support - * loopback device connectivity, take this device - * as the parent device. Skip all other devices - * w/o parent device. - */ - if (netdev->type != ARPHRD_LOOPBACK) { - pr_warn("siw: device %s error: no parent device\n", - netdev->name); - return NULL; - } - parent = &netdev->dev; - } sdev = ib_alloc_device(siw_device, base_dev); if (!sdev) return NULL; @@ -347,30 +330,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, addr); } - base_dev->uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV) | - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); base_dev->node_type = RDMA_NODE_RNIC; memcpy(base_dev->node_desc, SIW_NODE_DESC_COMMON, @@ -382,13 +343,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) * per physical port. */ base_dev->phys_port_cnt = 1; - base_dev->dev.parent = parent; - base_dev->dev.dma_parms = &sdev->dma_parms; - dma_set_max_seg_size(&base_dev->dev, UINT_MAX); - dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32); - if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask)) - goto error; - base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); @@ -430,7 +384,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) atomic_set(&sdev->num_mr, 0); atomic_set(&sdev->num_pd, 0); - sdev->numa_node = dev_to_node(parent); + sdev->numa_node = dev_to_node(&netdev->dev); spin_lock_init(&sdev->lock); return sdev; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 7cf3242ffb41..68fd053fc774 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -307,6 +307,9 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, siw_dbg(base_dev, "create new QP\n"); + if (attrs->create_flags) + return ERR_PTR(-EOPNOTSUPP); + if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); rv = -ENOMEM; @@ -544,6 +547,9 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, if (!attr_mask) return 0; + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + memset(&new_attrs, 0, sizeof(new_attrs)); if (attr_mask & IB_QP_ACCESS_FLAGS) { @@ -1094,6 +1100,9 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct siw_cq *cq = to_siw_cq(base_cq); int rv, size = attr->cqe; + if (attr->flags) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { siw_dbg(base_cq->device, "too many CQ's\n"); rv = -ENOMEM; @@ -1555,6 +1564,9 @@ int siw_create_srq(struct ib_srq *base_srq, base_ucontext); int rv; + if (init_attrs->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { siw_dbg_pd(base_srq->pd, "too many SRQ's\n"); rv = -ENOMEM; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 8f0b598a46ec..d5d592bdab35 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1514,9 +1514,9 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr, struct ipoib_dev_priv *priv = ipoib_priv(dev); if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) - return sprintf(buf, "connected\n"); + return sysfs_emit(buf, "connected\n"); else - return sprintf(buf, "datagram\n"); + return sysfs_emit(buf, "datagram\n"); } static ssize_t set_mode(struct device *d, struct device_attribute *attr, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 67a21fdf5367..823f6831e7ea 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -166,6 +166,10 @@ static inline int ib_speed_enum_to_int(int speed) return SPEED_14000; case IB_SPEED_EDR: return SPEED_25000; + case IB_SPEED_HDR: + return SPEED_50000; + case IB_SPEED_NDR: + return SPEED_100000; } return SPEED_UNKNOWN; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index abfab89423f4..a6f413491321 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2266,7 +2266,7 @@ static ssize_t show_pkey(struct device *dev, struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "0x%04x\n", priv->pkey); + return sysfs_emit(buf, "0x%04x\n", priv->pkey); } static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); @@ -2276,7 +2276,8 @@ static ssize_t show_umcast(struct device *dev, struct net_device *ndev = to_net_dev(dev); struct ipoib_dev_priv *priv = ipoib_priv(ndev); - return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); + return sysfs_emit(buf, "%d\n", + test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); } void ipoib_set_umcast(struct net_device *ndev, int umcast_val) @@ -2446,7 +2447,7 @@ static ssize_t dev_id_show(struct device *dev, "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", current->comm); - return sprintf(buf, "%#x\n", ndev->dev_id); + return sysfs_emit(buf, "%#x\n", ndev->dev_id); } static DEVICE_ATTR_RO(dev_id); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 587252fd6f57..5a150a080ac2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -158,6 +158,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) int ret, size, req_vec; int i; + static atomic_t counter; size = ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(dev); @@ -171,8 +172,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (ret != -EOPNOTSUPP) return ret; - req_vec = (priv->port - 1) * 2; - + req_vec = atomic_inc_return(&counter) * 2; cq_attr.cqe = size; cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors; priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 4c50a87ed7cc..5958840dbeed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -46,7 +46,7 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, struct net_device *dev = to_net_dev(d); struct ipoib_dev_priv *priv = ipoib_priv(dev); - return sprintf(buf, "%s\n", priv->parent->name); + return sysfs_emit(buf, "%s\n", priv->parent->name); } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3690e28cc7ea..4792b9bf400f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -187,23 +187,14 @@ iser_initialize_task_headers(struct iscsi_task *task, struct iser_device *device = iser_conn->ib_conn.device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; - const bool mgmt_task = !task->sc && !in_interrupt(); - int ret = 0; - if (unlikely(mgmt_task)) - mutex_lock(&iser_conn->state_mutex); - - if (unlikely(iser_conn->state != ISER_CONN_UP)) { - ret = -ENODEV; - goto out; - } + if (unlikely(iser_conn->state != ISER_CONN_UP)) + return -ENODEV; dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); - if (ib_dma_mapping_error(device->ib_device, dma_addr)) { - ret = -ENOMEM; - goto out; - } + if (ib_dma_mapping_error(device->ib_device, dma_addr)) + return -ENOMEM; tx_desc->inv_wr.next = NULL; tx_desc->reg_wr.wr.next = NULL; @@ -214,11 +205,8 @@ iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey; iser_task->iser_conn = iser_conn; -out: - if (unlikely(mgmt_task)) - mutex_unlock(&iser_conn->state_mutex); - return ret; + return 0; } /** @@ -739,7 +727,7 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, } /** - * iscsi_iser_set_param() - set class connection parameter + * iscsi_iser_conn_get_stats() - get iscsi connection statistics * @cls_conn: iscsi class connection * @stats: iscsi stats to output * diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 67f65dcb15a6..2ba27221ea85 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -28,6 +28,18 @@ static int isert_debug_level; module_param_named(debug_level, isert_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); +static int isert_sg_tablesize_set(const char *val, + const struct kernel_param *kp); +static const struct kernel_param_ops sg_tablesize_ops = { + .set = isert_sg_tablesize_set, + .get = param_get_int, +}; + +static int isert_sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE; +module_param_cb(sg_tablesize, &sg_tablesize_ops, &isert_sg_tablesize, 0644); +MODULE_PARM_DESC(sg_tablesize, + "Number of gather/scatter entries in a single scsi command, should >= 128 (default: 256, max: 4096)"); + static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_comp_wq; @@ -47,6 +59,19 @@ static void isert_send_done(struct ib_cq *cq, struct ib_wc *wc); static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void isert_login_send_done(struct ib_cq *cq, struct ib_wc *wc); +static int isert_sg_tablesize_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < ISCSI_ISER_MIN_SG_TABLESIZE || + n > ISCSI_ISER_MAX_SG_TABLESIZE) + return -EINVAL; + + return param_set_int(val, kp); +} + + static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) { @@ -101,7 +126,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, - ISCSI_ISER_MAX_SG_TABLESIZE); + isert_sg_tablesize); attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor; attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; @@ -1076,7 +1101,7 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, sequence_cmd: rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); - if (!rc && dump_payload == false && unsol_data) + if (!rc && !dump_payload && unsol_data) iscsit_set_unsolicited_dataout(cmd); else if (dump_payload && imm_data) target_put_sess_cmd(&cmd->se_cmd); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 7fee4a65e181..6c5af13db4e0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -65,6 +65,12 @@ */ #define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024) +/* Default I/O size is 1MB */ +#define ISCSI_ISER_DEF_SG_TABLESIZE 256 + +/* Minimum I/O size is 512KB */ +#define ISCSI_ISER_MIN_SG_TABLESIZE 128 + /* Maximum support is 16MB I/O size */ #define ISCSI_ISER_MAX_SG_TABLESIZE 4096 diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index f64519872297..012fc27c5c93 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -437,7 +437,7 @@ struct opa_veswport_trap { } __packed; /** - * struct opa_vnic_iface_macs_entry - single entry in the mac list + * struct opa_vnic_iface_mac_entry - single entry in the mac list * @mac_addr: MAC address */ struct opa_vnic_iface_mac_entry { diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c index 868b5aec1537..292c037aa239 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c @@ -74,7 +74,7 @@ void opa_vnic_vema_report_event(struct opa_vnic_adapter *adapter, u8 event) } /** - * opa_vnic_get_error_counters - get summary counters + * opa_vnic_get_summary_counters - get summary counters * @adapter: vnic port adapter * @cntrs: pointer to destination summary counters structure * diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ac4c49cbf153..ba00f0de14ca 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -52,7 +52,8 @@ static ssize_t max_reconnect_attempts_show(struct device *dev, { struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); - return sprintf(page, "%d\n", rtrs_clt_get_max_reconnect_attempts(clt)); + return sysfs_emit(page, "%d\n", + rtrs_clt_get_max_reconnect_attempts(clt)); } static ssize_t max_reconnect_attempts_store(struct device *dev, @@ -95,11 +96,13 @@ static ssize_t mpath_policy_show(struct device *dev, switch (clt->mp_policy) { case MP_POLICY_RR: - return sprintf(page, "round-robin (RR: %d)\n", clt->mp_policy); + return sysfs_emit(page, "round-robin (RR: %d)\n", + clt->mp_policy); case MP_POLICY_MIN_INFLIGHT: - return sprintf(page, "min-inflight (MI: %d)\n", clt->mp_policy); + return sysfs_emit(page, "min-inflight (MI: %d)\n", + clt->mp_policy); default: - return sprintf(page, "Unknown (%d)\n", clt->mp_policy); + return sysfs_emit(page, "Unknown (%d)\n", clt->mp_policy); } } @@ -138,9 +141,10 @@ static DEVICE_ATTR_RW(mpath_policy); static ssize_t add_path_show(struct device *dev, struct device_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, - "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", - attr->attr.name); + return sysfs_emit( + page, + "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", + attr->attr.name); } static ssize_t add_path_store(struct device *dev, @@ -184,20 +188,18 @@ static ssize_t rtrs_clt_state_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_clt_sess, kobj); if (sess->state == RTRS_CLT_CONNECTED) - return sprintf(page, "connected\n"); + return sysfs_emit(page, "connected\n"); - return sprintf(page, "disconnected\n"); + return sysfs_emit(page, "disconnected\n"); } static struct kobj_attribute rtrs_clt_state_attr = __ATTR(state, 0444, rtrs_clt_state_show, NULL); static ssize_t rtrs_clt_reconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj, @@ -225,11 +227,9 @@ static struct kobj_attribute rtrs_clt_reconnect_attr = rtrs_clt_reconnect_store); static ssize_t rtrs_clt_disconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, @@ -257,11 +257,9 @@ static struct kobj_attribute rtrs_clt_disconnect_attr = rtrs_clt_disconnect_store); static ssize_t rtrs_clt_remove_path_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj, @@ -324,7 +322,7 @@ static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj, sess = container_of(kobj, typeof(*sess), kobj); - return scnprintf(page, PAGE_SIZE, "%u\n", sess->hca_port); + return sysfs_emit(page, "%u\n", sess->hca_port); } static struct kobj_attribute rtrs_clt_hca_port_attr = @@ -338,7 +336,7 @@ static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_clt_sess, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", sess->hca_name); + return sysfs_emit(page, "%s\n", sess->hca_name); } static struct kobj_attribute rtrs_clt_hca_name_attr = @@ -349,12 +347,13 @@ static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj, char *page) { struct rtrs_clt_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_clt_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_clt_src_addr_attr = @@ -365,12 +364,13 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj, char *page) { struct rtrs_clt_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_clt_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_clt_dst_addr_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index f298adc02acb..560865f65dc4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1236,8 +1236,7 @@ static void free_sess_reqs(struct rtrs_clt_sess *sess) if (req->mr) ib_dereg_mr(req->mr); kfree(req->sge); - rtrs_iu_free(req->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1); } kfree(sess->reqs); sess->reqs = NULL; @@ -1499,6 +1498,7 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) con->c.cid = cid; con->c.sess = &sess->s; atomic_set(&con->io_cnt, 0); + mutex_init(&con->con_mutex); sess->s.con[cid] = &con->c; @@ -1510,6 +1510,7 @@ static void destroy_con(struct rtrs_clt_con *con) struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); sess->s.con[con->c.cid] = NULL; + mutex_destroy(&con->con_mutex); kfree(con); } @@ -1520,15 +1521,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; - /* - * This function can fail, but still destroy_con_cq_qp() should - * be called, this is because create_con_cq_qp() is called on cm - * event path, thus caller/waiter never knows: have we failed before - * create_con_cq_qp() or after. To solve this dilemma without - * creating any additional flags just allow destroy_con_cq_qp() be - * called many times. - */ - + lockdep_assert_held(&con->con_mutex); if (con->c.cid == 0) { /* * One completion for each receive and two for each send @@ -1602,11 +1595,10 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con) * Be careful here: destroy_con_cq_qp() can be called even * create_con_cq_qp() failed, see comments there. */ - + lockdep_assert_held(&con->con_mutex); rtrs_cq_qp_destroy(&con->c); if (con->rsp_ius) { - rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE, - sess->s.dev->ib_dev, con->queue_size); + rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_size); con->rsp_ius = NULL; con->queue_size = 0; } @@ -1634,16 +1626,16 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) struct rtrs_sess *s = con->c.sess; int err; + mutex_lock(&con->con_mutex); err = create_con_cq_qp(con); + mutex_unlock(&con->con_mutex); if (err) { rtrs_err(s, "create_con_cq_qp(), err: %d\n", err); return err; } err = rdma_resolve_route(con->c.cm_id, RTRS_CONNECT_TIMEOUT_MS); - if (err) { + if (err) rtrs_err(s, "Resolving route failed, err: %d\n", err); - destroy_con_cq_qp(con); - } return err; } @@ -1837,8 +1829,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, cm_err = rtrs_rdma_route_resolved(con); break; case RDMA_CM_EVENT_ESTABLISHED: - con->cm_err = rtrs_rdma_conn_established(con, ev); - if (likely(!con->cm_err)) { + cm_err = rtrs_rdma_conn_established(con, ev); + if (likely(!cm_err)) { /* * Report success and wake up. Here we abuse state_wq, * i.e. wake up without state change, but we set cm_err. @@ -1851,20 +1843,22 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_REJECTED: cm_err = rtrs_rdma_conn_rejected(con, ev); break; + case RDMA_CM_EVENT_DISCONNECTED: + /* No message for disconnecting */ + cm_err = -ECONNRESET; + break; case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: rtrs_wrn(s, "CM error event %d\n", ev->event); cm_err = -ECONNRESET; break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: + rtrs_wrn(s, "CM error event %d\n", ev->event); cm_err = -EHOSTUNREACH; break; - case RDMA_CM_EVENT_DISCONNECTED: - case RDMA_CM_EVENT_ADDR_CHANGE: - case RDMA_CM_EVENT_TIMEWAIT_EXIT: - cm_err = -ECONNRESET; - break; case RDMA_CM_EVENT_DEVICE_REMOVAL: /* * Device removal is a special case. Queue close and return 0. @@ -1949,8 +1943,9 @@ static int create_cm(struct rtrs_clt_con *con) errr: stop_cm(con); - /* Is safe to call destroy if cq_qp is not inited */ + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm: destroy_cm(con); @@ -2057,7 +2052,9 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) if (!sess->s.con[cid]) break; con = to_clt_con(sess->s.con[cid]); + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm(con); destroy_con(con); } @@ -2164,8 +2161,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_mutex); } -static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess, - struct rtrs_addr *addr) +static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess) { struct rtrs_clt *clt = sess->clt; @@ -2224,7 +2220,10 @@ destroy: struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); stop_cm(con); + + mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); destroy_cm(con); destroy_con(con); } @@ -2245,7 +2244,7 @@ static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(sess->clt, "Sess info request send failed: %s\n", @@ -2264,8 +2263,12 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, int i, sgi; sg_cnt = le16_to_cpu(msg->sg_cnt); - if (unlikely(!sg_cnt)) + if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { + rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", + sg_cnt); return -EINVAL; + } + /* * Check if IB immediate data size is enough to hold the mem_id and * the offset inside the memory chunk. @@ -2278,11 +2281,6 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); return -EINVAL; } - if (unlikely(!sg_cnt || (sess->queue_depth % sg_cnt))) { - rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", - sg_cnt); - return -EINVAL; - } total_len = 0; for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) { const struct rtrs_sg_desc *desc = &msg->desc[sgi]; @@ -2374,7 +2372,7 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); rtrs_clt_change_state(sess, state); } @@ -2436,9 +2434,9 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) out: if (tx_iu) - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); if (rx_iu) - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); if (unlikely(err)) /* If we've never taken async path because of malloc problems */ rtrs_clt_change_state(sess, RTRS_CLT_CONNECTING_ERR); @@ -2938,7 +2936,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, * IO will never grab it. Also it is very important to add * path before init, since init fires LINK_CONNECTED event. */ - rtrs_clt_add_path_to_arr(sess, addr); + rtrs_clt_add_path_to_arr(sess); err = init_sess(sess); if (err) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 167acd3c90fc..b8dbd701b3cb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -72,6 +72,7 @@ struct rtrs_clt_con { struct rtrs_iu *rsp_ius; u32 queue_size; unsigned int cpu; + struct mutex con_mutex; atomic_t io_cnt; int cm_err; }; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index b8e43dc4d95a..3f2918671dbe 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -287,8 +287,7 @@ struct rtrs_msg_rdma_hdr { struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t, struct ib_device *dev, enum dma_data_direction, void (*done)(struct ib_cq *cq, struct ib_wc *wc)); -void rtrs_iu_free(struct rtrs_iu *iu, enum dma_data_direction dir, - struct ib_device *dev, u32 queue_size); +void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size); int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 07fbb063555d..d2edff3b8f0d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -27,11 +27,9 @@ static struct kobj_type ktype = { }; static ssize_t rtrs_srv_disconnect_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(buf, "Usage: echo 1 > %s\n", attr->attr.name); } static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, @@ -72,8 +70,7 @@ static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj, sess = container_of(kobj, typeof(*sess), kobj); usr_con = sess->s.con[0]; - return scnprintf(page, PAGE_SIZE, "%u\n", - usr_con->cm_id->port_num); + return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num); } static struct kobj_attribute rtrs_srv_hca_port_attr = @@ -87,8 +84,7 @@ static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_srv_sess, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", - sess->s.dev->ib_dev->name); + return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name); } static struct kobj_attribute rtrs_srv_hca_name_attr = @@ -115,12 +111,13 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj, char *page) { struct rtrs_srv_sess *sess; - int cnt; + int len; sess = container_of(kobj, struct rtrs_srv_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, - page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + PAGE_SIZE); + len += sysfs_emit_at(page, len, "\n"); + return len; } static struct kobj_attribute rtrs_srv_dst_addr_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index d6f93601712e..c42fd470c4eb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -113,28 +113,18 @@ static bool __rtrs_srv_change_state(struct rtrs_srv_sess *sess, return changed; } -static bool rtrs_srv_change_state_get_old(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state, - enum rtrs_srv_state *old_state) +static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, + enum rtrs_srv_state new_state) { bool changed; spin_lock_irq(&sess->state_lock); - *old_state = sess->state; changed = __rtrs_srv_change_state(sess, new_state); spin_unlock_irq(&sess->state_lock); return changed; } -static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, - enum rtrs_srv_state new_state) -{ - enum rtrs_srv_state old_state; - - return rtrs_srv_change_state_get_old(sess, new_state, &old_state); -} - static void free_id(struct rtrs_srv_op *id) { if (!id) @@ -471,10 +461,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, void close_sess(struct rtrs_srv_sess *sess) { - enum rtrs_srv_state old_state; - - if (rtrs_srv_change_state_get_old(sess, RTRS_SRV_CLOSING, - &old_state)) + if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING)) queue_work(rtrs_wq, &sess->close_work); WARN_ON(sess->state != RTRS_SRV_CLOSING); } @@ -577,8 +564,7 @@ static void unmap_cont_bufs(struct rtrs_srv_sess *sess) struct rtrs_srv_mr *srv_mr; srv_mr = &sess->mrs[i]; - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); ib_dereg_mr(srv_mr->mr); ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); @@ -682,8 +668,7 @@ err: sgt = &srv_mr->sgt; mr = srv_mr->mr; free_iu: - rtrs_iu_free(srv_mr->iu, DMA_TO_DEVICE, - sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); unmap_sg: @@ -735,7 +720,7 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); if (unlikely(wc->status != IB_WC_SUCCESS)) { rtrs_err(s, "Sess info response send failed: %s\n", @@ -861,7 +846,7 @@ static int process_info_req(struct rtrs_srv_con *con, if (unlikely(err)) { rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); iu_free: - rtrs_iu_free(tx_iu, DMA_TO_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); } rwr_free: kfree(rwr); @@ -906,7 +891,7 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) goto close; out: - rtrs_iu_free(iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); return; close: close_sess(sess); @@ -929,7 +914,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con) err = rtrs_iu_post_recv(&con->c, rx_iu); if (unlikely(err)) { rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); - rtrs_iu_free(rx_iu, DMA_FROM_DEVICE, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); return err; } @@ -1328,17 +1313,42 @@ static void rtrs_srv_dev_release(struct device *dev) kfree(srv); } -static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) +static void free_srv(struct rtrs_srv *srv) +{ + int i; + + WARN_ON(refcount_read(&srv->refcount)); + for (i = 0; i < srv->queue_depth; i++) + mempool_free(srv->chunks[i], chunk_pool); + kfree(srv->chunks); + mutex_destroy(&srv->paths_mutex); + mutex_destroy(&srv->paths_ev_mutex); + /* last put to release the srv structure */ + put_device(&srv->dev); +} + +static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, + const uuid_t *paths_uuid) { struct rtrs_srv *srv; int i; + mutex_lock(&ctx->srv_mutex); + list_for_each_entry(srv, &ctx->srv_list, ctx_list) { + if (uuid_equal(&srv->paths_uuid, paths_uuid) && + refcount_inc_not_zero(&srv->refcount)) { + mutex_unlock(&ctx->srv_mutex); + return srv; + } + } + + /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) + if (!srv) { + mutex_unlock(&ctx->srv_mutex); return NULL; + } - refcount_set(&srv->refcount, 1); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); mutex_init(&srv->paths_ev_mutex); @@ -1347,6 +1357,8 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1358,7 +1370,7 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, if (!srv->chunks[i]) goto err_free_chunks; } - list_add(&srv->ctx_list, &ctx->srv_list); + refcount_set(&srv->refcount, 1); return srv; @@ -1369,52 +1381,9 @@ err_free_chunks: err_free_srv: kfree(srv); - - return NULL; -} - -static void free_srv(struct rtrs_srv *srv) -{ - int i; - - WARN_ON(refcount_read(&srv->refcount)); - for (i = 0; i < srv->queue_depth; i++) - mempool_free(srv->chunks[i], chunk_pool); - kfree(srv->chunks); - mutex_destroy(&srv->paths_mutex); - mutex_destroy(&srv->paths_ev_mutex); - /* last put to release the srv structure */ - put_device(&srv->dev); -} - -static inline struct rtrs_srv *__find_srv_and_get(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - list_for_each_entry(srv, &ctx->srv_list, ctx_list) { - if (uuid_equal(&srv->paths_uuid, paths_uuid) && - refcount_inc_not_zero(&srv->refcount)) - return srv; - } - return NULL; } -static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) -{ - struct rtrs_srv *srv; - - mutex_lock(&ctx->srv_mutex); - srv = __find_srv_and_get(ctx, paths_uuid); - if (!srv) - srv = __alloc_srv(ctx, paths_uuid); - mutex_unlock(&ctx->srv_mutex); - - return srv; -} - static void put_srv(struct rtrs_srv *srv) { if (refcount_dec_and_test(&srv->refcount)) { @@ -1813,7 +1782,11 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, } recon_cnt = le16_to_cpu(msg->recon_cnt); srv = get_or_create_srv(ctx, &msg->paths_uuid); - if (!srv) { + /* + * "refcount == 0" happens if a previous thread calls get_or_create_srv + * allocate srv, but chunks of srv are not allocated yet. + */ + if (!srv || refcount_read(&srv->refcount) == 0) { err = -ENOMEM; goto reject_w_err; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 08b0b8a6eebe..9543ae19996c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -62,7 +62,7 @@ struct rtrs_srv_op { /* * server side memory region context, when always_invalidate=Y, we need - * queue_depth of memory regrion to invalidate each memory region. + * queue_depth of memory region to invalidate each memory region. */ struct rtrs_srv_mr { struct ib_mr *mr; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ff1093d6e4bc..2e3a849e0a77 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -31,6 +31,7 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, return NULL; for (i = 0; i < queue_size; i++) { iu = &ius[i]; + iu->direction = dir; iu->buf = kzalloc(size, gfp_mask); if (!iu->buf) goto err; @@ -41,17 +42,15 @@ struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t gfp_mask, iu->cqe.done = done; iu->size = size; - iu->direction = dir; } return ius; err: - rtrs_iu_free(ius, dir, dma_dev, i); + rtrs_iu_free(ius, dma_dev, i); return NULL; } EXPORT_SYMBOL_GPL(rtrs_iu_alloc); -void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, - struct ib_device *ibdev, u32 queue_size) +void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_size) { struct rtrs_iu *iu; int i; @@ -61,7 +60,7 @@ void rtrs_iu_free(struct rtrs_iu *ius, enum dma_data_direction dir, for (i = 0; i < queue_size; i++) { iu = &ius[i]; - ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, dir); + ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction); kfree(iu->buf); } kfree(ius); @@ -105,6 +104,22 @@ int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe) } EXPORT_SYMBOL_GPL(rtrs_post_recv_empty); +static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head, + struct ib_send_wr *wr) +{ + if (head) { + struct ib_send_wr *tail = head; + + while (tail->next) + tail = tail->next; + tail->next = wr; + } else { + head = wr; + } + + return ib_post_send(qp, head, NULL); +} + int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head) { @@ -127,17 +142,7 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, .send_flags = IB_SEND_SIGNALED, }; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = ≀ - } else { - head = ≀ - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr); } EXPORT_SYMBOL_GPL(rtrs_iu_post_send); @@ -169,17 +174,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, if (WARN_ON(sge[i].length == 0)) return -EINVAL; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = &wr.wr; - } else { - head = &wr.wr; - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr.wr); } EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm); @@ -196,17 +191,7 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, .ex.imm_data = cpu_to_be32(imm_data), }; - if (head) { - struct ib_send_wr *tail = head; - - while (tail->next) - tail = tail->next; - tail->next = ≀ - } else { - head = ≀ - } - - return ib_post_send(con->qp, head, NULL); + return rtrs_post_send(con->qp, head, &wr); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d8fcd21ab472..5492b66a8153 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -169,9 +169,9 @@ static int srp_tmo_get(char *buffer, const struct kernel_param *kp) int tmo = *(int *)kp->arg; if (tmo >= 0) - return sprintf(buffer, "%d\n", tmo); + return sysfs_emit(buffer, "%d\n", tmo); else - return sprintf(buffer, "off\n"); + return sysfs_emit(buffer, "off\n"); } static int srp_tmo_set(const char *val, const struct kernel_param *kp) @@ -2896,7 +2896,7 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, @@ -2904,7 +2904,7 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); + return sysfs_emit(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } static ssize_t show_service_id(struct device *dev, @@ -2914,8 +2914,8 @@ static ssize_t show_service_id(struct device *dev, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "0x%016llx\n", - be64_to_cpu(target->ib_cm.service_id)); + return sysfs_emit(buf, "0x%016llx\n", + be64_to_cpu(target->ib_cm.service_id)); } static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, @@ -2925,7 +2925,8 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); + + return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); } static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, @@ -2933,7 +2934,7 @@ static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%pI6\n", target->sgid.raw); + return sysfs_emit(buf, "%pI6\n", target->sgid.raw); } static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, @@ -2944,7 +2945,8 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); + + return sysfs_emit(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); } static ssize_t show_orig_dgid(struct device *dev, @@ -2954,7 +2956,8 @@ static ssize_t show_orig_dgid(struct device *dev, if (target->using_rdma_cm) return -ENOENT; - return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); + + return sysfs_emit(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); } static ssize_t show_req_lim(struct device *dev, @@ -2968,7 +2971,8 @@ static ssize_t show_req_lim(struct device *dev, ch = &target->ch[i]; req_lim = min(req_lim, ch->req_lim); } - return sprintf(buf, "%d\n", req_lim); + + return sysfs_emit(buf, "%d\n", req_lim); } static ssize_t show_zero_req_lim(struct device *dev, @@ -2976,7 +2980,7 @@ static ssize_t show_zero_req_lim(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->zero_req_lim); + return sysfs_emit(buf, "%d\n", target->zero_req_lim); } static ssize_t show_local_ib_port(struct device *dev, @@ -2984,7 +2988,7 @@ static ssize_t show_local_ib_port(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->srp_host->port); + return sysfs_emit(buf, "%d\n", target->srp_host->port); } static ssize_t show_local_ib_device(struct device *dev, @@ -2992,8 +2996,8 @@ static ssize_t show_local_ib_device(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%s\n", - dev_name(&target->srp_host->srp_dev->dev->dev)); + return sysfs_emit(buf, "%s\n", + dev_name(&target->srp_host->srp_dev->dev->dev)); } static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, @@ -3001,7 +3005,7 @@ static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->ch_count); + return sysfs_emit(buf, "%d\n", target->ch_count); } static ssize_t show_comp_vector(struct device *dev, @@ -3009,7 +3013,7 @@ static ssize_t show_comp_vector(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->comp_vector); + return sysfs_emit(buf, "%d\n", target->comp_vector); } static ssize_t show_tl_retry_count(struct device *dev, @@ -3017,7 +3021,7 @@ static ssize_t show_tl_retry_count(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%d\n", target->tl_retry_count); + return sysfs_emit(buf, "%d\n", target->tl_retry_count); } static ssize_t show_cmd_sg_entries(struct device *dev, @@ -3025,7 +3029,7 @@ static ssize_t show_cmd_sg_entries(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%u\n", target->cmd_sg_cnt); + return sysfs_emit(buf, "%u\n", target->cmd_sg_cnt); } static ssize_t show_allow_ext_sg(struct device *dev, @@ -3033,7 +3037,7 @@ static ssize_t show_allow_ext_sg(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); + return sysfs_emit(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); } static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); @@ -3893,7 +3897,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); + return sysfs_emit(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -3903,7 +3907,7 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%d\n", host->port); + return sysfs_emit(buf, "%d\n", host->port); } static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index a17c56cd8312..6be60aa5ffe2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3448,7 +3448,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rdma_size); } static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item, @@ -3485,7 +3485,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_max_rsp_size); } static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item, @@ -3522,7 +3522,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size); + return sysfs_emit(page, "%u\n", sport->port_attrib.srp_sq_size); } static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item, @@ -3559,7 +3559,7 @@ static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item, struct se_portal_group *se_tpg = attrib_to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return sprintf(page, "%d\n", sport->port_attrib.use_srq); + return sysfs_emit(page, "%d\n", sport->port_attrib.use_srq); } static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item, @@ -3649,7 +3649,7 @@ out: static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", rdma_cm_port); + return sysfs_emit(page, "%d\n", rdma_cm_port); } static ssize_t srpt_rdma_cm_port_store(struct config_item *item, @@ -3705,7 +3705,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page) struct se_portal_group *se_tpg = to_tpg(item); struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); - return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled); + return sysfs_emit(page, "%d\n", sport->enabled); } static ssize_t srpt_tpg_enable_store(struct config_item *item, @@ -3812,7 +3812,7 @@ static void srpt_drop_tport(struct se_wwn *wwn) static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) { - return scnprintf(buf, PAGE_SIZE, "\n"); + return sysfs_emit(buf, "\n"); } CONFIGFS_ATTR_RO(srpt_wwn_, version); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index bdeb010efee6..76e66f630c17 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -347,7 +347,7 @@ struct srpt_nexus { }; /** - * struct srpt_port_attib - attributes for SRPT port + * struct srpt_port_attrib - attributes for SRPT port * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. * @srp_sq_size: Shared receive queue (SRQ) size. diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index df9f6f4549f1..cf6c49d09c82 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -853,7 +853,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, return error; ctrl->device = ctrl->queues[0].device; - ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); + ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev); /* T10-PI support */ if (ctrl->device->dev->attrs.device_cap_flags & diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ae6620489457..5c1e7cb7fe0d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -414,7 +414,8 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; - r->req.p2p_client = &ndev->device->dev; + if (!ib_uses_virt_dma(ndev->device)) + r->req.p2p_client = &ndev->device->dev; r->send_sge.length = sizeof(*r->req.cqe); r->send_sge.lkey = ndev->pd->local_dma_lkey; diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index bace04145c5f..196382630363 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -556,15 +556,6 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, return -1; for (i = 0; i < num_clients; i++) { -#ifdef CONFIG_DMA_VIRT_OPS - if (clients[i]->dma_ops == &dma_virt_ops) { - if (verbose) - dev_warn(clients[i], - "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n"); - return -1; - } -#endif - pci_client = find_parent_pci_dev(clients[i]); if (!pci_client) { if (verbose) @@ -834,24 +825,10 @@ static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, struct device *dev, struct scatterlist *sg, int nents) { struct scatterlist *s; - phys_addr_t paddr; int i; - /* - * p2pdma mappings are not compatible with devices that use - * dma_virt_ops. If the upper layers do the right thing - * this should never happen because it will be prevented - * by the check in pci_p2pdma_distance_many() - */ -#ifdef CONFIG_DMA_VIRT_OPS - if (WARN_ON_ONCE(dev->dma_ops == &dma_virt_ops)) - return 0; -#endif - for_each_sg(sg, s, nents, i) { - paddr = sg_phys(s); - - s->dma_address = paddr - p2p_pgmap->bus_offset; + s->dma_address = sg_phys(s) - p2p_pgmap->bus_offset; sg_dma_len(s) = s->length; } diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d45..2aaed35b556d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -565,6 +565,4 @@ static inline int dma_mmap_wc(struct device *dev, int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size); -extern const struct dma_map_ops dma_virt_ops; - #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 70597508c765..7752211c9638 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -34,6 +34,13 @@ static inline int ib_umem_offset(struct ib_umem *umem) return umem->address & ~PAGE_MASK; } +static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, + unsigned long pgsz) +{ + return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) & + (pgsz - 1); +} + static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, unsigned long pgsz) { @@ -79,6 +86,35 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); +/** + * ib_umem_find_best_pgoff - Find best HW page size + * + * @umem: umem struct + * @pgsz_bitmap bitmap of HW supported page sizes + * @pgoff_bitmask: Mask of bits that can be represented with an offset + * + * This is very similar to ib_umem_find_best_pgsz() except instead of accepting + * an IOVA it accepts a bitmask specifying what address bits can be represented + * with a page offset. + * + * For instance if the HW has multiple page sizes, requires 64 byte alignemnt, + * and can support aligned offsets up to 4032 then pgoff_bitmask would be + * "111111000000". + * + * If the pgoff_bitmask requires either alignment in the low bit or an + * unavailable page size for the high bits, this function returns 0. + */ +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + struct scatterlist *sg = umem->sg_head.sgl; + dma_addr_t dma_addr; + + dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); + return ib_umem_find_best_pgsz(umem, pgsz_bitmap, + dma_addr & pgoff_bitmask); +} #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -101,6 +137,12 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, { return 0; } +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + return 0; +} #endif /* CONFIG_INFINIBAND_USER_MEM */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3883efd588aa..9fed65bf9279 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1235,6 +1235,8 @@ enum ib_qp_attr_mask { IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), IB_QP_RATE_LIMIT = (1<<25), + + IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), }; enum ib_qp_state { @@ -1470,6 +1472,8 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* The driver failed to destroy the uobject and is being disconnected */ + RDMA_REMOVE_DRIVER_FAILURE, }; struct ib_rdmacg_object { @@ -1482,8 +1486,6 @@ struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - bool cleanup_retryable; - struct ib_rdmacg_object cg_obj; /* * Implementation details of the RDMA core, don't use in drivers: @@ -2402,6 +2404,8 @@ struct ib_device_ops { int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); + int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah, u32 flags); @@ -2430,9 +2434,10 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); - int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_pd *pd, struct ib_udata *udata); + struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -2666,7 +2671,6 @@ struct ib_device { const struct attribute_group *groups[3]; u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; @@ -2902,46 +2906,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, } /** - * ib_is_destroy_retryable - Check whether the uobject destruction - * is retryable. - * @ret: The initial destruction return code - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * This function is a helper function that IB layer and low-level drivers - * can use to consider whether the destruction of the given uobject is - * retry-able. - * It checks the original return code, if it wasn't success the destruction - * is retryable according to the ucontext state (i.e. cleanup_retryable) and - * the remove reason. (i.e. why). - * Must be called with the object locked for destroy. - */ -static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - return ret && (why == RDMA_REMOVE_DESTROY || - uobj->context->cleanup_retryable); -} - -/** - * ib_destroy_usecnt - Called during destruction to check the usecnt - * @usecnt: The usecnt atomic - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * Non-zero usecnts will block destruction unless destruction was triggered by - * a ucontext cleanup. - */ -static inline int ib_destroy_usecnt(atomic_t *usecnt, - enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) - return -EBUSY; - return 0; -} - -/** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. @@ -3431,6 +3395,17 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + * + * Every PD has a local_dma_lkey which can be used as the lkey value for local + * memory operations. + */ #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) @@ -3656,8 +3631,14 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, bad_recv_wr ? : &dummy); } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller); +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. @@ -3944,6 +3925,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -ENOSYS; } +/* + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual + * address into the dma address. + */ +static inline bool ib_uses_virt_dma(struct ib_device *dev) +{ + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; +} + /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created @@ -3951,6 +3942,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { + if (ib_uses_virt_dma(dev)) + return 0; return dma_mapping_error(dev->dma_device, dma_addr); } @@ -3965,6 +3958,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } @@ -3979,7 +3974,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3996,6 +3992,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } @@ -4010,7 +4008,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) + return ib_dma_virt_map_sg(dev, sg, nents); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } /** @@ -4024,7 +4045,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(dev->dma_device, sg, nents, direction); + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4038,24 +4059,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(dev->dma_device, sg, nents, direction); -} - -static inline int ib_dma_map_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); -} - -static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4066,6 +4070,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { + if (ib_uses_virt_dma(dev)) + return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } @@ -4081,7 +4087,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -4096,36 +4103,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev->dma_device, addr, size, dir); -} - -/** - * ib_dma_alloc_coherent - Allocate memory and map it for DMA - * @dev: The device for which the DMA address is requested - * @size: The size of the region to allocate in bytes - * @dma_handle: A pointer for returning the DMA address of the region - * @flag: memory allocator flags - */ -static inline void *ib_dma_alloc_coherent(struct ib_device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flag) -{ - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); -} - -/** - * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() - * @dev: The device for which the DMA addresses were allocated - * @size: The size of the region - * @cpu_addr: the address returned by ib_dma_alloc_coherent() - * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() - */ -static inline void ib_dma_free_coherent(struct ib_device *dev, - size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /* ib_reg_user_mr - register a memory region for virtual addresses from kernel @@ -4217,7 +4196,8 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata); int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); -static inline int ib_check_mr_access(int flags) +static inline int ib_check_mr_access(struct ib_device *ib_dev, + unsigned int flags) { /* * Local write permission is required if remote write or @@ -4230,6 +4210,9 @@ static inline int ib_check_mr_access(int flags) if (flags & ~IB_ACCESS_SUPPORTED) return -EINVAL; + if (flags & IB_ACCESS_ON_DEMAND && + !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + return -EINVAL; return 0; } @@ -4617,6 +4600,19 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) } /** + * ibdev_to_node - return the NUMA node for a given ib_device + * @dev: device to get the NUMA node for. + */ +static inline int ibdev_to_node(struct ib_device *ibdev) +{ + struct device *parent = ibdev->dev.parent; + + if (!parent) + return NUMA_NO_NODE; + return dev_to_node(parent); +} + +/** * rdma_device_to_drv_device - Helper macro to reach back to driver's * ib_device holder structure from device pointer. * diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index d3a1cc5be7bc..05e18839eaff 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -68,6 +68,14 @@ struct rdma_restrack_entry { * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI */ bool valid; + /** + * @no_track: don't add this entry to restrack DB + * + * This field is used to mark an entry that doesn't need to be added to + * internal restrack DB and presented later to the users at the nldev + * query stage. + */ + u8 no_track : 1; /* * @kref: Protect destroy of the resource */ @@ -145,4 +153,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); + +/** + * rdma_restrack_no_track() - don't add resource to the DB + * @res: resource entry + * + * Every user of thie API should be cross examined. + * Probaby you don't need to use this function. + */ +static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res) +{ + res->no_track = true; +} +static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res) +{ + return !res->no_track; +} #endif /* _RDMA_RESTRACK_H_ */ diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index b00270c72740..39ef204753ec 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -647,12 +647,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b * 'ucontext'. * */ -#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ - (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ - driver_udata) \ - ->context, \ - drv_dev_struct, member) : \ - (drv_dev_struct *)NULL) +static inline struct uverbs_attr_bundle * +rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) +{ + return container_of(udata, struct uverbs_attr_bundle, driver_udata); +} + +#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ + (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ + drv_dev_struct, member) : (drv_dev_struct *)NULL) #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) @@ -862,6 +865,16 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } + +static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, + size_t n, size_t size) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return ERR_PTR(-EOVERFLOW); + return uverbs_zalloc(bundle, bytes); +} int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 06db27e35f40..ccd11631c167 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -71,6 +71,8 @@ struct uverbs_obj_type_class { enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); + void (*swap_uobjects)(struct ib_uobject *obj_old, + struct ib_uobject *obj_new); }; struct uverbs_obj_type { @@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); +void rdma_assign_uobject(struct ib_uobject *to_uobj, + struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -138,8 +143,8 @@ struct uverbs_obj_fd_type { * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; - int (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + void (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); const struct file_operations *fops; const char *name; int flags; diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 9ec85f76e9ac..90b739d05adf 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -43,6 +43,10 @@ struct hns_roce_ib_create_cq { __u32 reserved; }; +enum hns_roce_cq_cap_flags { + HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0, +}; + struct hns_roce_ib_create_cq_resp { __aligned_u64 cqn; /* Only 32 bits used, 64 for compat */ __aligned_u64 cap_flags; @@ -69,6 +73,12 @@ struct hns_roce_ib_create_qp { __aligned_u64 sdb_addr; }; +enum hns_roce_qp_cap_flags { + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, +}; + struct hns_roce_ib_create_qp_resp { __aligned_u64 cap_flags; }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 456438c18c2c..7ee73a0652f1 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -596,20 +596,6 @@ enum { IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, }; -enum { - /* - * This value is equal to IB_QP_DEST_QPN. - */ - IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, -}; - -enum { - /* - * This value is equal to IB_QP_RATE_LIMIT. - */ - IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, -}; - struct ib_uverbs_ex_create_qp { __aligned_u64 user_handle; __u32 pd_handle; diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index e591d8c1f3cf..068433e2229d 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd { __aligned_u64 mmap_info_addr; }; +/* This data structure is stored at the base of work and + * completion queues shared between user space and kernel space. + * It contains the producer and consumer indices. Is also + * contains a copy of the queue size parameters for user space + * to use but the kernel must use the parameters in the + * rxe_queue struct. For performance reasons arrange to have + * producer and consumer indices in separate cache lines + * the kernel should always mask the indices to avoid accessing + * memory outside of the data area + */ +struct rxe_queue_buf { + __u32 log2_elem_size; + __u32 index_mask; + __u32 pad_1[30]; + __u32 producer_index; + __u32 pad_2[31]; + __u32 consumer_index; + __u32 pad_3[31]; + __u8 data[]; +}; + #endif /* RDMA_USER_RXE_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c99de4a21458..fd2db2665fc6 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -75,11 +75,6 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool -config DMA_VIRT_OPS - bool - depends on HAS_DMA - select DMA_OPS - config SWIOTLB bool select NEED_DMA_MAP_STATE diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index dc755ab68aab..cd1d86358a7a 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_DMA_OPS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o -obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c deleted file mode 100644 index 59d32317dd57..000000000000 --- a/kernel/dma/virt.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map to virtual addresses without flushing memory. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-map-ops.h> -#include <linux/scatterlist.h> - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, -}; -EXPORT_SYMBOL(dma_virt_ops); diff --git a/net/rds/ib.c b/net/rds/ib.c index deecbdcdae84..24c9a9005a6f 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/if.h> @@ -108,7 +107,6 @@ static void rds_ib_dev_free(struct work_struct *work) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); - dma_pool_destroy(rds_ibdev->rid_hdrs_pool); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); @@ -191,14 +189,6 @@ static int rds_ib_add_one(struct ib_device *device) rds_ibdev->pd = NULL; goto put_dev; } - rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, - device->dma_device, - sizeof(struct rds_header), - L1_CACHE_BYTES, 0); - if (!rds_ibdev->rid_hdrs_pool) { - ret = -ENOMEM; - goto put_dev; - } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); diff --git a/net/rds/ib.h b/net/rds/ib.h index 8dfff43cf07f..2ba71102b1f1 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -246,7 +246,6 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ u8 odp_capable:1; unsigned int max_mrs; @@ -264,13 +263,6 @@ struct rds_ib_device { int *vector_load; }; -static inline int ibdev_to_node(struct ib_device *ibdev) -{ - struct device *parent; - - parent = ibdev->dev.parent; - return parent ? dev_to_node(parent) : NUMA_NO_NODE; -} #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) /* bits for i_ack_flags */ @@ -387,11 +379,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event); -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs); -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs); #define rds_ib_conn_error(conn, fmt...) \ __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index b36b60668b1d..f5cbe963cd8f 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include <linux/dmapool.h> #include <linux/kernel.h> #include <linux/in.h> #include <linux/slab.h> @@ -441,42 +440,87 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) rds_ibdev->vector_load[index]--; } +static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, + dma_addr_t dma_addr, enum dma_data_direction dir) +{ + ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); + kfree(hdr); +} + +static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, + dma_addr_t *dma_addr, enum dma_data_direction dir) +{ + struct rds_header *hdr; + + hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); + if (!hdr) + return NULL; + + *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), + DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error(dev, *dma_addr)) { + kfree(hdr); + return NULL; + } + + return hdr; +} + +/* Free the DMA memory used to store struct rds_header. + * + * @dev: the RDS IB device + * @hdrs: pointer to the array storing DMA memory pointers + * @dma_addrs: pointer to the array storing DMA addresses + * @num_hdars: number of headers to free. + */ +static void rds_dma_hdrs_free(struct rds_ib_device *dev, + struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) +{ + u32 i; + + for (i = 0; i < num_hdrs; i++) + rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); + kvfree(hdrs); + kvfree(dma_addrs); +} + + /* Allocate DMA coherent memory to be used to store struct rds_header for * sending/receiving packets. The pointers to the DMA memory and the * associated DMA addresses are stored in two arrays. * - * @ibdev: the IB device - * @pool: the DMA memory pool + * @dev: the RDS IB device * @dma_addrs: pointer to the array for storing DMA addresses * @num_hdrs: number of headers to allocate * * It returns the pointer to the array storing the DMA memory pointers. On * error, NULL pointer is returned. */ -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs) +static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, + dma_addr_t **dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) { struct rds_header **hdrs; dma_addr_t *hdr_daddrs; u32 i; hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdrs) return NULL; hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdr_daddrs) { kvfree(hdrs); return NULL; } for (i = 0; i < num_hdrs; i++) { - hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); + hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); if (!hdrs[i]) { - rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); + rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); return NULL; } } @@ -485,24 +529,6 @@ struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, return hdrs; } -/* Free the DMA memory used to store struct rds_header. - * - * @pool: the DMA memory pool - * @hdrs: pointer to the array storing DMA memory pointers - * @dma_addrs: pointer to the array storing DMA addresses - * @num_hdars: number of headers to free. - */ -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs) -{ - u32 i; - - for (i = 0; i < num_hdrs; i++) - dma_pool_free(pool, hdrs[i], dma_addrs[i]); - kvfree(hdrs); - kvfree(dma_addrs); -} - /* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. @@ -516,7 +542,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct rds_ib_device *rds_ibdev; unsigned long max_wrs; int ret, fr_queue_space; - struct dma_pool *pool; /* * It's normal to see a null device if an incoming connection races @@ -612,25 +637,26 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto recv_cq_out; } - pool = rds_ibdev->rid_hdrs_pool; - ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("DMA send hdrs alloc failed\n"); goto qp_out; } - ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("DMA recv hdrs alloc failed\n"); goto send_hdrs_dma_out; } - ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, - &ic->i_ack_dma); + ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, + DMA_TO_DEVICE); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("DMA ack header alloc failed\n"); @@ -666,18 +692,19 @@ sends_out: vfree(ic->i_sends); ack_dma_out: - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, + DMA_TO_DEVICE); ic->i_ack = NULL; recv_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; send_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; @@ -1110,29 +1137,30 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) } if (ic->rds_ibdev) { - struct dma_pool *pool; - - pool = ic->rds_ibdev->rid_hdrs_pool; - /* then free the resources that ib callbacks use */ if (ic->i_send_hdrs) { - rds_dma_hdrs_free(pool, ic->i_send_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; } if (ic->i_recv_hdrs) { - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; } if (ic->i_ack) { - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, + ic->i_ack_dma, DMA_TO_DEVICE); ic->i_ack = NULL; } } else { diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 3cffcec5fb37..6fdedd9dbbc2 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -662,10 +662,16 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi seq = rds_ib_get_ack(ic); rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); + + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); rds_message_populate_header(hdr, 0, 0, 0); hdr->h_ack = cpu_to_be64(seq); hdr->h_credit = adv_credits; rds_message_make_checksum(hdr); + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); + ic->i_ack_queued = jiffies; ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); @@ -845,6 +851,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_incoming *ibinc = ic->i_ibinc; struct rds_header *ihdr, *hdr; + dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; /* XXX shut down the connection if port 0,0 are seen? */ @@ -863,6 +870,8 @@ static void rds_ib_process_recv(struct rds_connection *conn, ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { rds_ib_conn_error(conn, "incoming message " @@ -870,7 +879,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, "forcing a reconnect\n", &conn->c_faddr); rds_stats_inc(s_recv_drop_bad_checksum); - return; + goto done; } /* Process the ACK sequence which comes with every packet */ @@ -899,7 +908,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, */ rds_ib_frag_free(ic, recv->r_frag); recv->r_frag = NULL; - return; + goto done; } /* @@ -933,7 +942,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, hdr->h_dport != ihdr->h_dport) { rds_ib_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); - return; + goto done; } } @@ -965,6 +974,9 @@ static void rds_ib_process_recv(struct rds_connection *conn, rds_inc_put(&ibinc->ii_inc); } +done: + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); } void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index dfe778220657..92b4a8689aae 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -638,6 +638,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send->s_sge[0].length = sizeof(struct rds_header); send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); @@ -688,6 +692,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, adv_credits = 0; rds_ib_stats_inc(s_ib_tx_credit_updates); } + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); if (prev) prev->s_wr.next = &send->s_wr; diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 8a577c95496e..71c960dcd8a4 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -9,6 +9,7 @@ struct test { int alloc_ret; unsigned num_pages; unsigned *pfn; + unsigned *pfn_app; unsigned size; unsigned int max_seg; unsigned int expected_segments; @@ -52,31 +53,39 @@ int main(void) { const unsigned int sgmax = UINT_MAX; struct test *test, tests[] = { - { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, - { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 }, - { 0, 1, pfn(0), 1, sgmax, 1 }, - { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 }, - { 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 }, - { 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 }, - { 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 }, - { 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 }, - { 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 }, - { 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 }, - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 }, - { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 }, - { 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 }, - { 0, 0, NULL, 0, 0, 0 }, + { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, + { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, + { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, + { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, + { 0, 2, pfn(1, 0), NULL, 2 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), pfn(3, 4, 5), 3 * PAGE_SIZE, sgmax, 1 }, + { 0, 3, pfn(0, 1, 2), pfn(4, 5, 6), 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(0, 2, 1), NULL, 3 * PAGE_SIZE, sgmax, 3 }, + { 0, 3, pfn(0, 1, 3), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(1, 2, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 3, pfn(1, 3, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 }, + { 0, 4, pfn(0, 1, 3, 4), NULL, 4 * PAGE_SIZE, sgmax, 2 }, + { 0, 5, pfn(0, 1, 3, 4, 5), NULL, 5 * PAGE_SIZE, sgmax, 2 }, + { 0, 5, pfn(0, 1, 3, 4, 6), NULL, 5 * PAGE_SIZE, sgmax, 3 }, + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, sgmax, 1 }, + { 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, 2 * PAGE_SIZE, + 3 }, + { 0, 6, pfn(0, 1, 2, 3, 4, 5), NULL, 6 * PAGE_SIZE, + 2 * PAGE_SIZE, 3 }, + { 0, 6, pfn(0, 2, 3, 4, 5, 6), NULL, 6 * PAGE_SIZE, + 2 * PAGE_SIZE, 4 }, + { 0, 6, pfn(0, 1, 3, 4, 5, 6), pfn(7, 8, 9, 10, 11, 12), + 6 * PAGE_SIZE, 12 * PAGE_SIZE, 2 }, + { 0, 0, NULL, NULL, 0, 0, 0 }, }; unsigned int i; for (i = 0, test = tests; test->expected_segments; test++, i++) { + int left_pages = test->pfn_app ? test->num_pages : 0; struct page *pages[MAX_PAGES]; struct sg_table st; struct scatterlist *sg; @@ -84,14 +93,23 @@ int main(void) set_pages(pages, test->pfn, test->num_pages); sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, - test->size, test->max_seg, NULL, 0, GFP_KERNEL); + test->size, test->max_seg, NULL, left_pages, GFP_KERNEL); assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); if (test->alloc_ret) continue; + if (test->pfn_app) { + set_pages(pages, test->pfn_app, test->num_pages); + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, + test->size, test->max_seg, sg, 0, GFP_KERNEL); + + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); + } + VALIDATE(st.nents == test->expected_segments, &st, test); - VALIDATE(st.orig_nents == test->expected_segments, &st, test); + if (!test->pfn_app) + VALIDATE(st.orig_nents == test->expected_segments, &st, test); sg_free_table(&st); } |