diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-06 16:43:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-06 16:43:36 -0700 |
commit | d7806bbd22cabc3e3b0a985cfcffa29cf156bb30 (patch) | |
tree | ef24f40c658c2f015b7f96f429e47dd16ab6e5b4 /drivers | |
parent | d6efb3ac3e6c19ab722b28bdb9252bae0b9676b6 (diff) | |
parent | 23fcc7dee2c6aba1060558683988263851e74bab (diff) | |
download | linux-d7806bbd22cabc3e3b0a985cfcffa29cf156bb30.tar.bz2 |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A quiet cycle after the larger 5.8 effort. Substantially cleanup and
driver work with a few smaller features this time.
- Driver updates for hfi1, rxe, mlx5, hns, qedr, usnic, bnxt_re
- Removal of dead or redundant code across the drivers
- RAW resource tracker dumps to include a device specific data blob
for device objects to aide device debugging
- Further advance the IOCTL interface, remove the ability to turn it
off. Add QUERY_CONTEXT, QUERY_MR, and QUERY_PD commands
- Remove stubs related to devices with no pkey table
- A shared CQ scheme to allow multiple ULPs to share the CQ rings of
a device to give higher performance
- Several more static checker, syzkaller and rare crashers fixed"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (121 commits)
RDMA/mlx5: Fix flow destination setting for RDMA TX flow table
RDMA/rxe: Remove pkey table
RDMA/umem: Add a schedule point in ib_umem_get()
RDMA/hns: Fix the unneeded process when getting a general type of CQE error
RDMA/hns: Fix error during modify qp RTS2RTS
RDMA/hns: Delete unnecessary memset when allocating VF resource
RDMA/hns: Remove redundant parameters in set_rc_wqe()
RDMA/hns: Remove support for HIP08_A
RDMA/hns: Refactor hns_roce_v2_set_hem()
RDMA/hns: Remove redundant hardware opcode definitions
RDMA/netlink: Remove CAP_NET_RAW check when dump a raw QP
RDMA/include: Replace license text with SPDX tags
RDMA/rtrs: remove WQ_MEM_RECLAIM for rtrs_wq
RDMA/rtrs-clt: add an additional random 8 seconds before reconnecting
RDMA/cma: Execute rdma_cm destruction from a handler properly
RDMA/cma: Remove unneeded locking for req paths
RDMA/cma: Using the standard locking pattern when delivering the removal event
RDMA/cma: Simplify DEVICE_REMOVAL for internal_id
RDMA/efa: Add EFA 0xefa1 PCI ID
RDMA/efa: User/kernel compatibility handshake mechanism
...
Diffstat (limited to 'drivers')
125 files changed, 6097 insertions, 5994 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index a83f9eb86bfe..91b023341b77 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -37,14 +37,6 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from rdma-core <https://github.com/linux-rdma/rdma-core>. -config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI - bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)" - depends on INFINIBAND_USER_ACCESS - help - IOCTL based uAPI support for Infiniband is enabled by default for - new verbs only. This allows userspace to invoke the IOCTL based uAPI - for current legacy verbs too. - config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index a670209bbce6..ffad73bb40ff 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1054,7 +1054,7 @@ int ib_get_cached_pkey(struct ib_device *device, cache = device->port_data[port_num].cache.pkey; - if (index < 0 || index >= cache->table_len) + if (!cache || index < 0 || index >= cache->table_len) ret = -EINVAL; else *pkey = cache->table[index]; @@ -1099,6 +1099,10 @@ int ib_find_cached_pkey(struct ib_device *device, read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; + if (!cache) { + ret = -EINVAL; + goto err; + } *index = -1; @@ -1117,6 +1121,7 @@ int ib_find_cached_pkey(struct ib_device *device, ret = 0; } +err: read_unlock_irqrestore(&device->cache_lock, flags); return ret; @@ -1139,6 +1144,10 @@ int ib_find_exact_cached_pkey(struct ib_device *device, read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; + if (!cache) { + ret = -EINVAL; + goto err; + } *index = -1; @@ -1149,6 +1158,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, break; } +err: read_unlock_irqrestore(&device->cache_lock, flags); return ret; @@ -1425,23 +1435,26 @@ ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) goto err; } - pkey_cache = kmalloc(struct_size(pkey_cache, table, - tprops->pkey_tbl_len), - GFP_KERNEL); - if (!pkey_cache) { - ret = -ENOMEM; - goto err; - } + if (tprops->pkey_tbl_len) { + pkey_cache = kmalloc(struct_size(pkey_cache, table, + tprops->pkey_tbl_len), + GFP_KERNEL); + if (!pkey_cache) { + ret = -ENOMEM; + goto err; + } - pkey_cache->table_len = tprops->pkey_tbl_len; + pkey_cache->table_len = tprops->pkey_tbl_len; - for (i = 0; i < pkey_cache->table_len; ++i) { - ret = ib_query_pkey(device, port, i, pkey_cache->table + i); - if (ret) { - dev_warn(&device->dev, - "ib_query_pkey failed (%d) for index %d\n", - ret, i); - goto err; + for (i = 0; i < pkey_cache->table_len; ++i) { + ret = ib_query_pkey(device, port, i, + pkey_cache->table + i); + if (ret) { + dev_warn(&device->dev, + "ib_query_pkey failed (%d) for index %d\n", + ret, i); + goto err; + } } } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c30cf5307ce3..26de0dab60bb 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -428,19 +428,6 @@ static int cma_comp_exch(struct rdma_id_private *id_priv, return ret; } -static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, - enum rdma_cm_state exch) -{ - unsigned long flags; - enum rdma_cm_state old; - - spin_lock_irqsave(&id_priv->lock, flags); - old = id_priv->state; - id_priv->state = exch; - spin_unlock_irqrestore(&id_priv->lock, flags); - return old; -} - static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) { return hdr->ip_version >> 4; @@ -1829,23 +1816,11 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) } } -void rdma_destroy_id(struct rdma_cm_id *id) +static void _destroy_id(struct rdma_id_private *id_priv, + enum rdma_cm_state state) { - struct rdma_id_private *id_priv; - enum rdma_cm_state state; - - id_priv = container_of(id, struct rdma_id_private, id); - trace_cm_id_destroy(id_priv); - state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); - /* - * Wait for any active callback to finish. New callbacks will find - * the id_priv state set to destroying and abort. - */ - mutex_lock(&id_priv->handler_mutex); - mutex_unlock(&id_priv->handler_mutex); - rdma_restrack_del(&id_priv->res); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { @@ -1874,6 +1849,42 @@ void rdma_destroy_id(struct rdma_cm_id *id) put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } + +/* + * destroy an ID from within the handler_mutex. This ensures that no other + * handlers can start running concurrently. + */ +static void destroy_id_handler_unlock(struct rdma_id_private *id_priv) + __releases(&idprv->handler_mutex) +{ + enum rdma_cm_state state; + unsigned long flags; + + trace_cm_id_destroy(id_priv); + + /* + * Setting the state to destroyed under the handler mutex provides a + * fence against calling handler callbacks. If this is invoked due to + * the failure of a handler callback then it guarentees that no future + * handlers will be called. + */ + lockdep_assert_held(&id_priv->handler_mutex); + spin_lock_irqsave(&id_priv->lock, flags); + state = id_priv->state; + id_priv->state = RDMA_CM_DESTROYING; + spin_unlock_irqrestore(&id_priv->lock, flags); + mutex_unlock(&id_priv->handler_mutex); + _destroy_id(id_priv, state); +} + +void rdma_destroy_id(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + mutex_lock(&id_priv->handler_mutex); + destroy_id_handler_unlock(id_priv); +} EXPORT_SYMBOL(rdma_destroy_id); static int cma_rep_recv(struct rdma_id_private *id_priv) @@ -1925,6 +1936,8 @@ static int cma_cm_event_handler(struct rdma_id_private *id_priv, { int ret; + lockdep_assert_held(&id_priv->handler_mutex); + trace_cm_event_handler(id_priv, event); ret = id_priv->id.event_handler(&id_priv->id, event); trace_cm_event_done(id_priv, event, ret); @@ -1936,7 +1949,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event = {}; - int ret = 0; + int ret; mutex_lock(&id_priv->handler_mutex); if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && @@ -2005,14 +2018,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return ret; } out: mutex_unlock(&id_priv->handler_mutex); - return ret; + return 0; } static struct rdma_id_private * @@ -2174,7 +2185,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, mutex_lock(&listen_id->handler_mutex); if (listen_id->state != RDMA_CM_LISTEN) { ret = -ECONNABORTED; - goto err1; + goto err_unlock; } offset = cma_user_data_offset(listen_id); @@ -2191,55 +2202,38 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, } if (!conn_id) { ret = -ENOMEM; - goto err1; + goto err_unlock; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); ret = cma_ib_acquire_dev(conn_id, listen_id, &req); - if (ret) - goto err2; + if (ret) { + destroy_id_handler_unlock(conn_id); + goto err_unlock; + } conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; - /* - * Protect against the user destroying conn_id from another thread - * until we're done accessing it. - */ - cma_id_get(conn_id); ret = cma_cm_event_handler(conn_id, &event); - if (ret) - goto err3; - /* - * Acquire mutex to prevent user executing rdma_destroy_id() - * while we're accessing the cm_id. - */ - mutex_lock(&lock); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + conn_id->cm_id.ib = NULL; + mutex_unlock(&listen_id->handler_mutex); + destroy_id_handler_unlock(conn_id); + goto net_dev_put; + } + if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) { trace_cm_send_mra(cm_id->context); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); } - mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); - mutex_unlock(&listen_id->handler_mutex); - cma_id_put(conn_id); - if (net_dev) - dev_put(net_dev); - return 0; -err3: - cma_id_put(conn_id); - /* Destroy the CM ID by returning a non-zero value. */ - conn_id->cm_id.ib = NULL; -err2: - cma_exch(conn_id, RDMA_CM_DESTROYING); - mutex_unlock(&conn_id->handler_mutex); -err1: +err_unlock: mutex_unlock(&listen_id->handler_mutex); - if (conn_id) - rdma_destroy_id(&conn_id->id); net_dev_put: if (net_dev) @@ -2339,9 +2333,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return ret; } @@ -2388,16 +2380,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); if (ret) { - mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(new_cm_id); - goto out; + mutex_unlock(&listen_id->handler_mutex); + destroy_id_handler_unlock(conn_id); + return ret; } ret = cma_iw_acquire_dev(conn_id, listen_id); if (ret) { - mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(new_cm_id); - goto out; + mutex_unlock(&listen_id->handler_mutex); + destroy_id_handler_unlock(conn_id); + return ret; } conn_id->cm_id.iw = cm_id; @@ -2407,25 +2399,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); - /* - * Protect against the user destroying conn_id from another thread - * until we're done accessing it. - */ - cma_id_get(conn_id); ret = cma_cm_event_handler(conn_id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; - cma_exch(conn_id, RDMA_CM_DESTROYING); - mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); - cma_id_put(conn_id); - rdma_destroy_id(&conn_id->id); + destroy_id_handler_unlock(conn_id); return ret; } mutex_unlock(&conn_id->handler_mutex); - cma_id_put(conn_id); out: mutex_unlock(&listen_id->handler_mutex); @@ -2482,6 +2465,10 @@ static int cma_listen_handler(struct rdma_cm_id *id, { struct rdma_id_private *id_priv = id->context; + /* Listening IDs are always destroyed on removal */ + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + return -1; + id->context = id_priv->id.context; id->event_handler = id_priv->id.event_handler; trace_cm_event_handler(id_priv, event); @@ -2657,21 +2644,21 @@ static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); struct rdma_id_private *id_priv = work->id; - int destroy = 0; mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) - goto out; + goto out_unlock; if (cma_cm_event_handler(id_priv, &work->event)) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - destroy = 1; + cma_id_put(id_priv); + destroy_id_handler_unlock(id_priv); + goto out_free; } -out: + +out_unlock: mutex_unlock(&id_priv->handler_mutex); cma_id_put(id_priv); - if (destroy) - rdma_destroy_id(&id_priv->id); +out_free: kfree(work); } @@ -2679,23 +2666,22 @@ static void cma_ndev_work_handler(struct work_struct *_work) { struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); struct rdma_id_private *id_priv = work->id; - int destroy = 0; mutex_lock(&id_priv->handler_mutex); if (id_priv->state == RDMA_CM_DESTROYING || id_priv->state == RDMA_CM_DEVICE_REMOVAL) - goto out; + goto out_unlock; if (cma_cm_event_handler(id_priv, &work->event)) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - destroy = 1; + cma_id_put(id_priv); + destroy_id_handler_unlock(id_priv); + goto out_free; } -out: +out_unlock: mutex_unlock(&id_priv->handler_mutex); cma_id_put(id_priv); - if (destroy) - rdma_destroy_id(&id_priv->id); +out_free: kfree(work); } @@ -3171,9 +3157,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, event.event = RDMA_CM_EVENT_ADDR_RESOLVED; if (cma_cm_event_handler(id_priv, &event)) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return; } out: @@ -3790,7 +3774,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct rdma_cm_event event = {}; const struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; - int ret = 0; + int ret; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_CONNECT) @@ -3840,14 +3824,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return ret; } out: mutex_unlock(&id_priv->handler_mutex); - return ret; + return 0; } static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, @@ -4372,9 +4354,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { - cma_exch(id_priv, RDMA_CM_DESTROYING); - mutex_unlock(&id_priv->handler_mutex); - rdma_destroy_id(&id_priv->id); + destroy_id_handler_unlock(id_priv); return 0; } @@ -4789,50 +4769,59 @@ free_cma_dev: return ret; } -static int cma_remove_id_dev(struct rdma_id_private *id_priv) +static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { - struct rdma_cm_event event = {}; + struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; enum rdma_cm_state state; - int ret = 0; - - /* Record that we want to remove the device */ - state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); - if (state == RDMA_CM_DESTROYING) - return 0; + unsigned long flags; - cma_cancel_operation(id_priv, state); mutex_lock(&id_priv->handler_mutex); + /* Record that we want to remove the device */ + spin_lock_irqsave(&id_priv->lock, flags); + state = id_priv->state; + if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) { + spin_unlock_irqrestore(&id_priv->lock, flags); + mutex_unlock(&id_priv->handler_mutex); + cma_id_put(id_priv); + return; + } + id_priv->state = RDMA_CM_DEVICE_REMOVAL; + spin_unlock_irqrestore(&id_priv->lock, flags); - /* Check for destruction from another callback. */ - if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) - goto out; - - event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; - ret = cma_cm_event_handler(id_priv, &event); -out: + if (cma_cm_event_handler(id_priv, &event)) { + /* + * At this point the ULP promises it won't call + * rdma_destroy_id() concurrently + */ + cma_id_put(id_priv); + mutex_unlock(&id_priv->handler_mutex); + trace_cm_id_destroy(id_priv); + _destroy_id(id_priv, state); + return; + } mutex_unlock(&id_priv->handler_mutex); - return ret; + + /* + * If this races with destroy then the thread that first assigns state + * to a destroying does the cancel. + */ + cma_cancel_operation(id_priv, state); + cma_id_put(id_priv); } static void cma_process_remove(struct cma_device *cma_dev) { - struct rdma_id_private *id_priv; - int ret; - mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { - id_priv = list_entry(cma_dev->id_list.next, - struct rdma_id_private, list); + struct rdma_id_private *id_priv = list_first_entry( + &cma_dev->id_list, struct rdma_id_private, list); list_del(&id_priv->listen_list); list_del_init(&id_priv->list); cma_id_get(id_priv); mutex_unlock(&lock); - ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); - cma_id_put(id_priv); - if (ret) - rdma_destroy_id(&id_priv->id); + cma_send_device_removal_put(id_priv); mutex_lock(&lock); } diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 738d1faf4bba..636166880442 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -8,7 +8,7 @@ #include "core_priv.h" #include "restrack.h" -#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE) +#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID) static int __counter_set_mode(struct rdma_counter_mode *curr, enum rdma_nl_counter_mode new_mode, @@ -149,23 +149,13 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - /* - * Ensure that counter belongs to the right PID. This operation can - * race with user space which kills the process and leaves QP and - * counters orphans. - * - * It is not a big deal because exitted task will leave both QP and - * counter in the same bucket of zombie process. Just ensure that - * process is still alive before procedding. - * - */ - if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) || - !task_pid_nr(qp->res.task)) - return false; - if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) match &= (param->qp_type == qp->qp_type); + if (auto_mask & RDMA_COUNTER_MASK_PID) + match &= (task_pid_nr(counter->res.task) == + task_pid_nr(qp->res.task)); + return match; } @@ -288,7 +278,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid) + if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) @@ -483,7 +473,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, goto err; } - if (counter->res.task != qp->res.task) { + if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { ret = -EINVAL; goto err_task; } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 2927a9d16eaa..ef0cd2998671 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -272,7 +272,6 @@ static void ib_device_check_mandatory(struct ib_device *device) } mandatory_table[] = { IB_MANDATORY_FUNC(query_device), IB_MANDATORY_FUNC(query_port), - IB_MANDATORY_FUNC(query_pkey), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), IB_MANDATORY_FUNC(create_qp), @@ -1343,6 +1342,10 @@ out: return ret; } +static void prevent_dealloc_device(struct ib_device *ib_dev) +{ +} + /** * ib_register_device - Register an IB device with IB core * @device: Device to register @@ -1413,11 +1416,11 @@ int ib_register_device(struct ib_device *device, const char *name) * possibility for a parallel unregistration along with this * error flow. Since we have a refcount here we know any * parallel flow is stopped in disable_device and will see the - * NULL pointers, causing the responsibility to + * special dealloc_driver pointer, causing the responsibility to * ib_dealloc_device() to revert back to this thread. */ dealloc_fn = device->ops.dealloc_driver; - device->ops.dealloc_driver = NULL; + device->ops.dealloc_driver = prevent_dealloc_device; ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; @@ -1466,7 +1469,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev) * Drivers using the new flow may not call ib_dealloc_device except * in error unwind prior to registration success. */ - if (ib_dev->ops.dealloc_driver) { + if (ib_dev->ops.dealloc_driver && + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); ib_dealloc_device(ib_dev); } @@ -2361,6 +2365,9 @@ int ib_query_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; + if (!device->ops.query_pkey) + return -EOPNOTSUPP; + return device->ops.query_pkey(device, port_num, index, pkey); } EXPORT_SYMBOL(ib_query_pkey); @@ -2621,8 +2628,14 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, drain_rq); SET_DEVICE_OP(dev_ops, drain_sq); SET_DEVICE_OP(dev_ops, enable_driver); - SET_DEVICE_OP(dev_ops, fill_res_entry); - SET_DEVICE_OP(dev_ops, fill_stat_entry); + SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry); + SET_DEVICE_OP(dev_ops, fill_res_cq_entry); + SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw); + SET_DEVICE_OP(dev_ops, fill_res_mr_entry); + SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw); + SET_DEVICE_OP(dev_ops, fill_res_qp_entry); + SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw); + SET_DEVICE_OP(dev_ops, fill_stat_mr_entry); SET_DEVICE_OP(dev_ops, get_dev_fw_str); SET_DEVICE_OP(dev_ops, get_dma_mr); SET_DEVICE_OP(dev_ops, get_hw_stats); @@ -2667,6 +2680,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, query_port); SET_DEVICE_OP(dev_ops, query_qp); SET_DEVICE_OP(dev_ops, query_srq); + SET_DEVICE_OP(dev_ops, query_ucontext); SET_DEVICE_OP(dev_ops, rdma_netdev_get_params); SET_DEVICE_OP(dev_ops, read_counters); SET_DEVICE_OP(dev_ops, reg_dm_mr); @@ -2679,10 +2693,12 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, set_vf_link_state); SET_OBJ_SIZE(dev_ops, ib_ah); + SET_OBJ_SIZE(dev_ops, ib_counters); SET_OBJ_SIZE(dev_ops, ib_cq); SET_OBJ_SIZE(dev_ops, ib_pd); SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); + SET_OBJ_SIZE(dev_ops, ib_xrcd); } EXPORT_SYMBOL(ib_set_device_ops); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a09f8e3c7f3f..9355e521d9f4 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -402,7 +402,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); - atomic_set(&mad_agent_priv->refcount, 1); + refcount_set(&mad_agent_priv->refcount, 1); init_completion(&mad_agent_priv->comp); ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type); @@ -484,7 +484,7 @@ EXPORT_SYMBOL(ib_register_mad_agent); static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { - if (atomic_dec_and_test(&mad_agent_priv->refcount)) + if (refcount_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } @@ -718,7 +718,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * Reference MAD agent until receive * side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); } else kfree(mad_priv); break; @@ -758,7 +758,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&local->completion_list, &mad_agent_priv->local_list); @@ -916,7 +916,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, } mad_send_wr->send_buf.mad_agent = mad_agent; - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); @@ -1131,7 +1131,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->status = IB_WC_SUCCESS; /* Reference MAD agent until send completes */ - atomic_inc(&mad_agent_priv->refcount); + refcount_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->send_list); @@ -1148,7 +1148,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, spin_lock_irqsave(&mad_agent_priv->lock, flags); list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); goto error; } } @@ -1554,7 +1554,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; rcu_read_lock(); mad_agent = xa_load(&ib_mad_clients, hi_tid); - if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) + if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount)) mad_agent = NULL; rcu_read_unlock(); } else { @@ -1606,7 +1606,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, } } if (mad_agent) - atomic_inc(&mad_agent->refcount); + refcount_inc(&mad_agent->refcount); out: spin_unlock_irqrestore(&port_priv->reg_lock, flags); } @@ -1831,7 +1831,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, NULL, mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); } else { /* not user rmpp, revert to normal behavior and * drop the mad */ @@ -1848,7 +1848,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, &mad_agent_priv->agent, &mad_send_wr->send_buf, mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; @@ -2438,7 +2438,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_del(&mad_send_wr->agent_list); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); } } @@ -2572,7 +2572,7 @@ static void local_completions(struct work_struct *work) &local->mad_send_wr->send_buf, &local->mad_priv->header.recv_wc); spin_lock_irqsave(&recv_mad_agent->lock, flags); - atomic_dec(&recv_mad_agent->refcount); + deref_mad_agent(recv_mad_agent); spin_unlock_irqrestore(&recv_mad_agent->lock, flags); } @@ -2585,7 +2585,7 @@ local_send_completion: &mad_send_wc); spin_lock_irqsave(&mad_agent_priv->lock, flags); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); if (free_mad) kfree(local->mad_priv); kfree(local); @@ -2671,7 +2671,7 @@ static void timeout_sends(struct work_struct *work) mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - atomic_dec(&mad_agent_priv->refcount); + deref_mad_agent(mad_agent_priv); spin_lock_irqsave(&mad_agent_priv->lock, flags); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 403d8673a2f9..4aa16b35dad0 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -103,7 +103,7 @@ struct ib_mad_agent_private { struct work_struct local_work; struct list_head rmpp_list; - atomic_t refcount; + refcount_t refcount; union { struct completion comp; struct rcu_head rcu; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 5ec57abc0849..e0573e4d0404 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -40,8 +40,7 @@ enum rmpp_state { RMPP_STATE_ACTIVE, RMPP_STATE_TIMEOUT, - RMPP_STATE_COMPLETE, - RMPP_STATE_CANCELING + RMPP_STATE_COMPLETE }; struct mad_rmpp_recv { @@ -52,7 +51,7 @@ struct mad_rmpp_recv { struct completion comp; enum rmpp_state state; spinlock_t lock; - atomic_t refcount; + refcount_t refcount; struct ib_ah *ah; struct ib_mad_recv_wc *rmpp_wc; @@ -73,7 +72,7 @@ struct mad_rmpp_recv { static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) { - if (atomic_dec_and_test(&rmpp_recv->refcount)) + if (refcount_dec_and_test(&rmpp_recv->refcount)) complete(&rmpp_recv->comp); } @@ -92,22 +91,18 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) spin_lock_irqsave(&agent->lock, flags); list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { - if (rmpp_recv->state != RMPP_STATE_COMPLETE) - ib_free_recv_mad(rmpp_recv->rmpp_wc); - rmpp_recv->state = RMPP_STATE_CANCELING; - } - spin_unlock_irqrestore(&agent->lock, flags); - - list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { cancel_delayed_work(&rmpp_recv->timeout_work); cancel_delayed_work(&rmpp_recv->cleanup_work); } + spin_unlock_irqrestore(&agent->lock, flags); flush_workqueue(agent->qp_info->port_priv->wq); list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, &agent->rmpp_list, list) { list_del(&rmpp_recv->list); + if (rmpp_recv->state != RMPP_STATE_COMPLETE) + ib_free_recv_mad(rmpp_recv->rmpp_wc); destroy_rmpp_recv(rmpp_recv); } } @@ -272,10 +267,6 @@ static void recv_cleanup_handler(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&rmpp_recv->agent->lock, flags); - if (rmpp_recv->state == RMPP_STATE_CANCELING) { - spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); - return; - } list_del(&rmpp_recv->list); spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); destroy_rmpp_recv(rmpp_recv); @@ -305,7 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler); spin_lock_init(&rmpp_recv->lock); rmpp_recv->state = RMPP_STATE_ACTIVE; - atomic_set(&rmpp_recv->refcount, 1); + refcount_set(&rmpp_recv->refcount, 1); rmpp_recv->rmpp_wc = mad_recv_wc; rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf; @@ -357,7 +348,7 @@ acquire_rmpp_recv(struct ib_mad_agent_private *agent, spin_lock_irqsave(&agent->lock, flags); rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); if (rmpp_recv) - atomic_inc(&rmpp_recv->refcount); + refcount_inc(&rmpp_recv->refcount); spin_unlock_irqrestore(&agent->lock, flags); return rmpp_recv; } @@ -553,7 +544,7 @@ start_rmpp(struct ib_mad_agent_private *agent, destroy_rmpp_recv(rmpp_recv); return continue_rmpp(agent, mad_recv_wc); } - atomic_inc(&rmpp_recv->refcount); + refcount_inc(&rmpp_recv->refcount); if (get_last_flag(&mad_recv_wc->recv_buf)) { rmpp_recv->state = RMPP_STATE_COMPLETE; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e16105be2eb2..12d29d54a081 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -114,6 +114,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY }, [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, @@ -446,27 +447,11 @@ static int fill_res_name_pid(struct sk_buff *msg, return err ? -EMSGSIZE : 0; } -static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg, - struct rdma_restrack_entry *res) +static int fill_res_qp_entry_query(struct sk_buff *msg, + struct rdma_restrack_entry *res, + struct ib_device *dev, + struct ib_qp *qp) { - if (!dev->ops.fill_res_entry) - return false; - return dev->ops.fill_res_entry(msg, res); -} - -static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (!dev->ops.fill_stat_entry) - return false; - return dev->ops.fill_stat_entry(msg, res); -} - -static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, - struct rdma_restrack_entry *res, uint32_t port) -{ - struct ib_qp *qp = container_of(res, struct ib_qp, res); - struct ib_device *dev = qp->device; struct ib_qp_init_attr qp_init_attr; struct ib_qp_attr qp_attr; int ret; @@ -475,16 +460,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, if (ret) return ret; - if (port && port != qp_attr.port_num) - return -EAGAIN; - - /* In create_qp() port is not set yet */ - if (qp_attr.port_num && - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num)) - goto err; - - if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num)) - goto err; if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, qp_attr.dest_qp_num)) @@ -508,19 +483,53 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) goto err; + if (dev->ops.fill_res_qp_entry) + return dev->ops.fill_res_qp_entry(msg, qp); + return 0; + +err: return -EMSGSIZE; +} + +static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_qp *qp = container_of(res, struct ib_qp, res); + struct ib_device *dev = qp->device; + int ret; + + if (port && port != qp->port) + return -EAGAIN; + + /* In create_qp() port is not set yet */ + if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) + return -EINVAL; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); + if (ret) + return -EMSGSIZE; + if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) - goto err; + return -EMSGSIZE; - if (fill_res_name_pid(msg, res)) - goto err; + ret = fill_res_name_pid(msg, res); + if (ret) + return -EMSGSIZE; - if (fill_res_entry(dev, msg, res)) - goto err; + return fill_res_qp_entry_query(msg, res, dev, qp); +} - return 0; +static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_qp *qp = container_of(res, struct ib_qp, res); + struct ib_device *dev = qp->device; -err: return -EMSGSIZE; + if (port && port != qp->port) + return -EAGAIN; + if (!dev->ops.fill_res_qp_entry_raw) + return -EINVAL; + return dev->ops.fill_res_qp_entry_raw(msg, qp); } static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, @@ -568,9 +577,8 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, if (fill_res_name_pid(msg, res)) goto err; - if (fill_res_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_res_cm_id_entry) + return dev->ops.fill_res_cm_id_entry(msg, cm_id); return 0; err: return -EMSGSIZE; @@ -583,35 +591,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, struct ib_device *dev = cq->device; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) - goto err; + return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) - goto err; + return -EMSGSIZE; /* Poll context is only valid for kernel CQs */ if (rdma_is_kernel_res(res) && nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) - goto err; + return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) - goto err; + return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, cq->uobject->uevent.uobject.context->res.id)) - goto err; + return -EMSGSIZE; if (fill_res_name_pid(msg, res)) - goto err; + return -EMSGSIZE; - if (fill_res_entry(dev, msg, res)) - goto err; + return (dev->ops.fill_res_cq_entry) ? + dev->ops.fill_res_cq_entry(msg, cq) : 0; +} - return 0; +static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_cq *cq = container_of(res, struct ib_cq, res); + struct ib_device *dev = cq->device; -err: return -EMSGSIZE; + if (!dev->ops.fill_res_cq_entry_raw) + return -EINVAL; + return dev->ops.fill_res_cq_entry_raw(msg, cq); } static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, @@ -622,38 +637,45 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) - goto err; + return -EMSGSIZE; } if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, RDMA_NLDEV_ATTR_PAD)) - goto err; + return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) - goto err; + return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id)) - goto err; + return -EMSGSIZE; if (fill_res_name_pid(msg, res)) - goto err; + return -EMSGSIZE; - if (fill_res_entry(dev, msg, res)) - goto err; + return (dev->ops.fill_res_mr_entry) ? + dev->ops.fill_res_mr_entry(msg, mr) : + 0; +} - return 0; +static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_restrack_entry *res, uint32_t port) +{ + struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct ib_device *dev = mr->pd->device; -err: return -EMSGSIZE; + if (!dev->ops.fill_res_mr_entry_raw) + return -EINVAL; + return dev->ops.fill_res_mr_entry_raw(msg, mr); } static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); - struct ib_device *dev = pd->device; if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, @@ -676,13 +698,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, pd->uobject->context->res.id)) goto err; - if (fill_res_name_pid(msg, res)) - goto err; - - if (fill_res_entry(dev, msg, res)) - goto err; - - return 0; + return fill_res_name_pid(msg, res); err: return -EMSGSIZE; } @@ -695,11 +711,16 @@ static int fill_stat_counter_mode(struct sk_buff *msg, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) return -EMSGSIZE; - if (m->mode == RDMA_COUNTER_MODE_AUTO) + if (m->mode == RDMA_COUNTER_MODE_AUTO) { if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) return -EMSGSIZE; + if ((m->mask & RDMA_COUNTER_MASK_PID) && + fill_res_name_pid(msg, &counter->res)) + return -EMSGSIZE; + } + return 0; } @@ -738,9 +759,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { qp = container_of(res, struct ib_qp, res); - if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) - continue; - if (!qp->counter || (qp->counter->id != counter->id)) continue; @@ -793,9 +811,8 @@ static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) goto err; - if (fill_stat_entry(dev, msg, res)) - goto err; - + if (dev->ops.fill_stat_mr_entry) + return dev->ops.fill_stat_mr_entry(msg, mr); return 0; err: @@ -840,7 +857,6 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || - fill_res_name_pid(msg, &counter->res) || fill_stat_counter_mode(msg, counter) || fill_stat_counter_qps(msg, counter) || fill_stat_counter_hwcounters(msg, counter)) @@ -1177,7 +1193,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb, struct nldev_fill_res_entry { enum rdma_nldev_attr nldev_attr; - enum rdma_nldev_command nldev_cmd; u8 flags; u32 entry; u32 id; @@ -1189,40 +1204,34 @@ enum nldev_res_flags { static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, .id = RDMA_NLDEV_ATTR_RES_LQPN, }, [RDMA_RESTRACK_CM_ID] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CM_IDN, }, [RDMA_RESTRACK_CQ] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CQN, }, [RDMA_RESTRACK_MR] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY, .id = RDMA_NLDEV_ATTR_RES_MRN, }, [RDMA_RESTRACK_PD] = { - .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, .id = RDMA_NLDEV_ATTR_RES_PDN, }, [RDMA_RESTRACK_COUNTER] = { - .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET, .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, @@ -1281,7 +1290,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NL_GET_OP(nlh->nlmsg_type)), 0, 0); if (fill_nldev_handle(msg, device)) { @@ -1359,7 +1369,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 0, NLM_F_MULTI); if (fill_nldev_handle(skb, device)) { @@ -1441,26 +1452,29 @@ err_index: return ret; } -#define RES_GET_FUNCS(name, type) \ - static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ +#define RES_GET_FUNCS(name, type) \ + static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ struct netlink_callback *cb) \ - { \ - return res_get_common_dumpit(skb, cb, type, \ - fill_res_##name##_entry); \ - } \ - static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ - struct nlmsghdr *nlh, \ + { \ + return res_get_common_dumpit(skb, cb, type, \ + fill_res_##name##_entry); \ + } \ + static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ + struct nlmsghdr *nlh, \ struct netlink_ext_ack *extack) \ - { \ - return res_get_common_doit(skb, nlh, extack, type, \ - fill_res_##name##_entry); \ + { \ + return res_get_common_doit(skb, nlh, extack, type, \ + fill_res_##name##_entry); \ } RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); +RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP); RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); +RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ); RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); +RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR); RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); static LIST_HEAD(link_ops); @@ -2145,6 +2159,21 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .doit = nldev_stat_del_doit, .flags = RDMA_NL_ADMIN_PERM, }, + [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = { + .doit = nldev_res_get_qp_raw_doit, + .dump = nldev_res_get_qp_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = { + .doit = nldev_res_get_cq_raw_doit, + .dump = nldev_res_get_cq_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, + [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = { + .doit = nldev_res_get_mr_raw_doit, + .dump = nldev_res_get_mr_raw_dumpit, + .flags = RDMA_NL_ADMIN_PERM, + }, }; void __init nldev_init(void) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index defe9cd4c5ee..c11e50510e49 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -58,7 +58,7 @@ struct ib_port { struct ib_device *ibdev; struct gid_attr_group *gid_attr_group; struct attribute_group gid_group; - struct attribute_group pkey_group; + struct attribute_group *pkey_group; struct attribute_group *pma_table; struct attribute_group *hw_stats_ag; struct rdma_hw_stats *hw_stats; @@ -681,11 +681,16 @@ static void ib_port_release(struct kobject *kobj) kfree(p->gid_group.attrs); } - if (p->pkey_group.attrs) { - for (i = 0; (a = p->pkey_group.attrs[i]); ++i) - kfree(a); + if (p->pkey_group) { + if (p->pkey_group->attrs) { + for (i = 0; (a = p->pkey_group->attrs[i]); ++i) + kfree(a); + + kfree(p->pkey_group->attrs); + } - kfree(p->pkey_group.attrs); + kfree(p->pkey_group); + p->pkey_group = NULL; } kfree(p); @@ -1118,17 +1123,26 @@ static int add_port(struct ib_core_device *coredev, int port_num) if (ret) goto err_free_gid_type; - p->pkey_group.name = "pkeys"; - p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, - attr.pkey_tbl_len); - if (!p->pkey_group.attrs) { - ret = -ENOMEM; - goto err_remove_gid_type; + if (attr.pkey_tbl_len) { + p->pkey_group = kzalloc(sizeof(*p->pkey_group), GFP_KERNEL); + if (!p->pkey_group) { + ret = -ENOMEM; + goto err_remove_gid_type; + } + + p->pkey_group->name = "pkeys"; + p->pkey_group->attrs = alloc_group_attrs(show_port_pkey, + attr.pkey_tbl_len); + if (!p->pkey_group->attrs) { + ret = -ENOMEM; + goto err_free_pkey_group; + } + + ret = sysfs_create_group(&p->kobj, p->pkey_group); + if (ret) + goto err_free_pkey; } - ret = sysfs_create_group(&p->kobj, &p->pkey_group); - if (ret) - goto err_free_pkey; if (device->ops.init_port && is_full_dev) { ret = device->ops.init_port(device, port_num, &p->kobj); @@ -1150,14 +1164,20 @@ static int add_port(struct ib_core_device *coredev, int port_num) return 0; err_remove_pkey: - sysfs_remove_group(&p->kobj, &p->pkey_group); + if (p->pkey_group) + sysfs_remove_group(&p->kobj, p->pkey_group); err_free_pkey: - for (i = 0; i < attr.pkey_tbl_len; ++i) - kfree(p->pkey_group.attrs[i]); + if (p->pkey_group) { + for (i = 0; i < attr.pkey_tbl_len; ++i) + kfree(p->pkey_group->attrs[i]); + + kfree(p->pkey_group->attrs); + p->pkey_group->attrs = NULL; + } - kfree(p->pkey_group.attrs); - p->pkey_group.attrs = NULL; +err_free_pkey_group: + kfree(p->pkey_group); err_remove_gid_type: sysfs_remove_group(&p->gid_attr_group->kobj, @@ -1317,7 +1337,8 @@ void ib_free_port_attrs(struct ib_core_device *coredev) if (port->pma_table) sysfs_remove_group(p, port->pma_table); - sysfs_remove_group(p, &port->pkey_group); + if (port->pkey_group) + sysfs_remove_group(p, port->pkey_group); sysfs_remove_group(p, &port->gid_group); sysfs_remove_group(&port->gid_attr_group->kobj, &port->gid_attr_group->ndev); diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c index 6c3514beac4d..31e7860d35bf 100644 --- a/drivers/infiniband/core/trace.c +++ b/drivers/infiniband/core/trace.c @@ -9,6 +9,4 @@ #define CREATE_TRACE_POINTS -#include <rdma/ib_verbs.h> - #include <trace/events/rdma_core.h> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 82455a1392f1..831bff8d52e5 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -261,6 +261,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, sg = umem->sg_head.sgl; while (npages) { + cond_resched(); ret = pin_user_pages_fast(cur_base, min_t(unsigned long, npages, PAGE_SIZE / diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index ccd28405451c..5e32f61a2fe4 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -152,6 +152,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_implicit); * ib_alloc_implicit_odp_umem() * @addr: The starting userspace VA * @size: The length of the userspace VA + * @ops: MMU interval ops, currently only @invalidate */ struct ib_umem_odp * ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, @@ -213,6 +214,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child); * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned + * @ops: MMU interval ops, currently only @invalidate * * The driver should use when the access flags indicate ODP memory. It avoids * pinning, instead, stores the mm for future page fault handling in diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 76e7ec0f0775..2fbc583d5bdd 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -415,8 +415,8 @@ static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs) static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_alloc_pd_resp resp = {}; struct ib_uverbs_alloc_pd cmd; - struct ib_uverbs_alloc_pd_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; int ret; @@ -438,29 +438,20 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) pd->device = ib_dev; pd->uobject = uobj; - pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); pd->res.type = RDMA_RESTRACK_PD; ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata); if (ret) goto err_alloc; - - uobj->object = pd; - memset(&resp, 0, sizeof resp); - resp.pd_handle = uobj->id; rdma_restrack_uadd(&pd->res); - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; + uobj->object = pd; + uobj_finalize_uobj_create(uobj, attrs); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; + resp.pd_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); -err_copy: - ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); - pd = NULL; err_alloc: kfree(pd); err: @@ -568,15 +559,15 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev, static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_device *ibudev = attrs->ufile->device; + struct ib_uverbs_open_xrcd_resp resp = {}; struct ib_uverbs_open_xrcd cmd; - struct ib_uverbs_open_xrcd_resp resp; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; - struct fd f = {NULL, 0}; struct inode *inode = NULL; - int ret = 0; int new_xrcd = 0; struct ib_device *ib_dev; + struct fd f = {}; + int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) @@ -614,24 +605,16 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) } if (!xrcd) { - xrcd = ib_dev->ops.alloc_xrcd(ib_dev, &attrs->driver_udata); + xrcd = ib_alloc_xrcd_user(ib_dev, inode, &attrs->driver_udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; } - - xrcd->inode = inode; - xrcd->device = ib_dev; - atomic_set(&xrcd->usecnt, 0); - mutex_init(&xrcd->tgt_qp_mutex); - INIT_LIST_HEAD(&xrcd->tgt_qp_list); new_xrcd = 1; } atomic_set(&obj->refcnt, 0); obj->uobject.object = xrcd; - memset(&resp, 0, sizeof resp); - resp.xrcd_handle = obj->uobject.id; if (inode) { if (new_xrcd) { @@ -643,27 +626,17 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) atomic_inc(&xrcd->usecnt); } - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - if (f.file) fdput(f); mutex_unlock(&ibudev->xrcd_tree_mutex); + uobj_finalize_uobj_create(&obj->uobject, attrs); - rdma_alloc_commit_uobject(&obj->uobject, attrs); - return 0; - -err_copy: - if (inode) { - if (new_xrcd) - xrcd_table_delete(ibudev, inode); - atomic_dec(&xrcd->usecnt); - } + resp.xrcd_handle = obj->uobject.id; + return uverbs_response(attrs, &resp, sizeof(resp)); err_dealloc_xrcd: - ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs)); + ib_dealloc_xrcd_user(xrcd, uverbs_get_cleared_udata(attrs)); err: uobj_alloc_abort(&obj->uobject, attrs); @@ -701,7 +674,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, if (inode && !atomic_dec_and_test(&xrcd->usecnt)) return 0; - ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata); + ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); @@ -716,8 +689,8 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_reg_mr_resp resp = {}; struct ib_uverbs_reg_mr cmd; - struct ib_uverbs_reg_mr_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; @@ -770,30 +743,20 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; + mr->iova = cmd.hca_va; rdma_restrack_uadd(&mr->res); uobj->object = mr; - - memset(&resp, 0, sizeof resp); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; - resp.mr_handle = uobj->id; - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - uobj_put_obj_read(pd); + uobj_finalize_uobj_create(uobj, attrs); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; - -err_copy: - ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + resp.mr_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); err_put: uobj_put_obj_read(pd); - err_free: uobj_alloc_abort(uobj, attrs); return ret; @@ -861,6 +824,9 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) atomic_dec(&old_pd->usecnt); } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; resp.rkey = mr->rkey; @@ -930,21 +896,13 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) atomic_inc(&pd->usecnt); uobj->object = mw; + uobj_put_obj_read(pd); + uobj_finalize_uobj_create(uobj, attrs); - memset(&resp, 0, sizeof(resp)); - resp.rkey = mw->rkey; + resp.rkey = mw->rkey; resp.mw_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - - uobj_put_obj_read(pd); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; - -err_copy: - uverbs_dealloc_mw(mw); err_put: uobj_put_obj_read(pd); err_free: @@ -981,40 +939,33 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) if (IS_ERR(uobj)) return PTR_ERR(uobj); - resp.fd = uobj->id; - ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, uobj); ib_uverbs_init_event_queue(&ev_file->ev_queue); + uobj_finalize_uobj_create(uobj, attrs); - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) { - uobj_alloc_abort(uobj, attrs); - return ret; - } - - rdma_alloc_commit_uobject(uobj, attrs); - return 0; + resp.fd = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); } -static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, - struct ib_uverbs_ex_create_cq *cmd) +static int create_cq(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_ex_create_cq *cmd) { struct ib_ucq_object *obj; struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_cq *cq; int ret; - struct ib_uverbs_ex_create_cq_resp resp; + struct ib_uverbs_ex_create_cq_resp resp = {}; struct ib_cq_init_attr attr = {}; struct ib_device *ib_dev; if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors) - return ERR_PTR(-EINVAL); + return -EINVAL; obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs, &ib_dev); if (IS_ERR(obj)) - return obj; + return PTR_ERR(obj); if (cmd->comp_channel >= 0) { ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs); @@ -1043,53 +994,38 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); + cq->res.type = RDMA_RESTRACK_CQ; ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); if (ret) goto err_free; + rdma_restrack_uadd(&cq->res); obj->uevent.uobject.object = cq; obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); if (obj->uevent.event_file) uverbs_uobject_get(&obj->uevent.event_file->uobj); + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); - memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uevent.uobject.id; - resp.base.cqe = cq->cqe; + resp.base.cqe = cq->cqe; resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + return uverbs_response(attrs, &resp, sizeof(resp)); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_uadd(&cq->res); - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_cb; - - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return obj; - -err_cb: - if (obj->uevent.event_file) - uverbs_uobject_put(&obj->uevent.event_file->uobj); - ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); - cq = NULL; err_free: kfree(cq); err_file: if (ev_file) ib_uverbs_release_ucq(ev_file, obj); - err: uobj_alloc_abort(&obj->uevent.uobject, attrs); - - return ERR_PTR(ret); + return ret; } static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_cq cmd; struct ib_uverbs_ex_create_cq cmd_ex; - struct ib_ucq_object *obj; int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); @@ -1102,14 +1038,12 @@ static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs) cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; - obj = create_cq(attrs, &cmd_ex); - return PTR_ERR_OR_ZERO(obj); + return create_cq(attrs, &cmd_ex); } static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_ex_create_cq cmd; - struct ib_ucq_object *obj; int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); @@ -1122,8 +1056,7 @@ static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs) if (cmd.reserved) return -EINVAL; - obj = create_cq(attrs, &cmd); - return PTR_ERR_OR_ZERO(obj); + return create_cq(attrs, &cmd); } static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) @@ -1131,7 +1064,7 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) struct ib_uverbs_resize_cq cmd; struct ib_uverbs_resize_cq_resp resp = {}; struct ib_cq *cq; - int ret = -EINVAL; + int ret; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); if (ret) @@ -1298,7 +1231,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, struct ib_srq *srq = NULL; struct ib_qp *qp; struct ib_qp_init_attr attr = {}; - struct ib_uverbs_ex_create_qp_resp resp; + struct ib_uverbs_ex_create_qp_resp resp = {}; int ret; struct ib_rwq_ind_table *ind_tbl = NULL; bool has_sq = true; @@ -1468,20 +1401,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (obj->uevent.event_file) uverbs_uobject_get(&obj->uevent.event_file->uobj); - memset(&resp, 0, sizeof resp); - resp.base.qpn = qp->qp_num; - resp.base.qp_handle = obj->uevent.uobject.id; - resp.base.max_recv_sge = attr.cap.max_recv_sge; - resp.base.max_send_sge = attr.cap.max_send_sge; - resp.base.max_recv_wr = attr.cap.max_recv_wr; - resp.base.max_send_wr = attr.cap.max_send_wr; - resp.base.max_inline_data = attr.cap.max_inline_data; - resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_uevent; - if (xrcd) { obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); @@ -1502,12 +1421,18 @@ static int create_qp(struct uverbs_attr_bundle *attrs, UVERBS_LOOKUP_READ); if (ind_tbl) uobj_put_obj_read(ind_tbl); + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); + + resp.base.qpn = qp->qp_num; + resp.base.qp_handle = obj->uevent.uobject.id; + resp.base.max_recv_sge = attr.cap.max_recv_sge; + resp.base.max_send_sge = attr.cap.max_send_sge; + resp.base.max_recv_wr = attr.cap.max_recv_wr; + resp.base.max_send_wr = attr.cap.max_send_wr; + resp.base.max_inline_data = attr.cap.max_inline_data; + resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + return uverbs_response(attrs, &resp, sizeof(resp)); - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return 0; -err_uevent: - if (obj->uevent.event_file) - uverbs_uobject_put(&obj->uevent.event_file->uobj); err_cb: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -1580,14 +1505,14 @@ static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs) static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_create_qp_resp resp = {}; struct ib_uverbs_open_qp cmd; - struct ib_uverbs_create_qp_resp resp; struct ib_uqp_object *obj; struct ib_xrcd *xrcd; - struct ib_uobject *xrcd_uobj; struct ib_qp *qp; struct ib_qp_open_attr attr = {}; int ret; + struct ib_uobject *xrcd_uobj; struct ib_device *ib_dev; ret = uverbs_request(attrs, &cmd, sizeof(cmd)); @@ -1627,24 +1552,16 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) obj->uevent.uobject.object = qp; obj->uevent.uobject.user_handle = cmd.user_handle; - memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - resp.qp_handle = obj->uevent.uobject.id; - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_destroy; - obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); qp->uobject = obj; uobj_put_read(xrcd_uobj); + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return 0; + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; + return uverbs_response(attrs, &resp, sizeof(resp)); -err_destroy: - ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); err_xrcd: uobj_put_read(xrcd_uobj); err_put: @@ -1980,7 +1897,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs) * Last bit is reserved for extending the attr_mask by * using another field. */ - BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31)); if (cmd.base.attr_mask & ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) @@ -2480,24 +2397,14 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) ah->uobject = uobj; uobj->user_handle = cmd.user_handle; uobj->object = ah; - - resp.ah_handle = uobj->id; - - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - uobj_put_obj_read(pd); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; + uobj_finalize_uobj_create(uobj, attrs); -err_copy: - rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, - uverbs_get_cleared_udata(attrs)); + resp.ah_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); err_put: uobj_put_obj_read(pd); - err: uobj_alloc_abort(uobj, attrs); return ret; @@ -2989,26 +2896,18 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) if (obj->uevent.event_file) uverbs_uobject_get(&obj->uevent.event_file->uobj); - memset(&resp, 0, sizeof(resp)); + uobj_put_obj_read(pd); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); + resp.wq_handle = obj->uevent.uobject.id; resp.max_sge = wq_init_attr.max_sge; resp.max_wr = wq_init_attr.max_wr; resp.wqn = wq->wq_num; resp.response_length = uverbs_response_length(attrs, sizeof(resp)); - err = uverbs_response(attrs, &resp, sizeof(resp)); - if (err) - goto err_copy; - - uobj_put_obj_read(pd); - rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, - UVERBS_LOOKUP_READ); - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return 0; + return uverbs_response(attrs, &resp, sizeof(resp)); -err_copy: - if (obj->uevent.event_file) - uverbs_uobject_put(&obj->uevent.event_file->uobj); - ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, UVERBS_LOOKUP_READ); @@ -3093,7 +2992,7 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) struct ib_wq **wqs = NULL; u32 *wqs_handles = NULL; struct ib_wq *wq = NULL; - int i, j, num_read_wqs; + int i, num_read_wqs; u32 num_wq_handles; struct uverbs_req_iter iter; struct ib_device *ib_dev; @@ -3139,6 +3038,7 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) } wqs[num_read_wqs] = wq; + atomic_inc(&wqs[num_read_wqs]->usecnt); } uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev); @@ -3166,33 +3066,24 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) atomic_set(&rwq_ind_tbl->usecnt, 0); for (i = 0; i < num_wq_handles; i++) - atomic_inc(&wqs[i]->usecnt); + rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); + kfree(wqs_handles); + uobj_finalize_uobj_create(uobj, attrs); resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; resp.response_length = uverbs_response_length(attrs, sizeof(resp)); + return uverbs_response(attrs, &resp, sizeof(resp)); - err = uverbs_response(attrs, &resp, sizeof(resp)); - if (err) - goto err_copy; - - kfree(wqs_handles); - - for (j = 0; j < num_read_wqs; j++) - rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, - UVERBS_LOOKUP_READ); - - rdma_alloc_commit_uobject(uobj, attrs); - return 0; - -err_copy: - ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: uobj_alloc_abort(uobj, attrs); put_wqs: - for (j = 0; j < num_read_wqs; j++) - rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + for (i = 0; i < num_read_wqs; i++) { + rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject, UVERBS_LOOKUP_READ); + atomic_dec(&wqs[i]->usecnt); + } err_free: kfree(wqs_handles); kfree(wqs); @@ -3218,7 +3109,7 @@ static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs) static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_create_flow cmd; - struct ib_uverbs_create_flow_resp resp; + struct ib_uverbs_create_flow_resp resp = {}; struct ib_uobject *uobj; struct ib_flow *flow_id; struct ib_uverbs_flow_attr *kern_flow_attr; @@ -3351,23 +3242,17 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res); - memset(&resp, 0, sizeof(resp)); - resp.flow_handle = uobj->id; - - err = uverbs_response(attrs, &resp, sizeof(resp)); - if (err) - goto err_copy; - rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, UVERBS_LOOKUP_READ); kfree(flow_attr); + if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - rdma_alloc_commit_uobject(uobj, attrs); - return 0; -err_copy: - if (!qp->device->ops.destroy_flow(flow_id)) - atomic_dec(&qp->usecnt); + uobj_finalize_uobj_create(uobj, attrs); + + resp.flow_handle = uobj->id; + return uverbs_response(attrs, &resp, sizeof(resp)); + err_free: ib_uverbs_flow_resources_free(uflow_res); err_free_flow_attr: @@ -3402,13 +3287,13 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { - struct ib_uverbs_create_srq_resp resp; + struct ib_uverbs_create_srq_resp resp = {}; struct ib_usrq_object *obj; struct ib_pd *pd; struct ib_srq *srq; - struct ib_uobject *xrcd_uobj; struct ib_srq_init_attr attr; int ret; + struct ib_uobject *xrcd_uobj; struct ib_device *ib_dev; obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs, @@ -3473,17 +3358,9 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, if (obj->uevent.event_file) uverbs_uobject_get(&obj->uevent.event_file->uobj); - memset(&resp, 0, sizeof resp); - resp.srq_handle = obj->uevent.uobject.id; - resp.max_wr = attr.attr.max_wr; - resp.max_sge = attr.attr.max_sge; if (cmd->srq_type == IB_SRQT_XRC) resp.srqn = srq->ext.xrc.srq_num; - ret = uverbs_response(attrs, &resp, sizeof(resp)); - if (ret) - goto err_copy; - if (cmd->srq_type == IB_SRQT_XRC) uobj_put_read(xrcd_uobj); @@ -3492,13 +3369,13 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, UVERBS_LOOKUP_READ); uobj_put_obj_read(pd); - rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); - return 0; + uobj_finalize_uobj_create(&obj->uevent.uobject, attrs); + + resp.srq_handle = obj->uevent.uobject.id; + resp.max_wr = attr.attr.max_wr; + resp.max_sge = attr.attr.max_sge; + return uverbs_response(attrs, &resp, sizeof(resp)); -err_copy: - if (obj->uevent.event_file) - uverbs_uobject_put(&obj->uevent.event_file->uobj); - ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); err_put_pd: uobj_put_obj_read(pd); err_put_cq: diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 2d882c02387c..ef04a261097f 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -790,6 +790,7 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, } return uverbs_copy_to(bundle, idx, from, size); } +EXPORT_SYMBOL(uverbs_copy_to_struct_or_zero); /* Once called an abort will call through to the type's destroy_hw() */ void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 69e4755cc04b..37794d88b1f3 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -601,6 +601,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); bundle.ufile = file; bundle.context = NULL; /* only valid if bundle has uobject */ + bundle.uobject = NULL; if (!method_elm->is_ex) { size_t in_len = hdr.in_words * 4 - sizeof(hdr); size_t out_len = hdr.out_words * 4; @@ -664,6 +665,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, } ret = method_elm->handler(&bundle); + if (bundle.uobject) + uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true, + !ret, &bundle); out_unlock: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return (ret) ? : count; diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 9f013304e677..c7e7438752bc 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -46,7 +46,9 @@ static int uverbs_free_counters(struct ib_uobject *uobject, if (ret) return ret; - return counters->device->ops.destroy_counters(counters); + counters->device->ops.destroy_counters(counters); + kfree(counters); + return 0; } static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( @@ -66,20 +68,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( if (!ib_dev->ops.create_counters) return -EOPNOTSUPP; - counters = ib_dev->ops.create_counters(ib_dev, attrs); - if (IS_ERR(counters)) { - ret = PTR_ERR(counters); - goto err_create_counters; - } + counters = rdma_zalloc_drv_obj(ib_dev, ib_counters); + if (!counters) + return -ENOMEM; counters->device = ib_dev; counters->uobject = uobj; uobj->object = counters; atomic_set(&counters->usecnt, 0); - return 0; + ret = ib_dev->ops.create_counters(counters, attrs); + if (ret) + kfree(counters); -err_create_counters: return ret; } diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 5dce2c7cc323..b1c7dacc02de 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -207,11 +207,8 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_CQ, UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq), - -#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) -#endif ); const struct uapi_definition uverbs_def_obj_cq[] = { diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index ae4a59d6f9b1..75df2094a010 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -38,7 +38,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)( attrs->ucore.outlen < method_elm->resp_size) return -ENOSPC; - return method_elm->handler(attrs); + attrs->uobject = NULL; + rc = method_elm->handler(attrs); + if (attrs->uobject) + uverbs_finalize_object(attrs->uobject, UVERBS_ACCESS_NEW, true, + !rc, attrs); + return rc; } DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE, @@ -229,6 +234,37 @@ static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)( return 0; } +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)( + struct uverbs_attr_bundle *attrs) +{ + u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; + u32 num_comp; + int ret; + + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + + if (!ib_dev->ops.query_ucontext) + return -EOPNOTSUPP; + + num_comp = attrs->ufile->device->num_comp_vectors; + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS, + &num_comp, sizeof(num_comp)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT, + &core_support, sizeof(core_support)); + if (IS_UVERBS_COPY_ERR(ret)) + return ret; + + return ucontext->device->ops.query_ucontext(ucontext, attrs); +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_GET_CONTEXT, UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, @@ -238,6 +274,13 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_UHW()); DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_CONTEXT, + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS, + UVERBS_ATTR_TYPE(u32), UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT, + UVERBS_ATTR_TYPE(u64), UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_INFO_HANDLES, /* Also includes any device specific object ids */ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID, @@ -260,7 +303,8 @@ DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE, &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT), &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), - &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT)); + &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT)); const struct uapi_definition uverbs_def_obj_device[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE), diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index a2722ef8496e..9b22bb553e8b 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -69,7 +69,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)( num_sge = uverbs_attr_ptr_get_array_size( attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge)); - if (num_sge < 0) + if (num_sge <= 0) return num_sge; sg_list = uverbs_attr_get_alloced_ptr(attrs, @@ -148,6 +148,36 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( return ret; } +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_mr *mr = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_QUERY_MR_HANDLE); + int ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LKEY, &mr->lkey, + sizeof(mr->lkey)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_RKEY, + &mr->rkey, sizeof(mr->rkey)); + + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LENGTH, + &mr->length, sizeof(mr->length)); + + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_IOVA, + &mr->iova, sizeof(mr->iova)); + + return IS_UVERBS_COPY_ERR(ret) ? ret : 0; +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_ADVISE_MR, UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE, @@ -166,6 +196,25 @@ DECLARE_UVERBS_NAMED_METHOD( UA_ALLOC_AND_COPY)); DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_MR, + UVERBS_ATTR_IDR(UVERBS_ATTR_QUERY_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_RKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LKEY, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LENGTH, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_IOVA, + UVERBS_ATTR_TYPE(u64), + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_DM_MR_REG, UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, @@ -206,7 +255,8 @@ DECLARE_UVERBS_NAMED_OBJECT( UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr), &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG), &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR)); + &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR)); const struct uapi_definition uverbs_def_obj_mr[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 53d6505c0c7b..3096e73797b7 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -988,8 +988,8 @@ EXPORT_SYMBOL(rdma_destroy_ah_user); * @srq_init_attr: A list of initial attributes required to create the * SRQ. If SRQ creation succeeds, then the attributes are updated to * the actual capabilities of the created SRQ. - * @uobject - uobject pointer if this is not a kernel SRQ - * @udata - udata pointer if this is not a kernel SRQ + * @uobject: uobject pointer if this is not a kernel SRQ + * @udata: udata pointer if this is not a kernel SRQ * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the SRQ, and set to the actual values allocated @@ -1090,13 +1090,6 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); } -static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) -{ - mutex_lock(&xrcd->tgt_qp_mutex); - list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); - mutex_unlock(&xrcd->tgt_qp_mutex); -} - static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, void (*event_handler)(struct ib_event *, void *), void *qp_context) @@ -1139,16 +1132,15 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) return ERR_PTR(-EINVAL); - qp = ERR_PTR(-EINVAL); - mutex_lock(&xrcd->tgt_qp_mutex); - list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { - if (real_qp->qp_num == qp_open_attr->qp_num) { - qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, - qp_open_attr->qp_context); - break; - } + down_read(&xrcd->tgt_qps_rwsem); + real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num); + if (!real_qp) { + up_read(&xrcd->tgt_qps_rwsem); + return ERR_PTR(-EINVAL); } - mutex_unlock(&xrcd->tgt_qp_mutex); + qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, + qp_open_attr->qp_context); + up_read(&xrcd->tgt_qps_rwsem); return qp; } EXPORT_SYMBOL(ib_open_qp); @@ -1157,6 +1149,7 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr) { struct ib_qp *real_qp = qp; + int err; qp->event_handler = __ib_shared_qp_event_handler; qp->qp_context = qp; @@ -1172,7 +1165,12 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, if (IS_ERR(qp)) return qp; - __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); + err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num, + real_qp, GFP_KERNEL)); + if (err) { + ib_close_qp(qp); + return ERR_PTR(err); + } return qp; } @@ -1712,7 +1710,7 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (!(rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num) && rdma_protocol_ib(qp->device, port))) { - ret = EINVAL; + ret = -EINVAL; goto out; } } @@ -1887,21 +1885,18 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) real_qp = qp->real_qp; xrcd = real_qp->xrcd; - - mutex_lock(&xrcd->tgt_qp_mutex); + down_write(&xrcd->tgt_qps_rwsem); ib_close_qp(qp); if (atomic_read(&real_qp->usecnt) == 0) - list_del(&real_qp->xrcd_list); + xa_erase(&xrcd->tgt_qps, real_qp->qp_num); else real_qp = NULL; - mutex_unlock(&xrcd->tgt_qp_mutex); + up_write(&xrcd->tgt_qps_rwsem); if (real_qp) { ret = ib_destroy_qp(real_qp); if (!ret) atomic_dec(&xrcd->usecnt); - else - __ib_insert_xrcd_qp(xrcd, real_qp); } return 0; @@ -2077,6 +2072,9 @@ int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, if (!pd->device->ops.advise_mr) return -EOPNOTSUPP; + if (!num_sge) + return 0; + return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge, NULL); } @@ -2104,11 +2102,10 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) EXPORT_SYMBOL(ib_dereg_mr_user); /** - * ib_alloc_mr_user() - Allocates a memory region + * ib_alloc_mr() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. - * @udata: user data or null for kernel objects * * Notes: * Memory registeration page/sg lists must not exceed max_num_sg. @@ -2116,8 +2113,8 @@ EXPORT_SYMBOL(ib_dereg_mr_user); * max_num_sg * used_page_size. * */ -struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) +struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg) { struct ib_mr *mr; @@ -2132,25 +2129,26 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, goto out; } - mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); - if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; - mr->dm = NULL; - mr->uobject = NULL; - atomic_inc(&pd->usecnt); - mr->need_inval = false; - mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_kadd(&mr->res); - mr->type = mr_type; - mr->sig_attrs = NULL; - } + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); + if (IS_ERR(mr)) + goto out; + + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + mr->need_inval = false; + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_kadd(&mr->res); + mr->type = mr_type; + mr->sig_attrs = NULL; out: trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; } -EXPORT_SYMBOL(ib_alloc_mr_user); +EXPORT_SYMBOL(ib_alloc_mr); /** * ib_alloc_mr_integrity() - Allocates an integrity memory region @@ -2288,45 +2286,57 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) } EXPORT_SYMBOL(ib_detach_mcast); -struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) +/** + * ib_alloc_xrcd_user - Allocates an XRC domain. + * @device: The device on which to allocate the XRC domain. + * @inode: inode to connect XRCD + * @udata: Valid user data or NULL for kernel object + */ +struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, + struct inode *inode, struct ib_udata *udata) { struct ib_xrcd *xrcd; + int ret; if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); - xrcd = device->ops.alloc_xrcd(device, NULL); - if (!IS_ERR(xrcd)) { - xrcd->device = device; - xrcd->inode = NULL; - atomic_set(&xrcd->usecnt, 0); - mutex_init(&xrcd->tgt_qp_mutex); - INIT_LIST_HEAD(&xrcd->tgt_qp_list); - } + xrcd = rdma_zalloc_drv_obj(device, ib_xrcd); + if (!xrcd) + return ERR_PTR(-ENOMEM); + xrcd->device = device; + xrcd->inode = inode; + atomic_set(&xrcd->usecnt, 0); + init_rwsem(&xrcd->tgt_qps_rwsem); + xa_init(&xrcd->tgt_qps); + + ret = device->ops.alloc_xrcd(xrcd, udata); + if (ret) + goto err; return xrcd; +err: + kfree(xrcd); + return ERR_PTR(ret); } -EXPORT_SYMBOL(__ib_alloc_xrcd); +EXPORT_SYMBOL(ib_alloc_xrcd_user); -int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +/** + * ib_dealloc_xrcd_user - Deallocates an XRC domain. + * @xrcd: The XRC domain to deallocate. + * @udata: Valid user data or NULL for kernel object + */ +int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) { - struct ib_qp *qp; - int ret; - if (atomic_read(&xrcd->usecnt)) return -EBUSY; - while (!list_empty(&xrcd->tgt_qp_list)) { - qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); - ret = ib_destroy_qp(qp); - if (ret) - return ret; - } - mutex_destroy(&xrcd->tgt_qp_mutex); - - return xrcd->device->ops.dealloc_xrcd(xrcd, udata); + WARN_ON(!xa_empty(&xrcd->tgt_qps)); + xrcd->device->ops.dealloc_xrcd(xrcd, udata); + kfree(xrcd); + return 0; } -EXPORT_SYMBOL(ib_dealloc_xrcd); +EXPORT_SYMBOL(ib_dealloc_xrcd_user); /** * ib_create_wq - Creates a WQ associated with the specified protection @@ -2410,45 +2420,6 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, EXPORT_SYMBOL(ib_modify_wq); /* - * ib_create_rwq_ind_table - Creates a RQ Indirection Table. - * @device: The device on which to create the rwq indirection table. - * @ib_rwq_ind_table_init_attr: A list of initial attributes required to - * create the Indirection Table. - * - * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less - * than the created ib_rwq_ind_table object and the caller is responsible - * for its memory allocation/free. - */ -struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr) -{ - struct ib_rwq_ind_table *rwq_ind_table; - int i; - u32 table_size; - - if (!device->ops.create_rwq_ind_table) - return ERR_PTR(-EOPNOTSUPP); - - table_size = (1 << init_attr->log_ind_tbl_size); - rwq_ind_table = device->ops.create_rwq_ind_table(device, - init_attr, NULL); - if (IS_ERR(rwq_ind_table)) - return rwq_ind_table; - - rwq_ind_table->ind_tbl = init_attr->ind_tbl; - rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; - rwq_ind_table->device = device; - rwq_ind_table->uobject = NULL; - atomic_set(&rwq_ind_table->usecnt, 0); - - for (i = 0; i < table_size; i++) - atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); - - return rwq_ind_table; -} -EXPORT_SYMBOL(ib_create_rwq_ind_table); - -/* * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. * @wq_ind_table: The Indirection Table to destroy. */ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8b6ad5cddfce..3f18efc0c297 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -842,16 +842,79 @@ static u8 __from_ib_qp_type(enum ib_qp_type type) } } +static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp, + int rsge, int max) +{ + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + rsge = max; + return bnxt_re_get_rwqe_size(rsge); +} + +static u16 bnxt_re_get_wqe_size(int ilsize, int nsge) +{ + u16 wqe_size, calc_ils; + + wqe_size = bnxt_re_get_swqe_size(nsge); + if (ilsize) { + calc_ils = sizeof(struct sq_send_hdr) + ilsize; + wqe_size = max_t(u16, calc_ils, wqe_size); + wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr)); + } + return wqe_size; +} + +static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + struct bnxt_qplib_q *sq; + int align, ilsize; + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + sq = &qplqp->sq; + dev_attr = &rdev->dev_attr; + + align = sizeof(struct sq_send_hdr); + ilsize = ALIGN(init_attr->cap.max_inline_data, align); + + sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge); + if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges)) + return -EINVAL; + /* For gen p4 and gen p5 backward compatibility mode + * wqe size is fixed to 128 bytes + */ + if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) && + qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges); + + if (init_attr->cap.max_inline_data) { + qplqp->max_inline_data = sq->wqe_size - + sizeof(struct sq_send_hdr); + init_attr->cap.max_inline_data = qplqp->max_inline_data; + if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + sq->max_sge = qplqp->max_inline_data / + sizeof(struct sq_sge); + } + + return 0; +} + static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, struct bnxt_re_qp *qp, struct ib_udata *udata) { + struct bnxt_qplib_qp *qplib_qp; + struct bnxt_re_ucontext *cntx; struct bnxt_re_qp_req ureq; - struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp; - struct ib_umem *umem; int bytes = 0, psn_sz; - struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context( - udata, struct bnxt_re_ucontext, ib_uctx); + struct ib_umem *umem; + int psn_nume; + qplib_qp = &qp->qplib_qp; + cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, + ib_uctx); if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return -EFAULT; @@ -859,10 +922,15 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? - sizeof(struct sq_psn_search_ext) : - sizeof(struct sq_psn_search); - bytes += (qplib_qp->sq.max_wqe * psn_sz); + sizeof(struct sq_psn_search_ext) : + sizeof(struct sq_psn_search); + psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + qplib_qp->sq.max_wqe : + ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) / + sizeof(struct bnxt_qplib_sge)); + bytes += (psn_nume * psn_sz); } + bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); @@ -975,7 +1043,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.sig_type = true; /* Shadow QP SQ depth should be same as QP1 RQ depth */ - qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(); + qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6); qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ @@ -986,7 +1054,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.scq = qp1_qp->scq; qp->qplib_qp.rcq = qp1_qp->rcq; - qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(); + qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6); qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; /* Q full delta can be 1 since it is internal QP */ @@ -1041,19 +1109,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, qplqp->srq = &srq->qplib_srq; rq->max_wqe = 0; } else { - rq->wqe_size = bnxt_re_get_rwqe_size(); + rq->max_sge = init_attr->cap.max_recv_sge; + if (rq->max_sge > dev_attr->max_qp_sges) + rq->max_sge = dev_attr->max_qp_sges; + init_attr->cap.max_recv_sge = rq->max_sge; + rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge, + dev_attr->max_qp_sges); /* Allocate 1 more than what's provided so posting max doesn't * mean empty. */ entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); - rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr; - rq->max_sge = init_attr->cap.max_recv_sge; - if (rq->max_sge > dev_attr->max_qp_sges) - rq->max_sge = dev_attr->max_qp_sges; + rq->q_full_delta = 0; + rq->sg_info.pgsize = PAGE_SIZE; + rq->sg_info.pgshft = PAGE_SHIFT; } - rq->sg_info.pgsize = PAGE_SIZE; - rq->sg_info.pgshft = PAGE_SHIFT; return 0; } @@ -1068,41 +1138,48 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - qplqp->rq.max_sge = dev_attr->max_qp_sges; - if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; - qplqp->rq.max_sge = 6; + if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + qplqp->rq.max_sge = dev_attr->max_qp_sges; + qplqp->rq.max_sge = 6; + } } -static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_qplib_q *sq; int entries; + int diff; + int rc; rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; dev_attr = &rdev->dev_attr; - sq->wqe_size = bnxt_re_get_swqe_size(); sq->max_sge = init_attr->cap.max_send_sge; - if (sq->max_sge > dev_attr->max_qp_sges) + if (sq->max_sge > dev_attr->max_qp_sges) { sq->max_sge = dev_attr->max_qp_sges; - /* - * Change the SQ depth if user has requested minimum using - * configfs. Only supported for kernel consumers - */ + init_attr->cap.max_send_sge = sq->max_sge; + } + + rc = bnxt_re_setup_swqe_size(qp, init_attr); + if (rc) + return rc; + entries = init_attr->cap.max_send_wr; /* Allocate 128 + 1 more than what's provided */ - entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); - sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + - BNXT_QPLIB_RESERVED_QP_WRS + 1); - sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; + diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ? + 0 : BNXT_QPLIB_RESERVED_QP_WRS; + entries = roundup_pow_of_two(entries + diff + 1); + sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); + sq->q_full_delta = diff + 1; /* * Reserving one slot for Phantom WQE. Application can * post one extra entry in this case. But allowing this to avoid @@ -1111,6 +1188,8 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, qplqp->sq.q_full_delta -= 1; qplqp->sq.sg_info.pgsize = PAGE_SIZE; qplqp->sq.sg_info.pgshft = PAGE_SHIFT; + + return 0; } static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, @@ -1125,13 +1204,16 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); - qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); - qplqp->sq.q_full_delta = qplqp->sq.max_wqe - - init_attr->cap.max_send_wr; - qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ - if (qplqp->sq.max_sge > dev_attr->max_qp_sges) - qplqp->sq.max_sge = dev_attr->max_qp_sges; + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); + qplqp->sq.max_wqe = min_t(u32, entries, + dev_attr->max_qp_wqes + 1); + qplqp->sq.q_full_delta = qplqp->sq.max_wqe - + init_attr->cap.max_send_wr; + qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ + if (qplqp->sq.max_sge > dev_attr->max_qp_sges) + qplqp->sq.max_sge = dev_attr->max_qp_sges; + } } static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, @@ -1183,6 +1265,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, goto out; } qplqp->type = (u8)qptype; + qplqp->wqe_mode = rdev->chip_ctx->modes.wqe_mode; if (init_attr->qp_type == IB_QPT_RC) { qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom; @@ -1226,7 +1309,9 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, bnxt_re_adjust_gsi_rq_attr(qp); /* Setup SQ */ - bnxt_re_init_sq_attr(qp, init_attr, udata); + rc = bnxt_re_init_sq_attr(qp, init_attr, udata); + if (rc) + goto out; if (init_attr->qp_type == IB_QPT_GSI) bnxt_re_adjust_gsi_sq_attr(qp, init_attr); @@ -1574,8 +1659,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, entries = dev_attr->max_srq_wqes + 1; srq->qplib_srq.max_wqe = entries; - srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(); srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; + srq->qplib_srq.wqe_size = + bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; @@ -3569,7 +3655,7 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, } struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index e5fbbeba6d28..1daeb30e06fd 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -136,14 +136,14 @@ struct bnxt_re_ucontext { spinlock_t sh_lock; /* protect shpg */ }; -static inline u16 bnxt_re_get_swqe_size(void) +static inline u16 bnxt_re_get_swqe_size(int nsge) { - return sizeof(struct sq_send); + return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); } -static inline u16 bnxt_re_get_rwqe_size(void) +static inline u16 bnxt_re_get_rwqe_size(int nsge) { - return sizeof(struct rq_wqe); + return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); } int bnxt_re_query_device(struct ib_device *ibdev, @@ -201,7 +201,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b12fbc857f94..dad0df8a2467 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -82,6 +82,15 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev); static void bnxt_re_dealloc_driver(struct ib_device *ib_dev); static void bnxt_re_stop_irq(void *handle); +static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) +{ + struct bnxt_qplib_chip_ctx *cctx; + + cctx = rdev->chip_ctx; + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + mode : BNXT_QPLIB_WQE_MODE_STATIC; +} + static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx; @@ -97,7 +106,7 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) kfree(chip_ctx); } -static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) +static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; @@ -117,6 +126,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; + bnxt_re_set_drv_mode(rdev, wqe_mode); return 0; } @@ -1386,7 +1396,7 @@ static void bnxt_re_worker(struct work_struct *work) schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } -static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) +static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) { struct bnxt_qplib_creq_ctx *creq; struct bnxt_re_ring_attr rattr; @@ -1406,7 +1416,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); - rc = bnxt_re_setup_chip_ctx(rdev); + rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); if (rc) { ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); return -EINVAL; @@ -1585,7 +1595,7 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev) } static int bnxt_re_add_device(struct bnxt_re_dev **rdev, - struct net_device *netdev) + struct net_device *netdev, u8 wqe_mode) { int rc; @@ -1599,7 +1609,7 @@ static int bnxt_re_add_device(struct bnxt_re_dev **rdev, } pci_dev_get((*rdev)->en_dev->pdev); - rc = bnxt_re_dev_init(*rdev); + rc = bnxt_re_dev_init(*rdev, wqe_mode); if (rc) { pci_dev_put((*rdev)->en_dev->pdev); bnxt_re_dev_unreg(*rdev); @@ -1711,7 +1721,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, case NETDEV_REGISTER: if (rdev) break; - rc = bnxt_re_add_device(&rdev, real_dev); + rc = bnxt_re_add_device(&rdev, real_dev, + BNXT_QPLIB_WQE_MODE_STATIC); if (!rc) sch_work = true; release = false; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index c5e29577cd43..117b42349a28 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -178,11 +178,11 @@ static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res, if (qp->rq_hdr_buf) dma_free_coherent(&res->pdev->dev, - rq->hwq.max_elements * qp->rq_hdr_buf_size, + rq->max_wqe * qp->rq_hdr_buf_size, qp->rq_hdr_buf, qp->rq_hdr_buf_map); if (qp->sq_hdr_buf) dma_free_coherent(&res->pdev->dev, - sq->hwq.max_elements * qp->sq_hdr_buf_size, + sq->max_wqe * qp->sq_hdr_buf_size, qp->sq_hdr_buf, qp->sq_hdr_buf_map); qp->rq_hdr_buf = NULL; qp->sq_hdr_buf = NULL; @@ -199,10 +199,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_q *sq = &qp->sq; int rc = 0; - if (qp->sq_hdr_buf_size && sq->hwq.max_elements) { + if (qp->sq_hdr_buf_size && sq->max_wqe) { qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, - sq->hwq.max_elements * - qp->sq_hdr_buf_size, + sq->max_wqe * qp->sq_hdr_buf_size, &qp->sq_hdr_buf_map, GFP_KERNEL); if (!qp->sq_hdr_buf) { rc = -ENOMEM; @@ -212,9 +211,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, } } - if (qp->rq_hdr_buf_size && rq->hwq.max_elements) { + if (qp->rq_hdr_buf_size && rq->max_wqe) { qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, - rq->hwq.max_elements * + rq->max_wqe * qp->rq_hdr_buf_size, &qp->rq_hdr_buf_map, GFP_KERNEL); @@ -661,6 +660,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->dbinfo.hwq = &srq->hwq; srq->dbinfo.xid = srq->id; srq->dbinfo.db = srq->dpi->dbr; + srq->dbinfo.max_slot = 1; srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; if (srq->threshold) bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); @@ -784,6 +784,28 @@ done: } /* QP */ + +static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) +{ + int rc = 0; + int indx; + + que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL); + if (!que->swq) { + rc = -ENOMEM; + goto out; + } + + que->swq_start = 0; + que->swq_last = que->max_wqe - 1; + for (indx = 0; indx < que->max_wqe; indx++) + que->swq[indx].next_idx = indx + 1; + que->swq[que->swq_last].next_idx = 0; /* Make it circular */ + que->swq_last = 0; +out: + return rc; +} + int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_hwq_attr hwq_attr = {}; @@ -808,71 +830,63 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.depth = sq->max_wqe; - hwq_attr.stride = sq->wqe_size; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) goto exit; - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); - if (!sq->swq) { - rc = -ENOMEM; + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) goto fail_sq; - } + + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT); pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK); req.sq_pg_size_sq_lvl = pg_sz_lvl; + req.sq_fwo_sq_sge = + cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << + CMDQ_CREATE_QP1_SQ_SGE_SFT); + req.scq_cid = cpu_to_le32(qp->scq->id); - if (qp->scq) - req.scq_cid = cpu_to_le32(qp->scq->id); /* RQ */ if (rq->max_wqe) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = rq->wqe_size; - hwq_attr.depth = qp->rq.max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) - goto fail_sq; - - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), - GFP_KERNEL); - if (!rq->swq) { - rc = -ENOMEM; + goto sq_swq; + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) goto fail_rq; - } + req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT); pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK); req.rq_pg_size_rq_lvl = pg_sz_lvl; - if (qp->rcq) - req.rcq_cid = cpu_to_le32(qp->rcq->id); + req.rq_fwo_rq_sge = + cpu_to_le16((rq->max_sge & + CMDQ_CREATE_QP1_RQ_SGE_MASK) << + CMDQ_CREATE_QP1_RQ_SGE_SFT); } + req.rcq_cid = cpu_to_le32(qp->rcq->id); /* Header buffer - allow hdr_buf pass in */ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); if (rc) { rc = -ENOMEM; - goto fail; + goto rq_rwq; } qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; req.qp_flags = cpu_to_le32(qp_flags); - req.sq_size = cpu_to_le32(sq->hwq.max_elements); - req.rq_size = cpu_to_le32(rq->hwq.max_elements); - - req.sq_fwo_sq_sge = - cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << - CMDQ_CREATE_QP1_SQ_SGE_SFT); - req.rq_fwo_rq_sge = - cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) << - CMDQ_CREATE_QP1_RQ_SGE_SFT); - req.pd_id = cpu_to_le32(qp->pd->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, @@ -886,10 +900,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sq->dbinfo.hwq = &sq->hwq; sq->dbinfo.xid = qp->id; sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); if (rq->max_wqe) { rq->dbinfo.hwq = &rq->hwq; rq->dbinfo.xid = qp->id; rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; @@ -898,12 +914,14 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) fail: bnxt_qplib_free_qp_hdr_buf(res, qp); +rq_rwq: + kfree(rq->swq); fail_rq: bnxt_qplib_free_hwq(res, &rq->hwq); - kfree(rq->swq); +sq_swq: + kfree(sq->swq); fail_sq: bnxt_qplib_free_hwq(res, &sq->hwq); - kfree(sq->swq); exit: return rc; } @@ -912,26 +930,18 @@ static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size) { struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_q *sq; - u64 fpsne, psne, psn_pg; - u16 indx_pad = 0, indx; - u16 pg_num, pg_indx; - u64 *page; + u64 fpsne, psn_pg; + u16 indx_pad = 0; sq = &qp->sq; hwq = &sq->hwq; - - fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg); + fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg); if (!IS_ALIGNED(fpsne, PAGE_SIZE)) indx_pad = ALIGN(fpsne, PAGE_SIZE) / size; - page = (u64 *)psn_pg; - for (indx = 0; indx < hwq->max_elements; indx++) { - pg_num = (indx + indx_pad) / (PAGE_SIZE / size); - pg_indx = (indx + indx_pad) % (PAGE_SIZE / size); - psne = page[pg_num] + pg_indx * size; - sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne; - sq->swq[indx].psn_search = (struct sq_psn_search *)psne; - } + hwq->pad_pgofft = indx_pad; + hwq->pad_pg = (u64 *)psn_pg; + hwq->pad_stride = size; } int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) @@ -944,12 +954,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct creq_create_qp_resp resp; int rc, req_size, psn_sz = 0; struct bnxt_qplib_hwq *xrrq; - u16 cmd_flags = 0, max_ssge; struct bnxt_qplib_pbl *pbl; struct cmdq_create_qp req; + u16 cmd_flags = 0; u32 qp_flags = 0; u8 pg_sz_lvl; - u16 max_rsge; + u16 nsge; RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); @@ -967,97 +977,78 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.stride = sq->wqe_size; - hwq_attr.depth = sq->max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.aux_stride = psn_sz; - hwq_attr.aux_depth = hwq_attr.depth; + hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) goto exit; - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); - if (!sq->swq) { - rc = -ENOMEM; + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) goto fail_sq; - } if (psn_sz) bnxt_qplib_init_psn_ptr(qp, psn_sz); + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << CMDQ_CREATE_QP_SQ_PG_SIZE_SFT); pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK); req.sq_pg_size_sq_lvl = pg_sz_lvl; - - if (qp->scq) - req.scq_cid = cpu_to_le32(qp->scq->id); + req.sq_fwo_sq_sge = + cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) << + CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); + req.scq_cid = cpu_to_le32(qp->scq->id); /* RQ */ - if (rq->max_wqe) { + if (!qp->srq) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = rq->wqe_size; - hwq_attr.depth = rq->max_wqe; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq); hwq_attr.aux_stride = 0; hwq_attr.aux_depth = 0; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) - goto fail_sq; - - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), - GFP_KERNEL); - if (!rq->swq) { - rc = -ENOMEM; + goto sq_swq; + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) goto fail_rq; - } + + req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << CMDQ_CREATE_QP_RQ_PG_SIZE_SFT); pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK); req.rq_pg_size_rq_lvl = pg_sz_lvl; + nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + 6 : rq->max_sge; + req.rq_fwo_rq_sge = + cpu_to_le16(((nsge & + CMDQ_CREATE_QP_RQ_SGE_MASK) << + CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); } else { /* SRQ */ - if (qp->srq) { - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; - req.srq_cid = cpu_to_le32(qp->srq->id); - } + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; + req.srq_cid = cpu_to_le32(qp->srq->id); } - - if (qp->rcq) - req.rcq_cid = cpu_to_le32(qp->rcq->id); + req.rcq_cid = cpu_to_le32(qp->rcq->id); qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; if (qp->sig_type) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; req.qp_flags = cpu_to_le32(qp_flags); - req.sq_size = cpu_to_le32(sq->hwq.max_elements); - req.rq_size = cpu_to_le32(rq->hwq.max_elements); - qp->sq_hdr_buf = NULL; - qp->rq_hdr_buf = NULL; - - rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); - if (rc) - goto fail_rq; - - /* CTRL-22434: Irrespective of the requested SGE count on the SQ - * always create the QP with max send sges possible if the requested - * inline size is greater than 0. - */ - max_ssge = qp->max_inline_data ? 6 : sq->max_sge; - req.sq_fwo_sq_sge = cpu_to_le16( - ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK) - << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); - max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge; - req.rq_fwo_rq_sge = cpu_to_le16( - ((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK) - << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); /* ORRQ and IRRQ */ if (psn_sz) { xrrq = &qp->orrq; @@ -1078,7 +1069,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.type = HWQ_TYPE_CTX; rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr); if (rc) - goto fail_buf_free; + goto rq_swq; pbl = &xrrq->pbl[PBL_LVL_0]; req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]); @@ -1113,30 +1104,29 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sq->dbinfo.hwq = &sq->hwq; sq->dbinfo.xid = qp->id; sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); if (rq->max_wqe) { rq->dbinfo.hwq = &rq->hwq; rq->dbinfo.xid = qp->id; rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; return 0; - fail: - if (qp->irrq.max_elements) - bnxt_qplib_free_hwq(res, &qp->irrq); + bnxt_qplib_free_hwq(res, &qp->irrq); fail_orrq: - if (qp->orrq.max_elements) - bnxt_qplib_free_hwq(res, &qp->orrq); -fail_buf_free: - bnxt_qplib_free_qp_hdr_buf(res, qp); + bnxt_qplib_free_hwq(res, &qp->orrq); +rq_swq: + kfree(rq->swq); fail_rq: bnxt_qplib_free_hwq(res, &rq->hwq); - kfree(rq->swq); +sq_swq: + kfree(sq->swq); fail_sq: bnxt_qplib_free_hwq(res, &sq->hwq); - kfree(sq->swq); exit: return rc; } @@ -1512,7 +1502,7 @@ void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, memset(sge, 0, sizeof(*sge)); if (qp->sq_hdr_buf) { - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + sw_prod = sq->swq_start; sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map + sw_prod * qp->sq_hdr_buf_size); sge->lkey = 0xFFFFFFFF; @@ -1526,7 +1516,7 @@ u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - return HWQ_CMP(rq->hwq.prod, &rq->hwq); + return rq->swq_start; } dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index) @@ -1543,7 +1533,7 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, memset(sge, 0, sizeof(*sge)); if (qp->rq_hdr_buf) { - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + sw_prod = rq->swq_start; sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map + sw_prod * qp->rq_hdr_buf_size); sge->lkey = 0xFFFFFFFF; @@ -1562,6 +1552,8 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, u32 flg_npsn; u32 op_spsn; + if (!swq->psn_search) + return; psns = swq->psn_search; psns_ext = swq->psn_ext; @@ -1575,12 +1567,122 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); + psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); } else { psns->opcode_start_psn = cpu_to_le32(op_spsn); psns->flags_next_psn = cpu_to_le32(flg_npsn); } } +static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *idx) +{ + struct bnxt_qplib_hwq *hwq; + int len, t_len, offt; + bool pull_dst = true; + void *il_dst = NULL; + void *il_src = NULL; + int t_cplen, cplen; + int indx; + + hwq = &qp->sq.hwq; + t_len = 0; + for (indx = 0; indx < wqe->num_sge; indx++) { + len = wqe->sg_list[indx].size; + il_src = (void *)wqe->sg_list[indx].addr; + t_len += len; + if (t_len > qp->max_inline_data) + goto bad; + while (len) { + if (pull_dst) { + pull_dst = false; + il_dst = bnxt_qplib_get_prod_qe(hwq, *idx); + (*idx)++; + t_cplen = 0; + offt = 0; + } + cplen = min_t(int, len, sizeof(struct sq_sge)); + cplen = min_t(int, cplen, + (sizeof(struct sq_sge) - offt)); + memcpy(il_dst, il_src, cplen); + t_cplen += cplen; + il_src += cplen; + il_dst += cplen; + offt += cplen; + len -= cplen; + if (t_cplen == sizeof(struct sq_sge)) + pull_dst = true; + } + } + + return t_len; +bad: + return -ENOMEM; +} + +static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, + struct bnxt_qplib_sge *ssge, + u16 nsge, u16 *idx) +{ + struct sq_sge *dsge; + int indx, len = 0; + + for (indx = 0; indx < nsge; indx++, (*idx)++) { + dsge = bnxt_qplib_get_prod_qe(hwq, *idx); + dsge->va_or_pa = cpu_to_le64(ssge[indx].addr); + dsge->l_key = cpu_to_le32(ssge[indx].lkey); + dsge->size = cpu_to_le32(ssge[indx].size); + len += ssge[indx].size; + } + + return len; +} + +static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *wqe_sz, u16 *qdf, u8 mode) +{ + u32 ilsize, bytes; + u16 nsge; + u16 slot; + + nsge = wqe->num_sge; + /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */ + bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { + ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data); + bytes = ALIGN(ilsize, sizeof(struct sq_sge)); + bytes += sizeof(struct sq_send_hdr); + } + + *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes); + slot = bytes >> 4; + *wqe_sz = slot; + if (mode == BNXT_QPLIB_WQE_MODE_STATIC) + slot = 8; + return slot; +} + +static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, + struct bnxt_qplib_swq *swq) +{ + struct bnxt_qplib_hwq *hwq; + u32 pg_num, pg_indx; + void *buff; + u32 tail; + + hwq = &sq->hwq; + if (!hwq->pad_pg) + return; + tail = swq->slot_idx / sq->dbinfo.max_slot; + pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); + pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); + buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); + swq->psn_ext = buff; + swq->psn_search = buff; +} + void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; @@ -1594,88 +1696,84 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_nq_work *nq_work = NULL; int i, rc = 0, data_len = 0, pkt_num = 0; struct bnxt_qplib_q *sq = &qp->sq; - struct sq_send *hw_sq_send_hdr; + struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_swq *swq; bool sch_handler = false; - struct sq_sge *hw_sge; - u8 wqe_size16; + u16 wqe_sz, qdf = 0; + void *base_hdr; + void *ext_hdr; __le32 temp32; - u32 sw_prod; + u32 wqe_idx; + u32 slots; + u16 idx; - if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - sch_handler = true; - dev_dbg(&sq->hwq.pdev->dev, - "%s Error QP. Scheduling for poll_cq\n", - __func__); - goto queue_err; - } + hwq = &sq->hwq; + if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && + qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) { + dev_err(&hwq->pdev->dev, + "QPLIB: FP: QP (0x%x) is in the 0x%x state", + qp->id, qp->state); + rc = -EINVAL; + goto done; } - if (bnxt_qplib_queue_full(sq)) { - dev_err(&sq->hwq.pdev->dev, + slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode); + if (bnxt_qplib_queue_full(sq, slots + qdf)) { + dev_err(&hwq->pdev->dev, "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", - sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, - sq->q_full_delta); + hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta); rc = -ENOMEM; goto done; } - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - swq = &sq->swq[sw_prod]; + + swq = bnxt_qplib_get_swqe(sq, &wqe_idx); + bnxt_qplib_pull_psn_buff(sq, swq); + + idx = 0; + swq->slot_idx = hwq->prod; + swq->slots = slots; swq->wr_id = wqe->wr_id; swq->type = wqe->type; swq->flags = wqe->flags; + swq->start_psn = sq->psn & BTH_PSN_MASK; if (qp->sig_type) swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - swq->start_psn = sq->psn & BTH_PSN_MASK; - - hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL); - memset(hw_sq_send_hdr, 0, sq->wqe_size); - if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { - /* Copy the inline data */ - if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { - dev_warn(&sq->hwq.pdev->dev, - "Inline data length > 96 detected\n"); - data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; - } else { - data_len = wqe->inline_len; - } - memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len); - wqe_size16 = (data_len + 15) >> 4; - } else { - for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); - data_len += wqe->sg_list[i].size; - } - /* Each SGE entry = 1 WQE size16 */ - wqe_size16 = wqe->num_sge; - /* HW requires wqe size has room for atleast one SGE even if - * none was supplied by ULP - */ - if (!wqe->num_sge) - wqe_size16++; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&hwq->pdev->dev, + "%s Error QP. Scheduling for poll_cq\n", __func__); + goto queue_err; } + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) + /* Copy the inline data */ + data_len = bnxt_qplib_put_inline(qp, wqe, &idx); + else + data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, + &idx); + if (data_len < 0) + goto queue_err; /* Specifics */ switch (wqe->type) { case BNXT_QPLIB_SWQE_TYPE_SEND: if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) { + struct sq_send_raweth_qp1_hdr *sqe = base_hdr; + struct sq_raw_ext_hdr *ext_sqe = ext_hdr; /* Assemble info for Raw Ethertype QPs */ - struct sq_send_raweth_qp1 *sqe = - (struct sq_send_raweth_qp1 *)hw_sq_send_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action); sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags); sqe->length = cpu_to_le32(data_len); - sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & + ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) << SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT); @@ -1685,27 +1783,24 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: { - struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr; + struct sq_ud_ext_hdr *ext_sqe = ext_hdr; + struct sq_send_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); - sqe->inv_key_or_imm_data = cpu_to_le32( - wqe->send.inv_key); + sqe->wqe_size = wqe_sz; + sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key); if (qp->type == CMDQ_CREATE_QP_TYPE_UD || qp->type == CMDQ_CREATE_QP_TYPE_GSI) { sqe->q_key = cpu_to_le32(wqe->send.q_key); - sqe->dst_qp = cpu_to_le32( - wqe->send.dst_qp & SQ_SEND_DST_QP_MASK); sqe->length = cpu_to_le32(data_len); - sqe->avid = cpu_to_le32(wqe->send.avid & - SQ_SEND_AVID_MASK); sq->psn = (sq->psn + 1) & BTH_PSN_MASK; + ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp & + SQ_SEND_DST_QP_MASK); + ext_sqe->avid = cpu_to_le32(wqe->send.avid & + SQ_SEND_AVID_MASK); } else { sqe->length = cpu_to_le32(data_len); - sqe->dst_qp = 0; - sqe->avid = 0; if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1718,16 +1813,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_RDMA_READ: { - struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr; + struct sq_rdma_ext_hdr *ext_sqe = ext_hdr; + struct sq_rdma_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key); sqe->length = cpu_to_le32((u32)data_len); - sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); - sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); + ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); + ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1738,14 +1833,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP: case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD: { - struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr; + struct sq_atomic_ext_hdr *ext_sqe = ext_hdr; + struct sq_atomic_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; sqe->remote_key = cpu_to_le32(wqe->atomic.r_key); sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va); - sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); - sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); + ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); + ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1755,8 +1851,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV: { - struct sq_localinvalidate *sqe = - (struct sq_localinvalidate *)hw_sq_send_hdr; + struct sq_localinvalidate *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1766,7 +1861,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: { - struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr; + struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr; + struct sq_fr_pmr_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1790,14 +1886,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, wqe->frmr.pbl_ptr[i] = cpu_to_le64( wqe->frmr.page_list[i] | PTU_PTE_VALID); - sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); - sqe->va = cpu_to_le64(wqe->frmr.va); + ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); + ext_sqe->va = cpu_to_le64(wqe->frmr.va); break; } case BNXT_QPLIB_SWQE_TYPE_BIND_MW: { - struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr; + struct sq_bind_ext_hdr *ext_sqe = ext_hdr; + struct sq_bind_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1806,9 +1903,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0); sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key); sqe->l_key = cpu_to_le32(wqe->bind.r_key); - sqe->va = cpu_to_le64(wqe->bind.va); - temp32 = cpu_to_le32(wqe->bind.length); - memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length)); + ext_sqe->va = cpu_to_le64(wqe->bind.va); + ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); break; } default: @@ -1817,23 +1913,11 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto done; } swq->next_psn = sq->psn & BTH_PSN_MASK; - if (qp->type == CMDQ_CREATE_QP_TYPE_RC) - bnxt_qplib_fill_psn_search(qp, wqe, swq); + bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: - if (sch_handler) { - /* Store the ULP info in the software structures */ - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - swq = &sq->swq[sw_prod]; - swq->wr_id = wqe->wr_id; - swq->type = wqe->type; - swq->flags = wqe->flags; - if (qp->sig_type) - swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - swq->start_psn = sq->psn & BTH_PSN_MASK; - } - sq->hwq.prod++; + bnxt_qplib_swq_mod_start(sq, wqe_idx); + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); qp->wqe_cnt++; - done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); @@ -1843,7 +1927,7 @@ done: INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); queue_work(qp->scq->nq->cqn_wq, &nq_work->work); } else { - dev_err(&sq->hwq.pdev->dev, + dev_err(&hwq->pdev->dev, "FP: Failed to allocate SQ nq_work!\n"); rc = -ENOMEM; } @@ -1863,58 +1947,65 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, { struct bnxt_qplib_nq_work *nq_work = NULL; struct bnxt_qplib_q *rq = &qp->rq; + struct rq_wqe_hdr *base_hdr; + struct rq_ext_hdr *ext_hdr; + struct bnxt_qplib_hwq *hwq; + struct bnxt_qplib_swq *swq; bool sch_handler = false; - struct sq_sge *hw_sge; - struct rq_wqe *rqe; - int i, rc = 0; - u32 sw_prod; + u16 wqe_sz, idx; + u32 wqe_idx; + int rc = 0; - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - sch_handler = true; - dev_dbg(&rq->hwq.pdev->dev, - "%s: Error QP. Scheduling for poll_cq\n", __func__); - goto queue_err; + hwq = &rq->hwq; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { + dev_err(&hwq->pdev->dev, + "QPLIB: FP: QP (0x%x) is in the 0x%x state", + qp->id, qp->state); + rc = -EINVAL; + goto done; } - if (bnxt_qplib_queue_full(rq)) { - dev_err(&rq->hwq.pdev->dev, + + if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) { + dev_err(&hwq->pdev->dev, "FP: QP (0x%x) RQ is full!\n", qp->id); rc = -EINVAL; goto done; } - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - rq->swq[sw_prod].wr_id = wqe->wr_id; - rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL); - memset(rqe, 0, rq->wqe_size); + swq = bnxt_qplib_get_swqe(rq, &wqe_idx); + swq->wr_id = wqe->wr_id; + swq->slots = rq->dbinfo.max_slot; - /* Calculate wqe_size16 and data_len */ - for (i = 0, hw_sge = (struct sq_sge *)rqe->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&hwq->pdev->dev, + "%s: Error QP. Scheduling for poll_cq\n", __func__); + goto queue_err; } - rqe->wqe_type = wqe->type; - rqe->flags = wqe->flags; - rqe->wqe_size = wqe->num_sge + - ((offsetof(typeof(*rqe), data) + 15) >> 4); - /* HW requires wqe size has room for atleast one SGE even if none - * was supplied by ULP - */ - if (!wqe->num_sge) - rqe->wqe_size++; - - /* Supply the rqe->wr_id index to the wr_id_tbl for now */ - rqe->wr_id[0] = cpu_to_le32(sw_prod); + idx = 0; + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + wqe_sz = (sizeof(struct rq_wqe_hdr) + + wqe->num_sge * sizeof(struct sq_sge)) >> 4; + bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx); + if (!wqe->num_sge) { + struct sq_sge *sge; + + sge = bnxt_qplib_get_prod_qe(hwq, idx++); + sge->size = 0; + wqe_sz++; + } + base_hdr->wqe_type = wqe->type; + base_hdr->flags = wqe->flags; + base_hdr->wqe_size = wqe_sz; + base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); queue_err: - if (sch_handler) { - /* Store the ULP info in the software structures */ - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - rq->swq[sw_prod].wr_id = wqe->wr_id; - } - - rq->hwq.prod++; + bnxt_qplib_swq_mod_start(rq, wqe_idx); + bnxt_qplib_hwq_incr_prod(hwq, swq->slots); +done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); if (nq_work) { @@ -1923,12 +2014,12 @@ queue_err: INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); } else { - dev_err(&rq->hwq.pdev->dev, + dev_err(&hwq->pdev->dev, "FP: Failed to allocate RQ nq_work!\n"); rc = -ENOMEM; } } -done: + return rc; } @@ -2026,20 +2117,19 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, struct bnxt_qplib_cqe **pcqe, int *budget) { - u32 sw_prod, sw_cons; struct bnxt_qplib_cqe *cqe; + u32 start, last; int rc = 0; /* Now complete all outstanding SQEs with FLUSHED_ERR */ - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + start = sq->swq_start; cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_cons == sw_prod) { + last = sq->swq_last; + if (start == last) break; - } /* Skip the FENCE WQE completions */ - if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) { + if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) { bnxt_qplib_cancel_phantom_processing(qp); goto skip_compl; } @@ -2047,16 +2137,17 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; - cqe->wr_id = sq->swq[sw_cons].wr_id; + cqe->wr_id = sq->swq[last].wr_id; cqe->src_qp = qp->id; - cqe->type = sq->swq[sw_cons].type; + cqe->type = sq->swq[last].type; cqe++; (*budget)--; skip_compl: - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots); + sq->swq_last = sq->swq[last].next_idx; } *pcqe = cqe; - if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod) + if (!(*budget) && sq->swq_last != start) /* Out of budget */ rc = -EAGAIN; @@ -2067,9 +2158,9 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, struct bnxt_qplib_cqe **pcqe, int *budget) { struct bnxt_qplib_cqe *cqe; - u32 sw_prod, sw_cons; - int rc = 0; + u32 start, last; int opcode = 0; + int rc = 0; switch (qp->type) { case CMDQ_CREATE_QP1_TYPE_GSI: @@ -2085,24 +2176,25 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, } /* Flush the rest of the RQ */ - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + start = rq->swq_start; cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq); - if (sw_cons == sw_prod) + last = rq->swq_last; + if (last == start) break; memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = opcode; cqe->qp_handle = (unsigned long)qp; - cqe->wr_id = rq->swq[sw_cons].wr_id; + cqe->wr_id = rq->swq[last].wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots); + rq->swq_last = rq->swq[last].next_idx; } *pcqe = cqe; - if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod) + if (!*budget && rq->swq_last != start) /* Out of budget */ rc = -EAGAIN; @@ -2125,7 +2217,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle) * CQE is track from sw_cq_cons to max_element but valid only if VALID=1 */ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, - u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons) + u32 cq_cons, u32 swq_last, u32 cqe_sq_cons) { u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; struct bnxt_qplib_q *sq = &qp->sq; @@ -2138,7 +2230,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, /* Normal mode */ /* Check for the psn_search marking before completing */ - swq = &sq->swq[sw_sq_cons]; + swq = &sq->swq[swq_last]; if (swq->psn_search && le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) { /* Unmark */ @@ -2147,7 +2239,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, & ~0x80000000); dev_dbg(&cq->hwq.pdev->dev, "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n", - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); + cq_cons, qp->id, swq_last, cqe_sq_cons); sq->condition = true; sq->send_phantom = true; @@ -2184,9 +2276,10 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, le64_to_cpu (peek_req_hwcqe->qp_handle)); peek_sq = &peek_qp->sq; - peek_sq_cons_idx = HWQ_CMP(le16_to_cpu( - peek_req_hwcqe->sq_cons_idx) - 1 - , &sq->hwq); + peek_sq_cons_idx = + ((le16_to_cpu( + peek_req_hwcqe->sq_cons_idx) + - 1) % sq->max_wqe); /* If the hwcqe's sq's wr_id matches */ if (peek_sq == sq && sq->swq[peek_sq_cons_idx].wr_id == @@ -2214,7 +2307,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, } dev_err(&cq->hwq.pdev->dev, "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); + cq_cons, qp->id, swq_last, cqe_sq_cons); rc = -EINVAL; } out: @@ -2226,11 +2319,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget, u32 cq_cons, struct bnxt_qplib_qp **lib_qp) { - u32 sw_sq_cons, cqe_sq_cons; struct bnxt_qplib_swq *swq; struct bnxt_qplib_cqe *cqe; struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *sq; + u32 cqe_sq_cons; int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) @@ -2242,14 +2335,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } sq = &qp->sq; - cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); - if (cqe_sq_cons > sq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n", - cqe_sq_cons, sq->hwq.max_elements); - return -EINVAL; - } - + cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_wqe; if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, "%s: QP in Flush QP = %p\n", __func__, qp); @@ -2261,12 +2347,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, */ cqe = *pcqe; while (*budget) { - sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_sq_cons == cqe_sq_cons) + if (sq->swq_last == cqe_sq_cons) /* Done */ break; - swq = &sq->swq[sw_sq_cons]; + swq = &sq->swq[sq->swq_last]; memset(cqe, 0, sizeof(*cqe)); cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; @@ -2280,12 +2365,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, * of the request being signaled or not, it must complete with * the hwcqe error status */ - if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons && + if (swq->next_idx == cqe_sq_cons && hwcqe->status != CQ_REQ_STATUS_OK) { cqe->status = hwcqe->status; dev_err(&cq->hwq.pdev->dev, "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n", - sw_sq_cons, cqe->wr_id, cqe->status); + sq->swq_last, cqe->wr_id, cqe->status); cqe++; (*budget)--; bnxt_qplib_mark_qp_error(qp); @@ -2293,7 +2378,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, bnxt_qplib_add_flush_qp(qp); } else { /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, + if (do_wa9060(qp, cq, cq_cons, sq->swq_last, cqe_sq_cons)) { *lib_qp = qp; goto out; @@ -2305,13 +2390,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } } skip: - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots); + sq->swq_last = swq->next_idx; if (sq->single) break; } out: *pcqe = cqe; - if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) { + if (sq->swq_last != cqe_sq_cons) { /* Out of budget */ rc = -EAGAIN; goto done; @@ -2386,17 +2472,23 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (wr_id_idx != rq->swq_last) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2467,18 +2559,24 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (rq->swq_last != wr_id_idx) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2569,17 +2667,23 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, (*budget)--; *pcqe = cqe; } else { + struct bnxt_qplib_swq *swq; + rq = &qp->rq; - if (wr_id_idx >= rq->hwq.max_elements) { + if (wr_id_idx > (rq->max_wqe - 1)) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n", - wr_id_idx, rq->hwq.max_elements); + wr_id_idx, rq->max_wqe); return -EINVAL; } - cqe->wr_id = rq->swq[wr_id_idx].wr_id; + if (rq->swq_last != wr_id_idx) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + rq->swq_last = swq->next_idx; *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { @@ -2601,7 +2705,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *sq, *rq; struct bnxt_qplib_cqe *cqe; - u32 sw_cons = 0, cqe_cons; + u32 swq_last = 0, cqe_cons; int rc = 0; /* Check the Status */ @@ -2627,13 +2731,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx); if (cqe_cons == 0xFFFF) goto do_rq; - - if (cqe_cons > sq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n", - cqe_cons, sq->hwq.max_elements); - goto do_rq; - } + cqe_cons %= sq->max_wqe; if (qp->sq.flushed) { dev_dbg(&cq->hwq.pdev->dev, @@ -2647,24 +2745,25 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, */ cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_cons == cqe_cons) + swq_last = sq->swq_last; + if (swq_last == cqe_cons) break; - if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { + if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_REQ_STATUS_OK; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; cqe->src_qp = qp->id; - cqe->wr_id = sq->swq[sw_cons].wr_id; - cqe->type = sq->swq[sw_cons].type; + cqe->wr_id = sq->swq[swq_last].wr_id; + cqe->type = sq->swq[swq_last].type; cqe++; (*budget)--; } - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots); + sq->swq_last = sq->swq[swq_last].next_idx; } *pcqe = cqe; - if (!(*budget) && sw_cons != cqe_cons) { + if (!(*budget) && swq_last != cqe_cons) { /* Out of budget */ rc = -EAGAIN; goto sq_done; @@ -2676,10 +2775,10 @@ do_rq: cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx); if (cqe_cons == 0xFFFF) { goto done; - } else if (cqe_cons > rq->hwq.max_elements) { + } else if (cqe_cons > rq->max_wqe - 1) { dev_err(&cq->hwq.pdev->dev, "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n", - cqe_cons, rq->hwq.max_elements); + cqe_cons, rq->max_wqe); goto done; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 568ca390322c..f50784405e27 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -39,6 +39,51 @@ #ifndef __BNXT_QPLIB_FP_H__ #define __BNXT_QPLIB_FP_H__ +/* Few helper structures temporarily defined here + * should get rid of these when roce_hsi.h is updated + * in original code base + */ +struct sq_ud_ext_hdr { + __le32 dst_qp; + __le32 avid; + __le64 rsvd; +}; + +struct sq_raw_ext_hdr { + __le32 cfa_meta; + __le32 rsvd0; + __le64 rsvd1; +}; + +struct sq_rdma_ext_hdr { + __le64 remote_va; + __le32 remote_key; + __le32 rsvd; +}; + +struct sq_atomic_ext_hdr { + __le64 swap_data; + __le64 cmp_data; +}; + +struct sq_fr_pmr_ext_hdr { + __le64 pblptr; + __le64 va; +}; + +struct sq_bind_ext_hdr { + __le64 va; + __le32 length_lo; + __le32 length_hi; +}; + +struct rq_ext_hdr { + __le64 rsvd1; + __le64 rsvd2; +}; + +/* Helper structures end */ + struct bnxt_qplib_srq { struct bnxt_qplib_pd *pd; struct bnxt_qplib_dpi *dpi; @@ -74,6 +119,8 @@ struct bnxt_qplib_swq { u8 flags; u32 start_psn; u32 next_psn; + u32 slot_idx; + u8 slots; struct sq_psn_search *psn_search; struct sq_psn_search_ext *psn_ext; }; @@ -213,6 +260,8 @@ struct bnxt_qplib_q { u32 phantom_cqe_cnt; u32 next_cq_cons; bool flushed; + u32 swq_start; + u32 swq_last; }; struct bnxt_qplib_qp { @@ -224,9 +273,10 @@ struct bnxt_qplib_qp { u32 id; u8 type; u8 sig_type; - u32 modify_flags; + u8 wqe_mode; u8 state; u8 cur_qp_state; + u64 modify_flags; u32 max_inline_data; u32 mtu; u8 path_mtu; @@ -300,11 +350,18 @@ struct bnxt_qplib_qp { (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ !((raw_cons) & (cp_bit))) -static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q) +static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, + u8 slots) { - return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta), - &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons, - &qplib_q->hwq); + struct bnxt_qplib_hwq *hwq; + int avail; + + hwq = &que->hwq; + /* False full is possible, retrying post-send makes sense */ + avail = hwq->cons - hwq->prod; + if (hwq->cons <= hwq->prod) + avail += hwq->depth; + return avail <= slots; } struct bnxt_qplib_cqe { @@ -489,4 +546,64 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes); void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp); + +static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx) +{ + u32 idx; + + idx = que->swq_start; + if (swq_idx) + *swq_idx = idx; + return &que->swq[idx]; +} + +static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx) +{ + que->swq_start = que->swq[idx].next_idx; +} + +static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que) +{ + return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge); +} + +static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode) +{ + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + que->max_wqe : bnxt_qplib_get_depth(que); +} + +static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode) +{ + return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + sizeof(struct sq_send) / sizeof(struct sq_sge) : 1; +} + +static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size) +{ + return (wqe_size / sizeof(struct sq_sge)); +} + +static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes) +{ + /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128 + * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128. + * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512. + * when 8916 is disabled. + */ + return (delta * wqe_bytes) / sizeof(struct sq_sge); +} + +static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) +{ + u16 size = 0; + int indx; + + for (indx = 0; indx < wqe->num_sge; indx++) + size += wqe->sg_list[indx].size; + if (size > max) + size = max; + + return size; +} #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c29cbd3a2d7b..9da470d1e4a3 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -41,6 +41,28 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 + +enum bnxt_qplib_wqe_mode { + BNXT_QPLIB_WQE_MODE_STATIC = 0x00, + BNXT_QPLIB_WQE_MODE_VARIABLE = 0x01, + BNXT_QPLIB_WQE_MODE_INVALID = 0x02 +}; + +struct bnxt_qplib_drv_modes { + u8 wqe_mode; + /* Other modes to follow here */ +}; + +struct bnxt_qplib_chip_ctx { + u16 chip_num; + u8 chip_rev; + u8 chip_metal; + struct bnxt_qplib_drv_modes modes; +}; + #define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *)) #define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1) #define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG) @@ -141,6 +163,9 @@ struct bnxt_qplib_hwq { u32 cons; /* raw */ u8 cp_bit; u8 is_user; + u64 *pad_pg; + u32 pad_stride; + u32 pad_pgofft; }; struct bnxt_qplib_db_info { @@ -148,6 +173,7 @@ struct bnxt_qplib_db_info { void __iomem *priv_db; struct bnxt_qplib_hwq *hwq; u32 xid; + u32 max_slot; }; /* Tables */ @@ -230,16 +256,6 @@ struct bnxt_qplib_ctx { u64 hwrm_intf_ver; }; -struct bnxt_qplib_chip_ctx { - u16 chip_num; - u8 chip_rev; - u8 chip_metal; -}; - -#define CHIP_NUM_57508 0x1750 -#define CHIP_NUM_57504 0x1751 -#define CHIP_NUM_57502 0x1752 - struct bnxt_qplib_res { struct pci_dev *pdev; struct bnxt_qplib_chip_ctx *cctx; @@ -317,6 +333,14 @@ static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq, return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); } +static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx) +{ + idx += hwq->prod; + if (idx >= hwq->depth) + idx -= hwq->depth; + return bnxt_qplib_get_qe(hwq, idx, NULL); +} + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) @@ -351,6 +375,17 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx, bool virt_fn, bool is_p5); +static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt) +{ + hwq->prod = (hwq->prod + cnt) % hwq->depth; +} + +static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq, + u32 cnt) +{ + hwq->cons = (hwq->cons + cnt) % hwq->depth; +} + static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, bool arm) { @@ -383,8 +418,7 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info, key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; key <<= 32; - key |= (info->hwq->prod & (info->hwq->max_elements - 1)) & - DBC_DBC_INDEX_MASK; + key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK; writeq(key, info->db); } diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 6f00f07420b7..3e40e0d76efd 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1126,6 +1126,7 @@ struct cmdq_create_qp { #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL + #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL u8 type; #define CMDQ_CREATE_QP_TYPE_RC 0x2UL #define CMDQ_CREATE_QP_TYPE_UD 0x4UL diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index e8e11bd95e42..2b2b009b371a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -980,7 +980,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); @@ -1053,8 +1053,9 @@ int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); -typedef int c4iw_restrack_func(struct sk_buff *msg, - struct rdma_restrack_entry *res); -extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; +int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr); +int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq); +int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp); +int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id); #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 962dc97a8ff2..73936c3341b7 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -399,7 +399,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; mhp->ibmr.length = mhp->attr.len; - mhp->ibmr.iova = mhp->attr.va_fbo; mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); @@ -691,7 +690,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw) } struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct c4iw_dev *rhp; struct c4iw_pd *php; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index ba83d942997c..6c579d2d3997 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -236,14 +236,6 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) return 0; } -static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - pr_debug("ibdev %p\n", ibdev); - *pkey = 0; - return 0; -} - static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { @@ -317,7 +309,6 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->max_msg_sz = -1; return ret; @@ -439,7 +430,6 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -458,13 +448,6 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); } -static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) -{ - return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) && - c4iw_restrack_funcs[res->type]) ? - c4iw_restrack_funcs[res->type](msg, res) : 0; -} - static const struct ib_device_ops c4iw_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_CXGB4, @@ -485,7 +468,9 @@ static const struct ib_device_ops c4iw_dev_ops = { .destroy_cq = c4iw_destroy_cq, .destroy_qp = c4iw_destroy_qp, .destroy_srq = c4iw_destroy_srq, - .fill_res_entry = fill_res_entry, + .fill_res_cq_entry = c4iw_fill_res_cq_entry, + .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry, + .fill_res_mr_entry = c4iw_fill_res_mr_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, @@ -508,7 +493,6 @@ static const struct ib_device_ops c4iw_dev_ops = { .post_srq_recv = c4iw_post_srq_recv, .query_device = c4iw_query_device, .query_gid = c4iw_query_gid, - .query_pkey = c4iw_query_pkey, .query_port = c4iw_query_port, .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index f82d46ed969d..b32e6516d65f 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -134,10 +134,8 @@ err: return -EMSGSIZE; } -static int fill_res_qp_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp) { - struct ib_qp *ibqp = container_of(res, struct ib_qp, res); struct t4_swsqe *fsp = NULL, *lsp = NULL; struct c4iw_qp *qhp = to_c4iw_qp(ibqp); u16 first_sq_idx = 0, last_sq_idx = 0; @@ -195,10 +193,9 @@ union union_ep { struct c4iw_ep ep; }; -static int fill_res_ep_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, + struct rdma_cm_id *cm_id) { - struct rdma_cm_id *cm_id = rdma_res_to_id(res); struct nlattr *table_attr; struct c4iw_ep_common *epcp; struct c4iw_listen_ep *listen_ep = NULL; @@ -372,10 +369,8 @@ err: return -EMSGSIZE; } -static int fill_res_cq_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq) { - struct ib_cq *ibcq = container_of(res, struct ib_cq, res); struct c4iw_cq *chp = to_c4iw_cq(ibcq); struct nlattr *table_attr; struct t4_cqe hwcqes[2]; @@ -433,10 +428,8 @@ err: return -EMSGSIZE; } -static int fill_res_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct c4iw_mr *mhp = to_c4iw_mr(ibmr); struct c4iw_dev *dev = mhp->rhp; u32 stag = mhp->attr.stag; @@ -492,10 +485,3 @@ err_cancel_table: err: return -EMSGSIZE; } - -c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { - [RDMA_RESTRACK_QP] = fill_res_qp_entry, - [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, - [RDMA_RESTRACK_CQ] = fill_res_cq_entry, - [RDMA_RESTRACK_MR] = fill_res_mr_entry, -}; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index bef2bd291054..5484b08bbc5d 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -606,8 +606,8 @@ struct efa_admin_feature_queue_attr_desc { /* Number of sub-CQs to be created for each CQ */ u16 sub_cqs_per_cq; - /* MBZ */ - u16 reserved; + /* Minimum number of WQEs per SQ */ + u16 min_sq_depth; /* Maximum number of SGEs (buffers) allowed for a single send WQE */ u16 max_wr_send_sges; @@ -632,6 +632,17 @@ struct efa_admin_feature_queue_attr_desc { /* Maximum number of SGEs for a single RDMA read WQE */ u16 max_wr_rdma_sges; + + /* + * Maximum number of bytes that can be written to SQ between two + * consecutive doorbells (in units of 64B). Driver must ensure that only + * complete WQEs are written to queue before issuing a doorbell. + * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can + * be written to SQ between two consecutive doorbells. max_tx_batch=11 + * and WQE size = 128B, means up to 5 WQEs can be written to SQ between + * two consecutive doorbells. Zero means unlimited. + */ + u16 max_tx_batch; }; struct efa_admin_feature_aenq_desc { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index fabd8df2e78f..6ac23627f65a 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -480,6 +480,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; + result->max_tx_batch = resp.u.queue_attr.max_tx_batch; + result->min_sq_depth = resp.u.queue_attr.min_sq_depth; err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); if (err) { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 41ce4a476ee6..190bac23f585 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -127,6 +127,8 @@ struct efa_com_get_device_attr_result { u16 max_sq_sge; u16 max_rq_sge; u16 max_wr_rdma_sge; + u16 max_tx_batch; + u16 min_sq_depth; u8 db_bar; }; diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 82145574c928..92d701146320 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -12,10 +12,12 @@ #include "efa.h" -#define PCI_DEV_ID_EFA_VF 0xefa0 +#define PCI_DEV_ID_EFA0_VF 0xefa0 +#define PCI_DEV_ID_EFA1_VF 0xefa1 static const struct pci_device_id efa_pci_tbl[] = { - { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, { } }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 7dd082441333..9e201f169289 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1502,11 +1502,39 @@ static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) return efa_com_dealloc_uar(&dev->edev, ¶ms); } +#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \ + (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \ + NULL : #_attr) + +static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext, + const struct efa_ibv_alloc_ucontext_cmd *cmd) +{ + struct efa_dev *dev = to_edev(ibucontext->device); + char *attr_str; + + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch, + EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str)) + goto err; + + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth, + EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR, + attr_str)) + goto err; + + return 0; + +err: + ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n", + attr_str); + return -EOPNOTSUPP; +} + int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); struct efa_ibv_alloc_ucontext_resp resp = {}; + struct efa_ibv_alloc_ucontext_cmd cmd = {}; struct efa_com_alloc_uar_result result; int err; @@ -1515,6 +1543,18 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) * we will ack input fields in our response. */ + err = ib_copy_from_udata(&cmd, udata, + min(sizeof(cmd), udata->inlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Cannot copy udata for alloc_ucontext\n"); + goto err_out; + } + + err = efa_user_comp_handshake(ibucontext, &cmd); + if (err) + goto err_out; + err = efa_com_alloc_uar(&dev->edev, &result); if (err) goto err_out; @@ -1526,6 +1566,8 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; resp.inline_buf_size = dev->dev_attr.inline_buf_size; resp.max_llq_size = dev->dev_attr.max_llq_size; + resp.max_tx_batch = dev->dev_attr.max_tx_batch; + resp.min_sq_wr = dev->dev_attr.min_sq_depth; if (udata && udata->outlen) { err = ib_copy_to_udata(udata, &resp, diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 15f9c635f292..7eaf99538216 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7317,11 +7317,11 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width) case 1: return OPA_LINK_WIDTH_1X; case 2: return OPA_LINK_WIDTH_2X; case 3: return OPA_LINK_WIDTH_3X; + case 4: return OPA_LINK_WIDTH_4X; default: dd_dev_info(dd, "%s: invalid width %d, using 4\n", __func__, width); - /* fall through */ - case 4: return OPA_LINK_WIDTH_4X; + return OPA_LINK_WIDTH_4X; } } @@ -7376,12 +7376,13 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, case 0: dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G; break; + case 1: + dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; + break; default: dd_dev_err(dd, "%s: unexpected max rate %d, using 25Gb\n", __func__, (int)max_rate); - /* fall through */ - case 1: dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G; break; } @@ -12878,11 +12879,6 @@ bail: static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) { switch (chip_lstate) { - default: - dd_dev_err(dd, - "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", - chip_lstate); - /* fall through */ case LSTATE_DOWN: return IB_PORT_DOWN; case LSTATE_INIT: @@ -12891,6 +12887,11 @@ static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate) return IB_PORT_ARMED; case LSTATE_ACTIVE: return IB_PORT_ACTIVE; + default: + dd_dev_err(dd, + "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n", + chip_lstate); + return IB_PORT_DOWN; } } @@ -12898,10 +12899,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) { /* look at the HFI meta-states only */ switch (chip_pstate & 0xf0) { - default: - dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", - chip_pstate); - /* fall through */ case PLS_DISABLED: return IB_PORTPHYSSTATE_DISABLED; case PLS_OFFLINE: @@ -12914,6 +12911,10 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) return IB_PORTPHYSSTATE_LINKUP; case PLS_PHYTEST: return IB_PORTPHYSSTATE_PHY_TEST; + default: + dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n", + chip_pstate); + return IB_PORTPHYSSTATE_DISABLED; } } diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 2b57ba70ddd6..0e83d4b61e46 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1868,11 +1868,8 @@ int parse_platform_config(struct hfi1_devdata *dd) 2; break; case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: pcfgcache->config_tables[table_type].num_table = table_length_dwords; @@ -1890,15 +1887,10 @@ int parse_platform_config(struct hfi1_devdata *dd) /* metadata table */ switch (table_type) { case PLATFORM_CONFIG_SYSTEM_TABLE: - /* fall through */ case PLATFORM_CONFIG_PORT_TABLE: - /* fall through */ case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: break; default: @@ -2027,15 +2019,10 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, switch (table) { case PLATFORM_CONFIG_SYSTEM_TABLE: - /* fall through */ case PLATFORM_CONFIG_PORT_TABLE: - /* fall through */ case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: if (field && field < platform_config_table_limits[table]) src_ptr = @@ -2138,11 +2125,8 @@ int get_platform_config_field(struct hfi1_devdata *dd, pcfgcache->config_tables[table_type].table; break; case PLATFORM_CONFIG_RX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_TX_PRESET_TABLE: - /* fall through */ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: - /* fall through */ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: src_ptr = pcfgcache->config_tables[table_type].table; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 7073f237a949..3222e3acb79c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -721,7 +721,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, /* Bad mkey not a violation below level 2 */ if (ibp->rvp.mkeyprot < 2) break; - /* fall through */ + fallthrough; case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_TRAP_REPRESS: if (ibp->rvp.mkey_violations != 0xFFFF) @@ -1272,7 +1272,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, case IB_PORT_NOP: if (phys_state == IB_PORTPHYSSTATE_NOP) break; - /* FALLTHROUGH */ + fallthrough; case IB_PORT_DOWN: if (phys_state == IB_PORTPHYSSTATE_NOP) { link_state = HLS_DN_DOWNDEF; @@ -2300,7 +2300,6 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, * can be changed from the default values */ case OPA_VLARB_PREEMPT_ELEMENTS: - /* FALLTHROUGH */ case OPA_VLARB_PREEMPT_MATRIX: smp->status |= IB_SMP_UNSUP_METH_ATTR; break; @@ -4170,7 +4169,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; if (ibp->rvp.port_cap_flags & IB_PORT_SM) return IB_MAD_RESULT_SUCCESS; - /* FALLTHROUGH */ + fallthrough; default: smp->status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)smp); @@ -4240,7 +4239,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; if (ibp->rvp.port_cap_flags & IB_PORT_SM) return IB_MAD_RESULT_SUCCESS; - /* FALLTHROUGH */ + fallthrough; default: smp->status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_mad_hdr *)smp); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 1a6268d61977..18d32f053d26 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -306,7 +306,7 @@ int pcie_speeds(struct hfi1_devdata *dd) ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); if (ret) { dd_dev_err(dd, "Unable to read from PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) { @@ -334,10 +334,14 @@ int pcie_speeds(struct hfi1_devdata *dd) return 0; } -/* restore command and BARs after a reset has wiped them out */ +/** + * Restore command and BARs after a reset has wiped them out + * + * Returns 0 on success, otherwise a negative error value + */ int restore_pci_variables(struct hfi1_devdata *dd) { - int ret = 0; + int ret; ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); if (ret) @@ -386,13 +390,17 @@ int restore_pci_variables(struct hfi1_devdata *dd) error: dd_dev_err(dd, "Unable to write to PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } -/* Save BARs and command to rewrite after device reset */ +/** + * Save BARs and command to rewrite after device reset + * + * Returns 0 on success, otherwise a negative error value + */ int save_pci_variables(struct hfi1_devdata *dd) { - int ret = 0; + int ret; ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); @@ -441,7 +449,7 @@ int save_pci_variables(struct hfi1_devdata *dd) error: dd_dev_err(dd, "Unable to read from PCI config\n"); - return ret; + return pcibios_err_to_errno(ret); } /* diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 79126b2b14ab..ff864f6f0266 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -86,7 +86,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op) switch (op) { case PSC_GLOBAL_ENABLE: reg |= SEND_CTRL_SEND_ENABLE_SMASK; - /* Fall through */ + fallthrough; case PSC_DATA_VL_ENABLE: mask = 0; for (i = 0; i < ARRAY_SIZE(dd->vld); i++) diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 03024cec78dd..b12e4665c9ab 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -191,22 +191,22 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) switch (n) { case 7: *dest++ = *src++; - /* fall through */ + fallthrough; case 6: *dest++ = *src++; - /* fall through */ + fallthrough; case 5: *dest++ = *src++; - /* fall through */ + fallthrough; case 4: *dest++ = *src++; - /* fall through */ + fallthrough; case 3: *dest++ = *src++; - /* fall through */ + fallthrough; case 2: *dest++ = *src++; - /* fall through */ + fallthrough; case 1: *dest++ = *src++; /* fall through */ diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 36593f2efe26..4642d6ceb890 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -668,8 +668,8 @@ static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd) /* active optical cables only */ switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { - case 0x0 ... 0x9: /* fallthrough */ - case 0xC: /* fallthrough */ + case 0x0 ... 0x9: fallthrough; + case 0xC: fallthrough; case 0xE: /* active AOC */ power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); @@ -899,8 +899,8 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, *ptr_tuning_method = OPA_PASSIVE_TUNING; break; - case 0x0 ... 0x9: /* fallthrough */ - case 0xC: /* fallthrough */ + case 0x0 ... 0x9: fallthrough; + case 0xC: fallthrough; case 0xE: ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset, ptr_total_atten); @@ -909,7 +909,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, *ptr_tuning_method = OPA_ACTIVE_TUNING; break; - case 0xD: /* fallthrough */ + case 0xD: fallthrough; case 0xF: default: dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n", diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index be62284e42d9..356518e17fa6 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -312,7 +312,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) switch (qp->ibqp.qp_type) { case IB_QPT_RC: hfi1_setup_tid_rdma_wqe(qp, wqe); - /* fall through */ + fallthrough; case IB_QPT_UC: if (wqe->length > 0x80000000U) return -EINVAL; diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index b670321365d3..b0d053d12129 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -113,20 +113,6 @@ static inline void clear_ahg(struct rvt_qp *qp) } /** - * hfi1_create_qp - create a queue pair for a device - * @ibpd: the protection domain who's device we create the queue pair for - * @init_attr: the attributes of the queue pair - * @udata: user data for libibverbs.so - * - * Returns the queue pair on success, otherwise returns an errno. - * - * Called by the ib_create_qp() core verbs function. - */ -struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - -/** * hfi1_qp_wakeup - wake up on the indicated event * @qp: the QP * @flag: flag the qp on which the qp is stalled diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index b5966991d647..8386c84c2d92 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -231,7 +231,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, break; case 2: offset_bytes[1] = (offset >> 8) & 0xff; - /* fall through */ + fallthrough; case 1: num_msgs = 2; offset_bytes[0] = offset & 0xff; @@ -279,7 +279,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, break; case 2: offset_bytes[1] = (offset >> 8) & 0xff; - /* fall through */ + fallthrough; case 1: num_msgs = 2; offset_bytes[0] = offset & 0xff; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index f1734e5e9ac4..1bb5f57152d3 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -141,7 +141,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; release_rdma_sge_mr(e); - /* FALLTHROUGH */ + fallthrough; case OP(ATOMIC_ACKNOWLEDGE): /* * We can increment the tail pointer now that the last @@ -160,7 +160,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, qp->s_acked_ack_queue = next; qp->s_tail_ack_queue = next; trace_hfi1_rsp_make_rc_ack(qp, e->psn); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_ONLY): case OP(ACKNOWLEDGE): /* Check for no next entry in the queue. */ @@ -267,7 +267,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_FIRST): qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_READ_RESPONSE_MIDDLE): ps->s_txreq->ss = &qp->s_ack_rdma_sge; ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; @@ -881,8 +881,7 @@ no_flow_control: goto bail; } qp->s_num_rd_atomic++; - - /* FALLTHROUGH */ + fallthrough; case IB_WR_OPFN: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; @@ -946,10 +945,10 @@ no_flow_control: * See restart_rc(). */ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_FIRST): qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): bth2 = mask_psn(qp->s_psn++); ss = &qp->s_sge; @@ -991,10 +990,10 @@ no_flow_control: * See restart_rc(). */ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_FIRST): qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): bth2 = mask_psn(qp->s_psn++); ss = &qp->s_sge; @@ -2901,7 +2900,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (!ret) goto rnr_nak; qp->r_rcv_len = 0; - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): case OP(RDMA_WRITE_MIDDLE): send_middle: @@ -2941,7 +2940,7 @@ send_middle: goto no_immediate_data; if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) goto send_last_inv; - /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */ + fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; @@ -2957,7 +2956,7 @@ send_last_inv: goto send_last; case OP(RDMA_WRITE_LAST): copy_last = rvt_is_user_qp(qp); - /* fall through */ + fallthrough; case OP(SEND_LAST): no_immediate_data: wc.wc_flags = 0; @@ -3010,7 +3009,7 @@ send_last: case OP(RDMA_WRITE_ONLY): copy_last = rvt_is_user_qp(qp); - /* fall through */ + fallthrough; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index c93ea021cf49..04575c9afd61 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2584,7 +2584,7 @@ static void __sdma_process_event(struct sdma_engine *sde, * 7220, e.g. */ ss->go_s99_running = 1; - /* fall through -- and start dma engine */ + fallthrough; /* and start dma engine */ case sdma_event_e10_go_hw_start: /* This reference means the state machine is started */ sdma_get(&sde->state); @@ -2726,7 +2726,6 @@ static void __sdma_process_event(struct sdma_engine *sde, case sdma_event_e70_go_idle: break; case sdma_event_e85_link_down: - /* fall through */ case sdma_event_e80_hw_freeze: sdma_set_state(sde, sdma_state_s80_hw_freeze); atomic_dec(&sde->dd->sdma_unfreeze_count); @@ -3007,7 +3006,7 @@ static void __sdma_process_event(struct sdma_engine *sde, case sdma_event_e60_hw_halted: need_progress = 1; sdma_err_progress_check_schedule(sde); - /* fall through */ + fallthrough; case sdma_event_e90_sw_halted: /* * SW initiated halt does not perform engines @@ -3021,7 +3020,7 @@ static void __sdma_process_event(struct sdma_engine *sde, break; case sdma_event_e85_link_down: ss->go_s99_running = 0; - /* fall through */ + fallthrough; case sdma_event_e80_hw_freeze: sdma_set_state(sde, sdma_state_s80_hw_freeze); atomic_dec(&sde->dd->sdma_unfreeze_count); @@ -3252,7 +3251,7 @@ void _sdma_txreq_ahgadd( tx->num_desc++; tx->descs[2].qw[0] = 0; tx->descs[2].qw[1] = 0; - /* FALLTHROUGH */ + fallthrough; case SDMA_AHG_APPLY_UPDATE2: tx->num_desc++; tx->descs[1].qw[0] = 0; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index facff133139a..9af82ff933d7 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -3227,7 +3227,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) case IB_WR_RDMA_READ: if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE) break; - /* fall through */ + fallthrough; case IB_WR_TID_RDMA_READ: switch (prev->wr.opcode) { case IB_WR_RDMA_READ: @@ -5067,7 +5067,7 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (priv->s_state == TID_OP(WRITE_REQ)) hfi1_tid_rdma_restart_req(qp, wqe, &bth2); priv->s_state = TID_OP(WRITE_DATA); - /* fall through */ + fallthrough; case TID_OP(WRITE_DATA): /* diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 0c77f18120ed..1fb918399da0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -216,7 +216,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case OP(SEND_FIRST): qp->s_state = OP(SEND_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): len = qp->s_len; if (len > pmtu) { @@ -241,7 +241,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case OP(RDMA_WRITE_FIRST): qp->s_state = OP(RDMA_WRITE_MIDDLE); - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): len = qp->s_len; if (len > pmtu) { @@ -414,7 +414,7 @@ send_first: goto no_immediate_data; else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) goto send_last_imm; - /* FALLTHROUGH */ + fallthrough; case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ /* @@ -515,7 +515,7 @@ rdma_first: wc.ex.imm_data = ohdr->u.rc.imm_data; goto rdma_last_imm; } - /* FALLTHROUGH */ + fallthrough; case OP(RDMA_WRITE_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 479fa557993e..da9888deff8c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,9 +37,8 @@ #define DRV_NAME "hns_roce" -/* hip08 is a pci device, it includes two version according pci version id */ -#define PCI_REVISION_ID_HIP08_A 0x20 -#define PCI_REVISION_ID_HIP08_B 0x21 +/* hip08 is a pci device */ +#define PCI_REVISION_ID_HIP08 0x21 #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') @@ -348,20 +347,22 @@ struct hns_roce_buf_attr { bool mtt_only; /* only alloc buffer-required MTT memory */ }; +struct hns_roce_hem_cfg { + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + unsigned int ba_pg_shift; /* BA table page shift */ + unsigned int buf_pg_shift; /* buffer page shift */ + unsigned int buf_pg_count; /* buffer page count */ + struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; + int region_count; +}; + /* memory translate region */ struct hns_roce_mtr { struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ struct ib_umem *umem; /* user space buffer */ struct hns_roce_buf *kmem; /* kernel space buffer */ - struct { - dma_addr_t root_ba; /* root BA table's address */ - bool is_direct; /* addressing without BA table */ - unsigned int ba_pg_shift; /* BA table page shift */ - unsigned int buf_pg_shift; /* buffer page shift */ - int buf_pg_count; /* buffer page count */ - struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; - unsigned int region_count; - } hem_cfg; /* config for hardware addressing */ + struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ }; struct hns_roce_mw { @@ -1192,7 +1193,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); @@ -1267,6 +1268,6 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); -int hns_roce_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, + struct ib_cq *ib_cq); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index cf39f560b800..07b4c85d341d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2483,7 +2483,6 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) { struct ib_device *ibdev = &hr_dev->ib_dev; - int rq_pa_start; int count; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); @@ -2491,9 +2490,9 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibdev_err(ibdev, "Failed to find SQ ba\n"); return -ENOBUFS; } - rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift; - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1, - NULL); + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba, + 1, NULL); if (!count) { ibdev_err(ibdev, "Failed to find RQ ba\n"); return -ENOBUFS; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0618ced45bf8..d2968594664b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -91,10 +91,11 @@ static u32 to_hr_opcode(u32 ib_opcode) } static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, - void *wqe, const struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { + struct hns_roce_wqe_frmr_seg *fseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); struct hns_roce_mr *mr = to_hr_mr(wr->mr); - struct hns_roce_wqe_frmr_seg *fseg = wqe; u64 pbl_ba; /* use ib_access_flags */ @@ -128,14 +129,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } -static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, +static void set_atomic_seg(const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, unsigned int valid_num_sge) { - struct hns_roce_wqe_atomic_seg *aseg; + struct hns_roce_v2_wqe_data_seg *dseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); + struct hns_roce_wqe_atomic_seg *aseg = + (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg); - set_data_seg_v2(wqe, wr->sg_list); - aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg); + set_data_seg_v2(dseg, wr->sg_list); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap); @@ -143,7 +146,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, } else { aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->compare_add); - aseg->cmp_data = 0; + aseg->cmp_data = 0; } roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, @@ -176,13 +179,15 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, - void *wqe, unsigned int *sge_ind, + unsigned int *sge_ind, unsigned int valid_num_sge) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_v2_wqe_data_seg *dseg = wqe; + struct hns_roce_v2_wqe_data_seg *dseg = + (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); + void *wqe = dseg; int j = 0; int i; @@ -438,7 +443,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); - wqe += sizeof(struct hns_roce_v2_rc_send_wqe); switch (wr->opcode) { case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: @@ -451,7 +455,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr)); + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: @@ -468,10 +472,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge); + set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); else if (wr->opcode != IB_WR_REG_MR) ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, - wqe, &curr_idx, valid_num_sge); + &curr_idx, valid_num_sge); *sge_idx = curr_idx; @@ -1510,8 +1514,6 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) req_a = (struct hns_roce_vf_res_a *)desc[0].data; req_b = (struct hns_roce_vf_res_b *)desc[1].data; - memset(req_a, 0, sizeof(*req_a)); - memset(req_b, 0, sizeof(*req_b)); for (i = 0; i < 2; i++) { hns_roce_cmq_setup_basic_desc(&desc[i], HNS_ROCE_OPC_ALLOC_VF_RES, false); @@ -1744,27 +1746,25 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | - HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | - HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | + HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; - caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; - caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; - caps->qpc_timer_ba_pg_sz = 0; - caps->qpc_timer_buf_pg_sz = 0; - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; - caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; - caps->cqc_timer_ba_pg_sz = 0; - caps->cqc_timer_buf_pg_sz = 0; - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; + caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ; + caps->qpc_timer_ba_pg_sz = 0; + caps->qpc_timer_buf_pg_sz = 0; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; + caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; + caps->cqc_timer_ba_pg_sz = 0; + caps->cqc_timer_buf_pg_sz = 0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; - caps->sccc_ba_pg_sz = 0; - caps->sccc_buf_pg_sz = 0; - caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; - } + caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_ba_pg_sz = 0; + caps->sccc_buf_pg_sz = 0; + caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; } static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, @@ -1995,20 +1995,18 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->srqc_bt_num, &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC); - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - caps->sccc_hop_num = ctx_hop_num; - caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->sccc_hop_num = ctx_hop_num; + caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; + caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, - caps->sccc_hop_num, caps->sccc_bt_num, - &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, - HEM_TYPE_SCCC); - calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, - caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, - &caps->cqc_timer_buf_pg_sz, - &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); - } + calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, + caps->sccc_hop_num, caps->sccc_bt_num, + &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, + HEM_TYPE_SCCC); + calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz, + caps->cqc_timer_hop_num, caps->cqc_timer_bt_num, + &caps->cqc_timer_buf_pg_sz, + &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER); calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num, 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE); @@ -2055,22 +2053,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { - ret = hns_roce_query_pf_timer_resource(hr_dev); - if (ret) { - dev_err(hr_dev->dev, - "Query pf timer resource fail, ret = %d.\n", - ret); - return ret; - } + ret = hns_roce_query_pf_timer_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, + "failed to query pf timer resource, ret = %d.\n", ret); + return ret; + } - ret = hns_roce_set_vf_switch_param(hr_dev, 0); - if (ret) { - dev_err(hr_dev->dev, - "Set function switch param fail, ret = %d.\n", - ret); - return ret; - } + ret = hns_roce_set_vf_switch_param(hr_dev, 0); + if (ret) { + dev_err(hr_dev->dev, + "failed to set function switch param, ret = %d.\n", + ret); + return ret; } hr_dev->vendor_part_id = hr_dev->pci_dev->device; @@ -2336,8 +2331,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) - hns_roce_function_clear(hr_dev); + hns_roce_function_clear(hr_dev); hns_roce_free_link_table(hr_dev, &priv->tpq); hns_roce_free_link_table(hr_dev, &priv->tsq); @@ -3053,6 +3047,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, IB_WC_RETRY_EXC_ERR }, { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR }, { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR }, + { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR} }; u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, @@ -3075,6 +3070,14 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, sizeof(*cqe), false); /* + * For hns ROCEE, GENERAL_ERR is an error type that is not defined in + * the standard protocol, the driver must ignore it and needn't to set + * the QP to an error state. + */ + if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR) + return; + + /* * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets * into errored mode. Hence, as a workaround to this hardware * limitation, driver needs to assist in flushing. But the flushing @@ -3170,51 +3173,51 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, /* SQ corresponding to CQE */ switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M, V2_CQE_BYTE_4_OPCODE_S) & 0x1f) { - case HNS_ROCE_SQ_OPCODE_SEND: + case HNS_ROCE_V2_WQE_OP_SEND: wc->opcode = IB_WC_SEND; break; - case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV: + case HNS_ROCE_V2_WQE_OP_SEND_WITH_INV: wc->opcode = IB_WC_SEND; break; - case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM: + case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM: wc->opcode = IB_WC_SEND; wc->wc_flags |= IB_WC_WITH_IMM; break; - case HNS_ROCE_SQ_OPCODE_RDMA_READ: + case HNS_ROCE_V2_WQE_OP_RDMA_READ: wc->opcode = IB_WC_RDMA_READ; wc->byte_len = le32_to_cpu(cqe->byte_cnt); break; - case HNS_ROCE_SQ_OPCODE_RDMA_WRITE: + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; - case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM: + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM: wc->opcode = IB_WC_RDMA_WRITE; wc->wc_flags |= IB_WC_WITH_IMM; break; - case HNS_ROCE_SQ_OPCODE_LOCAL_INV: + case HNS_ROCE_V2_WQE_OP_LOCAL_INV: wc->opcode = IB_WC_LOCAL_INV; wc->wc_flags |= IB_WC_WITH_INVALIDATE; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP: + case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP: wc->opcode = IB_WC_COMP_SWAP; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD: + case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD: wc->opcode = IB_WC_FETCH_ADD; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP: + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP: wc->opcode = IB_WC_MASKED_COMP_SWAP; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD: + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD: wc->opcode = IB_WC_MASKED_FETCH_ADD; wc->byte_len = 8; break; - case HNS_ROCE_SQ_OPCODE_FAST_REG_WR: + case HNS_ROCE_V2_WQE_OP_FAST_REG_PMR: wc->opcode = IB_WC_REG_MR; break; - case HNS_ROCE_SQ_OPCODE_BIND_MW: + case HNS_ROCE_V2_WQE_OP_BIND_MW: wc->opcode = IB_WC_REG_MR; break; default: @@ -3374,11 +3377,33 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, return op + step_idx; } +static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba, + u32 hem_type, int step_idx) +{ + struct hns_roce_cmd_mailbox *mailbox; + int ret; + int op; + + op = get_op_for_set_hem(hr_dev, hem_type, step_idx); + if (op < 0) + return 0; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, + 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx) { - struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_iter iter; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; @@ -3390,7 +3415,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, u64 bt_ba = 0; u32 chunk_ba_num; u32 hop_num; - int op; if (!hns_roce_check_whether_mhop(hr_dev, table->type)) return 0; @@ -3412,14 +3436,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, hem_idx = i; } - op = get_op_for_set_hem(hr_dev, table->type, step_idx); - if (op == -EINVAL) - return 0; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - if (table->type == HEM_TYPE_SCCC) obj = mhop.l0_idx; @@ -3428,11 +3444,8 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, for (hns_roce_hem_first(hem, &iter); !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { bt_ba = hns_roce_hem_addr(&iter); - - /* configure the ba, tag, and op */ - ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, - obj, 0, op, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, + step_idx); } } else { if (step_idx == 0) @@ -3440,12 +3453,9 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, else if (step_idx == 1 && hop_num == 2) bt_ba = table->bt_l1_dma_addr[l1_idx]; - /* configure the ba, tag, and op */ - ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj, - 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx); } - hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; } @@ -3745,51 +3755,23 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, } } -static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, int mtt_cnt, - u32 page_size) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - - if (hr_qp->rq.wqe_cnt < 1) - return true; - - if (mtt_cnt < 1) { - ibdev_err(ibdev, "failed to find RQWQE buf ba of QP(0x%lx)\n", - hr_qp->qpn); - return false; - } - - if (mtt_cnt < MTT_MIN_COUNT && - (hr_qp->rq.offset + page_size) < hr_qp->buff_size) { - ibdev_err(ibdev, - "failed to find next RQWQE buf ba of QP(0x%lx)\n", - hr_qp->qpn); - return false; - } - - return true; -} - static int config_qp_rq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - struct ib_qp *ibqp = &hr_qp->ibqp; u64 mtts[MTT_MIN_COUNT] = { 0 }; u64 wqe_sge_ba; - u32 page_size; int count; /* Search qp buf's mtts */ - page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->rq.offset / page_size, mtts, + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts, MTT_MIN_COUNT, &wqe_sge_ba); - if (!ibqp->srq) - if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size)) - return -EINVAL; + if (hr_qp->rq.wqe_cnt && count < 1) { + ibdev_err(&hr_dev->ib_dev, + "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn); + return -EINVAL; + } context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3); qpc_mask->wqe_sge_ba = 0; @@ -3891,7 +3873,6 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; u64 sq_cur_blk = 0; - u32 page_size; int count; /* search qp buf's mtts */ @@ -3902,9 +3883,8 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev, return -EINVAL; } if (hr_qp->sge.sge_cnt > 0) { - page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->sge.offset / page_size, + hr_qp->sge.offset, &sge_cur_blk, 1, NULL); if (count < 1) { ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n", @@ -4265,12 +4245,13 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B && is_udp) + if (is_udp) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); else roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, grh->traffic_class); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, 0); roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, @@ -4301,7 +4282,9 @@ static bool check_qp_state(enum ib_qp_state cur_state, [IB_QPS_RTR] = { [IB_QPS_RESET] = true, [IB_QPS_RTS] = true, [IB_QPS_ERR] = true }, - [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, + [IB_QPS_RTS] = true, + [IB_QPS_ERR] = true }, [IB_QPS_SQD] = {}, [IB_QPS_SQE] = {}, [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e176b0aaa4ac..1fb1c583d0f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -179,27 +179,11 @@ enum { HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb, - HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE = 0xc, + HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc, HNS_ROCE_V2_WQE_OP_MASK = 0x1f, }; enum { - HNS_ROCE_SQ_OPCODE_SEND = 0x0, - HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1, - HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2, - HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3, - HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4, - HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5, - HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6, - HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7, - HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8, - HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9, - HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa, - HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb, - HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc, -}; - -enum { /* rq operations */ HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0, HNS_ROCE_V2_OPCODE_SEND = 0x1, @@ -230,6 +214,7 @@ enum { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15, HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16, HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22, + HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23, HNS_ROCE_V2_CQE_STATUS_MASK = 0xff, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 50763cf4fa3d..5907cfd878a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -428,7 +428,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .destroy_ah = hns_roce_destroy_ah, .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, - .fill_res_entry = hns_roce_fill_res_entry, + .fill_res_cq_entry = hns_roce_fill_res_cq_entry, .get_dma_mr = hns_roce_get_dma_mr, .get_link_layer = hns_roce_get_link_layer, .get_port_immutable = hns_roce_port_immutable, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 6b226a5eb7db..e5df3884b41d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -415,7 +415,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; @@ -871,6 +871,15 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int err; int i; + /* + * Only use the first page address as root ba when hopnum is 0, this + * is because the addresses of all pages are consecutive in this case. + */ + if (mtr->hem_cfg.is_direct) { + mtr->hem_cfg.root_ba = pages[0]; + return 0; + } + for (i = 0; i < mtr->hem_cfg.region_count; i++) { r = &mtr->hem_cfg.region[i]; if (r->offset + r->count > page_cnt) { @@ -896,6 +905,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int start_index; int mtt_count; int total = 0; __le64 *mtts; @@ -907,26 +918,32 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, goto done; /* no mtt memory in direct mode, so just return the buffer address */ - if (mtr->hem_cfg.is_direct) { - npage = offset; - for (total = 0; total < mtt_max; total++, npage++) { - addr = mtr->hem_cfg.root_ba + - (npage << mtr->hem_cfg.buf_pg_shift); - + if (cfg->is_direct) { + start_index = offset >> HNS_HW_PAGE_SHIFT; + for (mtt_count = 0; mtt_count < cfg->region_count && + total < mtt_max; mtt_count++) { + npage = cfg->region[mtt_count].offset; + if (npage < start_index) + continue; + + addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) mtt_buf[total] = to_hr_hw_page_addr(addr); else mtt_buf[total] = addr; + + total++; } goto done; } + start_index = offset >> cfg->buf_pg_shift; left = mtt_max; while (left > 0) { mtt_count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, - offset + total, + start_index + total, &mtt_count, NULL); if (!mtts || !mtt_count) goto done; @@ -939,104 +956,136 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, done: if (base_addr) - *base_addr = mtr->hem_cfg.root_ba; + *base_addr = cfg->root_ba; return total; } -/* convert buffer size to page index and page count */ -static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr, - int page_cnt, - struct hns_roce_buf_region *regions, - int region_cnt, unsigned int page_shift) +static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, + struct hns_roce_buf_attr *attr, + struct hns_roce_hem_cfg *cfg, + unsigned int *buf_page_shift) { - unsigned int page_size = 1 << page_shift; - int max_region = attr->region_count; struct hns_roce_buf_region *r; - unsigned int i = 0; - int page_idx = 0; - - for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { - r = ®ions[i]; - r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? - 0 : attr->region[i].hopnum; - r->offset = page_idx; - r->count = DIV_ROUND_UP(attr->region[i].size, page_size); - page_idx += r->count; + unsigned int page_shift = 0; + int page_cnt = 0; + size_t buf_size; + int region_cnt; + + if (cfg->is_direct) { + buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; + page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); + /* + * When HEM buffer use level-0 addressing, the page size equals + * the buffer size, and the the page size = 4K * 2^N. + */ + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); + if (attr->region_count > 1) { + cfg->buf_pg_count = page_cnt; + page_shift = HNS_HW_PAGE_SHIFT; + } else { + cfg->buf_pg_count = 1; + page_shift = cfg->buf_pg_shift; + if (buf_size != 1 << page_shift) { + ibdev_err(&hr_dev->ib_dev, + "failed to check direct size %zu shift %d.\n", + buf_size, page_shift); + return -EINVAL; + } + } + } else { + page_shift = cfg->buf_pg_shift; + } + + /* convert buffer size to page index and page count */ + for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && + region_cnt < attr->region_count && + region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { + r = &cfg->region[region_cnt]; + r->offset = page_cnt; + buf_size = hr_hw_page_align(attr->region[region_cnt].size); + r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + page_cnt += r->count; + r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, + r->count); + } + + if (region_cnt < 1) { + ibdev_err(&hr_dev->ib_dev, + "failed to check mtr region count, pages = %d.\n", + cfg->buf_pg_count); + return -ENOBUFS; } - return i; + cfg->region_count = region_cnt; + *buf_page_shift = page_shift; + + return page_cnt; } /** * hns_roce_mtr_create - Create hns memory translate region. * * @mtr: memory translate region - * @init_attr: init attribute for creating mtr - * @page_shift: page shift for multi-hop base address table + * @buf_attr: buffer attribute for creating mtr + * @ba_page_shift: page shift for multi-hop base address table * @udata: user space context, if it's NULL, means kernel space * @user_addr: userspace virtual address to start at - * @buf_alloced: mtr has private buffer, true means need to alloc */ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_attr *buf_attr, - unsigned int page_shift, struct ib_udata *udata, + unsigned int ba_page_shift, struct ib_udata *udata, unsigned long user_addr) { + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned int buf_page_shift = 0; dma_addr_t *pages = NULL; - int region_cnt = 0; int all_pg_cnt; int get_pg_cnt; - bool has_mtt; - int err = 0; + int ret = 0; + + /* if disable mtt, all pages must in a continuous address range */ + cfg->is_direct = !mtr_has_mtt(buf_attr); - has_mtt = mtr_has_mtt(buf_attr); /* if buffer only need mtt, just init the hem cfg */ if (buf_attr->mtt_only) { - mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; - mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> - buf_attr->page_shift; + cfg->buf_pg_shift = buf_attr->page_shift; + cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> + buf_attr->page_shift; mtr->umem = NULL; mtr->kmem = NULL; } else { - err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, - user_addr); - if (err) { - ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", - err); - return err; + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, + udata, user_addr); + if (ret) { + ibdev_err(ibdev, + "failed to alloc mtr bufs, ret = %d.\n", ret); + return ret; } } - /* alloc mtt memory */ - all_pg_cnt = mtr->hem_cfg.buf_pg_count; - hns_roce_hem_list_init(&mtr->hem_list); - mtr->hem_cfg.is_direct = !has_mtt; - mtr->hem_cfg.ba_pg_shift = page_shift; - mtr->hem_cfg.region_count = 0; - region_cnt = mtr_init_region(buf_attr, all_pg_cnt, - mtr->hem_cfg.region, - ARRAY_SIZE(mtr->hem_cfg.region), - mtr->hem_cfg.buf_pg_shift); - if (region_cnt < 1) { - err = -ENOBUFS; - ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt); + all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); + if (all_pg_cnt < 1) { + ret = -ENOBUFS; + ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); goto err_alloc_bufs; } - mtr->hem_cfg.region_count = region_cnt; - - if (has_mtt) { - err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - mtr->hem_cfg.region, region_cnt, - page_shift); - if (err) { - ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", - err); + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) { + ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", + ret); goto err_alloc_bufs; } - mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; } /* no buffer to map */ @@ -1046,31 +1095,26 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, /* alloc a tmp array to store buffer's dma address */ pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); if (!pages) { - err = -ENOMEM; - ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", + ret = -ENOMEM; + ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", all_pg_cnt); goto err_alloc_hem_list; } get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, - mtr->hem_cfg.buf_pg_shift); + buf_page_shift); if (get_pg_cnt != all_pg_cnt) { - ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", get_pg_cnt, all_pg_cnt); - err = -ENOBUFS; + ret = -ENOBUFS; goto err_alloc_page_list; } - if (!has_mtt) { - mtr->hem_cfg.root_ba = pages[0]; - } else { - /* write buffer's dma address to BA table */ - err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); - if (err) { - ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", - err); - goto err_alloc_page_list; - } + /* write buffer's dma address to BA table */ + ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); + if (ret) { + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); + goto err_alloc_page_list; } /* drop tmp array */ @@ -1082,7 +1126,7 @@ err_alloc_hem_list: hns_roce_hem_list_release(hr_dev, &mtr->hem_list); err_alloc_bufs: mtr_free_bufs(hr_dev, mtr); - return err; + return ret; } void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a0a47bd66975..e94ca130ff5e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -411,7 +411,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) { - struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt; cnt = max(1U, cap->max_send_sge); @@ -431,15 +430,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { cnt = roundup_pow_of_two(sq_wqe_cnt * (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (cnt > hr_dev->caps.max_extend_sg) { - ibdev_err(ibdev, - "failed to check exSGE num, exSGE num = %d.\n", - cnt); - return -EINVAL; - } - } } else { cnt = 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 06871731ac43..259444c0a630 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -76,10 +76,9 @@ err: return -EMSGSIZE; } -static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, + struct ib_cq *ib_cq) { - struct ib_cq *ib_cq = container_of(res, struct ib_cq, res); struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); struct hns_roce_v2_cq_context *context; @@ -119,12 +118,3 @@ err: kfree(context); return ret; } - -int hns_roce_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) -{ - if (res->type == RDMA_RESTRACK_CQ) - return hns_roce_fill_res_cq_entry(msg, res); - - return 0; -} diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 19af29a48c55..6957e4f3404b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -101,7 +101,6 @@ static int i40iw_query_port(struct ib_device *ibdev, props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->active_width = IB_WIDTH_4X; props->active_speed = 1; props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; @@ -1543,10 +1542,9 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages - * @udata: user data or NULL for kernel objects */ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct i40iw_pd *iwpd = to_iwpd(pd); struct i40iw_device *iwdev = to_iwdev(pd->device); @@ -2460,7 +2458,6 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -2616,22 +2613,6 @@ static int i40iw_query_gid(struct ib_device *ibdev, return 0; } -/** - * i40iw_query_pkey - Query partition key - * @ibdev: device pointer from stack - * @port: port number - * @index: index of pkey - * @pkey: pointer to store the pkey - */ -static int i40iw_query_pkey(struct ib_device *ibdev, - u8 port, - u16 index, - u16 *pkey) -{ - *pkey = 0; - return 0; -} - static const struct ib_device_ops i40iw_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_I40IW, @@ -2671,7 +2652,6 @@ static const struct ib_device_ops i40iw_dev_ops = { .post_send = i40iw_post_send, .query_device = i40iw_query_device, .query_gid = i40iw_query_gid, - .query_pkey = i40iw_query_pkey, .query_port = i40iw_query_port, .query_qp = i40iw_query_qp, .reg_user_mr = i40iw_reg_user_mr, diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 816d28854a8e..5e7910a517da 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1219,56 +1219,47 @@ static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); } -static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata) +static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) { - struct mlx4_ib_xrcd *xrcd; + struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device); + struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd); struct ib_cq_init_attr cq_attr = {}; int err; - if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) - return ERR_PTR(-ENOSYS); - - xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); - if (!xrcd) - return ERR_PTR(-ENOMEM); + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return -EOPNOTSUPP; - err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); + err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn); if (err) - goto err1; + return err; - xrcd->pd = ib_alloc_pd(ibdev, 0); + xrcd->pd = ib_alloc_pd(ibxrcd->device, 0); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; } cq_attr.cqe = 1; - xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr); + xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; } - return &xrcd->ibxrcd; + return 0; err3: ib_dealloc_pd(xrcd->pd); err2: - mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); -err1: - kfree(xrcd); - return ERR_PTR(err); + mlx4_xrcd_free(dev->dev, xrcd->xrcdn); + return err; } -static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); - kfree(xrcd); - - return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) @@ -2607,6 +2598,8 @@ static const struct ib_device_ops mlx4_ib_dev_mw_ops = { static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { .alloc_xrcd = mlx4_ib_alloc_xrcd, .dealloc_xrcd = mlx4_ib_dealloc_xrcd, + + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd), }; static const struct ib_device_ops mlx4_ib_dev_fs_ops = { diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 6f4ea1067095..38e87a700a2a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -729,7 +729,7 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 7e0b205c05eb..1d5ef0de12c9 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->ibmr.length = length; - mr->ibmr.iova = virt_addr; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; @@ -655,7 +654,7 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) } struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 8cca61c671f8..b4c009bb0db6 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := ah.o \ cmd.o \ cong.o \ + counters.o \ cq.o \ doorbell.o \ gsi.o \ @@ -22,5 +23,6 @@ mlx5_ib-y := ah.o \ mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \ - flow.o \ - qos.o + fs.o \ + qos.o \ + std_types.o diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index cc24c711e92a..ebb2f108b64f 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -148,18 +148,6 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) spin_unlock(&dm->lock); } -int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) -{ - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); - return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, - 0, 0); -} - void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid) { u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index f4d8558db434..1d192a8ca87d 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -41,7 +41,6 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out); -int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c new file mode 100644 index 000000000000..145f3cb40ccb --- /dev/null +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -0,0 +1,709 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#include "mlx5_ib.h" +#include <linux/mlx5/eswitch.h> +#include "counters.h" +#include "ib_rep.h" +#include "qp.h" + +struct mlx5_ib_counter { + const char *name; + size_t offset; +}; + +#define INIT_Q_COUNTER(_name) \ + { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} + +static const struct mlx5_ib_counter basic_q_cnts[] = { + INIT_Q_COUNTER(rx_write_requests), + INIT_Q_COUNTER(rx_read_requests), + INIT_Q_COUNTER(rx_atomic_requests), + INIT_Q_COUNTER(out_of_buffer), +}; + +static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { + INIT_Q_COUNTER(out_of_sequence), +}; + +static const struct mlx5_ib_counter retrans_q_cnts[] = { + INIT_Q_COUNTER(duplicate_request), + INIT_Q_COUNTER(rnr_nak_retry_err), + INIT_Q_COUNTER(packet_seq_err), + INIT_Q_COUNTER(implied_nak_seq_err), + INIT_Q_COUNTER(local_ack_timeout_err), +}; + +#define INIT_CONG_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} + +static const struct mlx5_ib_counter cong_cnts[] = { + INIT_CONG_COUNTER(rp_cnp_ignored), + INIT_CONG_COUNTER(rp_cnp_handled), + INIT_CONG_COUNTER(np_ecn_marked_roce_packets), + INIT_CONG_COUNTER(np_cnp_sent), +}; + +static const struct mlx5_ib_counter extended_err_cnts[] = { + INIT_Q_COUNTER(resp_local_length_error), + INIT_Q_COUNTER(resp_cqe_error), + INIT_Q_COUNTER(req_cqe_error), + INIT_Q_COUNTER(req_remote_invalid_request), + INIT_Q_COUNTER(req_remote_access_errors), + INIT_Q_COUNTER(resp_remote_access_errors), + INIT_Q_COUNTER(resp_cqe_flush_error), + INIT_Q_COUNTER(req_cqe_flush_error), +}; + +static const struct mlx5_ib_counter roce_accl_cnts[] = { + INIT_Q_COUNTER(roce_adp_retrans), + INIT_Q_COUNTER(roce_adp_retrans_to), + INIT_Q_COUNTER(roce_slow_restart), + INIT_Q_COUNTER(roce_slow_restart_cnps), + INIT_Q_COUNTER(roce_slow_restart_trans), +}; + +#define INIT_EXT_PPCNT_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} + +static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { + INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), +}; + +static int mlx5_ib_read_counters(struct ib_counters *counters, + struct ib_counters_read_attr *read_attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + struct mlx5_read_counters_attr mread_attr = {}; + struct mlx5_ib_flow_counters_desc *desc; + int ret, i; + + mutex_lock(&mcounters->mcntrs_mutex); + if (mcounters->cntrs_max_index > read_attr->ncounters) { + ret = -EINVAL; + goto err_bound; + } + + mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), + GFP_KERNEL); + if (!mread_attr.out) { + ret = -ENOMEM; + goto err_bound; + } + + mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; + mread_attr.flags = read_attr->flags; + ret = mcounters->read_counters(counters->device, &mread_attr); + if (ret) + goto err_read; + + /* do the pass over the counters data array to assign according to the + * descriptions and indexing pairs + */ + desc = mcounters->counters_data; + for (i = 0; i < mcounters->ncounters; i++) + read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; + +err_read: + kfree(mread_attr.out); +err_bound: + mutex_unlock(&mcounters->mcntrs_mutex); + return ret; +} + +static void mlx5_ib_destroy_counters(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mlx5_ib_counters_clear_description(counters); + if (mcounters->hw_cntrs_hndl) + mlx5_fc_destroy(to_mdev(counters->device)->mdev, + mcounters->hw_cntrs_hndl); +} + +static int mlx5_ib_create_counters(struct ib_counters *counters, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mutex_init(&mcounters->mcntrs_mutex); + return 0; +} + + +static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) +{ + return MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == + MLX5_ESWITCH_OFFLOADS; +} + +static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, + u8 port_num) +{ + return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : + &dev->port[port_num].cnts; +} + +/** + * mlx5_ib_get_counters_id - Returns counters id to use for device+port + * @dev: Pointer to mlx5 IB device + * @port_num: Zero based port number + * + * mlx5_ib_get_counters_id() Returns counters set id to use for given + * device port combination in switchdev and non switchdev mode of the + * parent device. + */ +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) +{ + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); + + return cnts->set_id; +} + +static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); + + if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) + return NULL; + + cnts = get_counters(dev, port_num - 1); + + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats, + u16 set_id) +{ + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + __be32 val; + int ret, i; + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); + ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); + if (ret) + return ret; + + for (i = 0; i < cnts->num_q_counters; i++) { + val = *(__be32 *)((void *)out + cnts->offsets[i]); + stats->value[i] = (u64)be32_to_cpu(val); + } + + return 0; +} + +static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats) +{ + int offset = cnts->num_q_counters + cnts->num_cong_counters; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int ret, i; + void *out; + + out = kvzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT, + 0, 0); + if (ret) + goto free; + + for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + cnts->offsets[i + offset])); +free: + kvfree(out); + return ret; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); + struct mlx5_core_dev *mdev; + int ret, num_counters; + u8 mdev_port_num; + + if (!stats) + return -EINVAL; + + num_counters = cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + + /* q_counters are per IB device, query the master mdev */ + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); + if (ret) + return ret; + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); + if (ret) + return ret; + } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, + &mdev_port_num); + if (!mdev) { + /* If port is not affiliated yet, its in down state + * which doesn't have any counters yet, so it would be + * zero. So no need to read from the HCA. + */ + goto done; + } + ret = mlx5_lag_query_cong_counters(dev->mdev, + stats->value + + cnts->num_q_counters, + cnts->num_cong_counters, + cnts->offsets + + cnts->num_q_counters); + + mlx5_ib_put_native_port_mdev(dev, port_num); + if (ret) + return ret; + } + +done: + return num_counters; +} + +static struct rdma_hw_stats * +mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); + + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); + + return mlx5_ib_query_q_counters(dev->mdev, cnts, + counter->stats, counter->id); +} + +static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + + if (!counter->id) + return 0; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); + return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); +} + +static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, + struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + int err; + + if (!counter->id) { + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; + + MLX5_SET(alloc_q_counter_in, in, opcode, + MLX5_CMD_OP_ALLOC_Q_COUNTER); + MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); + err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); + if (err) + return err; + counter->id = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + } + + err = mlx5_ib_qp_set_counter(qp, counter); + if (err) + goto fail_set_counter; + + return 0; + +fail_set_counter: + mlx5_ib_counter_dealloc(counter); + counter->id = 0; + + return err; +} + +static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) +{ + return mlx5_ib_qp_set_counter(qp, NULL); +} + + +static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, + const char **names, + size_t *offsets) +{ + int i; + int j = 0; + + for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { + names[j] = basic_q_cnts[i].name; + offsets[j] = basic_q_cnts[i].offset; + } + + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { + for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { + names[j] = out_of_seq_q_cnts[i].name; + offsets[j] = out_of_seq_q_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { + for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { + names[j] = retrans_q_cnts[i].name; + offsets[j] = retrans_q_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { + for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + names[j] = extended_err_cnts[i].name; + offsets[j] = extended_err_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { + for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { + names[j] = roce_accl_cnts[i].name; + offsets[j] = roce_accl_cnts[i].offset; + } + } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { + names[j] = cong_cnts[i].name; + offsets[j] = cong_cnts[i].offset; + } + } + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { + names[j] = ext_ppcnt_cnts[i].name; + offsets[j] = ext_ppcnt_cnts[i].offset; + } + } +} + + +static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_counters *cnts) +{ + u32 num_counters; + + num_counters = ARRAY_SIZE(basic_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) + num_counters += ARRAY_SIZE(out_of_seq_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) + num_counters += ARRAY_SIZE(retrans_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) + num_counters += ARRAY_SIZE(extended_err_cnts); + + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) + num_counters += ARRAY_SIZE(roce_accl_cnts); + + cnts->num_q_counters = num_counters; + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); + num_counters += ARRAY_SIZE(cong_cnts); + } + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); + num_counters += ARRAY_SIZE(ext_ppcnt_cnts); + } + cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); + if (!cnts->names) + return -ENOMEM; + + cnts->offsets = kcalloc(num_counters, + sizeof(cnts->offsets), GFP_KERNEL); + if (!cnts->offsets) + goto err_names; + + return 0; + +err_names: + kfree(cnts->names); + cnts->names = NULL; + return -ENOMEM; +} + +static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + int num_cnt_ports; + int i; + + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + + for (i = 0; i < num_cnt_ports; i++) { + if (dev->port[i].cnts.set_id) { + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, + dev->port[i].cnts.set_id); + mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); + } + kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.offsets); + } +} + +static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; + int num_cnt_ports; + int err = 0; + int i; + bool is_shared; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + + for (i = 0; i < num_cnt_ports; i++) { + err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); + if (err) + goto err_alloc; + + mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, + dev->port[i].cnts.offsets); + + MLX5_SET(alloc_q_counter_in, in, uid, + is_shared ? MLX5_SHARED_RESOURCE_UID : 0); + + err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); + if (err) { + mlx5_ib_warn(dev, + "couldn't allocate queue counter for port %d, err %d\n", + i + 1, err); + goto err_alloc; + } + + dev->port[i].cnts.set_id = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + } + return 0; + +err_alloc: + mlx5_ib_dealloc_counters(dev); + return err; +} + +static int read_flow_counters(struct ib_device *ibdev, + struct mlx5_read_counters_attr *read_attr) +{ + struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_fc_query(dev->mdev, fc, + &read_attr->out[IB_COUNTER_PACKETS], + &read_attr->out[IB_COUNTER_BYTES]); +} + +/* flow counters currently expose two counters packets and bytes */ +#define FLOW_COUNTERS_NUM 2 +static int counters_set_description( + struct ib_counters *counters, enum mlx5_ib_counters_type counters_type, + struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + u32 cntrs_max_index = 0; + int i; + + if (counters_type != MLX5_IB_COUNTERS_FLOW) + return -EINVAL; + + /* init the fields for the object */ + mcounters->type = counters_type; + mcounters->read_counters = read_flow_counters; + mcounters->counters_num = FLOW_COUNTERS_NUM; + mcounters->ncounters = ncounters; + /* each counter entry have both description and index pair */ + for (i = 0; i < ncounters; i++) { + if (desc_data[i].description > IB_COUNTER_BYTES) + return -EINVAL; + + if (cntrs_max_index <= desc_data[i].index) + cntrs_max_index = desc_data[i].index + 1; + } + + mutex_lock(&mcounters->mcntrs_mutex); + mcounters->counters_data = desc_data; + mcounters->cntrs_max_index = cntrs_max_index; + mutex_unlock(&mcounters->mcntrs_mutex); + + return 0; +} + +#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) +int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); + struct mlx5_ib_flow_counters_data *cntrs_data = NULL; + struct mlx5_ib_flow_counters_desc *desc_data = NULL; + bool hw_hndl = false; + int ret = 0; + + if (ucmd && ucmd->ncounters_data != 0) { + cntrs_data = ucmd->data; + if (cntrs_data->ncounters > MAX_COUNTERS_NUM) + return -EINVAL; + + desc_data = kcalloc(cntrs_data->ncounters, + sizeof(*desc_data), + GFP_KERNEL); + if (!desc_data) + return -ENOMEM; + + if (copy_from_user(desc_data, + u64_to_user_ptr(cntrs_data->counters_data), + sizeof(*desc_data) * cntrs_data->ncounters)) { + ret = -EFAULT; + goto free; + } + } + + if (!mcounters->hw_cntrs_hndl) { + mcounters->hw_cntrs_hndl = mlx5_fc_create( + to_mdev(ibcounters->device)->mdev, false); + if (IS_ERR(mcounters->hw_cntrs_hndl)) { + ret = PTR_ERR(mcounters->hw_cntrs_hndl); + goto free; + } + hw_hndl = true; + } + + if (desc_data) { + /* counters already bound to at least one flow */ + if (mcounters->cntrs_max_index) { + ret = -EINVAL; + goto free_hndl; + } + + ret = counters_set_description(ibcounters, + MLX5_IB_COUNTERS_FLOW, + desc_data, + cntrs_data->ncounters); + if (ret) + goto free_hndl; + + } else if (!mcounters->cntrs_max_index) { + /* counters not bound yet, must have udata passed */ + ret = -EINVAL; + goto free_hndl; + } + + return 0; + +free_hndl: + if (hw_hndl) { + mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, + mcounters->hw_cntrs_hndl); + mcounters->hw_cntrs_hndl = NULL; + } +free: + kfree(desc_data); + return ret; +} + +void mlx5_ib_counters_clear_description(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters; + + if (!counters || atomic_read(&counters->usecnt) != 1) + return; + + mcounters = to_mcounters(counters); + + mutex_lock(&mcounters->mcntrs_mutex); + kfree(mcounters->counters_data); + mcounters->counters_data = NULL; + mcounters->cntrs_max_index = 0; + mutex_unlock(&mcounters->mcntrs_mutex); +} + +static const struct ib_device_ops hw_stats_ops = { + .alloc_hw_stats = mlx5_ib_alloc_hw_stats, + .get_hw_stats = mlx5_ib_get_hw_stats, + .counter_bind_qp = mlx5_ib_counter_bind_qp, + .counter_unbind_qp = mlx5_ib_counter_unbind_qp, + .counter_dealloc = mlx5_ib_counter_dealloc, + .counter_alloc_stats = mlx5_ib_counter_alloc_stats, + .counter_update_stats = mlx5_ib_counter_update_stats, +}; + +static const struct ib_device_ops counters_ops = { + .create_counters = mlx5_ib_create_counters, + .destroy_counters = mlx5_ib_destroy_counters, + .read_counters = mlx5_ib_read_counters, + + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), +}; + +int mlx5_ib_counters_init(struct mlx5_ib_dev *dev) +{ + ib_set_device_ops(&dev->ib_dev, &counters_ops); + + if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + return 0; + + ib_set_device_ops(&dev->ib_dev, &hw_stats_ops); + return mlx5_ib_alloc_counters(dev); +} + +void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev) +{ + if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) + return; + + mlx5_ib_dealloc_counters(dev); +} diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h new file mode 100644 index 000000000000..1aa30c2f3f4d --- /dev/null +++ b/drivers/infiniband/hw/mlx5/counters.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_COUNTERS_H +#define _MLX5_IB_COUNTERS_H + +#include "mlx5_ib.h" + +int mlx5_ib_counters_init(struct mlx5_ib_dev *dev); +void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev); +void mlx5_ib_counters_clear_description(struct ib_counters *counters); +int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd); +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); +#endif /* _MLX5_IB_COUNTERS_H */ diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 655ea9c984e1..9e3d8b826498 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -14,6 +14,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" +#include "devx.h" #include "qp.h" #include <linux/xarray.h> @@ -89,22 +90,6 @@ struct devx_async_event_file { u8 is_destroyed:1; }; -#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) -struct devx_obj { - struct mlx5_ib_dev *ib_dev; - u64 obj_id; - u32 dinlen; /* destroy inbox length */ - u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; - u32 flags; - union { - struct mlx5_ib_devx_mr devx_mr; - struct mlx5_core_dct core_dct; - struct mlx5_core_cq core_cq; - u32 flow_counter_bulk_size; - }; - struct list_head event_sub; /* holds devx_event_subscription entries */ -}; - struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; @@ -171,48 +156,6 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } -bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) -{ - struct devx_obj *devx_obj = obj; - u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); - - switch (opcode) { - case MLX5_CMD_OP_DESTROY_TIR: - *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; - *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, - obj_id); - return true; - - case MLX5_CMD_OP_DESTROY_FLOW_TABLE: - *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, - table_id); - return true; - default: - return false; - } -} - -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) -{ - struct devx_obj *devx_obj = obj; - u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); - - if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { - - if (offset && offset >= devx_obj->flow_counter_bulk_size) - return false; - - *counter_id = MLX5_GET(dealloc_flow_counter_in, - devx_obj->dinbox, - flow_counter_id); - *counter_id += offset; - return true; - } - - return false; -} - static bool is_legacy_unaffiliated_event_num(u16 event_num) { switch (event_num) { @@ -2419,17 +2362,24 @@ static int devx_event_notifier(struct notifier_block *nb, return NOTIFY_OK; } -void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) +int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) { struct mlx5_devx_event_table *table = &dev->devx_event_table; + int uid; - xa_init(&table->event_xa); - mutex_init(&table->event_xa_lock); - MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); - mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); + uid = mlx5_ib_devx_create(dev, false); + if (uid > 0) { + dev->devx_whitelist_uid = uid; + xa_init(&table->event_xa); + mutex_init(&table->event_xa_lock); + MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY); + mlx5_eq_notifier_register(dev->mdev, &table->devx_nb); + } + + return 0; } -void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) +void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_devx_event_table *table = &dev->devx_event_table; struct devx_event_subscription *sub, *tmp; @@ -2437,17 +2387,21 @@ void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) void *entry; unsigned long id; - mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); - mutex_lock(&dev->devx_event_table.event_xa_lock); - xa_for_each(&table->event_xa, id, entry) { - event = entry; - list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list, - xa_list) - devx_cleanup_subscription(dev, sub); - kfree(entry); + if (dev->devx_whitelist_uid) { + mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb); + mutex_lock(&dev->devx_event_table.event_xa_lock); + xa_for_each(&table->event_xa, id, entry) { + event = entry; + list_for_each_entry_safe( + sub, tmp, &event->unaffiliated_list, xa_list) + devx_cleanup_subscription(dev, sub); + kfree(entry); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + xa_destroy(&table->event_xa); + + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); } - mutex_unlock(&dev->devx_event_table.event_xa_lock); - xa_destroy(&table->event_xa); } static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h new file mode 100644 index 000000000000..1f69866aed16 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/devx.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_DEVX_H +#define _MLX5_IB_DEVX_H + +#include "mlx5_ib.h" + +#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) +struct devx_obj { + struct mlx5_ib_dev *ib_dev; + u64 obj_id; + u32 dinlen; /* destroy inbox length */ + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; + u32 flags; + union { + struct mlx5_ib_devx_mr devx_mr; + struct mlx5_core_dct core_dct; + struct mlx5_core_cq core_cq; + u32 flow_counter_bulk_size; + }; + struct list_head event_sub; /* holds devx_event_subscription entries */ +}; +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); +int mlx5_ib_devx_init(struct mlx5_ib_dev *dev); +void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev); +#else +static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) +{ + return -EOPNOTSUPP; +} +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} +static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) +{ + return 0; +} +static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev) +{ +} +#endif +#endif /* _MLX5_IB_DEVX_H */ diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c deleted file mode 100644 index 216a1108ad34..000000000000 --- a/drivers/infiniband/hw/mlx5/flow.c +++ /dev/null @@ -1,765 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* - * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. - */ - -#include <rdma/ib_user_verbs.h> -#include <rdma/ib_verbs.h> -#include <rdma/uverbs_types.h> -#include <rdma/uverbs_ioctl.h> -#include <rdma/uverbs_std_types.h> -#include <rdma/mlx5_user_ioctl_cmds.h> -#include <rdma/mlx5_user_ioctl_verbs.h> -#include <rdma/ib_umem.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/fs.h> -#include "mlx5_ib.h" - -#define UVERBS_MODULE_NAME mlx5_ib -#include <rdma/uverbs_named_ioctl.h> - -static int -mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, - enum mlx5_flow_namespace_type *namespace) -{ - switch (table_type) { - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: - *namespace = MLX5_FLOW_NAMESPACE_BYPASS; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: - *namespace = MLX5_FLOW_NAMESPACE_EGRESS; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: - *namespace = MLX5_FLOW_NAMESPACE_FDB; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: - *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; - break; - case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: - *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; - break; - default: - return -EINVAL; - } - - return 0; -} - -static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { - [MLX5_IB_FLOW_TYPE_NORMAL] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - .u.ptr = { - .len = sizeof(u16), /* data is priority */ - .min_len = sizeof(u16), - } - }, - [MLX5_IB_FLOW_TYPE_SNIFFER] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, -}; - -static int get_dests(struct uverbs_attr_bundle *attrs, - struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, - int *dest_type, struct ib_qp **qp, u32 *flags) -{ - bool dest_devx, dest_qp; - void *devx_obj; - int err; - - dest_devx = uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); - dest_qp = uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - - *flags = 0; - err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | - MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); - if (err) - return err; - - /* Both flags are not allowed */ - if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && - *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) - return -EINVAL; - - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - if (dest_devx && (dest_qp || *flags)) - return -EINVAL; - else if (dest_qp && *flags) - return -EINVAL; - } - - /* Allow only DEVX object, drop as dest for FDB */ - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || - (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) - return -EINVAL; - - /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && - ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) - return -EINVAL; - - *qp = NULL; - if (dest_devx) { - devx_obj = - uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); - - /* Verify that the given DEVX object is a flow - * steering destination. - */ - if (!mlx5_ib_devx_is_flow_dest(devx_obj, dest_id, dest_type)) - return -EINVAL; - /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && - *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) - return -EINVAL; - } else if (dest_qp) { - struct mlx5_ib_qp *mqp; - - *qp = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); - if (IS_ERR(*qp)) - return PTR_ERR(*qp); - - if ((*qp)->qp_type != IB_QPT_RAW_PACKET) - return -EINVAL; - - mqp = to_mqp(*qp); - if (mqp->is_rss) - *dest_id = mqp->rss_qp.tirn; - else - *dest_id = mqp->raw_packet_qp.rq.tirn; - *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { - *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; - } - - if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - return -EINVAL; - - return 0; -} - -#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 -static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_flow_context flow_context = {.flow_tag = - MLX5_FS_DEFAULT_FLOW_TAG}; - u32 *offset_attr, offset = 0, counter_id = 0; - int dest_id, dest_type, inlen, len, ret, i; - struct mlx5_ib_flow_handler *flow_handler; - struct mlx5_ib_flow_matcher *fs_matcher; - struct ib_uobject **arr_flow_actions; - struct ib_uflow_resources *uflow_res; - struct mlx5_flow_act flow_act = {}; - struct ib_qp *qp = NULL; - void *devx_obj, *cmd_in; - struct ib_uobject *uobj; - struct mlx5_ib_dev *dev; - u32 flags; - - if (!capable(CAP_NET_RAW)) - return -EPERM; - - fs_matcher = uverbs_attr_get_obj(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCHER); - uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); - dev = mlx5_udata_to_mdev(&attrs->driver_udata); - - if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) - return -EINVAL; - - if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; - - if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - - len = uverbs_attr_get_uobjs_arr(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); - if (len) { - devx_obj = arr_flow_actions[0]->object; - - if (uverbs_attr_is_valid(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { - - int num_offsets = uverbs_attr_ptr_get_array_size( - attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, - sizeof(u32)); - - if (num_offsets != 1) - return -EINVAL; - - offset_attr = uverbs_attr_get_alloced_ptr( - attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); - offset = *offset_attr; - } - - if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset, - &counter_id)) - return -EINVAL; - - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - } - - cmd_in = uverbs_attr_get_alloced_ptr( - attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); - inlen = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); - - uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); - if (!uflow_res) - return -ENOMEM; - - len = uverbs_attr_get_uobjs_arr(attrs, - MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); - for (i = 0; i < len; i++) { - struct mlx5_ib_flow_action *maction = - to_mflow_act(arr_flow_actions[i]->object); - - ret = parse_flow_flow_action(maction, false, &flow_act); - if (ret) - goto err_out; - flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, - arr_flow_actions[i]->object); - } - - ret = uverbs_copy_from(&flow_context.flow_tag, attrs, - MLX5_IB_ATTR_CREATE_FLOW_TAG); - if (!ret) { - if (flow_context.flow_tag >= BIT(24)) { - ret = -EINVAL; - goto err_out; - } - flow_context.flags |= FLOW_CONTEXT_HAS_TAG; - } - - flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, - &flow_context, - &flow_act, - counter_id, - cmd_in, inlen, - dest_id, dest_type); - if (IS_ERR(flow_handler)) { - ret = PTR_ERR(flow_handler); - goto err_out; - } - - ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); - - return 0; -err_out: - ib_uverbs_flow_resources_free(uflow_res); - return ret; -} - -static int flow_matcher_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_matcher *obj = uobject->object; - int ret; - - ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); - if (ret) - return ret; - - kfree(obj); - return 0; -} - -static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, - struct mlx5_ib_flow_matcher *obj) -{ - enum mlx5_ib_uapi_flow_table_type ft_type = - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; - u32 flags; - int err; - - /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older - * users should switch to it. We leave this to not break userspace - */ - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && - uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) - return -EINVAL; - - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { - err = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); - if (err) - return err; - - err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); - if (err) - return err; - - return 0; - } - - if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { - err = uverbs_get_flags32(&flags, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - IB_FLOW_ATTR_FLAGS_EGRESS); - if (err) - return err; - - if (flags) { - mlx5_ib_ft_type_to_namespace( - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, - &obj->ns_type); - return 0; - } - } - - obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; - - return 0; -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); - struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); - struct mlx5_ib_flow_matcher *obj; - int err; - - obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); - if (!obj) - return -ENOMEM; - - obj->mask_len = uverbs_attr_get_len( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); - err = uverbs_copy_from(&obj->matcher_mask, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); - if (err) - goto end; - - obj->flow_type = uverbs_attr_get_enum_id( - attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); - - if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { - err = uverbs_copy_from(&obj->priority, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); - if (err) - goto end; - } - - err = uverbs_copy_from(&obj->match_criteria_enable, - attrs, - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); - if (err) - goto end; - - err = mlx5_ib_matcher_ns(attrs, obj); - if (err) - goto end; - - uobj->object = obj; - obj->mdev = dev->mdev; - atomic_set(&obj->usecnt, 0); - return 0; - -end: - kfree(obj); - return err; -} - -void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) -{ - switch (maction->flow_action_raw.sub_type) { - case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: - mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.modify_hdr); - break; - case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: - mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.pkt_reformat); - break; - case MLX5_IB_FLOW_ACTION_DECAP: - break; - default: - break; - } -} - -static struct ib_flow_action * -mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, - enum mlx5_ib_uapi_flow_table_type ft_type, - u8 num_actions, void *in) -{ - enum mlx5_flow_namespace_type namespace; - struct mlx5_ib_flow_action *maction; - int ret; - - ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); - if (ret) - return ERR_PTR(-EINVAL); - - maction = kzalloc(sizeof(*maction), GFP_KERNEL); - if (!maction) - return ERR_PTR(-ENOMEM); - - maction->flow_action_raw.modify_hdr = - mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); - - if (IS_ERR(maction->flow_action_raw.modify_hdr)) { - ret = PTR_ERR(maction->flow_action_raw.modify_hdr); - kfree(maction); - return ERR_PTR(ret); - } - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_MODIFY_HEADER; - maction->flow_action_raw.dev = dev; - - return &maction->ib_action; -} - -static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) -{ - return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - max_modify_header_actions) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - max_modify_header_actions) || - MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - max_modify_header_actions); -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); - struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); - enum mlx5_ib_uapi_flow_table_type ft_type; - struct ib_flow_action *action; - int num_actions; - void *in; - int ret; - - if (!mlx5_ib_modify_header_supported(mdev)) - return -EOPNOTSUPP; - - in = uverbs_attr_get_alloced_ptr(attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - - num_actions = uverbs_attr_ptr_get_array_size( - attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, - MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)); - if (num_actions < 0) - return num_actions; - - ret = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); - if (ret) - return ret; - action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); - if (IS_ERR(action)) - return PTR_ERR(action); - - uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, - IB_FLOW_ACTION_UNSPECIFIED); - - return 0; -} - -static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, - u8 packet_reformat_type, - u8 ft_type) -{ - switch (packet_reformat_type) { - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) - return MLX5_CAP_FLOWTABLE(ibdev->mdev, - encap_general_header); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) - return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, - reformat_l2_to_l3_tunnel); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) - return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, - reformat_l3_tunnel_to_l2); - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: - if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) - return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); - break; - default: - break; - } - - return false; -} - -static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) -{ - switch (dv_prt) { - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: - *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: - *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; - break; - case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: - *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int mlx5_ib_flow_action_create_packet_reformat_ctx( - struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_action *maction, - u8 ft_type, u8 dv_prt, - void *in, size_t len) -{ - enum mlx5_flow_namespace_type namespace; - u8 prm_prt; - int ret; - - ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); - if (ret) - return ret; - - ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); - if (ret) - return ret; - - maction->flow_action_raw.pkt_reformat = - mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, - in, namespace); - if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { - ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); - return ret; - } - - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; - maction->flow_action_raw.dev = dev; - - return 0; -} - -static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); - struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); - enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; - enum mlx5_ib_uapi_flow_table_type ft_type; - struct mlx5_ib_flow_action *maction; - int ret; - - ret = uverbs_get_const(&ft_type, attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); - if (ret) - return ret; - - ret = uverbs_get_const(&dv_prt, attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); - if (ret) - return ret; - - if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) - return -EOPNOTSUPP; - - maction = kzalloc(sizeof(*maction), GFP_KERNEL); - if (!maction) - return -ENOMEM; - - if (dv_prt == - MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { - maction->flow_action_raw.sub_type = - MLX5_IB_FLOW_ACTION_DECAP; - maction->flow_action_raw.dev = mdev; - } else { - void *in; - int len; - - in = uverbs_attr_get_alloced_ptr(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); - if (IS_ERR(in)) { - ret = PTR_ERR(in); - goto free_maction; - } - - len = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); - - ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, - maction, ft_type, dv_prt, in, len); - if (ret) - goto free_maction; - } - - uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, - IB_FLOW_ACTION_UNSPECIFIED); - return 0; - -free_maction: - kfree(maction); - return ret; -} - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_CREATE_FLOW, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, - UVERBS_OBJECT_FLOW, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN( - MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, - UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), - UA_MANDATORY, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_READ, - UA_MANDATORY), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, - UVERBS_OBJECT_QP, - UVERBS_ACCESS_READ), - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ), - UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_READ, 1, - MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, - UVERBS_ATTR_TYPE(u32), - UA_OPTIONAL), - UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, - MLX5_IB_OBJECT_DEVX_OBJ, - UVERBS_ACCESS_READ, 1, 1, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, - UVERBS_ATTR_MIN_SIZE(sizeof(u32)), - UA_OPTIONAL, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, - enum mlx5_ib_create_flow_flags, - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY( - MLX5_IB_METHOD_DESTROY_FLOW, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, - UVERBS_OBJECT_FLOW, - UVERBS_ACCESS_DESTROY, - UA_MANDATORY)); - -ADD_UVERBS_METHODS(mlx5_ib_fs, - UVERBS_OBJECT_FLOW, - &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), - &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, - UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( - set_add_copy_action_in_auto)), - UA_MANDATORY, - UA_ALLOC_AND_COPY), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_MANDATORY)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, - UVERBS_ATTR_MIN_SIZE(1), - UA_ALLOC_AND_COPY, - UA_OPTIONAL), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, - enum mlx5_ib_uapi_flow_action_packet_reformat_type, - UA_MANDATORY), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_MANDATORY)); - -ADD_UVERBS_METHODS( - mlx5_ib_flow_actions, - UVERBS_OBJECT_FLOW_ACTION, - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); - -DECLARE_UVERBS_NAMED_METHOD( - MLX5_IB_METHOD_FLOW_MATCHER_CREATE, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN( - MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, - UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), - UA_MANDATORY), - UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, - mlx5_ib_flow_type, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, - UVERBS_ATTR_TYPE(u8), - UA_MANDATORY), - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - enum ib_flow_flags, - UA_OPTIONAL), - UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, - enum mlx5_ib_uapi_flow_table_type, - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD_DESTROY( - MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, - UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, - MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_ACCESS_DESTROY, - UA_MANDATORY)); - -DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, - UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), - &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); - -const struct uapi_definition mlx5_ib_flow_defs[] = { - UAPI_DEF_CHAIN_OBJ_TREE_NAMED( - MLX5_IB_OBJECT_FLOW_MATCHER), - UAPI_DEF_CHAIN_OBJ_TREE( - UVERBS_OBJECT_FLOW, - &mlx5_ib_fs), - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, - &mlx5_ib_flow_actions), - {}, -}; diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c new file mode 100644 index 000000000000..e9cfb9a2ef41 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -0,0 +1,2516 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/uverbs_std_types.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> +#include <rdma/ib_umem.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/fs_helpers.h> +#include <linux/mlx5/accel.h> +#include <linux/mlx5/eswitch.h> +#include "mlx5_ib.h" +#include "counters.h" +#include "devx.h" +#include "fs.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +enum { + MATCH_CRITERIA_ENABLE_OUTER_BIT, + MATCH_CRITERIA_ENABLE_MISC_BIT, + MATCH_CRITERIA_ENABLE_INNER_BIT, + MATCH_CRITERIA_ENABLE_MISC2_BIT +}; + +#define HEADER_IS_ZERO(match_criteria, headers) \ + !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ + 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ + +static u8 get_match_criteria_enable(u32 *match_criteria) +{ + u8 match_criteria_enable; + + match_criteria_enable = + (!HEADER_IS_ZERO(match_criteria, outer_headers)) << + MATCH_CRITERIA_ENABLE_OUTER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << + MATCH_CRITERIA_ENABLE_MISC_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, inner_headers)) << + MATCH_CRITERIA_ENABLE_INNER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << + MATCH_CRITERIA_ENABLE_MISC2_BIT; + + return match_criteria_enable; +} + +static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + u8 entry_mask; + u8 entry_val; + int err = 0; + + if (!mask) + goto out; + + entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, + ip_protocol); + entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, + ip_protocol); + if (!entry_mask) { + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); + goto out; + } + /* Don't override existing ip protocol */ + if (mask != entry_mask || val != entry_val) + err = -EINVAL; +out: + return err; +} + +static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, + bool inner) +{ + if (inner) { + MLX5_SET(fte_match_set_misc, + misc_c, inner_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, inner_ipv6_flow_label, val); + } else { + MLX5_SET(fte_match_set_misc, + misc_c, outer_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, outer_ipv6_flow_label, val); + } +} + +static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); +} + +static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) +{ + if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) + return -EOPNOTSUPP; + + return 0; +} + +#define LAST_ETH_FIELD vlan_tag +#define LAST_IB_FIELD sl +#define LAST_IPV4_FIELD tos +#define LAST_IPV6_FIELD traffic_class +#define LAST_TCP_UDP_FIELD src_port +#define LAST_TUNNEL_FIELD tunnel_id +#define LAST_FLOW_TAG_FIELD tag_id +#define LAST_DROP_FIELD size +#define LAST_COUNTERS_FIELD counters + +/* Field is the last supported field */ +#define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + +int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, + bool is_egress, + struct mlx5_flow_act *action) +{ + + switch (maction->ib_action.type) { + case IB_FLOW_ACTION_ESP: + if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) + return -EINVAL; + /* Currently only AES_GCM keymat is supported by the driver */ + action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; + action->action |= is_egress ? + MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : + MLX5_FLOW_CONTEXT_ACTION_DECRYPT; + return 0; + case IB_FLOW_ACTION_UNSPECIFIED: + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + action->modify_hdr = + maction->flow_action_raw.modify_hdr; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_DECAP) { + if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return -EINVAL; + action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + return 0; + } + if (maction->flow_action_raw.sub_type == + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { + if (action->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) + return -EINVAL; + action->action |= + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + action->pkt_reformat = + maction->flow_action_raw.pkt_reformat; + return 0; + } + fallthrough; + default: + return -EOPNOTSUPP; + } +} + +static int parse_flow_attr(struct mlx5_core_dev *mdev, + struct mlx5_flow_spec *spec, + const union ib_flow_spec *ib_spec, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_act *action, u32 prev_type) +{ + struct mlx5_flow_context *flow_context = &spec->flow_context; + u32 *match_c = spec->match_criteria; + u32 *match_v = spec->match_value; + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters); + void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters_2); + void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters_2); + void *headers_c; + void *headers_v; + int match_ipv; + int ret; + + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + inner_headers); + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version); + } else { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); + } + + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { + case IB_FLOW_SPEC_ETH: + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -EOPNOTSUPP; + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dmac_47_16), + ib_spec->eth.mask.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), + ib_spec->eth.val.dst_mac); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + + if (ib_spec->eth.mask.vlan_tag) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + cvlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_vid, ntohs(ib_spec->eth.val.vlan_tag)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_cfi, + ntohs(ib_spec->eth.mask.vlan_tag) >> 12); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_cfi, + ntohs(ib_spec->eth.val.vlan_tag) >> 12); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + first_prio, + ntohs(ib_spec->eth.mask.vlan_tag) >> 13); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + first_prio, + ntohs(ib_spec->eth.val.vlan_tag) >> 13); + } + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, ntohs(ib_spec->eth.mask.ether_type)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ntohs(ib_spec->eth.val.ether_type)); + break; + case IB_FLOW_SPEC_IPV4: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -EOPNOTSUPP; + + if (match_ipv) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ip_version, MLX5_FS_IPV4_VERSION); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ETH_P_IP); + } + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.src_ip, + sizeof(ib_spec->ipv4.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.src_ip, + sizeof(ib_spec->ipv4.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.mask.dst_ip, + sizeof(ib_spec->ipv4.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ib_spec->ipv4.val.dst_ip, + sizeof(ib_spec->ipv4.val.dst_ip)); + + set_tos(headers_c, headers_v, + ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); + + if (set_proto(headers_c, headers_v, + ib_spec->ipv4.mask.proto, + ib_spec->ipv4.val.proto)) + return -EINVAL; + break; + case IB_FLOW_SPEC_IPV6: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) + return -EOPNOTSUPP; + + if (match_ipv) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ip_version, MLX5_FS_IPV6_VERSION); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ETH_P_IPV6); + } + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.src_ip, + sizeof(ib_spec->ipv6.mask.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.src_ip, + sizeof(ib_spec->ipv6.val.src_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.mask.dst_ip, + sizeof(ib_spec->ipv6.mask.dst_ip)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &ib_spec->ipv6.val.dst_ip, + sizeof(ib_spec->ipv6.val.dst_ip)); + + set_tos(headers_c, headers_v, + ib_spec->ipv6.mask.traffic_class, + ib_spec->ipv6.val.traffic_class); + + if (set_proto(headers_c, headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr)) + return -EINVAL; + + set_flow_label(misc_params_c, misc_params_v, + ntohl(ib_spec->ipv6.mask.flow_label), + ntohl(ib_spec->ipv6.val.flow_label), + ib_spec->type & IB_FLOW_SPEC_INNER); + break; + case IB_FLOW_SPEC_ESP: + if (ib_spec->esp.mask.seq) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, + ntohl(ib_spec->esp.mask.spi)); + MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, + ntohl(ib_spec->esp.val.spi)); + break; + case IB_FLOW_SPEC_TCP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_UDP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(ib_spec->tcp_udp.mask.src_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(ib_spec->tcp_udp.val.src_port)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(ib_spec->tcp_udp.mask.dst_port)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(ib_spec->tcp_udp.val.dst_port)); + break; + case IB_FLOW_SPEC_GRE: + if (ib_spec->gre.mask.c_ks_res0_ver) + return -EOPNOTSUPP; + + if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) + return -EINVAL; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + IPPROTO_GRE); + + MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, + ntohs(ib_spec->gre.mask.protocol)); + MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, + ntohs(ib_spec->gre.val.protocol)); + + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + gre_key.nvgre.hi), + &ib_spec->gre.mask.key, + sizeof(ib_spec->gre.mask.key)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, + gre_key.nvgre.hi), + &ib_spec->gre.val.key, + sizeof(ib_spec->gre.val.key)); + break; + case IB_FLOW_SPEC_MPLS: + switch (prev_type) { + case IB_FLOW_SPEC_UDP: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_udp), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + case IB_FLOW_SPEC_GRE: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_gre), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + default: + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + inner_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + inner_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } else { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } + } + break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, + LAST_TUNNEL_FIELD)) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, + ntohl(ib_spec->tunnel.mask.tunnel_id)); + MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, + ntohl(ib_spec->tunnel.val.tunnel_id)); + break; + case IB_FLOW_SPEC_ACTION_TAG: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, + LAST_FLOW_TAG_FIELD)) + return -EOPNOTSUPP; + if (ib_spec->flow_tag.tag_id >= BIT(24)) + return -EINVAL; + + flow_context->flow_tag = ib_spec->flow_tag.tag_id; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + break; + case IB_FLOW_SPEC_ACTION_DROP: + if (FIELDS_NOT_SUPPORTED(ib_spec->drop, + LAST_DROP_FIELD)) + return -EOPNOTSUPP; + action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + break; + case IB_FLOW_SPEC_ACTION_HANDLE: + ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), + flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); + if (ret) + return ret; + break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, + LAST_COUNTERS_FIELD)) + return -EOPNOTSUPP; + + /* for now support only one counters spec per flow */ + if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + return -EINVAL; + + action->counters = ib_spec->flow_count.counters; + action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* If a flow could catch both multicast and unicast packets, + * it won't fall into the multicast flow steering table and this rule + * could steal other multicast packets. + */ +static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) +{ + union ib_flow_spec *flow_spec; + + if (ib_attr->type != IB_FLOW_ATTR_NORMAL || + ib_attr->num_of_specs < 1) + return false; + + flow_spec = (union ib_flow_spec *)(ib_attr + 1); + if (flow_spec->type == IB_FLOW_SPEC_IPV4) { + struct ib_flow_spec_ipv4 *ipv4_spec; + + ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; + if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) + return true; + + return false; + } + + if (flow_spec->type == IB_FLOW_SPEC_ETH) { + struct ib_flow_spec_eth *eth_spec; + + eth_spec = (struct ib_flow_spec_eth *)flow_spec; + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); + } + + return false; +} + +enum valid_spec { + VALID_SPEC_INVALID, + VALID_SPEC_VALID, + VALID_SPEC_NA, +}; + +static enum valid_spec +is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + const u32 *match_c = spec->match_criteria; + bool is_crypto = + (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); + bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); + bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* + * Currently only crypto is supported in egress, when regular egress + * rules would be supported, always return VALID_SPEC_NA. + */ + if (!is_crypto) + return VALID_SPEC_NA; + + return is_crypto && is_ipsec && + (!egress || (!is_drop && + !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? + VALID_SPEC_VALID : VALID_SPEC_INVALID; +} + +static bool is_valid_spec(struct mlx5_core_dev *mdev, + const struct mlx5_flow_spec *spec, + const struct mlx5_flow_act *flow_act, + bool egress) +{ + /* We curretly only support ipsec egress flow */ + return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; +} + +static bool is_valid_ethertype(struct mlx5_core_dev *mdev, + const struct ib_flow_attr *flow_attr, + bool check_inner) +{ + union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); + int match_ipv = check_inner ? + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version) : + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); + int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; + bool ipv4_spec_valid, ipv6_spec_valid; + unsigned int ip_spec_type = 0; + bool has_ethertype = false; + unsigned int spec_index; + bool mask_valid = true; + u16 eth_type = 0; + bool type_valid; + + /* Validate that ethertype is correct */ + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && + ib_spec->eth.mask.ether_type) { + mask_valid = (ib_spec->eth.mask.ether_type == + htons(0xffff)); + has_ethertype = true; + eth_type = ntohs(ib_spec->eth.val.ether_type); + } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || + (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { + ip_spec_type = ib_spec->type; + } + ib_spec = (void *)ib_spec + ib_spec->size; + } + + type_valid = (!has_ethertype) || (!ip_spec_type); + if (!type_valid && mask_valid) { + ipv4_spec_valid = (eth_type == ETH_P_IP) && + (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); + ipv6_spec_valid = (eth_type == ETH_P_IPV6) && + (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); + + type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || + (((eth_type == ETH_P_MPLS_UC) || + (eth_type == ETH_P_MPLS_MC)) && match_ipv); + } + + return type_valid; +} + +static bool is_valid_attr(struct mlx5_core_dev *mdev, + const struct ib_flow_attr *flow_attr) +{ + return is_valid_ethertype(mdev, flow_attr, false) && + is_valid_ethertype(mdev, flow_attr, true); +} + +static void put_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *prio, bool ft_added) +{ + prio->refcount -= !!ft_added; + if (!prio->refcount) { + mlx5_destroy_flow_table(prio->flow_table); + prio->flow_table = NULL; + } +} + +static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) +{ + struct mlx5_ib_flow_handler *handler = container_of(flow_id, + struct mlx5_ib_flow_handler, + ibflow); + struct mlx5_ib_flow_handler *iter, *tmp; + struct mlx5_ib_dev *dev = handler->dev; + + mutex_lock(&dev->flow_db->lock); + + list_for_each_entry_safe(iter, tmp, &handler->list, list) { + mlx5_del_flow_rules(iter->rule); + put_flow_table(dev, iter->prio, true); + list_del(&iter->list); + kfree(iter); + } + + mlx5_del_flow_rules(handler->rule); + put_flow_table(dev, handler->prio, true); + mlx5_ib_counters_clear_description(handler->ibcounters); + mutex_unlock(&dev->flow_db->lock); + if (handler->flow_matcher) + atomic_dec(&handler->flow_matcher->usecnt); + kfree(handler); + + return 0; +} + +static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) +{ + priority *= 2; + if (!dont_trap) + priority++; + return priority; +} + +enum flow_table_type { + MLX5_IB_FT_RX, + MLX5_IB_FT_TX +}; + +#define MLX5_FS_MAX_TYPES 6 +#define MLX5_FS_MAX_ENTRIES BIT(16) + +static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, + struct mlx5_ib_flow_prio *prio, + int priority, + int num_entries, int num_groups, + u32 flags) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft; + + ft_attr.prio = priority; + ft_attr.max_fte = num_entries; + ft_attr.flags = flags; + ft_attr.autogroup.max_num_groups = num_groups; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return ERR_CAST(ft); + + prio->flow_table = ft; + prio->refcount = 0; + return prio; +} + +static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, + struct ib_flow_attr *flow_attr, + enum flow_table_type ft_type) +{ + bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_table *ft; + int max_table_size; + int num_entries; + int num_groups; + bool esw_encap; + u32 flags = 0; + int priority; + + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + enum mlx5_flow_namespace_type fn_type; + + if (flow_is_multicast_only(flow_attr) && + !dont_trap) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(flow_attr->priority, + dont_trap); + if (ft_type == MLX5_IB_FT_RX) { + fn_type = MLX5_FLOW_NAMESPACE_BYPASS; + prio = &dev->flow_db->prios[priority]; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + log_max_ft_size)); + fn_type = MLX5_FLOW_NAMESPACE_EGRESS; + prio = &dev->flow_db->egress_prios[priority]; + if (!dev->is_rep && !esw_encap && + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + num_entries = MLX5_FS_MAX_ENTRIES; + num_groups = MLX5_FS_MAX_TYPES; + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + ns = mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_LEFTOVERS); + build_leftovers_ft_param(&priority, + &num_entries, + &num_groups); + prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + allow_sniffer_and_nic_rx_shared_tir)) + return ERR_PTR(-EOPNOTSUPP); + + ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); + + prio = &dev->flow_db->sniffer[ft_type]; + priority = 0; + num_entries = 1; + num_groups = 1; + } + + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + max_table_size = min_t(int, num_entries, max_table_size); + + ft = prio->flow_table; + if (!ft) + return _get_prio(ns, prio, priority, max_table_size, num_groups, + flags); + + return prio; +} + +static void set_underlay_qp(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + u32 underlay_qpn) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (underlay_qpn && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + ft_field_support.bth_dst_qp)) { + MLX5_SET(fte_match_set_misc, + misc_params_v, bth_dst_qp, underlay_qpn); + MLX5_SET(fte_match_set_misc, + misc_params_c, bth_dst_qp, 0xffffff); + } +} + +static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, + rep->vport)); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + } +} + +static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst, + u32 underlay_qpn, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_flow_table *ft = ft_prio->flow_table; + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_destination dest_arr[2] = {}; + struct mlx5_flow_destination *rule_dst = dest_arr; + const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); + unsigned int spec_index; + u32 prev_type = 0; + int err = 0; + int dest_num = 0; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + + if (!is_valid_attr(dev->mdev, flow_attr)) + return ERR_PTR(-EINVAL); + + if (dev->is_rep && is_egress) + return ERR_PTR(-EINVAL); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { + err = parse_flow_attr(dev->mdev, spec, + ib_flow, flow_attr, &flow_act, + prev_type); + if (err < 0) + goto free; + + prev_type = ((union ib_flow_spec *)ib_flow)->type; + ib_flow += ((union ib_flow_spec *)ib_flow)->size; + } + + if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } + + if (!flow_is_multicast_only(flow_attr)) + set_underlay_qp(dev, spec, underlay_qpn); + + if (dev->is_rep) { + struct mlx5_eswitch_rep *rep; + + rep = dev->port[flow_attr->port - 1].rep; + if (!rep) { + err = -EINVAL; + goto free; + } + + mlx5_ib_set_rule_source_port(dev, spec, rep); + } + + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); + + if (is_egress && + !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { + err = -EINVAL; + goto free; + } + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + struct mlx5_ib_mcounters *mcounters; + + err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd); + if (err) + goto free; + + mcounters = to_mcounters(flow_act.counters); + handler->ibcounters = flow_act.counters; + dest_arr[dest_num].type = + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest_arr[dest_num].counter_id = + mlx5_fc_id(mcounters->hw_cntrs_hndl); + dest_num++; + } + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + if (!dest_num) + rule_dst = NULL; + } else { + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) + flow_act.action |= + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + if (is_egress) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + else if (dest_num) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { + mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", + spec->flow_context.flow_tag, flow_attr->type); + err = -EINVAL; + goto free; + } + handler->rule = mlx5_add_flow_rules(ft, spec, + &flow_act, + rule_dst, dest_num); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + + ft_prio->flow_table = ft; +free: + if (err && handler) { + mlx5_ib_counters_clear_description(handler->ibcounters); + kfree(handler); + } + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); +} + +enum { + LEFTOVERS_MC, + LEFTOVERS_UC, +}; + +static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_ucast = NULL; + struct mlx5_ib_flow_handler *handler = NULL; + + static struct { + struct ib_flow_attr flow_attr; + struct ib_flow_spec_eth eth_flow; + } leftovers_specs[] = { + [LEFTOVERS_MC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {0x1} } + } + }, + [LEFTOVERS_UC] = { + .flow_attr = { + .num_of_specs = 1, + .size = sizeof(leftovers_specs[0]) + }, + .eth_flow = { + .type = IB_FLOW_SPEC_ETH, + .size = sizeof(struct ib_flow_spec_eth), + .mask = {.dst_mac = {0x1} }, + .val = {.dst_mac = {} } + } + } + }; + + handler = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_MC].flow_attr, + dst); + if (!IS_ERR(handler) && + flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { + handler_ucast = create_flow_rule(dev, ft_prio, + &leftovers_specs[LEFTOVERS_UC].flow_attr, + dst); + if (IS_ERR(handler_ucast)) { + mlx5_del_flow_rules(handler->rule); + ft_prio->refcount--; + kfree(handler); + handler = handler_ucast; + } else { + list_add(&handler_ucast->list, &handler->list); + } + } + + return handler; +} + +static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_rx, + struct mlx5_ib_flow_prio *ft_tx, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_rx; + struct mlx5_ib_flow_handler *handler_tx; + int err; + static const struct ib_flow_attr flow_attr = { + .num_of_specs = 0, + .size = sizeof(flow_attr) + }; + + handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); + if (IS_ERR(handler_rx)) { + err = PTR_ERR(handler_rx); + goto err; + } + + handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); + if (IS_ERR(handler_tx)) { + err = PTR_ERR(handler_tx); + goto err_tx; + } + + list_add(&handler_tx->list, &handler_rx->list); + + return handler_rx; + +err_tx: + mlx5_del_flow_rules(handler_rx->rule); + ft_rx->refcount--; + kfree(handler_rx); +err: + return ERR_PTR(err); +} + + +static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_flow_handler *handler = NULL; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio_tx = NULL; + struct mlx5_ib_flow_prio *ft_prio; + bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; + size_t min_ucmd_sz, required_ucmd_sz; + int err; + int underlay_qpn; + + if (udata && udata->inlen) { + min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + + sizeof(ucmd_hdr.reserved); + if (udata->inlen < min_ucmd_sz) + return ERR_PTR(-EOPNOTSUPP); + + err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); + if (err) + return ERR_PTR(err); + + /* currently supports only one counters data */ + if (ucmd_hdr.ncounters_data > 1) + return ERR_PTR(-EINVAL); + + required_ucmd_sz = min_ucmd_sz + + sizeof(struct mlx5_ib_flow_counters_data) * + ucmd_hdr.ncounters_data; + if (udata->inlen > required_ucmd_sz && + !ib_is_udata_cleared(udata, required_ucmd_sz, + udata->inlen - required_ucmd_sz)) + return ERR_PTR(-EOPNOTSUPP); + + ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); + if (!ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); + if (err) + goto free_ucmd; + } + + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { + err = -ENOMEM; + goto free_ucmd; + } + + if (domain != IB_FLOW_DOMAIN_USER || + flow_attr->port > dev->num_ports || + (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | + IB_FLOW_ATTR_FLAGS_EGRESS))) { + err = -EINVAL; + goto free_ucmd; + } + + if (is_egress && + (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { + err = -EINVAL; + goto free_ucmd; + } + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free_ucmd; + } + + mutex_lock(&dev->flow_db->lock); + + ft_prio = get_flow_table(dev, flow_attr, + is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); + if (IS_ERR(ft_prio_tx)) { + err = PTR_ERR(ft_prio_tx); + ft_prio_tx = NULL; + goto destroy_ft; + } + } + + if (is_egress) { + dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + if (mqp->is_rss) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; + } + + if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? + mqp->underlay_qpn : + 0; + handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, + underlay_qpn, ucmd); + } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + handler = create_leftovers_rule(dev, ft_prio, flow_attr, + dst); + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); + } else { + err = -EINVAL; + goto destroy_ft; + } + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + handler = NULL; + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + kfree(ucmd); + + return &handler->ibflow; + +destroy_ft: + put_flow_table(dev, ft_prio, false); + if (ft_prio_tx) + put_flow_table(dev, ft_prio_tx, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); +free_ucmd: + kfree(ucmd); + return ERR_PTR(err); +} + +static struct mlx5_ib_flow_prio * +_get_flow_table(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_matcher *fs_matcher, + bool mcast) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio = NULL; + int max_table_size = 0; + bool esw_encap; + u32 flags = 0; + int priority; + + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(fs_matcher->priority, false); + + esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != + DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + reformat_l3_tunnel_to_l2) && + !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + max_table_size = BIT( + MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && + esw_encap) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + priority = FDB_BYPASS_PATH; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; + } + + max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); + + ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) + prio = &dev->flow_db->prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + prio = &dev->flow_db->egress_prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + prio = &dev->flow_db->fdb; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) + prio = &dev->flow_db->rdma_rx[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) + prio = &dev->flow_db->rdma_tx[priority]; + + if (!prio) + return ERR_PTR(-EINVAL); + + if (prio->flow_table) + return prio; + + return _get_prio(ns, prio, priority, max_table_size, + MLX5_FS_MAX_TYPES, flags); +} + +static struct mlx5_ib_flow_handler * +_create_raw_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct mlx5_flow_destination *dst, + struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, + struct mlx5_flow_act *flow_act, + void *cmd_in, int inlen, + int dst_num) +{ + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft = ft_prio->flow_table; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + memcpy(spec->match_value, cmd_in, inlen); + memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, + fs_matcher->mask_len); + spec->match_criteria_enable = fs_matcher->match_criteria_enable; + spec->flow_context = *flow_context; + + handler->rule = mlx5_add_flow_rules(ft, spec, + flow_act, dst, dst_num); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + ft_prio->flow_table = ft; + +free: + if (err) + kfree(handler); + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, + void *match_v) +{ + void *match_c; + void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; + void *dmac, *dmac_mask; + void *ipv4, *ipv4_mask; + + if (!(fs_matcher->match_criteria_enable & + (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) + return false; + + match_c = fs_matcher->matcher_mask.match_params; + match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + + dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dmac_47_16); + dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dmac_47_16); + + if (is_multicast_ether_addr(dmac) && + is_multicast_ether_addr(dmac_mask)) + return true; + + ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + if (ipv4_is_multicast(*(__be32 *)(ipv4)) && + ipv4_is_multicast(*(__be32 *)(ipv4_mask))) + return true; + + return false; +} + +static struct mlx5_ib_flow_handler *raw_fs_rule_add( + struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, + u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) +{ + struct mlx5_flow_destination *dst; + struct mlx5_ib_flow_prio *ft_prio; + struct mlx5_ib_flow_handler *handler; + int dst_num = 0; + bool mcast; + int err; + + if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOMEM); + + dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mcast = raw_fs_is_multicast(fs_matcher, cmd_in); + mutex_lock(&dev->flow_db->lock); + + ft_prio = _get_flow_table(dev, fs_matcher, mcast); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + dst[dst_num].type = dest_type; + dst[dst_num++].tir_num = dest_id; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst[dst_num++].ft_num = dest_id; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + } + + + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst[dst_num].counter_id = counter_id; + dst_num++; + } + + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, + flow_context, flow_act, + cmd_in, inlen, dst_num); + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + atomic_inc(&fs_matcher->usecnt); + handler->flow_matcher = fs_matcher; + + kfree(dst); + + return handler; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + + return ERR_PTR(err); +} + +static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) +{ + u32 flags = 0; + + if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) + flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; + + return flags; +} + +#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \ + MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA +static struct ib_flow_action * +mlx5_ib_create_flow_action_esp(struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dev *mdev = to_mdev(device); + struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; + struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; + struct mlx5_ib_flow_action *action; + u64 action_flags; + u64 flags; + int err = 0; + + err = uverbs_get_flags64( + &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); + if (err) + return ERR_PTR(err); + + flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); + + /* We current only support a subset of the standard features. Only a + * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn + * (with overlap). Full offload mode isn't supported. + */ + if (!attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) + return ERR_PTR(-EOPNOTSUPP); + + if (attr->keymat->protocol != + IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) + return ERR_PTR(-EOPNOTSUPP); + + aes_gcm = &attr->keymat->keymat.aes_gcm; + + if (aes_gcm->icv_len != 16 || + aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) + return ERR_PTR(-EOPNOTSUPP); + + action = kmalloc(sizeof(*action), GFP_KERNEL); + if (!action) + return ERR_PTR(-ENOMEM); + + action->esp_aes_gcm.ib_flags = attr->flags; + memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, + sizeof(accel_attrs.keymat.aes_gcm.aes_key)); + accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; + memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, + sizeof(accel_attrs.keymat.aes_gcm.salt)); + memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, + sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); + accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; + accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; + accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) + accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; + + action->esp_aes_gcm.ctx = + mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); + if (IS_ERR(action->esp_aes_gcm.ctx)) { + err = PTR_ERR(action->esp_aes_gcm.ctx); + goto err_parse; + } + + action->esp_aes_gcm.ib_flags = attr->flags; + + return &action->ib_action; + +err_parse: + kfree(action); + return ERR_PTR(err); +} + +static int +mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + struct mlx5_accel_esp_xfrm_attrs accel_attrs; + int err = 0; + + if (attr->keymat || attr->replay || attr->encap || + attr->spi || attr->seq || attr->tfc_pad || + attr->hard_limit_pkts || + (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) + return -EOPNOTSUPP; + + /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can + * be modified. + */ + if (!(maction->esp_aes_gcm.ib_flags & + IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && + attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | + IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) + return -EINVAL; + + memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, + sizeof(accel_attrs)); + + accel_attrs.esn = attr->esn; + if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) + accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + else + accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; + + err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, + &accel_attrs); + if (err) + return err; + + maction->esp_aes_gcm.ib_flags &= + ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + maction->esp_aes_gcm.ib_flags |= + attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; + + return 0; +} + +static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) +{ + switch (maction->flow_action_raw.sub_type) { + case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: + mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.modify_hdr); + break; + case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: + mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, + maction->flow_action_raw.pkt_reformat); + break; + case MLX5_IB_FLOW_ACTION_DECAP: + break; + default: + break; + } +} + +static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) +{ + struct mlx5_ib_flow_action *maction = to_mflow_act(action); + + switch (action->type) { + case IB_FLOW_ACTION_ESP: + /* + * We only support aes_gcm by now, so we implicitly know this is + * the underline crypto. + */ + mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); + break; + case IB_FLOW_ACTION_UNSPECIFIED: + destroy_flow_action_raw(maction); + break; + default: + WARN_ON(true); + break; + } + + kfree(maction); + return 0; +} + +static int +mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, + enum mlx5_flow_namespace_type *namespace) +{ + switch (table_type) { + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX: + *namespace = MLX5_FLOW_NAMESPACE_BYPASS; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: + *namespace = MLX5_FLOW_NAMESPACE_EGRESS; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: + *namespace = MLX5_FLOW_NAMESPACE_FDB; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { + [MLX5_IB_FLOW_TYPE_NORMAL] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + .u.ptr = { + .len = sizeof(u16), /* data is priority */ + .min_len = sizeof(u16), + } + }, + [MLX5_IB_FLOW_TYPE_SNIFFER] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, +}; + +static bool is_flow_dest(void *obj, int *dest_id, int *dest_type) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + switch (opcode) { + case MLX5_CMD_OP_DESTROY_TIR: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, + obj_id); + return true; + + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, + table_id); + return true; + default: + return false; + } +} + +static int get_dests(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id, + int *dest_type, struct ib_qp **qp, u32 *flags) +{ + bool dest_devx, dest_qp; + void *devx_obj; + int err; + + dest_devx = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + dest_qp = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + + *flags = 0; + err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS | + MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP); + if (err) + return err; + + /* Both flags are not allowed */ + if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS && + *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) + return -EINVAL; + + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { + if (dest_devx && (dest_qp || *flags)) + return -EINVAL; + else if (dest_qp && *flags) + return -EINVAL; + } + + /* Allow only DEVX object, drop as dest for FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || + (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) + return -EINVAL; + + /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) + return -EINVAL; + + *qp = NULL; + if (dest_devx) { + devx_obj = + uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + + /* Verify that the given DEVX object is a flow + * steering destination. + */ + if (!is_flow_dest(devx_obj, dest_id, dest_type)) + return -EINVAL; + /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + return -EINVAL; + } else if (dest_qp) { + struct mlx5_ib_qp *mqp; + + *qp = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + if (IS_ERR(*qp)) + return PTR_ERR(*qp); + + if ((*qp)->qp_type != IB_QPT_RAW_PACKET) + return -EINVAL; + + mqp = to_mqp(*qp); + if (mqp->is_rss) + *dest_id = mqp->rss_qp.tirn; + else + *dest_id = mqp->raw_packet_qp.rq.tirn; + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { + *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; + } + + if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && + (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)) + return -EINVAL; + + return 0; +} + +static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + + if (offset && offset >= devx_obj->flow_counter_bulk_size) + return false; + + *counter_id = MLX5_GET(dealloc_flow_counter_in, + devx_obj->dinbox, + flow_counter_id); + *counter_id += offset; + return true; + } + + return false; +} + +#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 +static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_flow_context flow_context = {.flow_tag = + MLX5_FS_DEFAULT_FLOW_TAG}; + u32 *offset_attr, offset = 0, counter_id = 0; + int dest_id, dest_type = -1, inlen, len, ret, i; + struct mlx5_ib_flow_handler *flow_handler; + struct mlx5_ib_flow_matcher *fs_matcher; + struct ib_uobject **arr_flow_actions; + struct ib_uflow_resources *uflow_res; + struct mlx5_flow_act flow_act = {}; + struct ib_qp *qp = NULL; + void *devx_obj, *cmd_in; + struct ib_uobject *uobj; + struct mlx5_ib_dev *dev; + u32 flags; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); + dev = mlx5_udata_to_mdev(&attrs->driver_udata); + + if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags)) + return -EINVAL; + + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; + + if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); + if (len) { + devx_obj = arr_flow_actions[0]->object; + + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { + + int num_offsets = uverbs_attr_ptr_get_array_size( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + sizeof(u32)); + + if (num_offsets != 1) + return -EINVAL; + + offset_attr = uverbs_attr_get_alloced_ptr( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); + offset = *offset_attr; + } + + if (!is_flow_counter(devx_obj, offset, &counter_id)) + return -EINVAL; + + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } + + cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + inlen = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + + uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); + if (!uflow_res) + return -ENOMEM; + + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); + for (i = 0; i < len; i++) { + struct mlx5_ib_flow_action *maction = + to_mflow_act(arr_flow_actions[i]->object); + + ret = parse_flow_flow_action(maction, false, &flow_act); + if (ret) + goto err_out; + flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE, + arr_flow_actions[i]->object); + } + + ret = uverbs_copy_from(&flow_context.flow_tag, attrs, + MLX5_IB_ATTR_CREATE_FLOW_TAG); + if (!ret) { + if (flow_context.flow_tag >= BIT(24)) { + ret = -EINVAL; + goto err_out; + } + flow_context.flags |= FLOW_CONTEXT_HAS_TAG; + } + + flow_handler = + raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act, + counter_id, cmd_in, inlen, dest_id, dest_type); + if (IS_ERR(flow_handler)) { + ret = PTR_ERR(flow_handler); + goto err_out; + } + + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res); + + return 0; +err_out: + ib_uverbs_flow_resources_free(uflow_res); + return ret; +} + +static int flow_matcher_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_matcher *obj = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); + if (ret) + return ret; + + kfree(obj); + return 0; +} + +static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *obj) +{ + enum mlx5_ib_uapi_flow_table_type ft_type = + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; + u32 flags; + int err; + + /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older + * users should switch to it. We leave this to not break userspace + */ + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) + return -EINVAL; + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { + err = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); + if (err) + return err; + + err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); + if (err) + return err; + + return 0; + } + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + IB_FLOW_ATTR_FLAGS_EGRESS); + if (err) + return err; + + if (flags) { + mlx5_ib_ft_type_to_namespace( + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, + &obj->ns_type); + return 0; + } + } + + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); + struct mlx5_ib_flow_matcher *obj; + int err; + + obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->mask_len = uverbs_attr_get_len( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + err = uverbs_copy_from(&obj->matcher_mask, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + if (err) + goto end; + + obj->flow_type = uverbs_attr_get_enum_id( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + + if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { + err = uverbs_copy_from(&obj->priority, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + if (err) + goto end; + } + + err = uverbs_copy_from(&obj->match_criteria_enable, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); + if (err) + goto end; + + err = mlx5_ib_matcher_ns(attrs, obj); + if (err) + goto end; + + uobj->object = obj; + obj->mdev = dev->mdev; + atomic_set(&obj->usecnt, 0); + return 0; + +end: + kfree(obj); + return err; +} + +static struct ib_flow_action * +mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, + enum mlx5_ib_uapi_flow_table_type ft_type, + u8 num_actions, void *in) +{ + enum mlx5_flow_namespace_type namespace; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ERR_PTR(-EINVAL); + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return ERR_PTR(-ENOMEM); + + maction->flow_action_raw.modify_hdr = + mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); + + if (IS_ERR(maction->flow_action_raw.modify_hdr)) { + ret = PTR_ERR(maction->flow_action_raw.modify_hdr); + kfree(maction); + return ERR_PTR(ret); + } + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_MODIFY_HEADER; + maction->flow_action_raw.dev = dev; + + return &maction->ib_action; +} + +static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, + max_modify_header_actions) || + MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + max_modify_header_actions); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); + enum mlx5_ib_uapi_flow_table_type ft_type; + struct ib_flow_action *action; + int num_actions; + void *in; + int ret; + + if (!mlx5_ib_modify_header_supported(mdev)) + return -EOPNOTSUPP; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); + + num_actions = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)); + if (num_actions < 0) + return num_actions; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); + if (ret) + return ret; + action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); + if (IS_ERR(action)) + return PTR_ERR(action); + + uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, + IB_FLOW_ACTION_UNSPECIFIED); + + return 0; +} + +static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev, + u8 packet_reformat_type, + u8 ft_type) +{ + switch (packet_reformat_type) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE(ibdev->mdev, + encap_general_header); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX) + return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev, + reformat_l2_to_l3_tunnel); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, + reformat_l3_tunnel_to_l2); + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2: + if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX) + return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap); + break; + default: + break; + } + + return false; +} + +static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt) +{ + switch (dv_prt) { + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + break; + case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int mlx5_ib_flow_action_create_packet_reformat_ctx( + struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_action *maction, + u8 ft_type, u8 dv_prt, + void *in, size_t len) +{ + enum mlx5_flow_namespace_type namespace; + u8 prm_prt; + int ret; + + ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace); + if (ret) + return ret; + + ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt); + if (ret) + return ret; + + maction->flow_action_raw.pkt_reformat = + mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, + in, namespace); + if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { + ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); + return ret; + } + + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; + maction->flow_action_raw.dev = dev; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); + enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; + enum mlx5_ib_uapi_flow_table_type ft_type; + struct mlx5_ib_flow_action *maction; + int ret; + + ret = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE); + if (ret) + return ret; + + ret = uverbs_get_const(&dv_prt, attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE); + if (ret) + return ret; + + if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type)) + return -EOPNOTSUPP; + + maction = kzalloc(sizeof(*maction), GFP_KERNEL); + if (!maction) + return -ENOMEM; + + if (dv_prt == + MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) { + maction->flow_action_raw.sub_type = + MLX5_IB_FLOW_ACTION_DECAP; + maction->flow_action_raw.dev = mdev; + } else { + void *in; + int len; + + in = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + if (IS_ERR(in)) { + ret = PTR_ERR(in); + goto free_maction; + } + + len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF); + + ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev, + maction, ft_type, dv_prt, in, len); + if (ret) + goto free_maction; + } + + uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, + IB_FLOW_ACTION_UNSPECIFIED); + return 0; + +free_maction: + kfree(maction); + return ret; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_CREATE_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, + UVERBS_OBJECT_QP, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_READ, 1, + MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, 1, 1, + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS, + enum mlx5_ib_create_flow_flags, + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DESTROY_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_fs, + UVERBS_OBJECT_FLOW, + &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), + &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES( + set_add_copy_action_in_auto)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF, + UVERBS_ATTR_MIN_SIZE(1), + UA_ALLOC_AND_COPY, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE, + enum mlx5_ib_uapi_flow_action_packet_reformat_type, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY)); + +ADD_UVERBS_METHODS( + mlx5_ib_flow_actions, + UVERBS_OBJECT_FLOW_ACTION, + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_MATCHER_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + mlx5_ib_flow_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + UVERBS_ATTR_TYPE(u8), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + enum ib_flow_flags, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_OPTIONAL)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); + +const struct uapi_definition mlx5_ib_flow_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_FLOW_MATCHER), + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_FLOW, + &mlx5_ib_fs), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_actions), + {}, +}; + +static const struct ib_device_ops flow_ops = { + .create_flow = mlx5_ib_create_flow, + .destroy_flow = mlx5_ib_destroy_flow, + .destroy_flow_action = mlx5_ib_destroy_flow_action, +}; + +static const struct ib_device_ops flow_ipsec_ops = { + .create_flow_action_esp = mlx5_ib_create_flow_action_esp, + .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, +}; + +int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) +{ + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); + + if (!dev->flow_db) + return -ENOMEM; + + mutex_init(&dev->flow_db->lock); + + ib_set_device_ops(&dev->ib_dev, &flow_ops); + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h new file mode 100644 index 000000000000..ad320adaf321 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/fs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_FS_H +#define _MLX5_IB_FS_H + +#include "mlx5_ib.h" + +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_fs_init(struct mlx5_ib_dev *dev); +#else +static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) +{ + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); + + if (!dev->flow_db) + return -ENOMEM; + + mutex_init(&dev->flow_db->lock); + return 0; +} +#endif +static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) +{ + kfree(dev->flow_db); +} +#endif /* _MLX5_IB_FS_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6f99ed03d88e..fbc45a5e76c5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. */ #include <linux/debugfs.h> @@ -59,10 +32,13 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "devx.h" +#include "fs.h" #include "srq.h" #include "qp.h" #include "wr.h" -#include <linux/mlx5/fs_helpers.h> +#include "restrack.h" +#include "counters.h" #include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_verbs.h> @@ -311,9 +287,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, *native_port_num = 1; port = &ibdev->port[ib_port_num - 1]; - if (!port) - return NULL; - spin_lock(&port->mp.mpi_lock); mpi = ibdev->port[ib_port_num - 1].mp.mpi; if (mpi && !mpi->unaffiliate) { @@ -1765,6 +1738,92 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, mlx5_ib_disable_lb(dev, true, false); } +static int set_ucontext_resp(struct ib_ucontext *uctx, + struct mlx5_ib_alloc_ucontext_resp *resp) +{ + struct ib_device *ibdev = uctx->device; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_ucontext *context = to_mucontext(uctx); + struct mlx5_bfreg_info *bfregi = &context->bfregi; + int err; + + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, + &resp->dump_fill_mkey); + if (err) + return err; + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + + resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + if (dev->wc_support) + resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, + log_bf_reg_size); + resp->cache_line_size = cache_line_size(); + resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + resp->cqe_version = context->cqe_version; + resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; + resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_CAP_GEN(dev->mdev, + num_of_uars_per_page) : 1; + + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) { + if (mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_EGRESS)) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; + if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; + /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ + } + + resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : + bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; + resp->num_ports = dev->num_ports; + resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; + + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { + mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline); + resp->eth_min_inline++; + } + + if (dev->mdev->clock_info) + resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); + + /* + * We don't want to expose information from the PCI bar that is located + * after 4096 bytes, so if the arch only supports larger pages, let's + * pretend we don't support reading the HCA's core clock. This is also + * forced by mmap function. + */ + if (PAGE_SIZE <= 4096) { + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp->hca_core_clock_offset = + offsetof(struct mlx5_init_seg, + internal_timer_h) % PAGE_SIZE; + } + + if (MLX5_CAP_GEN(dev->mdev, ece_support)) + resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + + resp->num_dyn_bfregs = bfregi->num_dyn_bfregs; + return 0; +} + static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -1772,14 +1831,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi; int ver; int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); - u32 dump_fill_mkey; bool lib_uar_4k; bool lib_uar_dyn; @@ -1808,37 +1865,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) return -EINVAL; - resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (dev->wc_support) - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); - resp.cache_line_size = cache_line_size(); - resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); - resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); - resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); - resp.cqe_version = min_t(__u8, - (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), - req.max_cqe_version); - resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; - resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; - resp.response_length = min(offsetof(typeof(resp), response_length) + - sizeof(resp.response_length), udata->outlen); - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { - if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; - /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ - } - lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; bfregi = &context->bfregi; @@ -1887,87 +1913,24 @@ uar_done: if (err) goto out_devx; - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); - if (err) - goto out_mdev; - } - INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs; - resp.num_ports = dev->num_ports; - - if (offsetofend(typeof(resp), cqe_version) <= udata->outlen) - resp.response_length += sizeof(resp.cqe_version); - - if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) { - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | - MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; - resp.response_length += sizeof(resp.cmds_supp_uhw); - } - - if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) { - if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { - mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); - resp.eth_min_inline++; - } - resp.response_length += sizeof(resp.eth_min_inline); - } - - if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) { - if (mdev->clock_info) - resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); - resp.response_length += sizeof(resp.clock_info_versions); - } - - /* - * We don't want to expose information from the PCI bar that is located - * after 4096 bytes, so if the arch only supports larger pages, let's - * pretend we don't support reading the HCA's core clock. This is also - * forced by mmap function. - */ - if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) { - if (PAGE_SIZE <= 4096) { - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; - resp.hca_core_clock_offset = - offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; - } - resp.response_length += sizeof(resp.hca_core_clock_offset); - } - - if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen) - resp.response_length += sizeof(resp.log_uar_size); - - if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen) - resp.response_length += sizeof(resp.num_uars_per_page); - - if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) { - resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; - resp.response_length += sizeof(resp.num_dyn_bfregs); - } - - if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) { - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - resp.dump_fill_mkey = dump_fill_mkey; - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; - } - resp.response_length += sizeof(resp.dump_fill_mkey); - } + context->cqe_version = min_t(__u8, + (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), + req.max_cqe_version); - if (MLX5_CAP_GEN(dev->mdev, ece_support)) - resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + err = set_ucontext_resp(uctx, &resp); + if (err) + goto out_mdev; + resp.response_length = min(udata->outlen, sizeof(resp)); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; - context->cqe_version = resp.cqe_version; context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); @@ -2000,6 +1963,29 @@ out_ctx: return err; } +static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_alloc_ucontext_resp uctx_resp = {}; + int ret; + + ret = set_ucontext_resp(ibcontext, &uctx_resp); + if (ret) + return ret; + + uctx_resp.response_length = + min_t(size_t, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX), + sizeof(uctx_resp)); + + ret = uverbs_copy_to_struct_or_zero(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + &uctx_resp, + sizeof(uctx_resp)); + return ret; +} + static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -2591,1820 +2577,6 @@ static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } -enum { - MATCH_CRITERIA_ENABLE_OUTER_BIT, - MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT, - MATCH_CRITERIA_ENABLE_MISC2_BIT -}; - -#define HEADER_IS_ZERO(match_criteria, headers) \ - !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ - 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ - -static u8 get_match_criteria_enable(u32 *match_criteria) -{ - u8 match_criteria_enable; - - match_criteria_enable = - (!HEADER_IS_ZERO(match_criteria, outer_headers)) << - MATCH_CRITERIA_ENABLE_OUTER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << - MATCH_CRITERIA_ENABLE_MISC_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, inner_headers)) << - MATCH_CRITERIA_ENABLE_INNER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << - MATCH_CRITERIA_ENABLE_MISC2_BIT; - - return match_criteria_enable; -} - -static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - u8 entry_mask; - u8 entry_val; - int err = 0; - - if (!mask) - goto out; - - entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, - ip_protocol); - entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, - ip_protocol); - if (!entry_mask) { - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); - goto out; - } - /* Don't override existing ip protocol */ - if (mask != entry_mask || val != entry_val) - err = -EINVAL; -out: - return err; -} - -static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, - bool inner) -{ - if (inner) { - MLX5_SET(fte_match_set_misc, - misc_c, inner_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, inner_ipv6_flow_label, val); - } else { - MLX5_SET(fte_match_set_misc, - misc_c, outer_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, outer_ipv6_flow_label, val); - } -} - -static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); -} - -static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) -{ - if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) - return -EOPNOTSUPP; - - return 0; -} - -#define LAST_ETH_FIELD vlan_tag -#define LAST_IB_FIELD sl -#define LAST_IPV4_FIELD tos -#define LAST_IPV6_FIELD traffic_class -#define LAST_TCP_UDP_FIELD src_port -#define LAST_TUNNEL_FIELD tunnel_id -#define LAST_FLOW_TAG_FIELD tag_id -#define LAST_DROP_FIELD size -#define LAST_COUNTERS_FIELD counters - -/* Field is the last supported field */ -#define FIELDS_NOT_SUPPORTED(filter, field)\ - memchr_inv((void *)&filter.field +\ - sizeof(filter.field), 0,\ - sizeof(filter) -\ - offsetof(typeof(filter), field) -\ - sizeof(filter.field)) - -int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, - bool is_egress, - struct mlx5_flow_act *action) -{ - - switch (maction->ib_action.type) { - case IB_FLOW_ACTION_ESP: - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) - return -EINVAL; - /* Currently only AES_GCM keymat is supported by the driver */ - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= is_egress ? - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; - return 0; - case IB_FLOW_ACTION_UNSPECIFIED: - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - action->modify_hdr = - maction->flow_action_raw.modify_hdr; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_DECAP) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { - if (action->action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - return -EINVAL; - action->action |= - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - action->pkt_reformat = - maction->flow_action_raw.pkt_reformat; - return 0; - } - /* fall through */ - default: - return -EOPNOTSUPP; - } -} - -static int parse_flow_attr(struct mlx5_core_dev *mdev, - struct mlx5_flow_spec *spec, - const union ib_flow_spec *ib_spec, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action, u32 prev_type) -{ - struct mlx5_flow_context *flow_context = &spec->flow_context; - u32 *match_c = spec->match_criteria; - u32 *match_v = spec->match_value; - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters); - void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters_2); - void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters_2); - void *headers_c; - void *headers_v; - int match_ipv; - int ret; - - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - inner_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - inner_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version); - } else { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - } - - switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { - case IB_FLOW_SPEC_ETH: - if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) - return -EOPNOTSUPP; - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dmac_47_16), - ib_spec->eth.mask.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), - ib_spec->eth.val.dst_mac); - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - smac_47_16), - ib_spec->eth.mask.src_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - smac_47_16), - ib_spec->eth.val.src_mac); - - if (ib_spec->eth.mask.vlan_tag) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - cvlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - cvlan_tag, 1); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_vid, ntohs(ib_spec->eth.val.vlan_tag)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_cfi, - ntohs(ib_spec->eth.mask.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_cfi, - ntohs(ib_spec->eth.val.vlan_tag) >> 12); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_prio, - ntohs(ib_spec->eth.mask.vlan_tag) >> 13); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_prio, - ntohs(ib_spec->eth.val.vlan_tag) >> 13); - } - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, ntohs(ib_spec->eth.mask.ether_type)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ntohs(ib_spec->eth.val.ether_type)); - break; - case IB_FLOW_SPEC_IPV4: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV4_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IP); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.src_ip, - sizeof(ib_spec->ipv4.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.src_ip, - sizeof(ib_spec->ipv4.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.dst_ip, - sizeof(ib_spec->ipv4.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.dst_ip, - sizeof(ib_spec->ipv4.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv4.mask.proto, - ib_spec->ipv4.val.proto)) - return -EINVAL; - break; - case IB_FLOW_SPEC_IPV6: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV6_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IPV6); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.src_ip, - sizeof(ib_spec->ipv6.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.src_ip, - sizeof(ib_spec->ipv6.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.dst_ip, - sizeof(ib_spec->ipv6.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.dst_ip, - sizeof(ib_spec->ipv6.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv6.mask.traffic_class, - ib_spec->ipv6.val.traffic_class); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv6.mask.next_hdr, - ib_spec->ipv6.val.next_hdr)) - return -EINVAL; - - set_flow_label(misc_params_c, misc_params_v, - ntohl(ib_spec->ipv6.mask.flow_label), - ntohl(ib_spec->ipv6.val.flow_label), - ib_spec->type & IB_FLOW_SPEC_INNER); - break; - case IB_FLOW_SPEC_ESP: - if (ib_spec->esp.mask.seq) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, - ntohl(ib_spec->esp.mask.spi)); - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - ntohl(ib_spec->esp.val.spi)); - break; - case IB_FLOW_SPEC_TCP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_UDP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_GRE: - if (ib_spec->gre.mask.c_ks_res0_ver) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_GRE); - - MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - ntohs(ib_spec->gre.mask.protocol)); - MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, - ntohs(ib_spec->gre.val.protocol)); - - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key.nvgre.hi), - &ib_spec->gre.mask.key, - sizeof(ib_spec->gre.mask.key)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key.nvgre.hi), - &ib_spec->gre.val.key, - sizeof(ib_spec->gre.val.key)); - break; - case IB_FLOW_SPEC_MPLS: - switch (prev_type) { - case IB_FLOW_SPEC_UDP: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_udp), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - case IB_FLOW_SPEC_GRE: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_gre), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - default: - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - inner_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - inner_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } else { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } - } - break; - case IB_FLOW_SPEC_VXLAN_TUNNEL: - if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, - LAST_TUNNEL_FIELD)) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, - ntohl(ib_spec->tunnel.mask.tunnel_id)); - MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, - ntohl(ib_spec->tunnel.val.tunnel_id)); - break; - case IB_FLOW_SPEC_ACTION_TAG: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, - LAST_FLOW_TAG_FIELD)) - return -EOPNOTSUPP; - if (ib_spec->flow_tag.tag_id >= BIT(24)) - return -EINVAL; - - flow_context->flow_tag = ib_spec->flow_tag.tag_id; - flow_context->flags |= FLOW_CONTEXT_HAS_TAG; - break; - case IB_FLOW_SPEC_ACTION_DROP: - if (FIELDS_NOT_SUPPORTED(ib_spec->drop, - LAST_DROP_FIELD)) - return -EOPNOTSUPP; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - break; - case IB_FLOW_SPEC_ACTION_HANDLE: - ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), - flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); - if (ret) - return ret; - break; - case IB_FLOW_SPEC_ACTION_COUNT: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, - LAST_COUNTERS_FIELD)) - return -EOPNOTSUPP; - - /* for now support only one counters spec per flow */ - if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - return -EINVAL; - - action->counters = ib_spec->flow_count.counters; - action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - default: - return -EINVAL; - } - - return 0; -} - -/* If a flow could catch both multicast and unicast packets, - * it won't fall into the multicast flow steering table and this rule - * could steal other multicast packets. - */ -static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) -{ - union ib_flow_spec *flow_spec; - - if (ib_attr->type != IB_FLOW_ATTR_NORMAL || - ib_attr->num_of_specs < 1) - return false; - - flow_spec = (union ib_flow_spec *)(ib_attr + 1); - if (flow_spec->type == IB_FLOW_SPEC_IPV4) { - struct ib_flow_spec_ipv4 *ipv4_spec; - - ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; - if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) - return true; - - return false; - } - - if (flow_spec->type == IB_FLOW_SPEC_ETH) { - struct ib_flow_spec_eth *eth_spec; - - eth_spec = (struct ib_flow_spec_eth *)flow_spec; - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && - is_multicast_ether_addr(eth_spec->val.dst_mac); - } - - return false; -} - -enum valid_spec { - VALID_SPEC_INVALID, - VALID_SPEC_VALID, - VALID_SPEC_NA, -}; - -static enum valid_spec -is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - const u32 *match_c = spec->match_criteria; - bool is_crypto = - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* - * Currently only crypto is supported in egress, when regular egress - * rules would be supported, always return VALID_SPEC_NA. - */ - if (!is_crypto) - return VALID_SPEC_NA; - - return is_crypto && is_ipsec && - (!egress || (!is_drop && - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? - VALID_SPEC_VALID : VALID_SPEC_INVALID; -} - -static bool is_valid_spec(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - /* We curretly only support ipsec egress flow */ - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; -} - -static bool is_valid_ethertype(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr, - bool check_inner) -{ - union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); - int match_ipv = check_inner ? - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version) : - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; - bool ipv4_spec_valid, ipv6_spec_valid; - unsigned int ip_spec_type = 0; - bool has_ethertype = false; - unsigned int spec_index; - bool mask_valid = true; - u16 eth_type = 0; - bool type_valid; - - /* Validate that ethertype is correct */ - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && - ib_spec->eth.mask.ether_type) { - mask_valid = (ib_spec->eth.mask.ether_type == - htons(0xffff)); - has_ethertype = true; - eth_type = ntohs(ib_spec->eth.val.ether_type); - } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || - (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { - ip_spec_type = ib_spec->type; - } - ib_spec = (void *)ib_spec + ib_spec->size; - } - - type_valid = (!has_ethertype) || (!ip_spec_type); - if (!type_valid && mask_valid) { - ipv4_spec_valid = (eth_type == ETH_P_IP) && - (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); - ipv6_spec_valid = (eth_type == ETH_P_IPV6) && - (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); - - type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || - (((eth_type == ETH_P_MPLS_UC) || - (eth_type == ETH_P_MPLS_MC)) && match_ipv); - } - - return type_valid; -} - -static bool is_valid_attr(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr) -{ - return is_valid_ethertype(mdev, flow_attr, false) && - is_valid_ethertype(mdev, flow_attr, true); -} - -static void put_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *prio, bool ft_added) -{ - prio->refcount -= !!ft_added; - if (!prio->refcount) { - mlx5_destroy_flow_table(prio->flow_table); - prio->flow_table = NULL; - } -} - -static void counters_clear_description(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - mutex_lock(&mcounters->mcntrs_mutex); - kfree(mcounters->counters_data); - mcounters->counters_data = NULL; - mcounters->cntrs_max_index = 0; - mutex_unlock(&mcounters->mcntrs_mutex); -} - -static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) -{ - struct mlx5_ib_flow_handler *handler = container_of(flow_id, - struct mlx5_ib_flow_handler, - ibflow); - struct mlx5_ib_flow_handler *iter, *tmp; - struct mlx5_ib_dev *dev = handler->dev; - - mutex_lock(&dev->flow_db->lock); - - list_for_each_entry_safe(iter, tmp, &handler->list, list) { - mlx5_del_flow_rules(iter->rule); - put_flow_table(dev, iter->prio, true); - list_del(&iter->list); - kfree(iter); - } - - mlx5_del_flow_rules(handler->rule); - put_flow_table(dev, handler->prio, true); - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - - mutex_unlock(&dev->flow_db->lock); - if (handler->flow_matcher) - atomic_dec(&handler->flow_matcher->usecnt); - kfree(handler); - - return 0; -} - -static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) -{ - priority *= 2; - if (!dont_trap) - priority++; - return priority; -} - -enum flow_table_type { - MLX5_IB_FT_RX, - MLX5_IB_FT_TX -}; - -#define MLX5_FS_MAX_TYPES 6 -#define MLX5_FS_MAX_ENTRIES BIT(16) - -static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, - struct mlx5_ib_flow_prio *prio, - int priority, - int num_entries, int num_groups, - u32 flags) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *ft; - - ft_attr.prio = priority; - ft_attr.max_fte = num_entries; - ft_attr.flags = flags; - ft_attr.autogroup.max_num_groups = num_groups; - ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ft)) - return ERR_CAST(ft); - - prio->flow_table = ft; - prio->refcount = 0; - return prio; -} - -static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, - struct ib_flow_attr *flow_attr, - enum flow_table_type ft_type) -{ - bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio; - struct mlx5_flow_table *ft; - int max_table_size; - int num_entries; - int num_groups; - bool esw_encap; - u32 flags = 0; - int priority; - - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - enum mlx5_flow_namespace_type fn_type; - - if (flow_is_multicast_only(flow_attr) && - !dont_trap) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(flow_attr->priority, - dont_trap); - if (ft_type == MLX5_IB_FT_RX) { - fn_type = MLX5_FLOW_NAMESPACE_BYPASS; - prio = &dev->flow_db->prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); - fn_type = MLX5_FLOW_NAMESPACE_EGRESS; - prio = &dev->flow_db->egress_prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } - ns = mlx5_get_flow_namespace(dev->mdev, fn_type); - num_entries = MLX5_FS_MAX_ENTRIES; - num_groups = MLX5_FS_MAX_TYPES; - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - ns = mlx5_get_flow_namespace(dev->mdev, - MLX5_FLOW_NAMESPACE_LEFTOVERS); - build_leftovers_ft_param(&priority, - &num_entries, - &num_groups); - prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - if (!MLX5_CAP_FLOWTABLE(dev->mdev, - allow_sniffer_and_nic_rx_shared_tir)) - return ERR_PTR(-ENOTSUPP); - - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? - MLX5_FLOW_NAMESPACE_SNIFFER_RX : - MLX5_FLOW_NAMESPACE_SNIFFER_TX); - - prio = &dev->flow_db->sniffer[ft_type]; - priority = 0; - num_entries = 1; - num_groups = 1; - } - - if (!ns) - return ERR_PTR(-ENOTSUPP); - - max_table_size = min_t(int, num_entries, max_table_size); - - ft = prio->flow_table; - if (!ft) - return _get_prio(ns, prio, priority, max_table_size, num_groups, - flags); - - return prio; -} - -static void set_underlay_qp(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - u32 underlay_qpn) -{ - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - if (underlay_qpn && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - ft_field_support.bth_dst_qp)) { - MLX5_SET(fte_match_set_misc, - misc_params_v, bth_dst_qp, underlay_qpn); - MLX5_SET(fte_match_set_misc, - misc_params_c, bth_dst_qp, 0xffffff); - } -} - -static int read_flow_counters(struct ib_device *ibdev, - struct mlx5_read_counters_attr *read_attr) -{ - struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_fc_query(dev->mdev, fc, - &read_attr->out[IB_COUNTER_PACKETS], - &read_attr->out[IB_COUNTER_BYTES]); -} - -/* flow counters currently expose two counters packets and bytes */ -#define FLOW_COUNTERS_NUM 2 -static int counters_set_description(struct ib_counters *counters, - enum mlx5_ib_counters_type counters_type, - struct mlx5_ib_flow_counters_desc *desc_data, - u32 ncounters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - u32 cntrs_max_index = 0; - int i; - - if (counters_type != MLX5_IB_COUNTERS_FLOW) - return -EINVAL; - - /* init the fields for the object */ - mcounters->type = counters_type; - mcounters->read_counters = read_flow_counters; - mcounters->counters_num = FLOW_COUNTERS_NUM; - mcounters->ncounters = ncounters; - /* each counter entry have both description and index pair */ - for (i = 0; i < ncounters; i++) { - if (desc_data[i].description > IB_COUNTER_BYTES) - return -EINVAL; - - if (cntrs_max_index <= desc_data[i].index) - cntrs_max_index = desc_data[i].index + 1; - } - - mutex_lock(&mcounters->mcntrs_mutex); - mcounters->counters_data = desc_data; - mcounters->cntrs_max_index = cntrs_max_index; - mutex_unlock(&mcounters->mcntrs_mutex); - - return 0; -} - -#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) -static int flow_counters_set_data(struct ib_counters *ibcounters, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); - struct mlx5_ib_flow_counters_data *cntrs_data = NULL; - struct mlx5_ib_flow_counters_desc *desc_data = NULL; - bool hw_hndl = false; - int ret = 0; - - if (ucmd && ucmd->ncounters_data != 0) { - cntrs_data = ucmd->data; - if (cntrs_data->ncounters > MAX_COUNTERS_NUM) - return -EINVAL; - - desc_data = kcalloc(cntrs_data->ncounters, - sizeof(*desc_data), - GFP_KERNEL); - if (!desc_data) - return -ENOMEM; - - if (copy_from_user(desc_data, - u64_to_user_ptr(cntrs_data->counters_data), - sizeof(*desc_data) * cntrs_data->ncounters)) { - ret = -EFAULT; - goto free; - } - } - - if (!mcounters->hw_cntrs_hndl) { - mcounters->hw_cntrs_hndl = mlx5_fc_create( - to_mdev(ibcounters->device)->mdev, false); - if (IS_ERR(mcounters->hw_cntrs_hndl)) { - ret = PTR_ERR(mcounters->hw_cntrs_hndl); - goto free; - } - hw_hndl = true; - } - - if (desc_data) { - /* counters already bound to at least one flow */ - if (mcounters->cntrs_max_index) { - ret = -EINVAL; - goto free_hndl; - } - - ret = counters_set_description(ibcounters, - MLX5_IB_COUNTERS_FLOW, - desc_data, - cntrs_data->ncounters); - if (ret) - goto free_hndl; - - } else if (!mcounters->cntrs_max_index) { - /* counters not bound yet, must have udata passed */ - ret = -EINVAL; - goto free_hndl; - } - - return 0; - -free_hndl: - if (hw_hndl) { - mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, - mcounters->hw_cntrs_hndl); - mcounters->hw_cntrs_hndl = NULL; - } -free: - kfree(desc_data); - return ret; -} - -static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - struct mlx5_eswitch_rep *rep) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - void *misc; - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(esw, - rep->vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_mask()); - } else { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - } -} - -static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst, - u32 underlay_qpn, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_flow_table *ft = ft_prio->flow_table; - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec *spec; - struct mlx5_flow_destination dest_arr[2] = {}; - struct mlx5_flow_destination *rule_dst = dest_arr; - const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); - unsigned int spec_index; - u32 prev_type = 0; - int err = 0; - int dest_num = 0; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - - if (!is_valid_attr(dev->mdev, flow_attr)) - return ERR_PTR(-EINVAL); - - if (dev->is_rep && is_egress) - return ERR_PTR(-EINVAL); - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(dev->mdev, spec, - ib_flow, flow_attr, &flow_act, - prev_type); - if (err < 0) - goto free; - - prev_type = ((union ib_flow_spec *)ib_flow)->type; - ib_flow += ((union ib_flow_spec *)ib_flow)->size; - } - - if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { - memcpy(&dest_arr[0], dst, sizeof(*dst)); - dest_num++; - } - - if (!flow_is_multicast_only(flow_attr)) - set_underlay_qp(dev, spec, underlay_qpn); - - if (dev->is_rep) { - struct mlx5_eswitch_rep *rep; - - rep = dev->port[flow_attr->port - 1].rep; - if (!rep) { - err = -EINVAL; - goto free; - } - - mlx5_ib_set_rule_source_port(dev, spec, rep); - } - - spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - - if (is_egress && - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { - err = -EINVAL; - goto free; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - struct mlx5_ib_mcounters *mcounters; - - err = flow_counters_set_data(flow_act.counters, ucmd); - if (err) - goto free; - - mcounters = to_mcounters(flow_act.counters); - handler->ibcounters = flow_act.counters; - dest_arr[dest_num].type = - MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter_id = - mlx5_fc_id(mcounters->hw_cntrs_hndl); - dest_num++; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - if (!dest_num) - rule_dst = NULL; - } else { - if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) - flow_act.action |= - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - if (is_egress) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - else if (dest_num) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } - - if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", - spec->flow_context.flow_tag, flow_attr->type); - err = -EINVAL; - goto free; - } - handler->rule = mlx5_add_flow_rules(ft, spec, - &flow_act, - rule_dst, dest_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - - ft_prio->flow_table = ft; -free: - if (err && handler) { - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - kfree(handler); - } - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); -} - -enum { - LEFTOVERS_MC, - LEFTOVERS_UC, -}; - -static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_ucast = NULL; - struct mlx5_ib_flow_handler *handler = NULL; - - static struct { - struct ib_flow_attr flow_attr; - struct ib_flow_spec_eth eth_flow; - } leftovers_specs[] = { - [LEFTOVERS_MC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {0x1} } - } - }, - [LEFTOVERS_UC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {} } - } - } - }; - - handler = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_MC].flow_attr, - dst); - if (!IS_ERR(handler) && - flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { - handler_ucast = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_UC].flow_attr, - dst); - if (IS_ERR(handler_ucast)) { - mlx5_del_flow_rules(handler->rule); - ft_prio->refcount--; - kfree(handler); - handler = handler_ucast; - } else { - list_add(&handler_ucast->list, &handler->list); - } - } - - return handler; -} - -static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_rx, - struct mlx5_ib_flow_prio *ft_tx, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_rx; - struct mlx5_ib_flow_handler *handler_tx; - int err; - static const struct ib_flow_attr flow_attr = { - .num_of_specs = 0, - .size = sizeof(flow_attr) - }; - - handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); - if (IS_ERR(handler_rx)) { - err = PTR_ERR(handler_rx); - goto err; - } - - handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); - if (IS_ERR(handler_tx)) { - err = PTR_ERR(handler_tx); - goto err_tx; - } - - list_add(&handler_tx->list, &handler_rx->list); - - return handler_rx; - -err_tx: - mlx5_del_flow_rules(handler_rx->rule); - ft_rx->refcount--; - kfree(handler_rx); -err: - return ERR_PTR(err); -} - -static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain, - struct ib_udata *udata) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_qp *mqp = to_mqp(qp); - struct mlx5_ib_flow_handler *handler = NULL; - struct mlx5_flow_destination *dst = NULL; - struct mlx5_ib_flow_prio *ft_prio_tx = NULL; - struct mlx5_ib_flow_prio *ft_prio; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; - size_t min_ucmd_sz, required_ucmd_sz; - int err; - int underlay_qpn; - - if (udata && udata->inlen) { - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + - sizeof(ucmd_hdr.reserved); - if (udata->inlen < min_ucmd_sz) - return ERR_PTR(-EOPNOTSUPP); - - err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); - if (err) - return ERR_PTR(err); - - /* currently supports only one counters data */ - if (ucmd_hdr.ncounters_data > 1) - return ERR_PTR(-EINVAL); - - required_ucmd_sz = min_ucmd_sz + - sizeof(struct mlx5_ib_flow_counters_data) * - ucmd_hdr.ncounters_data; - if (udata->inlen > required_ucmd_sz && - !ib_is_udata_cleared(udata, required_ucmd_sz, - udata->inlen - required_ucmd_sz)) - return ERR_PTR(-EOPNOTSUPP); - - ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); - if (!ucmd) - return ERR_PTR(-ENOMEM); - - err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); - if (err) - goto free_ucmd; - } - - if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { - err = -ENOMEM; - goto free_ucmd; - } - - if (domain != IB_FLOW_DOMAIN_USER || - flow_attr->port > dev->num_ports || - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) { - err = -EINVAL; - goto free_ucmd; - } - - if (is_egress && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - err = -EINVAL; - goto free_ucmd; - } - - dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (!dst) { - err = -ENOMEM; - goto free_ucmd; - } - - mutex_lock(&dev->flow_db->lock); - - ft_prio = get_flow_table(dev, flow_attr, - is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); - if (IS_ERR(ft_prio_tx)) { - err = PTR_ERR(ft_prio_tx); - ft_prio_tx = NULL; - goto destroy_ft; - } - } - - if (is_egress) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; - } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->is_rss) - dst->tir_num = mqp->rss_qp.tirn; - else - dst->tir_num = mqp->raw_packet_qp.rq.tirn; - } - - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? - mqp->underlay_qpn : - 0; - handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, - underlay_qpn, ucmd); - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - handler = create_leftovers_rule(dev, ft_prio, flow_attr, - dst); - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); - } else { - err = -EINVAL; - goto destroy_ft; - } - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - handler = NULL; - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - kfree(ucmd); - - return &handler->ibflow; - -destroy_ft: - put_flow_table(dev, ft_prio, false); - if (ft_prio_tx) - put_flow_table(dev, ft_prio_tx, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); -free_ucmd: - kfree(ucmd); - return ERR_PTR(err); -} - -static struct mlx5_ib_flow_prio * -_get_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - bool mcast) -{ - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio = NULL; - int max_table_size = 0; - bool esw_encap; - u32 flags = 0; - int priority; - - if (mcast) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); - - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2) && - !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { - max_table_size = BIT( - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { - max_table_size = BIT( - MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && - esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - priority = FDB_BYPASS_PATH; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } - - max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); - - ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); - if (!ns) - return ERR_PTR(-ENOTSUPP); - - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) - prio = &dev->flow_db->prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - prio = &dev->flow_db->egress_prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) - prio = &dev->flow_db->fdb; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) - prio = &dev->flow_db->rdma_rx[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) - prio = &dev->flow_db->rdma_tx[priority]; - - if (!prio) - return ERR_PTR(-EINVAL); - - if (prio->flow_table) - return prio; - - return _get_prio(ns, prio, priority, max_table_size, - MLX5_FS_MAX_TYPES, flags); -} - -static struct mlx5_ib_flow_handler * -_create_raw_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct mlx5_flow_destination *dst, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen, - int dst_num) -{ - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_spec *spec; - struct mlx5_flow_table *ft = ft_prio->flow_table; - int err = 0; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - memcpy(spec->match_value, cmd_in, inlen); - memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, - fs_matcher->mask_len); - spec->match_criteria_enable = fs_matcher->match_criteria_enable; - spec->flow_context = *flow_context; - - handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, dst_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - ft_prio->flow_table = ft; - -free: - if (err) - kfree(handler); - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, - void *match_v) -{ - void *match_c; - void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; - void *dmac, *dmac_mask; - void *ipv4, *ipv4_mask; - - if (!(fs_matcher->match_criteria_enable & - (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) - return false; - - match_c = fs_matcher->matcher_mask.match_params; - match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - - dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dmac_47_16); - dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dmac_47_16); - - if (is_multicast_ether_addr(dmac) && - is_multicast_ether_addr(dmac_mask)) - return true; - - ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - if (ipv4_is_multicast(*(__be32 *)(ipv4)) && - ipv4_is_multicast(*(__be32 *)(ipv4_mask))) - return true; - - return false; -} - -struct mlx5_ib_flow_handler * -mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - u32 counter_id, - void *cmd_in, int inlen, int dest_id, - int dest_type) -{ - struct mlx5_flow_destination *dst; - struct mlx5_ib_flow_prio *ft_prio; - struct mlx5_ib_flow_handler *handler; - int dst_num = 0; - bool mcast; - int err; - - if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) - return ERR_PTR(-EOPNOTSUPP); - - if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) - return ERR_PTR(-ENOMEM); - - dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); - if (!dst) - return ERR_PTR(-ENOMEM); - - mcast = raw_fs_is_multicast(fs_matcher, cmd_in); - mutex_lock(&dev->flow_db->lock); - - ft_prio = _get_flow_table(dev, fs_matcher, mcast); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst[dst_num].type = dest_type; - dst[dst_num++].tir_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst[dst_num++].ft_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { - dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - } - - - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst[dst_num].counter_id = counter_id; - dst_num++; - } - - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, - flow_context, flow_act, - cmd_in, inlen, dst_num); - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - atomic_inc(&fs_matcher->usecnt); - handler->flow_matcher = fs_matcher; - - kfree(dst); - - return handler; - -destroy_ft: - put_flow_table(dev, ft_prio, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - - return ERR_PTR(err); -} - -static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) -{ - u32 flags = 0; - - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; - - return flags; -} - -#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA -static struct ib_flow_action * -mlx5_ib_create_flow_action_esp(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dev *mdev = to_mdev(device); - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; - struct mlx5_ib_flow_action *action; - u64 action_flags; - u64 flags; - int err = 0; - - err = uverbs_get_flags64( - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); - if (err) - return ERR_PTR(err); - - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); - - /* We current only support a subset of the standard features. Only a - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn - * (with overlap). Full offload mode isn't supported. - */ - if (!attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) - return ERR_PTR(-EOPNOTSUPP); - - if (attr->keymat->protocol != - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) - return ERR_PTR(-EOPNOTSUPP); - - aes_gcm = &attr->keymat->keymat.aes_gcm; - - if (aes_gcm->icv_len != 16 || - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return ERR_PTR(-EOPNOTSUPP); - - action = kmalloc(sizeof(*action), GFP_KERNEL); - if (!action) - return ERR_PTR(-ENOMEM); - - action->esp_aes_gcm.ib_flags = attr->flags; - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, - sizeof(accel_attrs.keymat.aes_gcm.salt)); - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; - - action->esp_aes_gcm.ctx = - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); - if (IS_ERR(action->esp_aes_gcm.ctx)) { - err = PTR_ERR(action->esp_aes_gcm.ctx); - goto err_parse; - } - - action->esp_aes_gcm.ib_flags = attr->flags; - - return &action->ib_action; - -err_parse: - kfree(action); - return ERR_PTR(err); -} - -static int -mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - struct mlx5_accel_esp_xfrm_attrs accel_attrs; - int err = 0; - - if (attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) - return -EOPNOTSUPP; - - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can - * be modified. - */ - if (!(maction->esp_aes_gcm.ib_flags & - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) - return -EINVAL; - - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, - sizeof(accel_attrs)); - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - else - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, - &accel_attrs); - if (err) - return err; - - maction->esp_aes_gcm.ib_flags &= - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - maction->esp_aes_gcm.ib_flags |= - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - - return 0; -} - -static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - - switch (action->type) { - case IB_FLOW_ACTION_ESP: - /* - * We only support aes_gcm by now, so we implicitly know this is - * the underline crypto. - */ - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); - break; - case IB_FLOW_ACTION_UNSPECIFIED: - mlx5_ib_destroy_flow_action_raw(maction); - break; - default: - WARN_ON(true); - break; - } - - kfree(maction); - return 0; -} - static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4848,137 +3020,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) return __get_port_caps(dev, port); } -static void destroy_umrc_res(struct mlx5_ib_dev *dev) -{ - int err; - - err = mlx5_mr_cache_cleanup(dev); - if (err) - mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - - if (dev->umrc.qp) - mlx5_ib_destroy_qp(dev->umrc.qp, NULL); - if (dev->umrc.cq) - ib_free_cq(dev->umrc.cq); - if (dev->umrc.pd) - ib_dealloc_pd(dev->umrc.pd); -} - -enum { - MAX_UMR_WR = 128, -}; - -static int create_umr_res(struct mlx5_ib_dev *dev) -{ - struct ib_qp_init_attr *init_attr = NULL; - struct ib_qp_attr *attr = NULL; - struct ib_pd *pd; - struct ib_cq *cq; - struct ib_qp *qp; - int ret; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto error_0; - } - - pd = ib_alloc_pd(&dev->ib_dev, 0); - if (IS_ERR(pd)) { - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); - ret = PTR_ERR(pd); - goto error_0; - } - - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); - if (IS_ERR(cq)) { - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); - ret = PTR_ERR(cq); - goto error_2; - } - - init_attr->send_cq = cq; - init_attr->recv_cq = cq; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->cap.max_send_wr = MAX_UMR_WR; - init_attr->cap.max_send_sge = 1; - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; - init_attr->port_num = 1; - qp = mlx5_ib_create_qp(pd, init_attr, NULL); - if (IS_ERR(qp)) { - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); - ret = PTR_ERR(qp); - goto error_3; - } - qp->device = &dev->ib_dev; - qp->real_qp = qp; - qp->uobject = NULL; - qp->qp_type = MLX5_IB_QPT_REG_UMR; - qp->send_cq = init_attr->send_cq; - qp->recv_cq = init_attr->recv_cq; - - attr->qp_state = IB_QPS_INIT; - attr->port_num = 1; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | - IB_QP_PORT, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTR; - attr->path_mtu = IB_MTU_256; - - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTS; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); - goto error_4; - } - - dev->umrc.qp = qp; - dev->umrc.cq = cq; - dev->umrc.pd = pd; - - sema_init(&dev->umrc.sem, MAX_UMR_WR); - ret = mlx5_mr_cache_init(dev); - if (ret) { - mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); - goto error_4; - } - - kfree(attr); - kfree(init_attr); - - return 0; - -error_4: - mlx5_ib_destroy_qp(qp, NULL); - dev->umrc.qp = NULL; - -error_3: - ib_free_cq(cq); - dev->umrc.cq = NULL; - -error_2: - ib_dealloc_pd(pd); - dev->umrc.pd = NULL; - -error_0: - kfree(attr); - kfree(init_attr); - return ret; -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -4991,18 +3032,20 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap) } } -static int create_dev_resources(struct mlx5_ib_resources *devr) +static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; struct ib_srq_init_attr attr; - struct mlx5_ib_dev *dev; struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; - dev = container_of(devr, struct mlx5_ib_dev, devr); ibdev = &dev->ib_dev; + if (!MLX5_CAP_GEN(dev->mdev, xrc)) + return -EOPNOTSUPP; + mutex_init(&devr->mutex); devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); @@ -5030,34 +3073,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) if (ret) goto err_create_cq; - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x0)) { - ret = PTR_ERR(devr->x0); + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); + if (ret) goto error2; - } - devr->x0->device = &dev->ib_dev; - devr->x0->inode = NULL; - atomic_set(&devr->x0->usecnt, 0); - mutex_init(&devr->x0->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x1)) { - ret = PTR_ERR(devr->x1); + + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); + if (ret) goto error3; - } - devr->x1->device = &dev->ib_dev; - devr->x1->inode = NULL; - atomic_set(&devr->x1->usecnt, 0); - mutex_init(&devr->x1->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x1->tgt_qp_list); memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; attr.ext.cq = devr->c0; - attr.ext.xrc.xrcd = devr->x0; devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); if (!devr->s0) { @@ -5068,13 +3096,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; devr->s0->srq_type = IB_SRQT_XRC; - devr->s0->ext.xrc.xrcd = devr->x0; devr->s0->ext.cq = devr->c0; ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); if (ret) goto err_create; - atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); @@ -5116,9 +3142,9 @@ error5: err_create: kfree(devr->s0); error4: - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); error3: - mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); error2: mlx5_ib_destroy_cq(devr->c0, NULL); err_create_cq: @@ -5130,16 +3156,17 @@ error0: return ret; } -static void destroy_dev_resources(struct mlx5_ib_resources *devr) +static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; int port; mlx5_ib_destroy_srq(devr->s1, NULL); kfree(devr->s1); mlx5_ib_destroy_srq(devr->s0, NULL); kfree(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0, NULL); - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); mlx5_ib_destroy_cq(devr->c0, NULL); kfree(devr->c0); mlx5_ib_dealloc_pd(devr->p0, NULL); @@ -5332,466 +3359,6 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -struct mlx5_ib_counter { - const char *name; - size_t offset; -}; - -#define INIT_Q_COUNTER(_name) \ - { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} - -static const struct mlx5_ib_counter basic_q_cnts[] = { - INIT_Q_COUNTER(rx_write_requests), - INIT_Q_COUNTER(rx_read_requests), - INIT_Q_COUNTER(rx_atomic_requests), - INIT_Q_COUNTER(out_of_buffer), -}; - -static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { - INIT_Q_COUNTER(out_of_sequence), -}; - -static const struct mlx5_ib_counter retrans_q_cnts[] = { - INIT_Q_COUNTER(duplicate_request), - INIT_Q_COUNTER(rnr_nak_retry_err), - INIT_Q_COUNTER(packet_seq_err), - INIT_Q_COUNTER(implied_nak_seq_err), - INIT_Q_COUNTER(local_ack_timeout_err), -}; - -#define INIT_CONG_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} - -static const struct mlx5_ib_counter cong_cnts[] = { - INIT_CONG_COUNTER(rp_cnp_ignored), - INIT_CONG_COUNTER(rp_cnp_handled), - INIT_CONG_COUNTER(np_ecn_marked_roce_packets), - INIT_CONG_COUNTER(np_cnp_sent), -}; - -static const struct mlx5_ib_counter extended_err_cnts[] = { - INIT_Q_COUNTER(resp_local_length_error), - INIT_Q_COUNTER(resp_cqe_error), - INIT_Q_COUNTER(req_cqe_error), - INIT_Q_COUNTER(req_remote_invalid_request), - INIT_Q_COUNTER(req_remote_access_errors), - INIT_Q_COUNTER(resp_remote_access_errors), - INIT_Q_COUNTER(resp_cqe_flush_error), - INIT_Q_COUNTER(req_cqe_flush_error), -}; - -static const struct mlx5_ib_counter roce_accl_cnts[] = { - INIT_Q_COUNTER(roce_adp_retrans), - INIT_Q_COUNTER(roce_adp_retrans_to), - INIT_Q_COUNTER(roce_slow_restart), - INIT_Q_COUNTER(roce_slow_restart_cnps), - INIT_Q_COUNTER(roce_slow_restart_trans), -}; - -#define INIT_EXT_PPCNT_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} - -static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { - INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), -}; - -static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) -{ - return MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == - MLX5_ESWITCH_OFFLOADS; -} - -static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) -{ - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - int num_cnt_ports; - int i; - - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - MLX5_SET(dealloc_q_counter_in, in, opcode, - MLX5_CMD_OP_DEALLOC_Q_COUNTER); - - for (i = 0; i < num_cnt_ports; i++) { - if (dev->port[i].cnts.set_id) { - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, - dev->port[i].cnts.set_id); - mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); - } - kfree(dev->port[i].cnts.names); - kfree(dev->port[i].cnts.offsets); - } -} - -static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_counters *cnts) -{ - u32 num_counters; - - num_counters = ARRAY_SIZE(basic_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) - num_counters += ARRAY_SIZE(out_of_seq_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) - num_counters += ARRAY_SIZE(retrans_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) - num_counters += ARRAY_SIZE(extended_err_cnts); - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) - num_counters += ARRAY_SIZE(roce_accl_cnts); - - cnts->num_q_counters = num_counters; - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); - num_counters += ARRAY_SIZE(cong_cnts); - } - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); - num_counters += ARRAY_SIZE(ext_ppcnt_cnts); - } - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); - if (!cnts->names) - return -ENOMEM; - - cnts->offsets = kcalloc(num_counters, - sizeof(cnts->offsets), GFP_KERNEL); - if (!cnts->offsets) - goto err_names; - - return 0; - -err_names: - kfree(cnts->names); - cnts->names = NULL; - return -ENOMEM; -} - -static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) -{ - int i; - int j = 0; - - for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; - offsets[j] = basic_q_cnts[i].offset; - } - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { - for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; - offsets[j] = out_of_seq_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { - for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; - offsets[j] = retrans_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { - for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; - offsets[j] = extended_err_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { - for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; - offsets[j] = roce_accl_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; - offsets[j] = cong_cnts[i].offset; - } - } - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; - offsets[j] = ext_ppcnt_cnts[i].offset; - } - } -} - -static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) -{ - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - int num_cnt_ports; - int err = 0; - int i; - bool is_shared; - - MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); - is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - for (i = 0; i < num_cnt_ports; i++) { - err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); - if (err) - goto err_alloc; - - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, - dev->port[i].cnts.offsets); - - MLX5_SET(alloc_q_counter_in, in, uid, - is_shared ? MLX5_SHARED_RESOURCE_UID : 0); - - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); - if (err) { - mlx5_ib_warn(dev, - "couldn't allocate queue counter for port %d, err %d\n", - i + 1, err); - goto err_alloc; - } - - dev->port[i].cnts.set_id = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); - } - return 0; - -err_alloc: - mlx5_ib_dealloc_counters(dev); - return err; -} - -static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, - u8 port_num) -{ - return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : - &dev->port[port_num].cnts; -} - -/** - * mlx5_ib_get_counters_id - Returns counters id to use for device+port - * @dev: Pointer to mlx5 IB device - * @port_num: Zero based port number - * - * mlx5_ib_get_counters_id() Returns counters set id to use for given - * device port combination in switchdev and non switchdev mode of the - * parent device. - */ -u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) -{ - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); - - return cnts->set_id; -} - -static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, - u8 port_num) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts; - bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); - - if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) - return NULL; - - cnts = get_counters(dev, port_num - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats, - u16 set_id) -{ - u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; - __be32 val; - int ret, i; - - MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); - MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); - ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); - if (ret) - return ret; - - for (i = 0; i < cnts->num_q_counters; i++) { - val = *(__be32 *)((void *)out + cnts->offsets[i]); - stats->value[i] = (u64)be32_to_cpu(val); - } - - return 0; -} - -static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats) -{ - int offset = cnts->num_q_counters + cnts->num_cong_counters; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - int ret, i; - void *out; - - out = kvzalloc(sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); - if (ret) - goto free; - - for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) - stats->value[i + offset] = - be64_to_cpup((__be64 *)(out + - cnts->offsets[i + offset])); -free: - kvfree(out); - return ret; -} - -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u8 port_num, int index) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); - struct mlx5_core_dev *mdev; - int ret, num_counters; - u8 mdev_port_num; - - if (!stats) - return -EINVAL; - - num_counters = cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters; - - /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); - if (ret) - return ret; - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); - if (ret) - return ret; - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, - &mdev_port_num); - if (!mdev) { - /* If port is not affiliated yet, its in down state - * which doesn't have any counters yet, so it would be - * zero. So no need to read from the HCA. - */ - goto done; - } - ret = mlx5_lag_query_cong_counters(dev->mdev, - stats->value + - cnts->num_q_counters, - cnts->num_cong_counters, - cnts->offsets + - cnts->num_q_counters); - - mlx5_ib_put_native_port_mdev(dev, port_num); - if (ret) - return ret; - } - -done: - return num_counters; -} - -static struct rdma_hw_stats * -mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return mlx5_ib_query_q_counters(dev->mdev, cnts, - counter->stats, counter->id); -} - -static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - - if (!counter->id) - return 0; - - MLX5_SET(dealloc_q_counter_in, in, opcode, - MLX5_CMD_OP_DEALLOC_Q_COUNTER); - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); - return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); -} - -static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - int err; - - if (!counter->id) { - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - - MLX5_SET(alloc_q_counter_in, in, opcode, - MLX5_CMD_OP_ALLOC_Q_COUNTER); - MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); - if (err) - return err; - counter->id = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); - } - - err = mlx5_ib_qp_set_counter(qp, counter); - if (err) - goto fail_set_counter; - - return 0; - -fail_set_counter: - mlx5_ib_counter_dealloc(counter); - counter->id = 0; - - return err; -} - -static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) -{ - return mlx5_ib_qp_set_counter(qp, NULL); -} - static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params) @@ -5802,23 +3369,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params); } -static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) -{ - if (!dev->delay_drop.dir_debugfs) - return; - debugfs_remove_recursive(dev->delay_drop.dir_debugfs); - dev->delay_drop.dir_debugfs = NULL; -} - -static void cancel_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - cancel_work_sync(&dev->delay_drop.delay_drop_work); - delay_drop_debugfs_cleanup(dev); -} - static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -5858,40 +3408,6 @@ static const struct file_operations fops_delay_drop_timeout = { .read = delay_drop_timeout_read, }; -static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev) -{ - struct dentry *root; - - if (!mlx5_debugfs_root) - return; - - root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); - dev->delay_drop.dir_debugfs = root; - - debugfs_create_atomic_t("num_timeout_events", 0400, root, - &dev->delay_drop.events_cnt); - debugfs_create_atomic_t("num_rqs", 0400, root, - &dev->delay_drop.rqs_cnt); - debugfs_create_file("timeout", 0600, root, &dev->delay_drop, - &fops_delay_drop_timeout); -} - -static void init_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - mutex_init(&dev->delay_drop.lock); - dev->delay_drop.dev = dev; - dev->delay_drop.activate = false; - dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; - INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); - atomic_set(&dev->delay_drop.rqs_cnt, 0); - atomic_set(&dev->delay_drop.events_cnt, 0); - - delay_drop_debugfs_init(dev); -} - static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { @@ -6385,90 +3901,32 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_query_context, + UVERBS_OBJECT_DEVICE, + UVERBS_METHOD_QUERY_CONTEXT, + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp, + dump_fill_mkey), + UA_MANDATORY)); + static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), UAPI_DEF_CHAIN(mlx5_ib_qos_defs), + UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR), {} }; -static int mlx5_ib_read_counters(struct ib_counters *counters, - struct ib_counters_read_attr *read_attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - struct mlx5_read_counters_attr mread_attr = {}; - struct mlx5_ib_flow_counters_desc *desc; - int ret, i; - - mutex_lock(&mcounters->mcntrs_mutex); - if (mcounters->cntrs_max_index > read_attr->ncounters) { - ret = -EINVAL; - goto err_bound; - } - - mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), - GFP_KERNEL); - if (!mread_attr.out) { - ret = -ENOMEM; - goto err_bound; - } - - mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; - mread_attr.flags = read_attr->flags; - ret = mcounters->read_counters(counters->device, &mread_attr); - if (ret) - goto err_read; - - /* do the pass over the counters data array to assign according to the - * descriptions and indexing pairs - */ - desc = mcounters->counters_data; - for (i = 0; i < mcounters->ncounters; i++) - read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; - -err_read: - kfree(mread_attr.out); -err_bound: - mutex_unlock(&mcounters->mcntrs_mutex); - return ret; -} - -static int mlx5_ib_destroy_counters(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - counters_clear_description(counters); - if (mcounters->hw_cntrs_hndl) - mlx5_fc_destroy(to_mdev(counters->device)->mdev, - mcounters->hw_cntrs_hndl); - - kfree(mcounters); - - return 0; -} - -static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters; - - mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); - if (!mcounters) - return ERR_PTR(-ENOMEM); - - mutex_init(&mcounters->mcntrs_mutex); - - return &mcounters->ibcntrs; -} - static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -6547,21 +4005,16 @@ err_mp: return -ENOMEM; } -static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_enable_driver(struct ib_device *dev) { - dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); - - if (!dev->flow_db) - return -ENOMEM; - - mutex_init(&dev->flow_db->lock); + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; - return 0; -} + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); -static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) -{ - kfree(dev->flow_db); + return ret; } static const struct ib_device_ops mlx5_ib_dev_ops = { @@ -6577,9 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .attach_mcast = mlx5_ib_mcg_attach, .check_mr_status = mlx5_ib_check_mr_status, .create_ah = mlx5_ib_create_ah, - .create_counters = mlx5_ib_create_counters, .create_cq = mlx5_ib_create_cq, - .create_flow = mlx5_ib_create_flow, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, .dealloc_pd = mlx5_ib_dealloc_pd, @@ -6587,10 +4038,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .del_gid = mlx5_ib_del_gid, .dereg_mr = mlx5_ib_dereg_mr, .destroy_ah = mlx5_ib_destroy_ah, - .destroy_counters = mlx5_ib_destroy_counters, .destroy_cq = mlx5_ib_destroy_cq, - .destroy_flow = mlx5_ib_destroy_flow, - .destroy_flow_action = mlx5_ib_destroy_flow_action, .destroy_qp = mlx5_ib_destroy_qp, .destroy_srq = mlx5_ib_destroy_srq, .detach_mcast = mlx5_ib_mcg_detach, @@ -6598,8 +4046,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .enable_driver = mlx5_ib_enable_driver, - .fill_res_entry = mlx5_ib_fill_res_entry, - .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, @@ -6623,24 +4069,20 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_pkey = mlx5_ib_query_pkey, .query_qp = mlx5_ib_query_qp, .query_srq = mlx5_ib_query_srq, - .read_counters = mlx5_ib_read_counters, + .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; -static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, -}; - static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { .rdma_netdev_get_params = mlx5_ib_rn_get_params, }; @@ -6661,6 +4103,8 @@ static const struct ib_device_ops mlx5_ib_dev_mw_ops = { static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { .alloc_xrcd = mlx5_ib_alloc_xrcd, .dealloc_xrcd = mlx5_ib_dealloc_xrcd, + + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd), }; static const struct ib_device_ops mlx5_ib_dev_dm_ops = { @@ -6769,9 +4213,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) @@ -6829,65 +4270,36 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .modify_wq = mlx5_ib_modify_wq, }; -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) -{ - u8 port_num; - - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); - - port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - /* Register only for native ports */ - return mlx5_add_netdev_notifier(dev, port_num); -} - -static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) -{ - u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - mlx5_remove_netdev_notifier(dev, port_num); -} - -static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev) -{ - struct mlx5_core_dev *mdev = dev->mdev; - enum rdma_link_layer ll; - int port_type_cap; - int err = 0; - - port_type_cap = MLX5_CAP_GEN(mdev, port_type); - ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - - if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev); - - return err; -} - -static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_stage_common_roce_cleanup(dev); -} - -static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num = 0; int err; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev); - if (err) + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); + + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + + /* Register only for native ports */ + err = mlx5_add_netdev_notifier(dev, port_num); + if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev)) + /* + * We don't enable ETH interface for + * 1. IB representors + * 2. User disabled ROCE through devlink interface + */ return err; err = mlx5_enable_eth(dev); @@ -6897,71 +4309,27 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) return 0; cleanup: - mlx5_ib_stage_common_roce_cleanup(dev); - + mlx5_remove_netdev_notifier(dev, port_num); return err; } -static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - mlx5_disable_eth(dev); - mlx5_ib_stage_common_roce_cleanup(dev); - } -} - -static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) -{ - return create_dev_resources(&dev->devr); -} + if (!dev->is_rep) + mlx5_disable_eth(dev); -static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) -{ - destroy_dev_resources(&dev->devr); -} - -static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) -{ - return mlx5_ib_odp_init_one(dev); -} - -static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_odp_cleanup_one(dev); -} - -static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { - .alloc_hw_stats = mlx5_ib_alloc_hw_stats, - .get_hw_stats = mlx5_ib_get_hw_stats, - .counter_bind_qp = mlx5_ib_counter_bind_qp, - .counter_unbind_qp = mlx5_ib_counter_unbind_qp, - .counter_dealloc = mlx5_ib_counter_dealloc, - .counter_alloc_stats = mlx5_ib_counter_alloc_stats, - .counter_update_stats = mlx5_ib_counter_update_stats, -}; - -static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); - - return mlx5_ib_alloc_counters(dev); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + mlx5_remove_netdev_notifier(dev, port_num); } - - return 0; -} - -static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_counters(dev); } static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) @@ -7023,7 +4391,18 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { - destroy_umrc_res(dev); + int err; + + err = mlx5_mr_cache_cleanup(dev); + if (err) + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); + + if (dev->umrc.qp) + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); + if (dev->umrc.cq) + ib_free_cq(dev->umrc.cq); + if (dev->umrc.pd) + ib_dealloc_pd(dev->umrc.pd); } static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) @@ -7031,21 +4410,162 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) ib_unregister_device(&dev->ib_dev); } +enum { + MAX_UMR_WR = 128, +}; + static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { - return create_umr_res(dev); + struct ib_qp_init_attr *init_attr = NULL; + struct ib_qp_attr *attr = NULL; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + int ret; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto error_0; + } + + pd = ib_alloc_pd(&dev->ib_dev, 0); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + ret = PTR_ERR(pd); + goto error_0; + } + + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto error_2; + } + + init_attr->send_cq = cq; + init_attr->recv_cq = cq; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->cap.max_send_wr = MAX_UMR_WR; + init_attr->cap.max_send_sge = 1; + init_attr->qp_type = MLX5_IB_QPT_REG_UMR; + init_attr->port_num = 1; + qp = mlx5_ib_create_qp(pd, init_attr, NULL); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto error_3; + } + qp->device = &dev->ib_dev; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = MLX5_IB_QPT_REG_UMR; + qp->send_cq = init_attr->send_cq; + qp->recv_cq = init_attr->recv_cq; + + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_PORT, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTR; + attr->path_mtu = IB_MTU_256; + + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTS; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + goto error_4; + } + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + ret = mlx5_mr_cache_init(dev); + if (ret) { + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + goto error_4; + } + + kfree(attr); + kfree(init_attr); + + return 0; + +error_4: + mlx5_ib_destroy_qp(qp, NULL); + dev->umrc.qp = NULL; + +error_3: + ib_free_cq(cq); + dev->umrc.cq = NULL; + +error_2: + ib_dealloc_pd(pd); + dev->umrc.pd = NULL; + +error_0: + kfree(attr); + kfree(init_attr); + return ret; } static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) { - init_delay_drop(dev); + struct dentry *root; + + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return 0; + + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); + atomic_set(&dev->delay_drop.rqs_cnt, 0); + atomic_set(&dev->delay_drop.events_cnt, 0); + + if (!mlx5_debugfs_root) + return 0; + + root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); + dev->delay_drop.dir_debugfs = root; + debugfs_create_atomic_t("num_timeout_events", 0400, root, + &dev->delay_drop.events_cnt); + debugfs_create_atomic_t("num_rqs", 0400, root, + &dev->delay_drop.rqs_cnt); + debugfs_create_file("timeout", 0600, root, &dev->delay_drop, + &fops_delay_drop_timeout); return 0; } static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) { - cancel_delay_drop(dev); + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); + if (!dev->delay_drop.dir_debugfs) + return; + + debugfs_remove_recursive(dev->delay_drop.dir_debugfs); + dev->delay_drop.dir_debugfs = NULL; } static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) @@ -7060,38 +4580,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); } -static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) -{ - int uid; - - uid = mlx5_ib_devx_create(dev, false); - if (uid > 0) { - dev->devx_whitelist_uid = uid; - mlx5_ib_devx_init_event_table(dev); - } - - return 0; -} -static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) -{ - if (dev->devx_whitelist_uid) { - mlx5_ib_devx_cleanup_event_table(dev); - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); - } -} - -int mlx5_ib_enable_driver(struct ib_device *dev) -{ - struct mlx5_ib_dev *mdev = to_mdev(dev); - int ret; - - ret = mlx5_ib_test_wc(mdev); - mlx5_ib_dbg(mdev, "Write-Combining %s", - mdev->wc_support ? "supported" : "not supported"); - - return ret; -} - void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -7139,9 +4627,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -7149,8 +4637,8 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_roce_init, - mlx5_ib_stage_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_QP, mlx5_init_qp_table, mlx5_cleanup_qp_table), @@ -7158,17 +4646,17 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, - mlx5_ib_stage_odp_init, - mlx5_ib_stage_odp_cleanup), + mlx5_ib_odp_init_one, + mlx5_ib_odp_cleanup_one), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), @@ -7182,8 +4670,8 @@ static const struct mlx5_ib_profile pf_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -7193,15 +4681,18 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, mlx5_ib_stage_delay_drop_init, mlx5_ib_stage_delay_drop_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -7209,8 +4700,8 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_raw_eth_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_raw_eth_roce_init, - mlx5_ib_stage_raw_eth_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_QP, mlx5_init_qp_table, mlx5_cleanup_qp_table), @@ -7218,14 +4709,14 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), @@ -7239,14 +4730,17 @@ const struct mlx5_ib_profile raw_eth_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5dbe3eb0d9cb..5287fc868662 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. */ #ifndef MLX5_IB_H @@ -730,8 +703,8 @@ struct mlx5_ib_port_resources { struct mlx5_ib_resources { struct ib_cq *c0; - struct ib_xrcd *x0; - struct ib_xrcd *x1; + u32 xrcdn0; + u32 xrcdn1; struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; @@ -832,7 +805,7 @@ struct mlx5_ib_delay_drop { enum mlx5_ib_stages { MLX5_IB_STAGE_INIT, - MLX5_IB_STAGE_FLOW_DB, + MLX5_IB_STAGE_FS, MLX5_IB_STAGE_CAPS, MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, @@ -850,7 +823,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, - MLX5_IB_STAGE_CLASS_ATTR, + MLX5_IB_STAGE_RESTRACK, MLX5_IB_STAGE_MAX, }; @@ -1078,11 +1051,6 @@ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp) return container_of(core_qp, struct mlx5_ib_rwq, core_qp); } -static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) -{ - return container_of(mmkey, struct mlx5_ib_mr, mmkey); -} - static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); @@ -1210,7 +1178,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, struct ib_pd *pd, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, u32 max_num_sg, u32 max_num_meta_sg); @@ -1224,9 +1192,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad *in, struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); -struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata); -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, @@ -1375,46 +1342,12 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); -int mlx5_ib_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); -int mlx5_ib_fill_stat_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res); extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; extern const struct uapi_definition mlx5_ib_qos_defs[]; +extern const struct uapi_definition mlx5_ib_std_types_defs[]; -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); -void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); -void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); -void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); -struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( - struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, u32 counter_id, - void *cmd_in, int inlen, int dest_id, int dest_type); -bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); -void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); -#else -static inline int -mlx5_ib_devx_create(struct mlx5_ib_dev *dev, - bool is_user) { return -EOPNOTSUPP; } -static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} -static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {} -static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {} -static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, - int *dest_type) -{ - return false; -} -static inline void -mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) -{ - return; -}; -#endif static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -1423,15 +1356,6 @@ static inline void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } -static inline u8 convert_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ; -} - static inline int is_qp1(enum ib_qp_type qp_type) { return qp_type == MLX5_IB_QPT_HW_GSI; @@ -1518,9 +1442,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); -int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); -u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); - static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, bool do_modify_atomic, int access_flags) { @@ -1533,14 +1454,18 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, return false; if (access_flags & IB_ACCESS_RELAXED_ORDERING && - (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) || - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))) + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return false; + + if (access_flags & IB_ACCESS_RELAXED_ORDERING && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; return true; } -int mlx5_ib_enable_driver(struct ib_device *dev); int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 44683073be0c..3e6f2f9c6655 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1961,7 +1961,7 @@ err_free: } struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 77dca1e05bba..cfd7efab114e 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -816,6 +816,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + lockdep_assert_held(&mr->dev->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; @@ -929,11 +930,6 @@ next_mr: if (ret < 0) goto srcu_unlock; - /* - * When prefetching a page, page fault is generated - * in order to bring the page to the main memory. - * In the current flow, page faults are being counted. - */ mlx5_update_odp_stats(mr, faults, ret); npages += ret; @@ -1770,13 +1766,26 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); + struct mlx5_ib_dev *dev; u32 bytes_mapped = 0; + int srcu_key; + int ret; u32 i; - for (i = 0; i < work->num_sge; ++i) - pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, - work->frags[i].length, &bytes_mapped, - work->pf_flags); + /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ + WARN_ON(!work->num_sge); + dev = work->frags[0].mr->dev; + /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ + srcu_key = srcu_read_lock(&dev->odp_srcu); + for (i = 0; i < work->num_sge; ++i) { + ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, + work->frags[i].length, &bytes_mapped, + work->pf_flags); + if (ret <= 0) + continue; + mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); + } + srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); } @@ -1832,6 +1841,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, &bytes_mapped, pf_flags); if (ret < 0) goto out; + mlx5_update_odp_stats(mr, prefetch, ret); } ret = 0; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1225b8d77510..59fce5fac7a3 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -38,6 +38,7 @@ #include <linux/mlx5/fs.h> #include "mlx5_ib.h" #include "ib_rep.h" +#include "counters.h" #include "cmd.h" #include "qp.h" #include "wr.h" @@ -2031,15 +2032,15 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, switch (init_attr->qp_type) { case IB_QPT_XRC_INI: MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); break; default: if (init_attr->srq) { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn); } else { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } } @@ -2178,11 +2179,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, no_sq, 1); if (attr->srq) { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(attr->srq)->msrq.srqn); } else { - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1); MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } @@ -3554,7 +3555,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, switch (raw_qp_param->operation) { case MLX5_CMD_OP_RST2INIT_QP: rq_state = MLX5_RQC_STATE_RDY; - sq_state = MLX5_SQC_STATE_RDY; + sq_state = MLX5_SQC_STATE_RST; break; case MLX5_CMD_OP_2ERR_QP: rq_state = MLX5_RQC_STATE_ERR; @@ -3566,13 +3567,11 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, break; case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: - if (raw_qp_param->set_mask == - MLX5_RAW_QP_RATE_LIMIT) { - modify_rq = 0; - sq_state = sq->state; - } else { - return raw_qp_param->set_mask ? -EINVAL : 0; - } + if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT) + return -EINVAL; + + modify_rq = 0; + sq_state = MLX5_SQC_STATE_RDY; break; case MLX5_CMD_OP_INIT2INIT_QP: case MLX5_CMD_OP_INIT2RTR_QP: @@ -4114,9 +4113,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_dev *dev = to_mdev(ibqp->device); enum ib_qp_state cur_state, new_state; - int err = 0; int required = IB_QP_STATE; void *dctc; + int err; if (!(attr_mask & IB_QP_STATE)) return -EINVAL; @@ -4208,11 +4207,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state); return -EINVAL; } - if (err) - qp->state = IB_QPS_ERR; - else - qp->state = new_state; - return err; + + qp->state = new_state; + return 0; } int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -4450,7 +4447,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, [MLX5_SQ_STATE_NA] = IB_QPS_RESET, }, [MLX5_RQC_STATE_RDY] = { - [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD, + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, [MLX5_SQC_STATE_ERR] = IB_QPS_SQE, [MLX5_SQ_STATE_NA] = MLX5_QP_STATE, @@ -4462,7 +4459,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state, [MLX5_SQ_STATE_NA] = IB_QPS_ERR, }, [MLX5_RQ_STATE_NA] = { - [MLX5_SQC_STATE_RST] = IB_QPS_RESET, + [MLX5_SQC_STATE_RST] = MLX5_QP_STATE, [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE, [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE, [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD, @@ -4708,41 +4705,23 @@ out: return err; } -struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_udata *udata) +int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_xrcd *xrcd; - int err; + struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device); + struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd); if (!MLX5_CAP_GEN(dev->mdev, xrc)) - return ERR_PTR(-ENOSYS); - - xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); - if (!xrcd) - return ERR_PTR(-ENOMEM); - - err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); - if (err) { - kfree(xrcd); - return ERR_PTR(-ENOMEM); - } + return -EOPNOTSUPP; - return &xrcd->ibxrcd; + return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); } -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - int err; - err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); - if (err) - mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); - - kfree(xrcd); - return 0; + mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); } static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index 82ea2b94dfa6..ba899df44c5b 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -43,4 +43,5 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res); int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); +int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); #endif /* _MLX5_IB_QP_H */ diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 8f6c04f12531..887270dd3ce2 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -1,17 +1,85 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved. */ #include <uapi/rdma/rdma_netlink.h> +#include <linux/mlx5/rsc_dump.h> #include <rdma/ib_umem_odp.h> #include <rdma/restrack.h> #include "mlx5_ib.h" +#include "restrack.h" -static int fill_stat_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +#define MAX_DUMP_SIZE 1024 + +static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type, + int index, void *data, int *data_len) +{ + struct mlx5_core_dev *mdev = dev; + struct mlx5_rsc_dump_cmd *cmd; + struct mlx5_rsc_key key = {}; + struct page *page; + int offset = 0; + int err = 0; + int cmd_err; + int size; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + key.size = PAGE_SIZE; + key.rsc = type; + key.index1 = index; + key.num_of_obj1 = 1; + + cmd = mlx5_rsc_dump_cmd_create(mdev, &key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + + do { + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); + if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) { + err = cmd_err; + goto destroy_cmd; + } + memcpy(data + offset, page_address(page), size); + offset += size; + } while (cmd_err > 0); + *data_len = offset; + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); +free_page: + __free_page(page); + return err; +} + +static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev, + enum mlx5_sgmt_type type, u32 key) +{ + int len = 0; + void *data; + int err; + + data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = dump_rsc(dev->mdev, type, key, data, &len); + if (err) + goto out; + + err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data); +out: + kfree(data); + return err; +} + +static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct mlx5_ib_mr *mr = to_mmr(ibmr); struct nlattr *table_attr; @@ -31,6 +99,9 @@ static int fill_stat_mr_entry(struct sk_buff *msg, msg, "page_invalidations", atomic64_read(&mr->odp_stats.invalidations))) goto err_table; + if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch", + atomic64_read(&mr->odp_stats.prefetch))) + goto err_table; nla_nest_end(msg, table_attr); return 0; @@ -41,10 +112,16 @@ err: return -EMSGSIZE; } -static int fill_res_mr_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + + return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY, + mlx5_mkey_to_idx(mr->mmkey.key)); +} + +static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) { - struct ib_mr *ibmr = container_of(res, struct ib_mr, res); struct mlx5_ib_mr *mr = to_mmr(ibmr); struct nlattr *table_attr; @@ -71,20 +148,32 @@ err: return -EMSGSIZE; } -int mlx5_ib_fill_res_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq) { - if (res->type == RDMA_RESTRACK_MR) - return fill_res_mr_entry(msg, res); + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); + struct mlx5_ib_cq *cq = to_mcq(ibcq); - return 0; + return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn); } -int mlx5_ib_fill_stat_entry(struct sk_buff *msg, - struct rdma_restrack_entry *res) +static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp) { - if (res->type == RDMA_RESTRACK_MR) - return fill_stat_mr_entry(msg, res); + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + + return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP, + ibqp->qp_num); +} +static const struct ib_device_ops restrack_ops = { + .fill_res_cq_entry_raw = fill_res_cq_entry_raw, + .fill_res_mr_entry = fill_res_mr_entry, + .fill_res_mr_entry_raw = fill_res_mr_entry_raw, + .fill_res_qp_entry_raw = fill_res_qp_entry_raw, + .fill_stat_mr_entry = fill_stat_mr_entry, +}; + +int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev) +{ + ib_set_device_ops(&dev->ib_dev, &restrack_ops); return 0; } diff --git a/drivers/infiniband/hw/mlx5/restrack.h b/drivers/infiniband/hw/mlx5/restrack.h new file mode 100644 index 000000000000..e8d81270f1b6 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved. + */ + +#ifndef _MLX5_IB_RESTRACK_H +#define _MLX5_IB_RESTRACK_H + +#include "mlx5_ib.h" + +int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev); + +#endif /* _MLX5_IB_RESTRACK_H */ diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 6d1ff13d2283..7e10cbcb6d5c 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -274,10 +274,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, if (srq->wq_sig) in.flags |= MLX5_SRQ_FLAG_WQ_SIG; - if (init_attr->srq_type == IB_SRQT_XRC) + if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd) in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; else - in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + in.xrcd = dev->devr.xrcdn0; if (init_attr->srq_type == IB_SRQT_TM) { in.tm_log_list_size = diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c new file mode 100644 index 000000000000..16145fda68d0 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/std_types.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> +#include <linux/mlx5/driver.h> +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_pd *pd = + uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_PD_HANDLE); + struct mlx5_ib_pd *mpd = to_mpd(pd); + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_PD_RESP_PDN, + &mpd->pdn, sizeof(mpd->pdn)); +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_PD_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE, + UVERBS_OBJECT_PD, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_PD_RESP_PDN, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_pd, + UVERBS_OBJECT_PD, + &UVERBS_METHOD(MLX5_IB_METHOD_PD_QUERY)); + +const struct uapi_definition mlx5_ib_std_types_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_PD, + &mlx5_ib_pd), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index bc35dbe4855b..43880973a512 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -263,7 +263,9 @@ static __be64 get_umr_update_translation_mask(void) return cpu_to_be64(result); } -static __be64 get_umr_update_access_mask(int atomic) +static __be64 get_umr_update_access_mask(int atomic, + int relaxed_ordering_write, + int relaxed_ordering_read) { u64 result; @@ -275,6 +277,12 @@ static __be64 get_umr_update_access_mask(int atomic) if (atomic) result |= MLX5_MKEY_MASK_A; + if (relaxed_ordering_write) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; + + if (relaxed_ordering_read) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; + return cpu_to_be64(result); } @@ -289,17 +297,28 @@ static __be64 get_umr_update_pd_mask(void) static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) { - if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || - (mask & MLX5_MKEY_MASK_A && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) + if (mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) return -EPERM; + + if (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + return -EPERM; + return 0; } static int set_reg_umr_segment(struct mlx5_ib_dev *dev, struct mlx5_wqe_umr_ctrl_seg *umr, - const struct ib_send_wr *wr, int atomic) + const struct ib_send_wr *wr) { const struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -325,7 +344,10 @@ static int set_reg_umr_segment(struct mlx5_ib_dev *dev, if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) umr->mkey_mask |= get_umr_update_translation_mask(); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { - umr->mkey_mask |= get_umr_update_access_mask(atomic); + umr->mkey_mask |= get_umr_update_access_mask( + !!(MLX5_CAP_GEN(dev->mdev, atomic)), + !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)), + !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))); umr->mkey_mask |= get_umr_update_pd_mask(); } if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) @@ -383,20 +405,31 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - seg->status = MLX5_MKEY_STATUS_FREE; + MLX5_SET(mkc, seg, free, 1); + + MLX5_SET(mkc, seg, a, + !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, seg, rw, + !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, seg, lr, 1); + MLX5_SET(mkc, seg, relaxed_ordering_write, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + MLX5_SET(mkc, seg, relaxed_ordering_read, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - seg->flags = convert_access(umrwr->access_flags); if (umrwr->pd) - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && !umrwr->length) - seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64); + MLX5_SET(mkc, seg, length64, 1); - seg->start_addr = cpu_to_be64(umrwr->virt_addr); - seg->len = cpu_to_be64(umrwr->length); - seg->log2_page_size = umrwr->page_shift; - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | - mlx5_mkey_variant(umrwr->mkey)); + MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr); + MLX5_SET64(mkc, seg, len, umrwr->length); + MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift); + MLX5_SET(mkc, seg, qpn, 0xffffff); + MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey)); } static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, @@ -1224,8 +1257,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); - err = set_reg_umr_segment(dev, *seg, wr, - !!(MLX5_CAP_GEN(dev->mdev, atomic))); + err = set_reg_umr_segment(dev, *seg, wr); if (unlikely(err)) goto out; *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index d11c74390a12..6cdbec13756a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2901,7 +2901,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) } struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { int status; struct ocrdma_mr *mr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 3a5010881be5..df8e3b923a44 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -101,7 +101,7 @@ struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, u64 virt, int acc, struct ib_udata *); struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index b1de8d608e4d..d85f992bac29 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -110,7 +110,6 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = 1; immutable->gid_tbl_len = 1; immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; immutable->max_mad_size = 0; @@ -179,6 +178,7 @@ static int qedr_iw_register_device(struct qedr_dev *dev) static const struct ib_device_ops qedr_roce_dev_ops = { .get_port_immutable = qedr_roce_port_immutable, + .query_pkey = qedr_query_pkey, }; static void qedr_roce_register_device(struct qedr_dev *dev) @@ -221,7 +221,6 @@ static const struct ib_device_ops qedr_dev_ops = { .post_srq_recv = qedr_post_srq_recv, .process_mad = qedr_process_mad, .query_device = qedr_query_device, - .query_pkey = qedr_query_pkey, .query_port = qedr_query_port, .query_qp = qedr_query_qp, .query_srq = qedr_query_srq, diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index fdf90ecb2699..460292179b32 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -235,6 +235,7 @@ struct qedr_ucontext { u32 dpi_size; u16 dpi; bool db_rec; + u8 edpm_mode; }; union db_prod32 { @@ -344,10 +345,10 @@ struct qedr_srq_hwq_info { u32 wqe_prod; u32 sge_prod; u32 wr_prod_cnt; - u32 wr_cons_cnt; + atomic_t wr_cons_cnt; u32 num_elems; - u32 *virt_prod_pair_addr; + struct rdma_srq_producers *virt_prod_pair_addr; dma_addr_t phy_prod_pair_addr; }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f5aa85a5377c..4ce4e2eef6cc 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -239,7 +239,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { attr->gid_tbl_len = 1; - attr->pkey_tbl_len = 1; } else { attr->gid_tbl_len = QEDR_MAX_SGID; attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; @@ -275,7 +274,8 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) DP_ERR(dev, "Problem copying data from user space\n"); return -EFAULT; } - + ctx->edpm_mode = !!(ureq.context_flags & + QEDR_ALLOC_UCTX_EDPM_MODE); ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); } @@ -316,11 +316,15 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY; else uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED | - QEDR_DPM_TYPE_ROCE_LEGACY; + QEDR_DPM_TYPE_ROCE_LEGACY | + QEDR_DPM_TYPE_ROCE_EDPM_MODE; - uresp.dpm_flags |= QEDR_DPM_SIZES_SET; - uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; - uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) { + uresp.dpm_flags |= QEDR_DPM_SIZES_SET; + uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; + uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE; + } uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; @@ -1754,7 +1758,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); struct qedr_create_qp_uresp uresp; - struct qedr_ucontext *ctx = NULL; + struct qedr_ucontext *ctx = pd ? pd->uctx : NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; @@ -1792,6 +1796,9 @@ static int qedr_create_user_qp(struct qedr_dev *dev, in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; } + if (ctx) + SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode); + qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); @@ -3004,7 +3011,7 @@ err0: } struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct qedr_mr *mr; @@ -3687,7 +3694,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) * count and consumer count and subtract it from max * work request supported so that we get elements left. */ - used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt); return hw_srq->max_wr - used; } @@ -3702,7 +3709,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, unsigned long flags; int status = 0; u32 num_sge; - u32 offset; spin_lock_irqsave(&srq->lock, flags); @@ -3715,7 +3721,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (!qedr_srq_elem_left(hw_srq) || wr->num_sge > srq->hw_srq.max_sges) { DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", - hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + hw_srq->wr_prod_cnt, + atomic_read(&hw_srq->wr_cons_cnt), wr->num_sge, srq->hw_srq.max_sges); status = -ENOMEM; *bad_wr = wr; @@ -3749,22 +3756,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, hw_srq->sge_prod++; } - /* Flush WQE and SGE information before + /* Update WQE and SGE information before * updating producer. */ - wmb(); + dma_wmb(); /* SRQ producer is 8 bytes. Need to update SGE producer index * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; - offset = offsetof(struct rdma_srq_producers, wqe_prod); - *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = - hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + /* Make sure sge producer is updated first */ + dma_wmb(); + srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; - /* Flush producer after updating it. */ - wmb(); wr = wr->next; } @@ -4183,7 +4188,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, } else { __process_resp_one(dev, qp, cq, wc, resp, wr_id); } - srq->hw_srq.wr_cons_cnt++; + atomic_inc(&srq->hw_srq.wr_cons_cnt); return 1; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 5e02387e068d..39dd6286ba39 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -84,7 +84,7 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, const struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 7875883621f4..398c4c00b932 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -214,7 +214,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, if (!flow) return ERR_PTR(-ENOMEM); - tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); + tlv = dma_alloc_coherent(&pdev->dev, tlv_size, &tlv_pa, GFP_ATOMIC); if (!tlv) { usnic_err("Failed to allocate memory\n"); status = -ENOMEM; @@ -258,7 +258,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, out_free_tlv: spin_unlock(&ufdev->lock); - pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); + dma_free_coherent(&pdev->dev, tlv_size, tlv, tlv_pa); if (!status) return flow; out_free_flow: diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index b039f1f00e05..77a010e68208 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -202,7 +202,7 @@ err_umem: * @return: ib_mr pointer on success, otherwise returns an errno. */ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_user_mr *mr; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 267702226f10..699b20849a7e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -406,7 +406,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 40480add7dd3..75a04b1497c4 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -90,8 +90,7 @@ EXPORT_SYMBOL(rvt_check_ah); /** * rvt_create_ah - create an address handle * @ibah: the IB address handle - * @ah_attr: the attributes of the AH - * @create_flags: create address handle flags (see enum rdma_create_ah_flags) + * @init_attr: the attributes of the AH * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 60864e5ca7cb..2f7c25fea44a 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -576,7 +576,7 @@ out: * Return: the memory region on success, otherwise return an errno. */ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct rvt_mr *mr; diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index 780fc63af98b..b3aba359401b 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -71,7 +71,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata); + u32 max_num_sg); int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 5642eefb4ba1..907203afbd99 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -40,14 +40,6 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); -/* free resources for all ports on a device */ -static void rxe_cleanup_ports(struct rxe_dev *rxe) -{ - kfree(rxe->port.pkey_tbl); - rxe->port.pkey_tbl = NULL; - -} - /* free resources for a rxe device all objects created for this device must * have been destroyed */ @@ -66,8 +58,6 @@ void rxe_dealloc(struct ib_device *ib_dev) rxe_pool_cleanup(&rxe->mc_grp_pool); rxe_pool_cleanup(&rxe->mc_elem_pool); - rxe_cleanup_ports(rxe); - if (rxe->tfm) crypto_free_shash(rxe->tfm); } @@ -111,7 +101,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe) } /* initialize port attributes */ -static int rxe_init_port_param(struct rxe_port *port) +static void rxe_init_port_param(struct rxe_port *port) { port->attr.state = IB_PORT_DOWN; port->attr.max_mtu = IB_MTU_4096; @@ -134,35 +124,19 @@ static int rxe_init_port_param(struct rxe_port *port) port->attr.phys_state = RXE_PORT_PHYS_STATE; port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256); port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); - - return 0; } /* initialize port state, note IB convention that HCA ports are always * numbered from 1 */ -static int rxe_init_ports(struct rxe_dev *rxe) +static void rxe_init_ports(struct rxe_dev *rxe) { struct rxe_port *port = &rxe->port; rxe_init_port_param(port); - - if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len) - return -EINVAL; - - port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len, - sizeof(*port->pkey_tbl), GFP_KERNEL); - - if (!port->pkey_tbl) - return -ENOMEM; - - port->pkey_tbl[0] = 0xffff; addrconf_addr_eui48((unsigned char *)&port->port_guid, rxe->ndev->dev_addr); - spin_lock_init(&port->port_lock); - - return 0; } /* init pools of managed objects */ @@ -252,13 +226,11 @@ static int rxe_init(struct rxe_dev *rxe) /* init default device parameters */ rxe_init_device_param(rxe); - err = rxe_init_ports(rxe); - if (err) - goto err1; + rxe_init_ports(rxe); err = rxe_init_pools(rxe); if (err) - goto err2; + return err; /* init pending mmap list */ spin_lock_init(&rxe->mmap_offset_lock); @@ -268,11 +240,6 @@ static int rxe_init(struct rxe_dev *rxe) mutex_init(&rxe->usdev_lock); return 0; - -err2: - rxe_cleanup_ports(rxe); -err1: - return err; } void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 775c23becaec..39dc3bfa5d5d 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -103,8 +103,8 @@ enum copy_direction { from_mem_obj, }; -int rxe_mem_init_dma(struct rxe_pd *pd, - int access, struct rxe_mem *mem); +void rxe_mem_init_dma(struct rxe_pd *pd, + int access, struct rxe_mem *mem); int rxe_mem_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int access, struct ib_udata *udata, @@ -132,9 +132,6 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length); -int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, - u64 *page, int num_pages, u64 iova); - void rxe_mem_cleanup(struct rxe_pool_entry *arg); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); @@ -145,7 +142,6 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb); struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc); -enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); struct device *rxe_dma_device(struct rxe_dev *rxe); int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index e83c7b518bfa..cdd811a45120 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -144,8 +144,8 @@ err1: return -ENOMEM; } -int rxe_mem_init_dma(struct rxe_pd *pd, - int access, struct rxe_mem *mem) +void rxe_mem_init_dma(struct rxe_pd *pd, + int access, struct rxe_mem *mem) { rxe_mem_init(access, mem); @@ -153,8 +153,6 @@ int rxe_mem_init_dma(struct rxe_pd *pd, mem->access = access; mem->state = RXE_MEM_STATE_VALID; mem->type = RXE_MEM_TYPE_DMA; - - return 0; } int rxe_mem_init_user(struct rxe_pd *pd, u64 start, @@ -587,47 +585,3 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, return mem; } - -int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, - u64 *page, int num_pages, u64 iova) -{ - int i; - int num_buf; - int err; - struct rxe_map **map; - struct rxe_phys_buf *buf; - int page_size; - - if (num_pages > mem->max_buf) { - err = -EINVAL; - goto err1; - } - - num_buf = 0; - page_size = 1 << mem->page_shift; - map = mem->map; - buf = map[0]->buf; - - for (i = 0; i < num_pages; i++) { - buf->addr = *page++; - buf->size = page_size; - buf++; - num_buf++; - - if (num_buf == RXE_BUF_PER_MAP) { - map++; - buf = map[0]->buf; - num_buf = 0; - } - } - - mem->iova = iova; - mem->va = iova; - mem->length = num_pages << mem->page_shift; - mem->state = RXE_MEM_STATE_VALID; - - return 0; - -err1: - return err; -} diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 312c2fc961c0..0c3808611f95 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -520,11 +520,6 @@ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) return rxe->ndev->name; } -enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num) -{ - return IB_LINK_LAYER_ETHERNET; -} - int rxe_net_add(const char *ibdev_name, struct net_device *ndev) { int err; diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 99e9d8ba9767..2f381aeafcb5 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -100,7 +100,7 @@ enum rxe_device_param { RXE_MAX_SRQ_SGE = 27, RXE_MIN_SRQ_SGE = 1, RXE_MAX_FMR_PAGE_LIST_LEN = 512, - RXE_MAX_PKEYS = 64, + RXE_MAX_PKEYS = 1, RXE_LOCAL_CA_ACK_DELAY = 15, RXE_MAX_UCONTEXT = 512, @@ -148,7 +148,7 @@ enum rxe_port_param { RXE_PORT_INIT_TYPE_REPLY = 0, RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X, RXE_PORT_ACTIVE_SPEED = 1, - RXE_PORT_PKEY_TBL_LEN = 64, + RXE_PORT_PKEY_TBL_LEN = 1, RXE_PORT_PHYS_STATE = IB_PORT_PHYS_STATE_POLLING, RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL, }; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 831ad578a7b2..7e123d3c4d09 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -101,36 +101,15 @@ static void set_qkey_viol_cntr(struct rxe_port *port) static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, u32 qpn, struct rxe_qp *qp) { - int i; - int found_pkey = 0; struct rxe_port *port = &rxe->port; u16 pkey = bth_pkey(pkt); pkt->pkey_index = 0; - if (qpn == 1) { - for (i = 0; i < port->attr.pkey_tbl_len; i++) { - if (pkey_match(pkey, port->pkey_tbl[i])) { - pkt->pkey_index = i; - found_pkey = 1; - break; - } - } - - if (!found_pkey) { - pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); - set_bad_pkey_cntr(port); - goto err1; - } - } else { - if (unlikely(!pkey_match(pkey, - port->pkey_tbl[qp->attr.pkey_index] - ))) { - pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey); - set_bad_pkey_cntr(port); - goto err1; - } - pkt->pkey_index = qp->attr.pkey_index; + if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) { + pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); + set_bad_pkey_cntr(port); + goto err1; } if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && @@ -330,10 +309,14 @@ err1: static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; + if (pkt->mask & RXE_LOOPBACK_MASK) + return 0; + if (skb->protocol == htons(ETH_P_IP)) { ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, (struct in6_addr *)&dgid); @@ -366,7 +349,7 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) goto drop; - if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { + if (rxe_match_dgid(rxe, skb) < 0) { pr_warn_ratelimited("failed matching dgid\n"); goto drop; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index e5031172c019..34df2b55e650 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -381,7 +381,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - struct rxe_port *port = &rxe->port; struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; struct rxe_av *av; @@ -419,9 +418,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == (RXE_WRITE_MASK | RXE_IMMDT_MASK)); - pkey = (qp_type(qp) == IB_QPT_GSI) ? - port->pkey_tbl[ibwr->wr.ud.pkey_index] : - port->pkey_tbl[qp->attr.pkey_index]; + pkey = IB_DEFAULT_PKEY_FULL; qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : qp->attr.dest_qp_num; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index b8a22af724e8..bb61e534e468 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -83,22 +83,11 @@ static int rxe_query_port(struct ib_device *dev, static int rxe_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey) { - struct rxe_dev *rxe = to_rdev(device); - struct rxe_port *port; - - port = &rxe->port; - - if (unlikely(index >= port->attr.pkey_tbl_len)) { - dev_warn(device->dev.parent, "invalid index = %d\n", - index); - goto err1; - } + if (index > 0) + return -EINVAL; - *pkey = port->pkey_tbl[index]; + *pkey = IB_DEFAULT_PKEY_FULL; return 0; - -err1: - return -EINVAL; } static int rxe_modify_device(struct ib_device *dev, @@ -141,9 +130,7 @@ static int rxe_modify_port(struct ib_device *dev, static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, u8 port_num) { - struct rxe_dev *rxe = to_rdev(dev); - - return rxe_link_layer(rxe, port_num); + return IB_LINK_LAYER_ETHERNET; } static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) @@ -684,6 +671,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; + struct ib_send_wr *next; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -700,6 +688,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, break; } + next = wr->next; + length = 0; for (i = 0; i < wr->num_sge; i++) length += wr->sg_list[i].length; @@ -710,7 +700,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, *bad_wr = wr; break; } - wr = wr->next; + wr = next; } rxe_run_task(&qp->req.task, 1); @@ -901,30 +891,16 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_mem *mr; - int err; mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err1; - } + if (!mr) + return ERR_PTR(-ENOMEM); rxe_add_index(mr); - rxe_add_ref(pd); - - err = rxe_mem_init_dma(pd, access, mr); - if (err) - goto err2; + rxe_mem_init_dma(pd, access, mr); return &mr->ibmr; - -err2: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); -err1: - return ERR_PTR(err); } static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, @@ -975,7 +951,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) + u32 max_num_sg) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 92de39c4a7c1..c664c7f36ab5 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -371,7 +371,6 @@ struct rxe_mc_elem { struct rxe_port { struct ib_port_attr attr; - u16 *pkey_tbl; __be64 port_guid; __be64 subnet_prefix; spinlock_t port_lock; /* guard port */ diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index ed60c9e4643e..d862bec84376 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -289,7 +289,6 @@ static const struct ib_device_ops siw_device_ops = { .post_srq_recv = siw_post_srq_recv, .query_device = siw_query_device, .query_gid = siw_query_gid, - .query_pkey = siw_query_pkey, .query_port = siw_query_port, .query_qp = siw_query_qp, .query_srq = siw_query_srq, diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 987e2ba05dbc..adafa1b8bebe 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -176,7 +176,6 @@ int siw_query_port(struct ib_device *base_dev, u8 port, attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); attr->phys_state = sdev->state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; - attr->pkey_tbl_len = 1; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->state = sdev->state; /* @@ -204,20 +203,12 @@ int siw_get_port_immutable(struct ib_device *base_dev, u8 port, if (rv) return rv; - port_immutable->pkey_tbl_len = attr.pkey_tbl_len; port_immutable->gid_tbl_len = attr.gid_tbl_len; port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } -int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey) -{ - /* Report the default pkey */ - *pkey = 0xffff; - return 0; -} - int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, union ib_gid *gid) { @@ -1373,7 +1364,7 @@ err_out: } struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_sge, struct ib_udata *udata) + u32 max_sge) { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mr *mr = NULL; diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index 1a731989fad6..d9572275a6b6 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -46,7 +46,6 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int siw_query_port(struct ib_device *base_dev, u8 port, struct ib_port_attr *attr); -int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey); int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, union ib_gid *gid); int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); @@ -69,7 +68,7 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags); struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len, u64 rnic_va, int rights, struct ib_udata *udata); struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type mr_type, - u32 max_sge, struct ib_udata *udata); + u32 max_sge); struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights); int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, unsigned int *sg_off); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 9a3379c49541..3440dc48d02c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -515,7 +515,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open_default(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); -int ipoib_ib_dev_stop(struct net_device *dev); +void ipoib_ib_dev_stop(struct net_device *dev); void ipoib_ib_dev_up(struct net_device *dev); void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); @@ -527,7 +527,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); void ipoib_mcast_restart_task(struct work_struct *work); void ipoib_mcast_start_thread(struct net_device *dev); -int ipoib_mcast_stop_thread(struct net_device *dev); +void ipoib_mcast_stop_thread(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index da3c5315bbb5..494f413dc3c6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -670,13 +670,12 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, return rc; } -static void __ipoib_reap_ah(struct net_device *dev) +static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_ah *ah, *tah; unsigned long flags; - netif_tx_lock_bh(dev); + netif_tx_lock_bh(priv->dev); spin_lock_irqsave(&priv->lock, flags); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) @@ -687,37 +686,37 @@ static void __ipoib_reap_ah(struct net_device *dev) } spin_unlock_irqrestore(&priv->lock, flags); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(priv->dev); } void ipoib_reap_ah(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, ah_reap_task.work); - struct net_device *dev = priv->dev; - __ipoib_reap_ah(dev); + ipoib_reap_dead_ahs(priv); if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) queue_delayed_work(priv->wq, &priv->ah_reap_task, round_jiffies_relative(HZ)); } -static void ipoib_flush_ah(struct net_device *dev) +static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - cancel_delayed_work(&priv->ah_reap_task); - flush_workqueue(priv->wq); - ipoib_reap_ah(&priv->ah_reap_task.work); + clear_bit(IPOIB_STOP_REAPER, &priv->flags); + queue_delayed_work(priv->wq, &priv->ah_reap_task, + round_jiffies_relative(HZ)); } -static void ipoib_stop_ah(struct net_device *dev) +static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - set_bit(IPOIB_STOP_REAPER, &priv->flags); - ipoib_flush_ah(dev); + cancel_delayed_work(&priv->ah_reap_task); + /* + * After ipoib_stop_ah_reaper() we always go through + * ipoib_reap_dead_ahs() which ensures the work is really stopped and + * does a final flush out of the dead_ah's list + */ } static int recvs_pending(struct net_device *dev) @@ -846,18 +845,6 @@ timeout: return 0; } -int ipoib_ib_dev_stop(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - priv->rn_ops->ndo_stop(dev); - - clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_flush_ah(dev); - - return 0; -} - int ipoib_ib_dev_open_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -901,10 +888,7 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } - clear_bit(IPOIB_STOP_REAPER, &priv->flags); - queue_delayed_work(priv->wq, &priv->ah_reap_task, - round_jiffies_relative(HZ)); - + ipoib_start_ah_reaper(priv); if (priv->rn_ops->ndo_open(dev)) { pr_warn("%s: Failed to open dev\n", dev->name); goto dev_stop; @@ -915,13 +899,20 @@ int ipoib_ib_dev_open(struct net_device *dev) return 0; dev_stop: - set_bit(IPOIB_STOP_REAPER, &priv->flags); - cancel_delayed_work(&priv->ah_reap_task); - set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_ib_dev_stop(dev); + ipoib_stop_ah_reaper(priv); return -1; } +void ipoib_ib_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + priv->rn_ops->ndo_stop(dev); + + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_stop_ah_reaper(priv); +} + void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -1232,7 +1223,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_mcast_dev_flush(dev); if (oper_up) set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); - ipoib_flush_ah(dev); + ipoib_reap_dead_ahs(priv); } if (level >= IPOIB_FLUSH_NORMAL) @@ -1307,7 +1298,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) * the neighbor garbage collection is stopped and reaped. * That should all be done now, so make a final ah flush. */ - ipoib_stop_ah(dev); + ipoib_reap_dead_ahs(priv); clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3cfb682b91b0..752581a8627b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1892,8 +1892,15 @@ static void ipoib_child_init(struct net_device *ndev) priv->max_ib_mtu = ppriv->max_ib_mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); - memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); + if (memchr_inv(priv->dev->dev_addr, 0, INFINIBAND_ALEN)) + memcpy(&priv->local_gid, priv->dev->dev_addr + 4, + sizeof(priv->local_gid)); + else { + memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, + INFINIBAND_ALEN); + memcpy(&priv->local_gid, &ppriv->local_gid, + sizeof(priv->local_gid)); + } } static int ipoib_ndo_init(struct net_device *ndev) @@ -1976,6 +1983,8 @@ static void ipoib_ndo_uninit(struct net_device *dev) /* no more works over the priv->wq */ if (priv->wq) { + /* See ipoib_mcast_carrier_on_task() */ + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); flush_workqueue(priv->wq); destroy_workqueue(priv->wq); priv->wq = NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 9bfa514473d5..86e4ed64e4e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -680,15 +680,13 @@ void ipoib_mcast_start_thread(struct net_device *dev) spin_unlock_irqrestore(&priv->lock, flags); } -int ipoib_mcast_stop_thread(struct net_device *dev) +void ipoib_mcast_stop_thread(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); ipoib_dbg_mcast(priv, "stopping multicast thread\n"); cancel_delayed_work_sync(&priv->mcast_task); - - return 0; } static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 1d77c7f42e38..78ee9445f801 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -300,18 +300,6 @@ struct ib_conn; struct iscsi_iser_task; /** - * struct iser_comp - iSER completion context - * - * @cq: completion queue - * @active_qps: Number of active QPs attached - * to completion context - */ -struct iser_comp { - struct ib_cq *cq; - int active_qps; -}; - -/** * struct iser_device - iSER device handle * * @ib_device: RDMA device @@ -320,9 +308,6 @@ struct iser_comp { * @event_handler: IB events handle routine * @ig_list: entry in devices list * @refcount: Reference counter, dominated by open iser connections - * @comps_used: Number of completion contexts used, Min between online - * cpus and device max completion vectors - * @comps: Dinamically allocated array of completion handlers */ struct iser_device { struct ib_device *ib_device; @@ -330,8 +315,6 @@ struct iser_device { struct ib_event_handler event_handler; struct list_head ig_list; int refcount; - int comps_used; - struct iser_comp *comps; }; /** @@ -353,6 +336,7 @@ struct iser_reg_resources { * @list: entry in connection fastreg pool * @rsc: data buffer registration resources * @sig_protected: is region protected indicator + * @all_list: first and last list members */ struct iser_fr_desc { struct list_head list; @@ -367,6 +351,7 @@ struct iser_fr_desc { * @list: list of fastreg descriptors * @lock: protects fastreg pool * @size: size of the pool + * @all_list: first and last list members */ struct iser_fr_pool { struct list_head list; @@ -380,11 +365,12 @@ struct iser_fr_pool { * * @cma_id: rdma_cm connection maneger handle * @qp: Connection Queue-pair + * @cq: Connection completion queue + * @cq_size: The number of max outstanding completions * @post_recv_buf_count: post receive counter * @sig_count: send work request signal count * @rx_wr: receive work request for batch posts * @device: reference to iser device - * @comp: iser completion context * @fr_pool: connection fast registration poool * @pi_support: Indicate device T10-PI support * @reg_cqe: completion handler @@ -392,11 +378,12 @@ struct iser_fr_pool { struct ib_conn { struct rdma_cm_id *cma_id; struct ib_qp *qp; + struct ib_cq *cq; + u32 cq_size; int post_recv_buf_count; u8 sig_count; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; - struct iser_comp *comp; struct iser_fr_pool fr_pool; bool pi_support; struct ib_cqe reg_cqe; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index c1f44c41f501..699e075ae1b3 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -68,59 +68,23 @@ static void iser_event_handler(struct ib_event_handler *handler, static int iser_create_device_ib_res(struct iser_device *device) { struct ib_device *ib_dev = device->ib_device; - int i, max_cqe; if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { iser_err("IB device does not support memory registrations\n"); return -1; } - device->comps_used = min_t(int, num_online_cpus(), - ib_dev->num_comp_vectors); - - device->comps = kcalloc(device->comps_used, sizeof(*device->comps), - GFP_KERNEL); - if (!device->comps) - goto comps_err; - - max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); - - iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", - device->comps_used, dev_name(&ib_dev->dev), - ib_dev->num_comp_vectors, max_cqe); - device->pd = ib_alloc_pd(ib_dev, iser_always_reg ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); if (IS_ERR(device->pd)) goto pd_err; - for (i = 0; i < device->comps_used; i++) { - struct iser_comp *comp = &device->comps[i]; - - comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i, - IB_POLL_SOFTIRQ); - if (IS_ERR(comp->cq)) { - comp->cq = NULL; - goto cq_err; - } - } - INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev, iser_event_handler); ib_register_event_handler(&device->event_handler); return 0; -cq_err: - for (i = 0; i < device->comps_used; i++) { - struct iser_comp *comp = &device->comps[i]; - - if (comp->cq) - ib_free_cq(comp->cq); - } - ib_dealloc_pd(device->pd); pd_err: - kfree(device->comps); -comps_err: iser_err("failed to allocate an IB resource\n"); return -1; } @@ -131,20 +95,9 @@ comps_err: */ static void iser_free_device_ib_res(struct iser_device *device) { - int i; - - for (i = 0; i < device->comps_used; i++) { - struct iser_comp *comp = &device->comps[i]; - - ib_free_cq(comp->cq); - comp->cq = NULL; - } - ib_unregister_event_handler(&device->event_handler); ib_dealloc_pd(device->pd); - kfree(device->comps); - device->comps = NULL; device->pd = NULL; } @@ -287,70 +240,57 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) struct ib_device *ib_dev; struct ib_qp_init_attr init_attr; int ret = -ENOMEM; - int index, min_index = 0; + unsigned int max_send_wr, cq_size; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; ib_dev = device->ib_device; - memset(&init_attr, 0, sizeof init_attr); + if (ib_conn->pi_support) + max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; + else + max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; + max_send_wr = min_t(unsigned int, max_send_wr, + (unsigned int)ib_dev->attrs.max_qp_wr); - mutex_lock(&ig.connlist_mutex); - /* select the CQ with the minimal number of usages */ - for (index = 0; index < device->comps_used; index++) { - if (device->comps[index].active_qps < - device->comps[min_index].active_qps) - min_index = index; + cq_size = max_send_wr + ISER_QP_MAX_RECV_DTOS; + ib_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_SOFTIRQ); + if (IS_ERR(ib_conn->cq)) { + ret = PTR_ERR(ib_conn->cq); + goto cq_err; } - ib_conn->comp = &device->comps[min_index]; - ib_conn->comp->active_qps++; - mutex_unlock(&ig.connlist_mutex); - iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); + ib_conn->cq_size = cq_size; + + memset(&init_attr, 0, sizeof(init_attr)); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; - init_attr.send_cq = ib_conn->comp->cq; - init_attr.recv_cq = ib_conn->comp->cq; + init_attr.send_cq = ib_conn->cq; + init_attr.recv_cq = ib_conn->cq; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; - if (ib_conn->pi_support) { - init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; + init_attr.cap.max_send_wr = max_send_wr; + if (ib_conn->pi_support) init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; - iser_conn->max_cmds = - ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); - } else { - if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) { - init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; - iser_conn->max_cmds = - ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); - } else { - init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr; - iser_conn->max_cmds = - ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr); - iser_dbg("device %s supports max_send_wr %d\n", - dev_name(&device->ib_device->dev), - ib_dev->attrs.max_qp_wr); - } - } + iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(max_send_wr - 1); ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) goto out_err; ib_conn->qp = ib_conn->cma_id->qp; - iser_info("setting conn %p cma_id %p qp %p\n", + iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n", ib_conn, ib_conn->cma_id, - ib_conn->cma_id->qp); + ib_conn->cma_id->qp, max_send_wr); return ret; out_err: - mutex_lock(&ig.connlist_mutex); - ib_conn->comp->active_qps--; - mutex_unlock(&ig.connlist_mutex); + ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size); +cq_err: iser_err("unable to alloc mem or create resource, err %d\n", ret); return ret; @@ -462,10 +402,8 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn, iser_conn, ib_conn->cma_id, ib_conn->qp); if (ib_conn->qp != NULL) { - mutex_lock(&ig.connlist_mutex); - ib_conn->comp->active_qps--; - mutex_unlock(&ig.connlist_mutex); rdma_destroy_qp(ib_conn->cma_id); + ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size); ib_conn->qp = NULL; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b7df38ee8ae0..61e2f7fc513d 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -24,13 +24,6 @@ #include "ib_isert.h" -#define ISERT_MAX_CONN 8 -#define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN) -#define ISER_MAX_TX_CQ_LEN \ - ((ISERT_QP_MAX_REQ_DTOS + ISCSI_DEF_XMIT_CMDS_MAX) * ISERT_MAX_CONN) -#define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \ - ISERT_MAX_CONN) - static int isert_debug_level; module_param_named(debug_level, isert_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)"); @@ -82,53 +75,34 @@ isert_qp_event_callback(struct ib_event *e, void *context) } } -static struct isert_comp * -isert_comp_get(struct isert_conn *isert_conn) -{ - struct isert_device *device = isert_conn->device; - struct isert_comp *comp; - int i, min = 0; - - mutex_lock(&device_list_mutex); - for (i = 0; i < device->comps_used; i++) - if (device->comps[i].active_qps < - device->comps[min].active_qps) - min = i; - comp = &device->comps[min]; - comp->active_qps++; - mutex_unlock(&device_list_mutex); - - isert_info("conn %p, using comp %p min_index: %d\n", - isert_conn, comp, min); - - return comp; -} - -static void -isert_comp_put(struct isert_comp *comp) -{ - mutex_lock(&device_list_mutex); - comp->active_qps--; - mutex_unlock(&device_list_mutex); -} - static struct ib_qp * isert_create_qp(struct isert_conn *isert_conn, - struct isert_comp *comp, struct rdma_cm_id *cma_id) { + u32 cq_size = ISERT_QP_MAX_REQ_DTOS + ISERT_QP_MAX_RECV_DTOS + 2; struct isert_device *device = isert_conn->device; + struct ib_device *ib_dev = device->ib_device; struct ib_qp_init_attr attr; - int ret; + int ret, factor; + + isert_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_WORKQUEUE); + if (IS_ERR(isert_conn->cq)) { + isert_err("Unable to allocate cq\n"); + ret = PTR_ERR(isert_conn->cq); + return ERR_PTR(ret); + } + isert_conn->cq_size = cq_size; memset(&attr, 0, sizeof(struct ib_qp_init_attr)); attr.event_handler = isert_qp_event_callback; attr.qp_context = isert_conn; - attr.send_cq = comp->cq; - attr.recv_cq = comp->cq; + attr.send_cq = isert_conn->cq; + attr.recv_cq = isert_conn->cq; attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; - attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; + factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num, + ISCSI_ISER_MAX_SG_TABLESIZE); + attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor; attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; @@ -139,6 +113,8 @@ isert_create_qp(struct isert_conn *isert_conn, ret = rdma_create_qp(cma_id, device->pd, &attr); if (ret) { isert_err("rdma_create_qp failed for cma_id %d\n", ret); + ib_cq_pool_put(isert_conn->cq, isert_conn->cq_size); + return ERR_PTR(ret); } @@ -146,25 +122,6 @@ isert_create_qp(struct isert_conn *isert_conn, } static int -isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) -{ - struct isert_comp *comp; - int ret; - - comp = isert_comp_get(isert_conn); - isert_conn->qp = isert_create_qp(isert_conn, comp, cma_id); - if (IS_ERR(isert_conn->qp)) { - ret = PTR_ERR(isert_conn->qp); - goto err; - } - - return 0; -err: - isert_comp_put(comp); - return ret; -} - -static int isert_alloc_rx_descriptors(struct isert_conn *isert_conn) { struct isert_device *device = isert_conn->device; @@ -231,61 +188,6 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn) isert_conn->rx_descs = NULL; } -static void -isert_free_comps(struct isert_device *device) -{ - int i; - - for (i = 0; i < device->comps_used; i++) { - struct isert_comp *comp = &device->comps[i]; - - if (comp->cq) - ib_free_cq(comp->cq); - } - kfree(device->comps); -} - -static int -isert_alloc_comps(struct isert_device *device) -{ - int i, max_cqe, ret = 0; - - device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(), - device->ib_device->num_comp_vectors)); - - isert_info("Using %d CQs, %s supports %d vectors support " - "pi_capable %d\n", - device->comps_used, dev_name(&device->ib_device->dev), - device->ib_device->num_comp_vectors, - device->pi_capable); - - device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp), - GFP_KERNEL); - if (!device->comps) - return -ENOMEM; - - max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe); - - for (i = 0; i < device->comps_used; i++) { - struct isert_comp *comp = &device->comps[i]; - - comp->device = device; - comp->cq = ib_alloc_cq(device->ib_device, comp, max_cqe, i, - IB_POLL_WORKQUEUE); - if (IS_ERR(comp->cq)) { - isert_err("Unable to allocate cq\n"); - ret = PTR_ERR(comp->cq); - comp->cq = NULL; - goto out_cq; - } - } - - return 0; -out_cq: - isert_free_comps(device); - return ret; -} - static int isert_create_device_ib_res(struct isert_device *device) { @@ -296,16 +198,12 @@ isert_create_device_ib_res(struct isert_device *device) ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge); isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); - ret = isert_alloc_comps(device); - if (ret) - goto out; - device->pd = ib_alloc_pd(ib_dev, 0); if (IS_ERR(device->pd)) { ret = PTR_ERR(device->pd); isert_err("failed to allocate pd, device %p, ret=%d\n", device, ret); - goto out_cq; + return ret; } /* Check signature cap */ @@ -313,13 +211,6 @@ isert_create_device_ib_res(struct isert_device *device) IB_DEVICE_INTEGRITY_HANDOVER ? true : false; return 0; - -out_cq: - isert_free_comps(device); -out: - if (ret > 0) - ret = -EINVAL; - return ret; } static void @@ -328,7 +219,6 @@ isert_free_device_ib_res(struct isert_device *device) isert_info("device %p\n", device); ib_dealloc_pd(device->pd); - isert_free_comps(device); } static void @@ -490,6 +380,13 @@ isert_set_nego_params(struct isert_conn *isert_conn, } } +static void +isert_destroy_qp(struct isert_conn *isert_conn) +{ + ib_destroy_qp(isert_conn->qp); + ib_cq_pool_put(isert_conn->cq, isert_conn->cq_size); +} + static int isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { @@ -530,17 +427,19 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_set_nego_params(isert_conn, &event->param.conn); - ret = isert_conn_setup_qp(isert_conn, cma_id); - if (ret) + isert_conn->qp = isert_create_qp(isert_conn, cma_id); + if (IS_ERR(isert_conn->qp)) { + ret = PTR_ERR(isert_conn->qp); goto out_conn_dev; + } ret = isert_login_post_recv(isert_conn); if (ret) - goto out_conn_dev; + goto out_destroy_qp; ret = isert_rdma_accept(isert_conn); if (ret) - goto out_conn_dev; + goto out_destroy_qp; mutex_lock(&isert_np->mutex); list_add_tail(&isert_conn->node, &isert_np->accepted); @@ -548,6 +447,8 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) return 0; +out_destroy_qp: + isert_destroy_qp(isert_conn); out_conn_dev: isert_device_put(device); out_rsp_dma_map: @@ -572,12 +473,8 @@ isert_connect_release(struct isert_conn *isert_conn) !isert_conn->dev_removed) rdma_destroy_id(isert_conn->cm_id); - if (isert_conn->qp) { - struct isert_comp *comp = isert_conn->qp->recv_cq->cq_context; - - isert_comp_put(comp); - ib_destroy_qp(isert_conn->qp); - } + if (isert_conn->qp) + isert_destroy_qp(isert_conn); if (isert_conn->login_req_buf) isert_free_login_buf(isert_conn); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 3b296bac4f60..c55f7d9bfced 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -63,7 +63,8 @@ (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \ sizeof(struct ib_cqe) + sizeof(bool))) -#define ISCSI_ISER_SG_TABLESIZE 256 +/* Maximum support is 16MB I/O size */ +#define ISCSI_ISER_MAX_SG_TABLESIZE 4096 enum isert_desc_type { ISCSI_TX_CONTROL, @@ -155,6 +156,8 @@ struct isert_conn { struct iser_tx_desc login_tx_desc; struct rdma_cm_id *cm_id; struct ib_qp *qp; + struct ib_cq *cq; + u32 cq_size; struct isert_device *device; struct mutex mutex; struct kref kref; @@ -165,22 +168,6 @@ struct isert_conn { bool dev_removed; }; -#define ISERT_MAX_CQ 64 - -/** - * struct isert_comp - iSER completion context - * - * @device: pointer to device handle - * @cq: completion queue - * @active_qps: Number of active QPs attached - * to completion context - */ -struct isert_comp { - struct isert_device *device; - struct ib_cq *cq; - int active_qps; -}; - struct isert_device { bool pi_capable; int refcount; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index d324312a373c..f64519872297 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -118,12 +118,17 @@ * struct opa_vesw_info - OPA vnic switch information * @fabric_id: 10-bit fabric id * @vesw_id: 12-bit virtual ethernet switch id + * @rsvd0: reserved bytes * @def_port_mask: bitmask of default ports + * @rsvd1: reserved bytes * @pkey: partition key + * @rsvd2: reserved bytes * @u_mcast_dlid: unknown multicast dlid * @u_ucast_dlid: array of unknown unicast dlids + * @rsvd3: reserved bytes * @rc: routing control * @eth_mtu: Ethernet MTU + * @rsvd4: reserved bytes */ struct opa_vesw_info { __be16 fabric_id; @@ -150,12 +155,14 @@ struct opa_vesw_info { * struct opa_per_veswport_info - OPA vnic per port information * @port_num: port number * @eth_link_status: current ethernet link state + * @rsvd0: reserved bytes * @base_mac_addr: base mac address * @config_state: configured port state * @oper_state: operational port state * @max_mac_tbl_ent: max number of mac table entries * @max_smac_ent: max smac entries in mac table * @mac_tbl_digest: mac table digest + * @rsvd1: reserved bytes * @encap_slid: base slid for the port * @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets * @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets @@ -165,8 +172,10 @@ struct opa_vesw_info { * @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets * @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets * @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets + * @rsvd2: reserved bytes * @uc_macs_gen_count: generation count for unicast macs list * @mc_macs_gen_count: generation count for multicast macs list + * @rsvd3: reserved bytes */ struct opa_per_veswport_info { __be32 port_num; @@ -294,6 +303,7 @@ struct opa_veswport_mactable { * @rx_512_1023: received packet length is >=512 and < 1023 bytes * @rx_1024_1518: received packet length is >=1024 and < 1518 bytes * @rx_1519_max: received packet length >= 1519 bytes + * @reserved: reserved bytes * * All the above are counters of corresponding conditions. */ @@ -347,16 +357,26 @@ struct opa_veswport_summary_counters { * @veswport_num: virtual ethernet switch port number * @tx_errors: transmit errors * @rx_errors: receive errors + * @rsvd0: reserved bytes * @tx_smac_filt: smac filter errors + * @rsvd1: reserved bytes + * @rsvd2: reserved bytes + * @rsvd3: reserved bytes * @tx_dlid_zero: transmit packets with invalid dlid + * @rsvd4: reserved bytes * @tx_logic: other transmit errors + * @rsvd5: reserved bytes * @tx_drop_state: packet tansmission in non-forward port state * @rx_bad_veswid: received packet with invalid vesw id + * @rsvd6: reserved bytes * @rx_runt: received ethernet packet with length < 64 bytes * @rx_oversize: received ethernet packet with length > MTU size + * @rsvd7: reserved bytes * @rx_eth_down: received packets when interface is down * @rx_drop_state: received packets in non-forwarding port state * @rx_logic: other receive errors + * @rsvd8: reserved bytes + * @rsvd9: reserved bytes * * All the above are counters of corresponding error conditions. */ @@ -447,6 +467,7 @@ struct opa_veswport_iface_macs { * struct opa_vnic_vema_mad - Generic VEMA MAD * @mad_hdr: Generic MAD header * @rmpp_hdr: RMPP header for vendor specific MADs + * @reserved: reserved bytes * @oui: Unique org identifier * @data: MAD data */ @@ -467,6 +488,7 @@ struct opa_vnic_vema_mad { * @trap_num: Trap number * @toggle_count: Notice toggle bit and count value * @issuer_lid: Trap issuer's lid + * @reserved: reserved bytes * @issuer_gid: Issuer GID (only if Report method) * @raw_data: Trap message body */ @@ -487,6 +509,7 @@ struct opa_vnic_notice_attr { * struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap * @mad_hdr: Generic MAD header * @rmpp_hdr: RMPP header for vendor specific MADs + * @reserved: reserved bytes * @oui: Unique org identifier * @notice: Notice structure */ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 564388a85603..776e89231c52 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/rculist.h> +#include <linux/random.h> #include "rtrs-clt.h" #include "rtrs-log.h" @@ -23,6 +24,12 @@ * leads to "false positives" failed reconnect attempts */ #define RTRS_RECONNECT_BACKOFF 1000 +/* + * Wait for additional random time between 0 and 8 seconds + * before starting to reconnect to avoid clients reconnecting + * all at once in case of a major network outage + */ +#define RTRS_RECONNECT_SEED 8 MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -306,7 +313,8 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) */ delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms)); + msecs_to_jiffies(delay_ms + + prandom_u32() % RTRS_RECONNECT_SEED)); } else { /* * Error can happen just on establishing new connection, @@ -2503,7 +2511,9 @@ reconnect_again: sess->stats->reconnects.fail_cnt++; delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms)); + msecs_to_jiffies(delay_ms + + prandom_u32() % + RTRS_RECONNECT_SEED)); } } @@ -2972,7 +2982,7 @@ static int __init rtrs_client_init(void) pr_err("Failed to create rtrs-client dev class\n"); return PTR_ERR(rtrs_clt_dev_class); } - rtrs_wq = alloc_workqueue("rtrs_client_wq", WQ_MEM_RECLAIM, 0); + rtrs_wq = alloc_workqueue("rtrs_client_wq", 0, 0); if (!rtrs_wq) { class_destroy(rtrs_clt_dev_class); return -ENOMEM; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 0d9241f5d9e6..a219bd1bdbc2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -2150,7 +2150,7 @@ static int __init rtrs_server_init(void) err = PTR_ERR(rtrs_dev_class); goto out_chunk_pool; } - rtrs_wq = alloc_workqueue("rtrs_server_wq", WQ_MEM_RECLAIM, 0); + rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); if (!rtrs_wq) { err = -ENOMEM; goto out_dev_class; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index ef7fcd3e8e15..0065eb17ae36 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -869,7 +869,7 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_rdma_ch *ch = wc->qp->qp_context; pr_debug("%s-%d wc->status %d\n", ch->sess_name, ch->qp->qp_num, wc->status); @@ -1322,7 +1322,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) */ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_rdma_ch *ch = wc->qp->qp_context; struct srpt_send_ioctx *ioctx = container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); @@ -1683,7 +1683,7 @@ push: static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_rdma_ch *ch = wc->qp->qp_context; struct srpt_recv_ioctx *ioctx = container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe); @@ -1744,7 +1744,7 @@ static void srpt_process_wait_list(struct srpt_rdma_ch *ch) */ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_rdma_ch *ch = wc->qp->qp_context; struct srpt_send_ioctx *ioctx = container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); enum srpt_command_state state; @@ -1791,7 +1791,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: - ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size, + ch->cq = ib_cq_pool_get(sdev->device, ch->rq_size + sq_size, -1, IB_POLL_WORKQUEUE); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); @@ -1799,6 +1799,7 @@ retry: ch->rq_size + sq_size, ret); goto out; } + ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; qp_init->event_handler @@ -1843,7 +1844,7 @@ retry: if (retry) { pr_debug("failed to create queue pair with sq_size = %d (%d) - retrying\n", sq_size, ret); - ib_free_cq(ch->cq); + ib_cq_pool_put(ch->cq, ch->cq_size); sq_size = max(sq_size / 2, MIN_SRPT_SQ_SIZE); goto retry; } else { @@ -1869,14 +1870,14 @@ out: err_destroy_cq: ch->qp = NULL; - ib_free_cq(ch->cq); + ib_cq_pool_put(ch->cq, ch->cq_size); goto out; } static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) { ib_destroy_qp(ch->qp); - ib_free_cq(ch->cq); + ib_cq_pool_put(ch->cq, ch->cq_size); } /** @@ -2156,9 +2157,6 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, WARN_ON_ONCE(irqs_disabled()); - if (WARN_ON(!sdev || !req)) - return -EINVAL; - it_iu_len = be32_to_cpu(req->req_it_iu_len); pr_info("Received SRP_LOGIN_REQ with i_port_id %pI6, t_port_id %pI6 and it_iu_len %d on port %d (guid=%pI6); pkey %#04x\n", diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index f31c349d07a1..41435a699b53 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -300,6 +300,7 @@ struct srpt_rdma_ch { } rdma_cm; }; struct ib_cq *cq; + u32 cq_size; struct ib_cqe zw_cqe; struct rcu_head rcu; struct kref kref; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 42198e64a7f4..8db4b5f0f963 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -299,11 +299,18 @@ void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas) } EXPORT_SYMBOL_GPL(mlx5_fill_page_array); -void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) +void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm) { int i; + WARN_ON(perm & 0xfc); for (i = 0; i < buf->npages; i++) - pas[i] = cpu_to_be64(buf->frags[i].map); + pas[i] = cpu_to_be64(buf->frags[i].map | perm); +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array_perm); + +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) +{ + mlx5_fill_page_frag_array_perm(buf, pas, 0); } EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 7e70a8178a46..9ccec5f8b92a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1577,6 +1577,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, static bool counter_is_valid(u32 action) { return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 5763965d5ef3..97b5fcb1f406 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -226,13 +226,20 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) int mlx5i_create_underlay_qp(struct mlx5e_priv *priv) { + unsigned char *dev_addr = priv->netdev->dev_addr; u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {}; struct mlx5i_priv *ipriv = priv->ppriv; void *addr_path; + int qpn = 0; int ret = 0; void *qpc; + if (MLX5_CAP_GEN(priv->mdev, mkey_by_name)) { + qpn = (dev_addr[1] << 16) + (dev_addr[2] << 8) + dev_addr[3]; + MLX5_SET(create_qp_in, in, input_qpn, qpn); + } + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e32d46c33701..ce43e3feccd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -557,6 +557,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN_MAX(dev, release_all_pages)) MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1); + if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) + MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } |