diff options
Diffstat (limited to 'drivers/infiniband')
115 files changed, 5494 insertions, 4522 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 17bfedd24cc3..717b798cddad 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -46,7 +46,7 @@ struct ib_pkey_cache { int table_len; - u16 table[0]; + u16 table[]; }; struct ib_update_work { @@ -973,6 +973,23 @@ done: EXPORT_SYMBOL(rdma_query_gid); /** + * rdma_read_gid_hw_context - Read the HW GID context from GID attribute + * @attr: Potinter to the GID attribute + * + * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding + * to the SGID attr. Callers are required to already be holding the reference + * to an existing GID entry. + * + * Returns the HW GID context + * + */ +void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr) +{ + return container_of(attr, struct ib_gid_table_entry, attr)->context; +} +EXPORT_SYMBOL(rdma_read_gid_hw_context); + +/** * rdma_find_gid - Returns SGID attributes if the matching GID is found. * @device: The device to query. * @gid: The GID value to search for. diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 68cc1b2d6824..4794113ecd59 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -80,8 +80,19 @@ const char *__attribute_const__ ibcm_reject_msg(int reason) } EXPORT_SYMBOL(ibcm_reject_msg); +struct cm_id_private; static void cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device, void *client_data); +static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, + struct ib_cm_sidr_rep_param *param); +static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, + const void *private_data, u8 private_data_len); +static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, + void *private_data, u8 private_data_len); +static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, + enum ib_cm_rej_reason reason, void *ari, + u8 ari_length, const void *private_data, + u8 private_data_len); static struct ib_client cm_client = { .name = "cm", @@ -197,7 +208,7 @@ struct cm_device { struct ib_device *ib_device; u8 ack_delay; int going_down; - struct cm_port *port[0]; + struct cm_port *port[]; }; struct cm_av { @@ -216,7 +227,7 @@ struct cm_work { __be32 local_id; /* Established / timewait */ __be32 remote_id; struct ib_cm_event cm_event; - struct sa_path_rec path[0]; + struct sa_path_rec path[]; }; struct cm_timewait_info { @@ -261,7 +272,6 @@ struct cm_id_private { __be16 pkey; u8 private_data_len; u8 max_cm_retries; - u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; u8 retry_count; @@ -572,18 +582,6 @@ static int cm_init_av_by_path(struct sa_path_rec *path, return 0; } -static int cm_alloc_id(struct cm_id_private *cm_id_priv) -{ - int err; - u32 id; - - err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv, - xa_limit_32b, &cm.local_id_next, GFP_KERNEL); - - cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; - return err; -} - static u32 cm_local_id(__be32 local_id) { return (__force u32) (local_id ^ cm.random_id_operand); @@ -633,22 +631,44 @@ static int be64_gt(__be64 a, __be64 b) return (__force u64) a > (__force u64) b; } -static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) +/* + * Inserts a new cm_id_priv into the listen_service_table. Returns cm_id_priv + * if the new ID was inserted, NULL if it could not be inserted due to a + * collision, or the existing cm_id_priv ready for shared usage. + */ +static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, + ib_cm_handler shared_handler) { struct rb_node **link = &cm.listen_service_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; __be64 service_mask = cm_id_priv->id.service_mask; + unsigned long flags; + spin_lock_irqsave(&cm.lock, flags); while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); if ((cur_cm_id_priv->id.service_mask & service_id) == (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + (cm_id_priv->id.device == cur_cm_id_priv->id.device)) { + /* + * Sharing an ib_cm_id with different handlers is not + * supported + */ + if (cur_cm_id_priv->id.cm_handler != shared_handler || + cur_cm_id_priv->id.context || + WARN_ON(!cur_cm_id_priv->id.cm_handler)) { + spin_unlock_irqrestore(&cm.lock, flags); + return NULL; + } + refcount_inc(&cur_cm_id_priv->refcount); + cur_cm_id_priv->listen_sharecount++; + spin_unlock_irqrestore(&cm.lock, flags); return cur_cm_id_priv; + } if (cm_id_priv->id.device < cur_cm_id_priv->id.device) link = &(*link)->rb_left; @@ -661,9 +681,11 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) else link = &(*link)->rb_right; } + cm_id_priv->listen_sharecount++; rb_link_node(&cm_id_priv->service_node, parent, link); rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table); - return NULL; + spin_unlock_irqrestore(&cm.lock, flags); + return cm_id_priv; } static struct cm_id_private * cm_find_listen(struct ib_device *device, @@ -810,21 +832,12 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private return NULL; } -static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, - enum ib_cm_sidr_status status) -{ - struct ib_cm_sidr_rep_param param; - - memset(¶m, 0, sizeof param); - param.status = status; - ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); -} - -struct ib_cm_id *ib_create_cm_id(struct ib_device *device, - ib_cm_handler cm_handler, - void *context) +static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device, + ib_cm_handler cm_handler, + void *context) { struct cm_id_private *cm_id_priv; + u32 id; int ret; cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL); @@ -836,10 +849,9 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; - ret = cm_alloc_id(cm_id_priv); - if (ret) - goto error; + RB_CLEAR_NODE(&cm_id_priv->service_node); + RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); @@ -847,11 +859,42 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); refcount_set(&cm_id_priv->refcount, 1); - return &cm_id_priv->id; + + ret = xa_alloc_cyclic_irq(&cm.local_id_table, &id, NULL, xa_limit_32b, + &cm.local_id_next, GFP_KERNEL); + if (ret) + goto error; + cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; + + return cm_id_priv; error: kfree(cm_id_priv); - return ERR_PTR(-ENOMEM); + return ERR_PTR(ret); +} + +/* + * Make the ID visible to the MAD handlers and other threads that use the + * xarray. + */ +static void cm_finalize_id(struct cm_id_private *cm_id_priv) +{ + xa_store_irq(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id), + cm_id_priv, GFP_KERNEL); +} + +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, + void *context) +{ + struct cm_id_private *cm_id_priv; + + cm_id_priv = cm_alloc_id_priv(device, cm_handler, context); + if (IS_ERR(cm_id_priv)) + return ERR_CAST(cm_id_priv); + + cm_finalize_id(cm_id_priv); + return &cm_id_priv->id; } EXPORT_SYMBOL(ib_create_cm_id); @@ -932,6 +975,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) unsigned long flags; struct cm_device *cm_dev; + lockdep_assert_held(&cm_id_priv->lock); + cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); if (!cm_dev) return; @@ -963,6 +1008,8 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) { unsigned long flags; + lockdep_assert_held(&cm_id_priv->lock); + cm_id_priv->id.state = IB_CM_IDLE; if (cm_id_priv->timewait_info) { spin_lock_irqsave(&cm.lock, flags); @@ -979,54 +1026,51 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) struct cm_work *work; cm_id_priv = container_of(cm_id, struct cm_id_private, id); -retest: spin_lock_irq(&cm_id_priv->lock); +retest: switch (cm_id->state) { case IB_CM_LISTEN: - spin_unlock_irq(&cm_id_priv->lock); - - spin_lock_irq(&cm.lock); + spin_lock(&cm.lock); if (--cm_id_priv->listen_sharecount > 0) { /* The id is still shared. */ + WARN_ON(refcount_read(&cm_id_priv->refcount) == 1); + spin_unlock(&cm.lock); + spin_unlock_irq(&cm_id_priv->lock); cm_deref_id(cm_id_priv); - spin_unlock_irq(&cm.lock); return; } + cm_id->state = IB_CM_IDLE; rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); - spin_unlock_irq(&cm.lock); + RB_CLEAR_NODE(&cm_id_priv->service_node); + spin_unlock(&cm.lock); break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_SIDR_REQ_RCVD: - spin_unlock_irq(&cm_id_priv->lock); - cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); - spin_lock_irq(&cm.lock); - if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) - rb_erase(&cm_id_priv->sidr_id_node, - &cm.remote_sidr_table); - spin_unlock_irq(&cm.lock); + cm_send_sidr_rep_locked(cm_id_priv, + &(struct ib_cm_sidr_rep_param){ + .status = IB_SIDR_REJECT }); + /* cm_send_sidr_rep_locked will not move to IDLE if it fails */ + cm_id->state = IB_CM_IDLE; break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irq(&cm_id_priv->lock); - ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, - &cm_id_priv->id.device->node_guid, - sizeof cm_id_priv->id.device->node_guid, - NULL, 0); + cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT, + &cm_id_priv->id.device->node_guid, + sizeof(cm_id_priv->id.device->node_guid), + NULL, 0); break; case IB_CM_REQ_RCVD: if (err == -ENOMEM) { /* Do not reject to allow future retries. */ cm_reset_to_idle(cm_id_priv); - spin_unlock_irq(&cm_id_priv->lock); } else { - spin_unlock_irq(&cm_id_priv->lock); - ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, - NULL, 0, NULL, 0); + cm_send_rej_locked(cm_id_priv, + IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, + NULL, 0); } break; case IB_CM_REP_SENT: @@ -1036,38 +1080,56 @@ retest: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: - spin_unlock_irq(&cm_id_priv->lock); - ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, - NULL, 0, NULL, 0); + cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL, + 0, NULL, 0); break; case IB_CM_ESTABLISHED: - spin_unlock_irq(&cm_id_priv->lock); - if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) + if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) { + cm_id->state = IB_CM_IDLE; break; - ib_send_cm_dreq(cm_id, NULL, 0); + } + cm_send_dreq_locked(cm_id_priv, NULL, 0); goto retest; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); - spin_unlock_irq(&cm_id_priv->lock); - break; + goto retest; case IB_CM_DREQ_RCVD: - spin_unlock_irq(&cm_id_priv->lock); - ib_send_cm_drep(cm_id, NULL, 0); + cm_send_drep_locked(cm_id_priv, NULL, 0); + WARN_ON(cm_id->state != IB_CM_TIMEWAIT); + goto retest; + case IB_CM_TIMEWAIT: + /* + * The cm_acquire_id in cm_timewait_handler will stop working + * once we do cm_free_id() below, so just move to idle here for + * consistency. + */ + cm_id->state = IB_CM_IDLE; break; - default: - spin_unlock_irq(&cm_id_priv->lock); + case IB_CM_IDLE: break; } + WARN_ON(cm_id->state != IB_CM_IDLE); - spin_lock_irq(&cm.lock); + spin_lock(&cm.lock); + /* Required for cleanup paths related cm_req_handler() */ + if (cm_id_priv->timewait_info) { + cm_cleanup_timewait(cm_id_priv->timewait_info); + kfree(cm_id_priv->timewait_info); + cm_id_priv->timewait_info = NULL; + } if (!list_empty(&cm_id_priv->altr_list) && (!cm_id_priv->altr_send_port_not_ready)) list_del(&cm_id_priv->altr_list); if (!list_empty(&cm_id_priv->prim_list) && (!cm_id_priv->prim_send_port_not_ready)) list_del(&cm_id_priv->prim_list); - spin_unlock_irq(&cm.lock); + WARN_ON(cm_id_priv->listen_sharecount); + WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node)); + if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) + rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); + spin_unlock(&cm.lock); + spin_unlock_irq(&cm_id_priv->lock); cm_free_id(cm_id->local_id); cm_deref_id(cm_id_priv); @@ -1087,8 +1149,27 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) } EXPORT_SYMBOL(ib_destroy_cm_id); +static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, + __be64 service_mask) +{ + service_mask = service_mask ? service_mask : ~cpu_to_be64(0); + service_id &= service_mask; + if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && + (service_id != IB_CM_ASSIGN_SERVICE_ID)) + return -EINVAL; + + if (service_id == IB_CM_ASSIGN_SERVICE_ID) { + cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++); + cm_id_priv->id.service_mask = ~cpu_to_be64(0); + } else { + cm_id_priv->id.service_id = service_id; + cm_id_priv->id.service_mask = service_mask; + } + return 0; +} + /** - * __ib_cm_listen - Initiates listening on the specified service ID for + * ib_cm_listen - Initiates listening on the specified service ID for * connection and service ID resolution requests. * @cm_id: Connection identifier associated with the listen request. * @service_id: Service identifier matched against incoming connection @@ -1100,51 +1181,33 @@ EXPORT_SYMBOL(ib_destroy_cm_id); * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. */ -static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, - __be64 service_mask) +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) { - struct cm_id_private *cm_id_priv, *cur_cm_id_priv; - int ret = 0; - - service_mask = service_mask ? service_mask : ~cpu_to_be64(0); - service_id &= service_mask; - if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && - (service_id != IB_CM_ASSIGN_SERVICE_ID)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - if (cm_id->state != IB_CM_IDLE) - return -EINVAL; - - cm_id->state = IB_CM_LISTEN; - ++cm_id_priv->listen_sharecount; + struct cm_id_private *cm_id_priv = + container_of(cm_id, struct cm_id_private, id); + unsigned long flags; + int ret; - if (service_id == IB_CM_ASSIGN_SERVICE_ID) { - cm_id->service_id = cpu_to_be64(cm.listen_service_id++); - cm_id->service_mask = ~cpu_to_be64(0); - } else { - cm_id->service_id = service_id; - cm_id->service_mask = service_mask; + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->id.state != IB_CM_IDLE) { + ret = -EINVAL; + goto out; } - cur_cm_id_priv = cm_insert_listen(cm_id_priv); - if (cur_cm_id_priv) { - cm_id->state = IB_CM_IDLE; - --cm_id_priv->listen_sharecount; + ret = cm_init_listen(cm_id_priv, service_id, service_mask); + if (ret) + goto out; + + if (!cm_insert_listen(cm_id_priv, NULL)) { ret = -EBUSY; + goto out; } - return ret; -} -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&cm.lock, flags); - ret = __ib_cm_listen(cm_id, service_id, service_mask); - spin_unlock_irqrestore(&cm.lock, flags); + cm_id_priv->id.state = IB_CM_LISTEN; + ret = 0; +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_cm_listen); @@ -1169,50 +1232,38 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, __be64 service_id) { + struct cm_id_private *listen_id_priv; struct cm_id_private *cm_id_priv; - struct ib_cm_id *cm_id; - unsigned long flags; int err = 0; /* Create an ID in advance, since the creation may sleep */ - cm_id = ib_create_cm_id(device, cm_handler, NULL); - if (IS_ERR(cm_id)) - return cm_id; + cm_id_priv = cm_alloc_id_priv(device, cm_handler, NULL); + if (IS_ERR(cm_id_priv)) + return ERR_CAST(cm_id_priv); - spin_lock_irqsave(&cm.lock, flags); - - if (service_id == IB_CM_ASSIGN_SERVICE_ID) - goto new_id; + err = cm_init_listen(cm_id_priv, service_id, 0); + if (err) + return ERR_PTR(err); - /* Find an existing ID */ - cm_id_priv = cm_find_listen(device, service_id); - if (cm_id_priv) { - if (cm_id->cm_handler != cm_handler || cm_id->context) { - /* Sharing an ib_cm_id with different handlers is not - * supported */ - spin_unlock_irqrestore(&cm.lock, flags); + spin_lock_irq(&cm_id_priv->lock); + listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler); + if (listen_id_priv != cm_id_priv) { + spin_unlock_irq(&cm_id_priv->lock); + ib_destroy_cm_id(&cm_id_priv->id); + if (!listen_id_priv) return ERR_PTR(-EINVAL); - } - refcount_inc(&cm_id_priv->refcount); - ++cm_id_priv->listen_sharecount; - spin_unlock_irqrestore(&cm.lock, flags); - - ib_destroy_cm_id(cm_id); - cm_id = &cm_id_priv->id; - return cm_id; + return &listen_id_priv->id; } + cm_id_priv->id.state = IB_CM_LISTEN; + spin_unlock_irq(&cm_id_priv->lock); -new_id: - /* Use newly created ID */ - err = __ib_cm_listen(cm_id, service_id, 0); - - spin_unlock_irqrestore(&cm.lock, flags); + /* + * A listen ID does not need to be in the xarray since it does not + * receive mads, is not placed in the remote_id or remote_qpn rbtree, + * and does not enter timewait. + */ - if (err) { - ib_destroy_cm_id(cm_id); - return ERR_PTR(err); - } - return cm_id; + return &cm_id_priv->id; } EXPORT_SYMBOL(ib_cm_insert_listen); @@ -1380,10 +1431,6 @@ static void cm_format_req(struct cm_req_msg *req_msg, static int cm_validate_req_param(struct ib_cm_req_param *param) { - /* peer-to-peer not supported */ - if (param->peer_to_peer) - return -EINVAL; - if (!param->primary_path) return -EINVAL; @@ -1418,7 +1465,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, /* Verify that we're not in timewait. */ cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_IDLE) { + if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = -EINVAL; goto out; @@ -1436,12 +1483,12 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, param->ppath_sgid_attr, &cm_id_priv->av, cm_id_priv); if (ret) - goto error1; + goto out; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) - goto error1; + goto out; } cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); @@ -1459,7 +1506,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) - goto error1; + goto out; req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); @@ -1482,7 +1529,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, return 0; error2: cm_free_msg(cm_id_priv->msg); -error1: kfree(cm_id_priv->timewait_info); out: return ret; } EXPORT_SYMBOL(ib_send_cm_req); @@ -1788,6 +1834,8 @@ static void cm_format_rej(struct cm_rej_msg *rej_msg, const void *private_data, u8 private_data_len) { + lockdep_assert_held(&cm_id_priv->lock); + cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg, be32_to_cpu(cm_id_priv->id.remote_id)); @@ -1837,8 +1885,12 @@ static void cm_dup_req_handler(struct cm_work *work, counter[CM_REQ_COUNTER]); /* Quick state check to discard duplicate REQs. */ - if (cm_id_priv->id.state == IB_CM_REQ_RCVD) + spin_lock_irq(&cm_id_priv->lock); + if (cm_id_priv->id.state == IB_CM_REQ_RCVD) { + spin_unlock_irq(&cm_id_priv->lock); return; + } + spin_unlock_irq(&cm_id_priv->lock); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) @@ -1923,14 +1975,10 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); - goto out; + return NULL; } refcount_inc(&listen_cm_id_priv->refcount); - refcount_inc(&cm_id_priv->refcount); - cm_id_priv->id.state = IB_CM_REQ_RCVD; - atomic_inc(&cm_id_priv->work_count); spin_unlock_irq(&cm.lock); -out: return listen_cm_id_priv; } @@ -1972,7 +2020,6 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) static int cm_req_handler(struct cm_work *work) { - struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; const struct ib_global_route *grh; @@ -1981,13 +2028,33 @@ static int cm_req_handler(struct cm_work *work) req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); - if (IS_ERR(cm_id)) - return PTR_ERR(cm_id); + cm_id_priv = + cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL); + if (IS_ERR(cm_id_priv)) + return PTR_ERR(cm_id_priv); - cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv->id.remote_id = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); + cm_id_priv->id.service_id = + cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); + cm_id_priv->id.service_mask = ~cpu_to_be64(0); + cm_id_priv->tid = req_msg->hdr.tid; + cm_id_priv->timeout_ms = cm_convert_to_ms( + IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg)); + cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg); + cm_id_priv->remote_qpn = + cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); + cm_id_priv->initiator_depth = + IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg); + cm_id_priv->responder_resources = + IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg); + cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg); + cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg)); + cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); + cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg); + cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg); + cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); + ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); @@ -1999,27 +2066,26 @@ static int cm_req_handler(struct cm_work *work) ret = PTR_ERR(cm_id_priv->timewait_info); goto destroy; } - cm_id_priv->timewait_info->work.remote_id = - cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); + cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id; cm_id_priv->timewait_info->remote_ca_guid = cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg)); - cm_id_priv->timewait_info->remote_qpn = - cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); + cm_id_priv->timewait_info->remote_qpn = cm_id_priv->remote_qpn; + + /* + * Note that the ID pointer is not in the xarray at this point, + * so this set is only visible to the local thread. + */ + cm_id_priv->id.state = IB_CM_REQ_RCVD; listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__, - be32_to_cpu(cm_id->local_id)); + be32_to_cpu(cm_id_priv->id.local_id)); + cm_id_priv->id.state = IB_CM_IDLE; ret = -EINVAL; - goto free_timeinfo; + goto destroy; } - cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; - cm_id_priv->id.context = listen_cm_id_priv->id.context; - cm_id_priv->id.service_id = - cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); - cm_process_routed_req(req_msg, work->mad_recv_wc->wc); memset(&work->path[0], 0, sizeof(work->path[0])); @@ -2057,10 +2123,10 @@ static int cm_req_handler(struct cm_work *work) work->port->port_num, 0, &work->path[0].sgid); if (err) - ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, + ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID, NULL, 0, NULL, 0); else - ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, + ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof(work->path[0].sgid), NULL, 0); @@ -2070,41 +2136,40 @@ static int cm_req_handler(struct cm_work *work) ret = cm_init_av_by_path(&work->path[1], NULL, &cm_id_priv->alt_av, cm_id_priv); if (ret) { - ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, + ib_send_cm_rej(&cm_id_priv->id, + IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, sizeof(work->path[0].sgid), NULL, 0); goto rejected; } } - cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->timeout_ms = cm_convert_to_ms( - IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg)); - cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg); - cm_id_priv->remote_qpn = - cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); - cm_id_priv->initiator_depth = - IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg); - cm_id_priv->responder_resources = - IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg); - cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg); - cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg)); - cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); - cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg); - cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg); - cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); + cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; + cm_id_priv->id.context = listen_cm_id_priv->id.context; cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); + + /* Now MAD handlers can see the new ID */ + spin_lock_irq(&cm_id_priv->lock); + cm_finalize_id(cm_id_priv); + + /* Refcount belongs to the event, pairs with cm_process_work() */ + refcount_inc(&cm_id_priv->refcount); + atomic_inc(&cm_id_priv->work_count); + spin_unlock_irq(&cm_id_priv->lock); cm_process_work(cm_id_priv, work); + /* + * Since this ID was just created and was not made visible to other MAD + * handlers until the cm_finalize_id() above we know that the + * cm_process_work() will deliver the event and the listen_cm_id + * embedded in the event can be derefed here. + */ cm_deref_id(listen_cm_id_priv); return 0; rejected: - refcount_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); -free_timeinfo: - kfree(cm_id_priv->timewait_info); destroy: - ib_destroy_cm_id(cm_id); + ib_destroy_cm_id(&cm_id_priv->id); return ret; } @@ -2188,6 +2253,9 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg)); + WARN_ONCE(param->qp_num & 0xFF000000, + "IBTA declares QPN to be 24 bits, but it is 0x%X\n", + param->qp_num); cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -2358,13 +2426,13 @@ static int cm_rep_handler(struct cm_work *work) case IB_CM_MRA_REQ_RCVD: break; default: - spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; pr_debug( "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n", __func__, cm_id_priv->id.state, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg), IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); + spin_unlock_irq(&cm_id_priv->lock); goto error; } @@ -2433,8 +2501,6 @@ static int cm_rep_handler(struct cm_work *work) cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); - /* todo: handle peer_to_peer */ - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) @@ -2545,35 +2611,32 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, private_data_len); } -int ib_send_cm_dreq(struct ib_cm_id *cm_id, - const void *private_data, - u8 private_data_len) +static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, + const void *private_data, u8 private_data_len) { - struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - unsigned long flags; int ret; + lockdep_assert_held(&cm_id_priv->lock); + if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) return -EINVAL; - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED) { + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__, - be32_to_cpu(cm_id->local_id), cm_id->state); - ret = -EINVAL; - goto out; + be32_to_cpu(cm_id_priv->id.local_id), + cm_id_priv->id.state); + return -EINVAL; } - if (cm_id->lap_state == IB_CM_LAP_SENT || - cm_id->lap_state == IB_CM_MRA_LAP_RCVD) + if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || + cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) { cm_enter_timewait(cm_id_priv); - goto out; + return ret; } cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, @@ -2584,14 +2647,26 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } - cm_id->state = IB_CM_DREQ_SENT; + cm_id_priv->id.state = IB_CM_DREQ_SENT; cm_id_priv->msg = msg; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; +} + +int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv = + container_of(cm_id, struct cm_id_private, id); + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + ret = cm_send_dreq_locked(cm_id_priv, private_data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_dreq); @@ -2612,51 +2687,60 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg, private_data_len); } -int ib_send_cm_drep(struct ib_cm_id *cm_id, - const void *private_data, - u8 private_data_len) +static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, + void *private_data, u8 private_data_len) { - struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - unsigned long flags; - void *data; int ret; + lockdep_assert_held(&cm_id_priv->lock); + if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE) return -EINVAL; - data = cm_copy_private_data(private_data, private_data_len); - if (IS_ERR(data)) - return PTR_ERR(data); - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_DREQ_RCVD) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); - pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n", - __func__, be32_to_cpu(cm_id->local_id), cm_id->state); + if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) { + pr_debug( + "%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n", + __func__, be32_to_cpu(cm_id_priv->id.local_id), + cm_id_priv->id.state); + kfree(private_data); return -EINVAL; } - cm_set_private_data(cm_id_priv, data, private_data_len); + cm_set_private_data(cm_id_priv, private_data, private_data_len); cm_enter_timewait(cm_id_priv); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) - goto out; + return ret; cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } + return 0; +} -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv = + container_of(cm_id, struct cm_id_private, id); + unsigned long flags; + void *data; + int ret; + + data = cm_copy_private_data(private_data, private_data_len); + if (IS_ERR(data)) + return PTR_ERR(data); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + ret = cm_send_drep_locked(cm_id_priv, data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_drep); @@ -2815,65 +2899,72 @@ out: return -EINVAL; } -int ib_send_cm_rej(struct ib_cm_id *cm_id, - enum ib_cm_rej_reason reason, - void *ari, - u8 ari_length, - const void *private_data, - u8 private_data_len) +static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, + enum ib_cm_rej_reason reason, void *ari, + u8 ari_length, const void *private_data, + u8 private_data_len) { - struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - unsigned long flags; int ret; + lockdep_assert_held(&cm_id_priv->lock); + if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) || (ari && ari_length > IB_CM_REJ_ARI_LENGTH)) return -EINVAL; - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - - spin_lock_irqsave(&cm_id_priv->lock, flags); - switch (cm_id->state) { + switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: - ret = cm_alloc_msg(cm_id_priv, &msg); - if (!ret) - cm_format_rej((struct cm_rej_msg *) msg->mad, - cm_id_priv, reason, ari, ari_length, - private_data, private_data_len); - cm_reset_to_idle(cm_id_priv); + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + return ret; + cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason, + ari, ari_length, private_data, private_data_len); break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ret = cm_alloc_msg(cm_id_priv, &msg); - if (!ret) - cm_format_rej((struct cm_rej_msg *) msg->mad, - cm_id_priv, reason, ari, ari_length, - private_data, private_data_len); - cm_enter_timewait(cm_id_priv); + ret = cm_alloc_msg(cm_id_priv, &msg); + if (ret) + return ret; + cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason, + ari, ari_length, private_data, private_data_len); break; default: pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__, - be32_to_cpu(cm_id_priv->id.local_id), cm_id->state); - ret = -EINVAL; - goto out; + be32_to_cpu(cm_id_priv->id.local_id), + cm_id_priv->id.state); + return -EINVAL; } - if (ret) - goto out; - ret = ib_post_send_mad(msg, NULL); - if (ret) + if (ret) { cm_free_msg(msg); + return ret; + } -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; +} + +int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason, + void *ari, u8 ari_length, const void *private_data, + u8 private_data_len) +{ + struct cm_id_private *cm_id_priv = + container_of(cm_id, struct cm_id_private, id); + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + ret = cm_send_rej_locked(cm_id_priv, reason, ari, ari_length, + private_data, private_data_len); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rej); @@ -2971,10 +3062,10 @@ static int cm_rej_handler(struct cm_work *work) } /* fall through */ default: - spin_unlock_irq(&cm_id_priv->lock); pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n", __func__, be32_to_cpu(cm_id_priv->id.local_id), cm_id_priv->id.state); + spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto out; } @@ -3501,20 +3592,27 @@ static void cm_format_sidr_req_event(struct cm_work *work, static int cm_sidr_req_handler(struct cm_work *work) { - struct ib_cm_id *cm_id; - struct cm_id_private *cm_id_priv, *cur_cm_id_priv; + struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_sidr_req_msg *sidr_req_msg; struct ib_wc *wc; int ret; - cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); - if (IS_ERR(cm_id)) - return PTR_ERR(cm_id); - cm_id_priv = container_of(cm_id, struct cm_id_private, id); + cm_id_priv = + cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL); + if (IS_ERR(cm_id_priv)) + return PTR_ERR(cm_id_priv); /* Record SGID/SLID and request ID for lookup. */ sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; + + cm_id_priv->id.remote_id = + cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg)); + cm_id_priv->id.service_id = + cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); + cm_id_priv->id.service_mask = ~cpu_to_be64(0); + cm_id_priv->tid = sidr_req_msg->hdr.tid; + wc = work->mad_recv_wc->wc; cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); cm_id_priv->av.dgid.global.interface_id = 0; @@ -3524,41 +3622,46 @@ static int cm_sidr_req_handler(struct cm_work *work) if (ret) goto out; - cm_id_priv->id.remote_id = - cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg)); - cm_id_priv->tid = sidr_req_msg->hdr.tid; - atomic_inc(&cm_id_priv->work_count); - spin_lock_irq(&cm.lock); - cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); - if (cur_cm_id_priv) { + listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); + if (listen_cm_id_priv) { spin_unlock_irq(&cm.lock); atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; - cur_cm_id_priv = cm_find_listen( - cm_id->device, - cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg))); - if (!cur_cm_id_priv) { + listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, + cm_id_priv->id.service_id); + if (!listen_cm_id_priv) { spin_unlock_irq(&cm.lock); - cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); + ib_send_cm_sidr_rep(&cm_id_priv->id, + &(struct ib_cm_sidr_rep_param){ + .status = IB_SIDR_UNSUPPORTED }); goto out; /* No match. */ } - refcount_inc(&cur_cm_id_priv->refcount); - refcount_inc(&cm_id_priv->refcount); + refcount_inc(&listen_cm_id_priv->refcount); spin_unlock_irq(&cm.lock); - cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; - cm_id_priv->id.context = cur_cm_id_priv->id.context; - cm_id_priv->id.service_id = - cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); + cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; + cm_id_priv->id.context = listen_cm_id_priv->id.context; - cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id); - cm_process_work(cm_id_priv, work); - cm_deref_id(cur_cm_id_priv); + /* + * A SIDR ID does not need to be in the xarray since it does not receive + * mads, is not placed in the remote_id or remote_qpn rbtree, and does + * not enter timewait. + */ + + cm_format_sidr_req_event(work, cm_id_priv, &listen_cm_id_priv->id); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); + cm_free_work(work); + /* + * A pointer to the listen_cm_id is held in the event, so this deref + * must be after the event is delivered above. + */ + cm_deref_id(listen_cm_id_priv); + if (ret) + cm_destroy_id(&cm_id_priv->id, ret); return 0; out: ib_destroy_cm_id(&cm_id_priv->id); @@ -3588,50 +3691,52 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, param->private_data, param->private_data_len); } -int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, - struct ib_cm_sidr_rep_param *param) +static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, + struct ib_cm_sidr_rep_param *param) { - struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - unsigned long flags; int ret; + lockdep_assert_held(&cm_id_priv->lock); + if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) || (param->private_data && param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE)) return -EINVAL; - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_SIDR_REQ_RCVD) { - ret = -EINVAL; - goto error; - } + if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD) + return -EINVAL; ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) - goto error; + return ret; cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); ret = ib_post_send_mad(msg, NULL); if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } - cm_id->state = IB_CM_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - - spin_lock_irqsave(&cm.lock, flags); + cm_id_priv->id.state = IB_CM_IDLE; if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); } - spin_unlock_irqrestore(&cm.lock, flags); return 0; +} -error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, + struct ib_cm_sidr_rep_param *param) +{ + struct cm_id_private *cm_id_priv = + container_of(cm_id, struct cm_id_private, id); + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + ret = cm_send_sidr_rep_locked(cm_id_priv, param); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_rep); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 72f032160c4b..26e6f7df247b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -199,7 +199,7 @@ struct cma_device { struct list_head list; struct ib_device *device; struct completion comp; - atomic_t refcount; + refcount_t refcount; struct list_head id_list; enum ib_gid_type *default_gid_type; u8 *default_roce_tos; @@ -247,9 +247,15 @@ enum { CMA_OPTION_AFONLY, }; -void cma_ref_dev(struct cma_device *cma_dev) +void cma_dev_get(struct cma_device *cma_dev) { - atomic_inc(&cma_dev->refcount); + refcount_inc(&cma_dev->refcount); +} + +void cma_dev_put(struct cma_device *cma_dev) +{ + if (refcount_dec_and_test(&cma_dev->refcount)) + complete(&cma_dev->comp); } struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, @@ -267,7 +273,7 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, } if (found_cma_dev) - cma_ref_dev(found_cma_dev); + cma_dev_get(found_cma_dev); mutex_unlock(&lock); return found_cma_dev; } @@ -463,7 +469,7 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) static void _cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { - cma_ref_dev(cma_dev); + cma_dev_get(cma_dev); id_priv->cma_dev = cma_dev; id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = @@ -484,12 +490,6 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, rdma_start_port(cma_dev->device)]; } -void cma_deref_dev(struct cma_device *cma_dev) -{ - if (atomic_dec_and_test(&cma_dev->refcount)) - complete(&cma_dev->comp); -} - static inline void release_mc(struct kref *kref) { struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); @@ -502,7 +502,7 @@ static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); list_del(&id_priv->list); - cma_deref_dev(id_priv->cma_dev); + cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; mutex_unlock(&lock); } @@ -728,8 +728,8 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev; enum ib_gid_type gid_type; int ret = -ENODEV; + unsigned int port; union ib_gid gid; - u8 port; if (dev_addr->dev_type != ARPHRD_INFINIBAND && id_priv->id.ps == RDMA_PS_IPOIB) @@ -753,7 +753,7 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv, } list_for_each_entry(cma_dev, &dev_list, list) { - for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { + rdma_for_each_port (cma_dev->device, port) { if (listen_id_priv->cma_dev == cma_dev && listen_id_priv->id.port_num == port) continue; @@ -786,8 +786,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) struct cma_device *cma_dev, *cur_dev; struct sockaddr_ib *addr; union ib_gid gid, sgid, *dgid; + unsigned int p; u16 pkey, index; - u8 p; enum ib_port_state port_state; int i; @@ -798,7 +798,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { - for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + rdma_for_each_port (cur_dev->device, p) { if (!rdma_cap_af_ib(cur_dev->device, p)) continue; @@ -840,9 +840,14 @@ found: return 0; } -static void cma_deref_id(struct rdma_id_private *id_priv) +static void cma_id_get(struct rdma_id_private *id_priv) +{ + refcount_inc(&id_priv->refcount); +} + +static void cma_id_put(struct rdma_id_private *id_priv) { - if (atomic_dec_and_test(&id_priv->refcount)) + if (refcount_dec_and_test(&id_priv->refcount)) complete(&id_priv->comp); } @@ -870,7 +875,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); - atomic_set(&id_priv->refcount, 1); + refcount_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); @@ -1846,11 +1851,11 @@ void rdma_destroy_id(struct rdma_cm_id *id) } cma_release_port(id_priv); - cma_deref_id(id_priv); + cma_id_put(id_priv); wait_for_completion(&id_priv->comp); if (id_priv->internal_id) - cma_deref_id(id_priv->id.context); + cma_id_put(id_priv->id.context); kfree(id_priv->id.route.path_rec); @@ -2187,7 +2192,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, * Protect against the user destroying conn_id from another thread * until we're done accessing it. */ - atomic_inc(&conn_id->refcount); + cma_id_get(conn_id); ret = cma_cm_event_handler(conn_id, &event); if (ret) goto err3; @@ -2204,13 +2209,13 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); - cma_deref_id(conn_id); + cma_id_put(conn_id); if (net_dev) dev_put(net_dev); return 0; err3: - cma_deref_id(conn_id); + cma_id_put(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; err2: @@ -2391,7 +2396,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, * Protect against the user destroying conn_id from another thread * until we're done accessing it. */ - atomic_inc(&conn_id->refcount); + cma_id_get(conn_id); ret = cma_cm_event_handler(conn_id, &event); if (ret) { /* User wants to destroy the CM ID */ @@ -2399,13 +2404,13 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); - cma_deref_id(conn_id); + cma_id_put(conn_id); rdma_destroy_id(&conn_id->id); return ret; } mutex_unlock(&conn_id->handler_mutex); - cma_deref_id(conn_id); + cma_id_put(conn_id); out: mutex_unlock(&listen_id->handler_mutex); @@ -2492,7 +2497,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, _cma_attach_to_dev(dev_id_priv, cma_dev); list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); - atomic_inc(&id_priv->refcount); + cma_id_get(id_priv); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; dev_id_priv->tos_set = id_priv->tos_set; @@ -2647,7 +2652,7 @@ static void cma_work_handler(struct work_struct *_work) } out: mutex_unlock(&id_priv->handler_mutex); - cma_deref_id(id_priv); + cma_id_put(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); kfree(work); @@ -2671,7 +2676,7 @@ static void cma_ndev_work_handler(struct work_struct *_work) out: mutex_unlock(&id_priv->handler_mutex); - cma_deref_id(id_priv); + cma_id_put(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); kfree(work); @@ -2687,14 +2692,19 @@ static void cma_init_resolve_route_work(struct cma_work *work, work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; } -static void cma_init_resolve_addr_work(struct cma_work *work, - struct rdma_id_private *id_priv) +static void enqueue_resolve_addr_work(struct cma_work *work, + struct rdma_id_private *id_priv) { + /* Balances with cma_id_put() in cma_work_handler */ + cma_id_get(id_priv); + work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + + queue_work(cma_wq, &work->work); } static int cma_resolve_ib_route(struct rdma_id_private *id_priv, @@ -2968,6 +2978,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; + route->num_paths = 0; err1: kfree(work); return ret; @@ -2982,7 +2993,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) return -EINVAL; - atomic_inc(&id_priv->refcount); + cma_id_get(id_priv); if (rdma_cap_ib_sa(id->device, id->port_num)) ret = cma_resolve_ib_route(id_priv, timeout_ms); else if (rdma_protocol_roce(id->device, id->port_num)) @@ -2998,7 +3009,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) return 0; err: cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); - cma_deref_id(id_priv); + cma_id_put(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_route); @@ -3025,9 +3036,9 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) struct cma_device *cma_dev, *cur_dev; union ib_gid gid; enum ib_port_state port_state; + unsigned int p; u16 pkey; int ret; - u8 p; cma_dev = NULL; mutex_lock(&lock); @@ -3039,7 +3050,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) if (!cma_dev) cma_dev = cur_dev; - for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + rdma_for_each_port (cur_dev->device, p) { if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) && port_state == IB_PORT_ACTIVE) { cma_dev = cur_dev; @@ -3148,9 +3159,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); - atomic_inc(&id_priv->refcount); - cma_init_resolve_addr_work(work, id_priv); - queue_work(cma_wq, &work->work); + enqueue_resolve_addr_work(work, id_priv); return 0; err: kfree(work); @@ -3175,9 +3184,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); - atomic_inc(&id_priv->refcount); - cma_init_resolve_addr_work(work, id_priv); - queue_work(cma_wq, &work->work); + enqueue_resolve_addr_work(work, id_priv); return 0; err: kfree(work); @@ -3212,19 +3219,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) + if (ret) { + memset(cma_dst_addr(id_priv), 0, + rdma_addr_size(dst_addr)); return ret; + } } - if (cma_family(id_priv) != dst_addr->sa_family) + if (cma_family(id_priv) != dst_addr->sa_family) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { @@ -4581,7 +4595,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id INIT_WORK(&work->work, cma_ndev_work_handler); work->id = id_priv; work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; - atomic_inc(&id_priv->refcount); + cma_id_get(id_priv); queue_work(cma_wq, &work->work); } @@ -4656,7 +4670,7 @@ static void cma_add_one(struct ib_device *device) } init_completion(&cma_dev->comp); - atomic_set(&cma_dev->refcount, 1); + refcount_set(&cma_dev->refcount, 1); INIT_LIST_HEAD(&cma_dev->id_list); ib_set_client_data(device, &cma_client, cma_dev); @@ -4715,11 +4729,11 @@ static void cma_process_remove(struct cma_device *cma_dev) list_del(&id_priv->listen_list); list_del_init(&id_priv->list); - atomic_inc(&id_priv->refcount); + cma_id_get(id_priv); mutex_unlock(&lock); ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); - cma_deref_id(id_priv); + cma_id_put(id_priv); if (ret) rdma_destroy_id(&id_priv->id); @@ -4727,7 +4741,7 @@ static void cma_process_remove(struct cma_device *cma_dev) } mutex_unlock(&lock); - cma_deref_dev(cma_dev); + cma_dev_put(cma_dev); wait_for_completion(&cma_dev->comp); } @@ -4783,6 +4797,19 @@ static int __init cma_init(void) { int ret; + /* + * There is a rare lock ordering dependency in cma_netdev_callback() + * that only happens when bonding is enabled. Teach lockdep that rtnl + * must never be nested under lock so it can find these without having + * to test with bonding. + */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + rtnl_lock(); + mutex_lock(&lock); + mutex_unlock(&lock); + rtnl_unlock(); + } + cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 8b0b5ae22e4c..c672a4978bfd 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -94,7 +94,7 @@ static int cma_configfs_params_get(struct config_item *item, static void cma_configfs_params_put(struct cma_device *cma_dev) { - cma_deref_dev(cma_dev); + cma_dev_put(cma_dev); } static ssize_t default_roce_mode_show(struct config_item *item, @@ -312,12 +312,12 @@ static struct config_group *make_cma_dev(struct config_group *group, configfs_add_default_group(&cma_dev_group->ports_group, &cma_dev_group->device_group); - cma_deref_dev(cma_dev); + cma_dev_put(cma_dev); return &cma_dev_group->device_group; fail: if (cma_dev) - cma_deref_dev(cma_dev); + cma_dev_put(cma_dev); kfree(cma_dev_group); return ERR_PTR(err); } diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index ca7307277518..5edcf44a9307 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -66,7 +66,7 @@ struct rdma_id_private { struct mutex qp_mutex; struct completion comp; - atomic_t refcount; + refcount_t refcount; struct mutex handler_mutex; int backlog; @@ -111,8 +111,8 @@ static inline void cma_configfs_exit(void) } #endif -void cma_ref_dev(struct cma_device *dev); -void cma_deref_dev(struct cma_device *dev); +void cma_dev_get(struct cma_device *dev); +void cma_dev_put(struct cma_device *dev); typedef bool (*cma_device_filter)(struct ib_device *, void *); struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, void *cookie); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index b1457b3464d3..cf42acca4a3a 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -338,6 +338,20 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->pd = pd; qp->uobject = uobj; qp->real_qp = qp; + + qp->qp_type = attr->qp_type; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->send_cq = attr->send_cq; + qp->recv_cq = attr->recv_cq; + qp->srq = attr->srq; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->event_handler = attr->event_handler; + + atomic_set(&qp->usecnt, 0); + spin_lock_init(&qp->mr_lock); + INIT_LIST_HEAD(&qp->rdma_mrs); + INIT_LIST_HEAD(&qp->sig_mrs); + /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f6c255202d7f..d0b3d35ad3e4 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -896,7 +896,9 @@ static int add_one_compat_dev(struct ib_device *device, cdev->dev.parent = device->dev.parent; rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); cdev->dev.release = compatdev_release; - dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + if (ret) + goto add_err; ret = device_add(&cdev->dev); if (ret) diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index ade71823370f..da8adadf4755 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -159,8 +159,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 956b3a7dfed7..403d8673a2f9 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -79,13 +79,13 @@ struct ib_mad_private { struct ib_mad_private_header header; size_t mad_size; struct ib_grh grh; - u8 mad[0]; + u8 mad[]; } __packed; struct ib_rmpp_segment { struct list_head list; u32 num; - u8 data[0]; + u8 data[]; }; struct ib_mad_agent_private { diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index cd338ddc4a39..9c2d8b7f1af9 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -71,7 +71,7 @@ struct mcast_device { struct ib_event_handler event_handler; int start_port; int end_port; - struct mcast_port port[0]; + struct mcast_port port[]; }; enum mcast_state { diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 37b433aa7306..9eec26d10d7b 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -918,6 +918,10 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); + if (strlen(name) == 0) { + err = -EINVAL; + goto done; + } err = ib_device_rename(device, name); goto done; } @@ -1514,7 +1518,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(ibdev_name)); - if (strchr(ibdev_name, '%')) + if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) return -EINVAL; nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); @@ -1757,6 +1761,8 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (ret) goto err_msg; } else { + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) + goto err_msg; qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 4fad732f9b3c..557efbf29197 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -273,6 +273,23 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return 1; } +static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + u32 sg_cnt, enum dma_data_direction dir) +{ + if (is_pci_p2pdma_page(sg_page(sg))) + pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir); + else + ib_dma_unmap_sg(dev, sg, sg_cnt, dir); +} + +static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, + u32 sg_cnt, enum dma_data_direction dir) +{ + if (is_pci_p2pdma_page(sg_page(sg))) + return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + return ib_dma_map_sg(dev, sg, sg_cnt, dir); +} + /** * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context * @ctx: context to initialize @@ -295,11 +312,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_device *dev = qp->pd->device; int ret; - if (is_pci_p2pdma_page(sg_page(sg))) - ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); - else - ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); - + ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); if (!ret) return -ENOMEM; sg_cnt = ret; @@ -338,7 +351,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, return ret; out_unmap_sg: - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -378,13 +391,13 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -EINVAL; } - ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); + ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); if (!ret) return -ENOMEM; sg_cnt = ret; if (prot_sg_cnt) { - ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); + ret = rdma_rw_map_sg(dev, prot_sg, prot_sg_cnt, dir); if (!ret) { ret = -ENOMEM; goto out_unmap_sg; @@ -453,9 +466,9 @@ out_free_ctx: kfree(ctx->reg); out_unmap_prot_sg: if (prot_sg_cnt) - ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); + rdma_rw_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); out_unmap_sg: - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_signature_init); @@ -588,11 +601,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, break; } - if (is_pci_p2pdma_page(sg_page(sg))) - pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg, - sg_cnt, dir); - else - ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); @@ -619,9 +628,9 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); kfree(ctx->reg); - ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); if (prot_sg_cnt) - ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); + rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); + rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 30d4c126a2db..74e0058fcf9e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -101,7 +101,7 @@ struct ib_sa_port { struct ib_sa_device { int start_port, end_port; struct ib_event_handler event_handler; - struct ib_sa_port port[0]; + struct ib_sa_port port[]; }; struct ib_sa_query { diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 2b4d80393bd0..75e7ec017836 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -340,20 +340,19 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, return NULL; if (qp_attr_mask & IB_QP_PORT) - new_pps->main.port_num = - (qp_pps) ? qp_pps->main.port_num : qp_attr->port_num; - if (qp_attr_mask & IB_QP_PKEY_INDEX) - new_pps->main.pkey_index = (qp_pps) ? qp_pps->main.pkey_index : - qp_attr->pkey_index; - if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) - new_pps->main.state = IB_PORT_PKEY_VALID; - - if (!(qp_attr_mask & (IB_QP_PKEY_INDEX || IB_QP_PORT)) && qp_pps) { + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) new_pps->main.port_num = qp_pps->main.port_num; + + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) new_pps->main.pkey_index = qp_pps->main.pkey_index; - if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) - new_pps->main.state = IB_PORT_PKEY_VALID; - } + + if (((qp_attr_mask & IB_QP_PKEY_INDEX) && + (qp_attr_mask & IB_QP_PORT)) || + (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)) + new_pps->main.state = IB_PORT_PKEY_VALID; if (qp_attr_mask & IB_QP_ALT_PATH) { new_pps->alt.port_num = qp_attr->alt_port_num; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 0274e9b704be..16b6cf57fa85 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -85,12 +85,13 @@ struct ucma_file { struct ucma_context { u32 id; struct completion comp; - atomic_t ref; + refcount_t ref; int events_reported; int backlog; struct ucma_file *file; struct rdma_cm_id *cm_id; + struct mutex mutex; u64 uid; struct list_head list; @@ -152,7 +153,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) if (ctx->closing) ctx = ERR_PTR(-EIO); else - atomic_inc(&ctx->ref); + refcount_inc(&ctx->ref); } xa_unlock(&ctx_table); return ctx; @@ -160,7 +161,7 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) static void ucma_put_ctx(struct ucma_context *ctx) { - if (atomic_dec_and_test(&ctx->ref)) + if (refcount_dec_and_test(&ctx->ref)) complete(&ctx->comp); } @@ -212,10 +213,11 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return NULL; INIT_WORK(&ctx->close_work, ucma_close_id); - atomic_set(&ctx->ref, 1); + refcount_set(&ctx->ref, 1); init_completion(&ctx->comp); INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; + mutex_init(&ctx->mutex); if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) goto error; @@ -589,6 +591,7 @@ static int ucma_free_ctx(struct ucma_context *ctx) } events_reported = ctx->events_reported; + mutex_destroy(&ctx->mutex); kfree(ctx); return events_reported; } @@ -658,7 +661,10 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + mutex_unlock(&ctx->mutex); + ucma_put_ctx(ctx); return ret; } @@ -681,7 +687,9 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -705,8 +713,10 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -731,8 +741,10 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -752,7 +764,9 @@ static ssize_t ucma_resolve_route(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -841,6 +855,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? @@ -864,6 +879,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, ucma_copy_iw_route(&resp, &ctx->cm_id->route); out: + mutex_unlock(&ctx->mutex); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1014,6 +1030,7 @@ static ssize_t ucma_query(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); switch (cmd.option) { case RDMA_USER_CM_QUERY_ADDR: ret = ucma_query_addr(ctx, response, out_len); @@ -1028,6 +1045,7 @@ static ssize_t ucma_query(struct ucma_file *file, ret = -ENOSYS; break; } + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; @@ -1045,7 +1063,7 @@ static void ucma_copy_conn_param(struct rdma_cm_id *id, dst->retry_count = src->retry_count; dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; - dst->qp_num = src->qp_num; + dst->qp_num = src->qp_num & 0xFFFFFF; dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; } @@ -1068,7 +1086,9 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); + mutex_lock(&ctx->mutex); ret = rdma_connect(ctx->cm_id, &conn_param); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1089,7 +1109,9 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? cmd.backlog : max_backlog; + mutex_lock(&ctx->mutex); ret = rdma_listen(ctx->cm_id, ctx->backlog); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1112,13 +1134,17 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); + mutex_lock(&ctx->mutex); ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); + mutex_unlock(&ctx->mutex); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); - } else + } else { + mutex_lock(&ctx->mutex); ret = __rdma_accept(ctx->cm_id, NULL, NULL); - + mutex_unlock(&ctx->mutex); + } ucma_put_ctx(ctx); return ret; } @@ -1137,7 +1163,9 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1156,7 +1184,9 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_disconnect(ctx->cm_id); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1187,7 +1217,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; + mutex_lock(&ctx->mutex); ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); + mutex_unlock(&ctx->mutex); if (ret) goto out; @@ -1273,9 +1305,13 @@ static int ucma_set_ib_path(struct ucma_context *ctx, struct sa_path_rec opa; sa_convert_path_ib_to_opa(&opa, &sa_path); + mutex_lock(&ctx->mutex); ret = rdma_set_ib_path(ctx->cm_id, &opa); + mutex_unlock(&ctx->mutex); } else { + mutex_lock(&ctx->mutex); ret = rdma_set_ib_path(ctx->cm_id, &sa_path); + mutex_unlock(&ctx->mutex); } if (ret) return ret; @@ -1308,7 +1344,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level, switch (level) { case RDMA_OPTION_ID: + mutex_lock(&ctx->mutex); ret = ucma_set_option_id(ctx, optname, optval, optlen); + mutex_unlock(&ctx->mutex); break; case RDMA_OPTION_IB: ret = ucma_set_option_ib(ctx, optname, optval, optlen); @@ -1368,8 +1406,10 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); if (ctx->cm_id->device) ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; @@ -1412,8 +1452,10 @@ static ssize_t ucma_process_join(struct ucma_file *file, mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); + mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); + mutex_unlock(&ctx->mutex); if (ret) goto err2; @@ -1502,7 +1544,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, mc = ERR_PTR(-ENOENT); else if (mc->ctx->file != file) mc = ERR_PTR(-EINVAL); - else if (!atomic_inc_not_zero(&mc->ctx->ref)) + else if (!refcount_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); else __xa_erase(&multicast_table, mc->id); @@ -1513,7 +1555,10 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, goto out; } + mutex_lock(&mc->ctx->mutex); rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); + mutex_unlock(&mc->ctx->mutex); + mutex_lock(&mc->ctx->file->mut); ucma_cleanup_mc_events(mc); list_del(&mc->list); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 06b6125b5ae1..82455a1392f1 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -197,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, unsigned long lock_limit; unsigned long new_pinned; unsigned long cur_base; + unsigned long dma_attr = 0; struct mm_struct *mm; unsigned long npages; int ret; @@ -278,10 +279,12 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, sg_mark_end(sg); - umem->nmap = ib_dma_map_sg(device, - umem->sg_head.sgl, - umem->sg_nents, - DMA_BIDIRECTIONAL); + if (access & IB_ACCESS_RELAXED_ORDERING) + dma_attr |= DMA_ATTR_WEAK_ORDERING; + + umem->nmap = + ib_dma_map_sg_attrs(device, umem->sg_head.sgl, umem->sg_nents, + DMA_BIDIRECTIONAL, dma_attr); if (!umem->nmap) { ret = -ENOMEM; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index b8c657b28380..3b1e627d9a8d 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -181,14 +181,28 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, odp_data->page_shift = PAGE_SHIFT; odp_data->notifier.ops = ops; + /* + * A mmget must be held when registering a notifier, the owming_mm only + * has a mm_grab at this point. + */ + if (!mmget_not_zero(umem->owning_mm)) { + ret = -EFAULT; + goto out_free; + } + odp_data->tgid = get_pid(root->tgid); ret = ib_init_umem_odp(odp_data, ops); - if (ret) { - put_pid(odp_data->tgid); - kfree(odp_data); - return ERR_PTR(ret); - } + if (ret) + goto out_tgid; + mmput(umem->owning_mm); return odp_data; + +out_tgid: + put_pid(odp_data->tgid); + mmput(umem->owning_mm); +out_free: + kfree(odp_data); + return ERR_PTR(ret); } EXPORT_SYMBOL(ib_umem_odp_alloc_child); @@ -261,8 +275,8 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) mmu_interval_notifier_remove(&umem_odp->notifier); kvfree(umem_odp->dma_list); kvfree(umem_odp->page_list); - put_pid(umem_odp->tgid); } + put_pid(umem_odp->tgid); kfree(umem_odp); } EXPORT_SYMBOL(ib_umem_odp_release); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 1235ffb2389b..da229eab5903 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1129,17 +1129,30 @@ static const struct file_operations umad_sm_fops = { .llseek = no_llseek, }; +static struct ib_umad_port *get_port(struct ib_device *ibdev, + struct ib_umad_device *umad_dev, + unsigned int port) +{ + if (!umad_dev) + return ERR_PTR(-EOPNOTSUPP); + if (!rdma_is_port_valid(ibdev, port)) + return ERR_PTR(-EINVAL); + if (!rdma_cap_ib_mad(ibdev, port)) + return ERR_PTR(-EOPNOTSUPP); + + return &umad_dev->ports[port - rdma_start_port(ibdev)]; +} + static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data, struct ib_client_nl_info *res) { - struct ib_umad_device *umad_dev = client_data; + struct ib_umad_port *port = get_port(ibdev, client_data, res->port); - if (!rdma_is_port_valid(ibdev, res->port)) - return -EINVAL; + if (IS_ERR(port)) + return PTR_ERR(port); res->abi = IB_USER_MAD_ABI_VERSION; - res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].dev; - + res->cdev = &port->dev; return 0; } @@ -1154,15 +1167,13 @@ MODULE_ALIAS_RDMA_CLIENT("umad"); static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data, struct ib_client_nl_info *res) { - struct ib_umad_device *umad_dev = - ib_get_client_data(ibdev, &umad_client); + struct ib_umad_port *port = get_port(ibdev, client_data, res->port); - if (!rdma_is_port_valid(ibdev, res->port)) - return -EINVAL; + if (IS_ERR(port)) + return PTR_ERR(port); res->abi = IB_USER_MAD_ABI_VERSION; - res->cdev = &umad_dev->ports[res->port - rdma_start_port(ibdev)].sm_dev; - + res->cdev = &port->sm_dev; return 0; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 025933752e1d..060b4ebbd2ba 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1445,16 +1445,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->rwq_ind_tbl = ind_tbl; - qp->event_handler = attr.event_handler; - qp->qp_type = attr.qp_type; - atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - qp->port = 0; if (attr.send_cq) atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3ebae3b65c28..56a71337112c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -54,8 +54,6 @@ #include "core_priv.h" #include <trace/events/rdma_core.h> -#include <trace/events/rdma_core.h> - static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); @@ -1127,8 +1125,7 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, EXPORT_SYMBOL(ib_open_qp); static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata) + struct ib_qp_init_attr *qp_init_attr) { struct ib_qp *real_qp = qp; @@ -1150,9 +1147,18 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, return qp; } -struct ib_qp *ib_create_qp_user(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata) +/** + * ib_create_qp - Creates a kernel QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + * + * NOTE: for user qp use ib_create_qp_user with valid udata! + */ +struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; @@ -1185,19 +1191,9 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, if (ret) goto err; - qp->qp_type = qp_init_attr->qp_type; - qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; - - atomic_set(&qp->usecnt, 0); - qp->mrs_used = 0; - spin_lock_init(&qp->mr_lock); - INIT_LIST_HEAD(&qp->rdma_mrs); - INIT_LIST_HEAD(&qp->sig_mrs); - qp->port = 0; - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { struct ib_qp *xrc_qp = - create_xrc_qp_user(qp, qp_init_attr, udata); + create_xrc_qp_user(qp, qp_init_attr); if (IS_ERR(xrc_qp)) { ret = PTR_ERR(xrc_qp); @@ -1253,7 +1249,7 @@ err: return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_create_qp_user); +EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 725b2350e349..a300588634c5 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -89,6 +89,15 @@ #define BNXT_RE_DEFAULT_ACK_DELAY 16 +struct bnxt_re_ring_attr { + dma_addr_t *dma_arr; + int pages; + int type; + u32 depth; + u32 lrid; /* Logical ring id */ + u8 mode; +}; + struct bnxt_re_work { struct work_struct work; unsigned long event; @@ -104,6 +113,14 @@ struct bnxt_re_sqp_entries { struct bnxt_re_qp *qp1_qp; }; +#define BNXT_RE_MAX_GSI_SQP_ENTRIES 1024 +struct bnxt_re_gsi_context { + struct bnxt_re_qp *gsi_qp; + struct bnxt_re_qp *gsi_sqp; + struct bnxt_re_ah *gsi_sah; + struct bnxt_re_sqp_entries *sqp_tbl; +}; + #define BNXT_RE_MIN_MSIX 2 #define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 @@ -115,7 +132,6 @@ struct bnxt_re_dev { struct list_head list; unsigned long flags; #define BNXT_RE_FLAG_NETDEV_REGISTERED 0 -#define BNXT_RE_FLAG_IBDEV_REGISTERED 1 #define BNXT_RE_FLAG_GOT_MSIX 2 #define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 @@ -125,7 +141,7 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 struct net_device *netdev; unsigned int version, major, minor; - struct bnxt_qplib_chip_ctx chip_ctx; + struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX]; int num_msix; @@ -160,15 +176,11 @@ struct bnxt_re_dev { atomic_t srq_count; atomic_t mr_count; atomic_t mw_count; - atomic_t sched_count; /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; /* QP for for handling QP1 packets */ - u32 sqp_id; - struct bnxt_re_qp *qp1_sqp; - struct bnxt_re_ah *sqp_ah; - struct bnxt_re_sqp_entries sqp_tbl[1024]; + struct bnxt_re_gsi_context gsi_ctx; atomic_t nq_alloc_cnt; u32 is_virtfn; u32 num_vfs; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 52b6a4d85460..95f6d493d1b9 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -312,9 +312,9 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) */ if (ctx->idx == 0 && rdma_link_local_addr((struct in6_addr *)gid_to_del) && - ctx->refcnt == 1 && rdev->qp1_sqp) { - dev_dbg(rdev_to_dev(rdev), - "Trying to delete GID0 while QP1 is alive\n"); + ctx->refcnt == 1 && rdev->gsi_ctx.gsi_sqp) { + ibdev_dbg(&rdev->ibdev, + "Trying to delete GID0 while QP1 is alive\n"); return -EFAULT; } ctx->refcnt--; @@ -322,8 +322,8 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, vlan_id, true); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to remove GID: %#x", rc); + ibdev_err(&rdev->ibdev, + "Failed to remove GID: %#x", rc); } else { ctx_tbl = sgid_tbl->ctx; ctx_tbl[ctx->idx] = NULL; @@ -360,7 +360,7 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context) } if (rc < 0) { - dev_err(rdev_to_dev(rdev), "Failed to add GID: %#x", rc); + ibdev_err(&rdev->ibdev, "Failed to add GID: %#x", rc); return rc; } @@ -423,12 +423,12 @@ static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp) wqe.bind.r_key = fence->bind_rkey; fence->bind_rkey = ib_inc_rkey(fence->bind_rkey); - dev_dbg(rdev_to_dev(qp->rdev), - "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n", + ibdev_dbg(&qp->rdev->ibdev, + "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n", wqe.bind.r_key, qp->qplib_qp.id, pd); rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); if (rc) { - dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n"); + ibdev_err(&qp->rdev->ibdev, "Failed to bind fence-WQE\n"); return rc; } bnxt_qplib_post_send_db(&qp->qplib_qp); @@ -479,7 +479,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) DMA_BIDIRECTIONAL); rc = dma_mapping_error(dev, dma_addr); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n"); + ibdev_err(&rdev->ibdev, "Failed to dma-map fence-MR-mem\n"); rc = -EIO; fence->dma_addr = 0; goto fail; @@ -499,7 +499,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags); rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n"); + ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n"); goto fail; } @@ -511,7 +511,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl, BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n"); + ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n"); goto fail; } mr->ib_mr.rkey = mr->qplib_mr.rkey; @@ -519,8 +519,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) /* Create a fence MW only for kernel consumers */ mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL); if (IS_ERR(mw)) { - dev_err(rdev_to_dev(rdev), - "Failed to create fence-MW for PD: %p\n", pd); + ibdev_err(&rdev->ibdev, + "Failed to create fence-MW for PD: %p\n", pd); rc = PTR_ERR(mw); goto fail; } @@ -558,7 +558,7 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) pd->rdev = rdev; if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) { - dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD"); + ibdev_err(&rdev->ibdev, "Failed to allocate HW PD"); rc = -ENOMEM; goto fail; } @@ -585,16 +585,16 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to copy user response\n"); + ibdev_err(&rdev->ibdev, + "Failed to copy user response\n"); goto dbfail; } } if (!udata) if (bnxt_re_create_fence_mr(pd)) - dev_warn(rdev_to_dev(rdev), - "Failed to create Fence-MR\n"); + ibdev_warn(&rdev->ibdev, + "Failed to create Fence-MR\n"); return 0; dbfail: bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, @@ -639,12 +639,13 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct bnxt_re_dev *rdev = pd->rdev; const struct ib_gid_attr *sgid_attr; + struct bnxt_re_gid_ctx *ctx; struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); u8 nw_type; int rc; if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { - dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); + ibdev_err(&rdev->ibdev, "Failed to alloc AH: GRH not set"); return -EINVAL; } @@ -654,19 +655,18 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, /* Supply the configuration for the HW */ memcpy(ah->qplib_ah.dgid.data, grh->dgid.raw, sizeof(union ib_gid)); - /* - * If RoCE V2 is enabled, stack will have two entries for - * each GID entry. Avoiding this duplicte entry in HW. Dividing - * the GID index by 2 for RoCE V2 + sgid_attr = grh->sgid_attr; + /* Get the HW context of the GID. The reference + * of GID table entry is already taken by the caller. */ - ah->qplib_ah.sgid_index = grh->sgid_index / 2; + ctx = rdma_read_gid_hw_context(sgid_attr); + ah->qplib_ah.sgid_index = ctx->idx; ah->qplib_ah.host_sgid_index = grh->sgid_index; ah->qplib_ah.traffic_class = grh->traffic_class; ah->qplib_ah.flow_label = grh->flow_label; ah->qplib_ah.hop_limit = grh->hop_limit; ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr); - sgid_attr = grh->sgid_attr; /* Get network header type for this GID */ nw_type = rdma_gid_attr_network_type(sgid_attr); ah->qplib_ah.nw_type = bnxt_re_stack_to_dev_nw_type(nw_type); @@ -675,7 +675,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, !(flags & RDMA_CREATE_AH_SLEEPABLE)); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH"); + ibdev_err(&rdev->ibdev, "Failed to allocate HW AH"); return rc; } @@ -742,6 +742,49 @@ void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, spin_unlock_irqrestore(&qp->scq->cq_lock, flags); } +static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) +{ + struct bnxt_re_qp *gsi_sqp; + struct bnxt_re_ah *gsi_sah; + struct bnxt_re_dev *rdev; + int rc = 0; + + rdev = qp->rdev; + gsi_sqp = rdev->gsi_ctx.gsi_sqp; + gsi_sah = rdev->gsi_ctx.gsi_sah; + + /* remove from active qp list */ + mutex_lock(&rdev->qp_lock); + list_del(&gsi_sqp->list); + mutex_unlock(&rdev->qp_lock); + atomic_dec(&rdev->qp_count); + + ibdev_dbg(&rdev->ibdev, "Destroy the shadow AH\n"); + bnxt_qplib_destroy_ah(&rdev->qplib_res, + &gsi_sah->qplib_ah, + true); + bnxt_qplib_clean_qp(&qp->qplib_qp); + + ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); + rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &gsi_sqp->qplib_qp); + if (rc) { + ibdev_err(&rdev->ibdev, "Destroy Shadow QP failed"); + goto fail; + } + bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp); + + kfree(rdev->gsi_ctx.sqp_tbl); + kfree(gsi_sah); + kfree(gsi_sqp); + rdev->gsi_ctx.gsi_sqp = NULL; + rdev->gsi_ctx.gsi_sah = NULL; + rdev->gsi_ctx.sqp_tbl = NULL; + + return 0; +fail: + return rc; +} + /* Queue Pairs */ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { @@ -750,10 +793,16 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) unsigned int flags; int rc; + mutex_lock(&rdev->qp_lock); + list_del(&qp->list); + mutex_unlock(&rdev->qp_lock); + atomic_dec(&rdev->qp_count); + bnxt_qplib_flush_cqn_wq(&qp->qplib_qp); + rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP"); + ibdev_err(&rdev->ibdev, "Failed to destroy HW QP"); return rc; } @@ -765,40 +814,19 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp); - if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { - bnxt_qplib_destroy_ah(&rdev->qplib_res, &rdev->sqp_ah->qplib_ah, - false); - - bnxt_qplib_clean_qp(&qp->qplib_qp); - rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, - &rdev->qp1_sqp->qplib_qp); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to destroy Shadow QP"); - return rc; - } - bnxt_qplib_free_qp_res(&rdev->qplib_res, - &rdev->qp1_sqp->qplib_qp); - mutex_lock(&rdev->qp_lock); - list_del(&rdev->qp1_sqp->list); - atomic_dec(&rdev->qp_count); - mutex_unlock(&rdev->qp_lock); - - kfree(rdev->sqp_ah); - kfree(rdev->qp1_sqp); - rdev->qp1_sqp = NULL; - rdev->sqp_ah = NULL; + if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp) { + rc = bnxt_re_destroy_gsi_sqp(qp); + if (rc) + goto sh_fail; } ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); - mutex_lock(&rdev->qp_lock); - list_del(&qp->list); - atomic_dec(&rdev->qp_count); - mutex_unlock(&rdev->qp_lock); kfree(qp); return 0; +sh_fail: + return rc; } static u8 __from_ib_qp_type(enum ib_qp_type type) @@ -831,7 +859,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE); /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { - psn_sz = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); bytes += (qplib_qp->sq.max_wqe * psn_sz); @@ -843,9 +871,11 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, return PTR_ERR(umem); qp->sumem = umem; - qplib_qp->sq.sg_info.sglist = umem->sg_head.sgl; + qplib_qp->sq.sg_info.sghead = umem->sg_head.sgl; qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem); qplib_qp->sq.sg_info.nmap = umem->nmap; + qplib_qp->sq.sg_info.pgsize = PAGE_SIZE; + qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT; qplib_qp->qp_handle = ureq.qp_handle; if (!qp->qplib_qp.srq) { @@ -856,9 +886,11 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; - qplib_qp->rq.sg_info.sglist = umem->sg_head.sgl; + qplib_qp->rq.sg_info.sghead = umem->sg_head.sgl; qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem); qplib_qp->rq.sg_info.nmap = umem->nmap; + qplib_qp->rq.sg_info.pgsize = PAGE_SIZE; + qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT; } qplib_qp->dpi = &cntx->dpi; @@ -906,8 +938,8 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to allocate HW AH for Shadow QP"); + ibdev_err(&rdev->ibdev, + "Failed to allocate HW AH for Shadow QP"); goto fail; } @@ -948,6 +980,8 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ qp->qplib_qp.sq.q_full_delta = 1; + qp->qplib_qp.sq.sg_info.pgsize = PAGE_SIZE; + qp->qplib_qp.sq.sg_info.pgshft = PAGE_SHIFT; qp->qplib_qp.scq = qp1_qp->scq; qp->qplib_qp.rcq = qp1_qp->rcq; @@ -956,6 +990,8 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; /* Q full delta can be 1 since it is internal QP */ qp->qplib_qp.rq.q_full_delta = 1; + qp->qplib_qp.rq.sg_info.pgsize = PAGE_SIZE; + qp->qplib_qp.rq.sg_info.pgshft = PAGE_SHIFT; qp->qplib_qp.mtu = qp1_qp->mtu; @@ -967,8 +1003,6 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp if (rc) goto fail; - rdev->sqp_id = qp->qplib_qp.id; - spin_lock_init(&qp->sq_lock); INIT_LIST_HEAD(&qp->list); mutex_lock(&rdev->qp_lock); @@ -981,205 +1015,378 @@ fail: return NULL; } -struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata) +static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr) { - struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); - struct bnxt_re_dev *rdev = pd->rdev; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; - struct bnxt_re_qp *qp; - struct bnxt_re_cq *cq; - struct bnxt_re_srq *srq; - int rc, entries; + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + int entries; - if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) || - (qp_init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes) || - (qp_init_attr->cap.max_send_sge > dev_attr->max_qp_sges) || - (qp_init_attr->cap.max_recv_sge > dev_attr->max_qp_sges) || - (qp_init_attr->cap.max_inline_data > dev_attr->max_inline_data)) - return ERR_PTR(-EINVAL); + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + dev_attr = &rdev->dev_attr; - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + if (init_attr->srq) { + struct bnxt_re_srq *srq; - qp->rdev = rdev; - ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr); - qp->qplib_qp.pd = &pd->qplib_pd; - qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp); - qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type); + srq = container_of(init_attr->srq, struct bnxt_re_srq, ib_srq); + if (!srq) { + ibdev_err(&rdev->ibdev, "SRQ not found"); + return -EINVAL; + } + qplqp->srq = &srq->qplib_srq; + qplqp->rq.max_wqe = 0; + } else { + /* Allocate 1 more than what's provided so posting max doesn't + * mean empty. + */ + entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); + qplqp->rq.max_wqe = min_t(u32, entries, + dev_attr->max_qp_wqes + 1); - if (qp_init_attr->qp_type == IB_QPT_GSI && - bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) - qp->qplib_qp.type = CMDQ_CREATE_QP_TYPE_GSI; - if (qp->qplib_qp.type == IB_QPT_MAX) { - dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported", - qp->qplib_qp.type); - rc = -EINVAL; - goto fail; + qplqp->rq.q_full_delta = qplqp->rq.max_wqe - + init_attr->cap.max_recv_wr; + qplqp->rq.max_sge = init_attr->cap.max_recv_sge; + if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + qplqp->rq.max_sge = dev_attr->max_qp_sges; + } + qplqp->rq.sg_info.pgsize = PAGE_SIZE; + qplqp->rq.sg_info.pgshft = PAGE_SHIFT; + + return 0; +} + +static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) +{ + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + dev_attr = &rdev->dev_attr; + + qplqp->rq.max_sge = dev_attr->max_qp_sges; + if (qplqp->rq.max_sge > dev_attr->max_qp_sges) + qplqp->rq.max_sge = dev_attr->max_qp_sges; + qplqp->rq.max_sge = 6; +} + +static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + int entries; + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + dev_attr = &rdev->dev_attr; + + qplqp->sq.max_sge = init_attr->cap.max_send_sge; + if (qplqp->sq.max_sge > dev_attr->max_qp_sges) + qplqp->sq.max_sge = dev_attr->max_qp_sges; + /* + * Change the SQ depth if user has requested minimum using + * configfs. Only supported for kernel consumers + */ + entries = init_attr->cap.max_send_wr; + /* Allocate 128 + 1 more than what's provided */ + entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); + qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + + BNXT_QPLIB_RESERVED_QP_WRS + 1); + qplqp->sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; + /* + * Reserving one slot for Phantom WQE. Application can + * post one extra entry in this case. But allowing this to avoid + * unexpected Queue full condition + */ + qplqp->sq.q_full_delta -= 1; + qplqp->sq.sg_info.pgsize = PAGE_SIZE; + qplqp->sq.sg_info.pgshft = PAGE_SHIFT; +} + +static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + int entries; + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + dev_attr = &rdev->dev_attr; + + entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); + qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); + qplqp->sq.q_full_delta = qplqp->sq.max_wqe - + init_attr->cap.max_send_wr; + qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ + if (qplqp->sq.max_sge > dev_attr->max_qp_sges) + qplqp->sq.max_sge = dev_attr->max_qp_sges; +} + +static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, + struct ib_qp_init_attr *init_attr) +{ + struct bnxt_qplib_chip_ctx *chip_ctx; + int qptype; + + chip_ctx = rdev->chip_ctx; + + qptype = __from_ib_qp_type(init_attr->qp_type); + if (qptype == IB_QPT_MAX) { + ibdev_err(&rdev->ibdev, "QP type 0x%x not supported", qptype); + qptype = -EOPNOTSUPP; + goto out; } - qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data; - qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type == - IB_SIGNAL_ALL_WR) ? true : false); + if (bnxt_qplib_is_chip_gen_p5(chip_ctx) && + init_attr->qp_type == IB_QPT_GSI) + qptype = CMDQ_CREATE_QP_TYPE_GSI; +out: + return qptype; +} - qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge; - if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges) - qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges; +static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_qplib_qp *qplqp; + struct bnxt_re_dev *rdev; + struct bnxt_re_cq *cq; + int rc = 0, qptype; + + rdev = qp->rdev; + qplqp = &qp->qplib_qp; + dev_attr = &rdev->dev_attr; + + /* Setup misc params */ + ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr); + qplqp->pd = &pd->qplib_pd; + qplqp->qp_handle = (u64)qplqp; + qplqp->max_inline_data = init_attr->cap.max_inline_data; + qplqp->sig_type = ((init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? + true : false); + qptype = bnxt_re_init_qp_type(rdev, init_attr); + if (qptype < 0) { + rc = qptype; + goto out; + } + qplqp->type = (u8)qptype; + + if (init_attr->qp_type == IB_QPT_RC) { + qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom; + qplqp->max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom; + } + qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); + qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */ + if (init_attr->create_flags) + ibdev_dbg(&rdev->ibdev, + "QP create flags 0x%x not supported", + init_attr->create_flags); - if (qp_init_attr->send_cq) { - cq = container_of(qp_init_attr->send_cq, struct bnxt_re_cq, - ib_cq); + /* Setup CQs */ + if (init_attr->send_cq) { + cq = container_of(init_attr->send_cq, struct bnxt_re_cq, ib_cq); if (!cq) { - dev_err(rdev_to_dev(rdev), "Send CQ not found"); + ibdev_err(&rdev->ibdev, "Send CQ not found"); rc = -EINVAL; - goto fail; + goto out; } - qp->qplib_qp.scq = &cq->qplib_cq; + qplqp->scq = &cq->qplib_cq; qp->scq = cq; } - if (qp_init_attr->recv_cq) { - cq = container_of(qp_init_attr->recv_cq, struct bnxt_re_cq, - ib_cq); + if (init_attr->recv_cq) { + cq = container_of(init_attr->recv_cq, struct bnxt_re_cq, ib_cq); if (!cq) { - dev_err(rdev_to_dev(rdev), "Receive CQ not found"); + ibdev_err(&rdev->ibdev, "Receive CQ not found"); rc = -EINVAL; - goto fail; + goto out; } - qp->qplib_qp.rcq = &cq->qplib_cq; + qplqp->rcq = &cq->qplib_cq; qp->rcq = cq; } - if (qp_init_attr->srq) { - srq = container_of(qp_init_attr->srq, struct bnxt_re_srq, - ib_srq); - if (!srq) { - dev_err(rdev_to_dev(rdev), "SRQ not found"); - rc = -EINVAL; - goto fail; - } - qp->qplib_qp.srq = &srq->qplib_srq; - qp->qplib_qp.rq.max_wqe = 0; - } else { - /* Allocate 1 more than what's provided so posting max doesn't - * mean empty - */ - entries = roundup_pow_of_two(qp_init_attr->cap.max_recv_wr + 1); - qp->qplib_qp.rq.max_wqe = min_t(u32, entries, - dev_attr->max_qp_wqes + 1); + /* Setup RQ/SRQ */ + rc = bnxt_re_init_rq_attr(qp, init_attr); + if (rc) + goto out; + if (init_attr->qp_type == IB_QPT_GSI) + bnxt_re_adjust_gsi_rq_attr(qp); - qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe - - qp_init_attr->cap.max_recv_wr; + /* Setup SQ */ + bnxt_re_init_sq_attr(qp, init_attr, udata); + if (init_attr->qp_type == IB_QPT_GSI) + bnxt_re_adjust_gsi_sq_attr(qp, init_attr); - qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge; - if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges) - qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges; + if (udata) /* This will update DPI and qp_handle */ + rc = bnxt_re_init_user_qp(rdev, pd, qp, udata); +out: + return rc; +} + +static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp, + struct bnxt_re_pd *pd) +{ + struct bnxt_re_sqp_entries *sqp_tbl = NULL; + struct bnxt_re_dev *rdev; + struct bnxt_re_qp *sqp; + struct bnxt_re_ah *sah; + int rc = 0; + + rdev = qp->rdev; + /* Create a shadow QP to handle the QP1 traffic */ + sqp_tbl = kzalloc(sizeof(*sqp_tbl) * BNXT_RE_MAX_GSI_SQP_ENTRIES, + GFP_KERNEL); + if (!sqp_tbl) + return -ENOMEM; + rdev->gsi_ctx.sqp_tbl = sqp_tbl; + + sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res, &qp->qplib_qp); + if (!sqp) { + rc = -ENODEV; + ibdev_err(&rdev->ibdev, "Failed to create Shadow QP for QP1"); + goto out; + } + rdev->gsi_ctx.gsi_sqp = sqp; + + sqp->rcq = qp->rcq; + sqp->scq = qp->scq; + sah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res, + &qp->qplib_qp); + if (!sah) { + bnxt_qplib_destroy_qp(&rdev->qplib_res, + &sqp->qplib_qp); + rc = -ENODEV; + ibdev_err(&rdev->ibdev, + "Failed to create AH entry for ShadowQP"); + goto out; } + rdev->gsi_ctx.gsi_sah = sah; - qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); + return 0; +out: + kfree(sqp_tbl); + return rc; +} - if (qp_init_attr->qp_type == IB_QPT_GSI && - !(bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))) { - /* Allocate 1 more than what's provided */ - entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1); - qp->qplib_qp.sq.max_wqe = min_t(u32, entries, - dev_attr->max_qp_wqes + 1); - qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe - - qp_init_attr->cap.max_send_wr; - qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges; - if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges) - qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges; - qp->qplib_qp.sq.max_sge++; - if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges) - qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges; - - qp->qplib_qp.rq_hdr_buf_size = - BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2; - - qp->qplib_qp.sq_hdr_buf_size = - BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2; - qp->qplib_qp.dpi = &rdev->dpi_privileged; - rc = bnxt_qplib_create_qp1(&rdev->qplib_res, &qp->qplib_qp); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to create HW QP1"); - goto fail; - } - /* Create a shadow QP to handle the QP1 traffic */ - rdev->qp1_sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res, - &qp->qplib_qp); - if (!rdev->qp1_sqp) { - rc = -EINVAL; - dev_err(rdev_to_dev(rdev), - "Failed to create Shadow QP for QP1"); - goto qp_destroy; - } - rdev->sqp_ah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res, - &qp->qplib_qp); - if (!rdev->sqp_ah) { - bnxt_qplib_destroy_qp(&rdev->qplib_res, - &rdev->qp1_sqp->qplib_qp); - rc = -EINVAL; - dev_err(rdev_to_dev(rdev), - "Failed to create AH entry for ShadowQP"); - goto qp_destroy; - } +static int bnxt_re_create_gsi_qp(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + struct bnxt_re_dev *rdev; + struct bnxt_qplib_qp *qplqp; + int rc = 0; - } else { - /* Allocate 128 + 1 more than what's provided */ - entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + - BNXT_QPLIB_RESERVED_QP_WRS + 1); - qp->qplib_qp.sq.max_wqe = min_t(u32, entries, - dev_attr->max_qp_wqes + - BNXT_QPLIB_RESERVED_QP_WRS + 1); - qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; + rdev = qp->rdev; + qplqp = &qp->qplib_qp; - /* - * Reserving one slot for Phantom WQE. Application can - * post one extra entry in this case. But allowing this to avoid - * unexpected Queue full condition - */ + qplqp->rq_hdr_buf_size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2; + qplqp->sq_hdr_buf_size = BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2; - qp->qplib_qp.sq.q_full_delta -= 1; + rc = bnxt_qplib_create_qp1(&rdev->qplib_res, qplqp); + if (rc) { + ibdev_err(&rdev->ibdev, "create HW QP1 failed!"); + goto out; + } - qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom; - qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom; - if (udata) { - rc = bnxt_re_init_user_qp(rdev, pd, qp, udata); - if (rc) - goto fail; - } else { - qp->qplib_qp.dpi = &rdev->dpi_privileged; - } + rc = bnxt_re_create_shadow_gsi(qp, pd); +out: + return rc; +} + +static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev, + struct ib_qp_init_attr *init_attr, + struct bnxt_qplib_dev_attr *dev_attr) +{ + bool rc = true; + + if (init_attr->cap.max_send_wr > dev_attr->max_qp_wqes || + init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes || + init_attr->cap.max_send_sge > dev_attr->max_qp_sges || + init_attr->cap.max_recv_sge > dev_attr->max_qp_sges || + init_attr->cap.max_inline_data > dev_attr->max_inline_data) { + ibdev_err(&rdev->ibdev, + "Create QP failed - max exceeded! 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x", + init_attr->cap.max_send_wr, dev_attr->max_qp_wqes, + init_attr->cap.max_recv_wr, dev_attr->max_qp_wqes, + init_attr->cap.max_send_sge, dev_attr->max_qp_sges, + init_attr->cap.max_recv_sge, dev_attr->max_qp_sges, + init_attr->cap.max_inline_data, + dev_attr->max_inline_data); + rc = false; + } + return rc; +} +struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) +{ + struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct bnxt_re_dev *rdev = pd->rdev; + struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_re_qp *qp; + int rc; + + rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr); + if (!rc) { + rc = -EINVAL; + goto exit; + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + rc = -ENOMEM; + goto exit; + } + qp->rdev = rdev; + rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, udata); + if (rc) + goto fail; + + if (qp_init_attr->qp_type == IB_QPT_GSI && + !(bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))) { + rc = bnxt_re_create_gsi_qp(qp, pd, qp_init_attr); + if (rc == -ENODEV) + goto qp_destroy; + if (rc) + goto fail; + } else { rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to create HW QP"); + ibdev_err(&rdev->ibdev, "Failed to create HW QP"); goto free_umem; } + if (udata) { + struct bnxt_re_qp_resp resp; + + resp.qpid = qp->qplib_qp.id; + resp.rsvd = 0; + rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (rc) { + ibdev_err(&rdev->ibdev, "Failed to copy QP udata"); + goto qp_destroy; + } + } } qp->ib_qp.qp_num = qp->qplib_qp.id; + if (qp_init_attr->qp_type == IB_QPT_GSI) + rdev->gsi_ctx.gsi_qp = qp; spin_lock_init(&qp->sq_lock); spin_lock_init(&qp->rq_lock); - - if (udata) { - struct bnxt_re_qp_resp resp; - - resp.qpid = qp->ib_qp.qp_num; - resp.rsvd = 0; - rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to copy QP udata"); - goto qp_destroy; - } - } INIT_LIST_HEAD(&qp->list); mutex_lock(&rdev->qp_lock); list_add_tail(&qp->list, &rdev->qp_list); - atomic_inc(&rdev->qp_count); mutex_unlock(&rdev->qp_lock); + atomic_inc(&rdev->qp_count); return &qp->ib_qp; qp_destroy: @@ -1189,6 +1396,7 @@ free_umem: ib_umem_release(qp->sumem); fail: kfree(qp); +exit: return ERR_PTR(rc); } @@ -1311,9 +1519,11 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, return PTR_ERR(umem); srq->umem = umem; - qplib_srq->sg_info.sglist = umem->sg_head.sgl; + qplib_srq->sg_info.sghead = umem->sg_head.sgl; qplib_srq->sg_info.npages = ib_umem_num_pages(umem); qplib_srq->sg_info.nmap = umem->nmap; + qplib_srq->sg_info.pgsize = PAGE_SIZE; + qplib_srq->sg_info.pgshft = PAGE_SHIFT; qplib_srq->srq_handle = ureq.srq_handle; qplib_srq->dpi = &cntx->dpi; @@ -1334,7 +1544,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, int rc, entries; if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) { - dev_err(rdev_to_dev(rdev), "Create CQ failed - max exceeded"); + ibdev_err(&rdev->ibdev, "Create CQ failed - max exceeded"); rc = -EINVAL; goto exit; } @@ -1369,7 +1579,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, rc = bnxt_qplib_create_srq(&rdev->qplib_res, &srq->qplib_srq); if (rc) { - dev_err(rdev_to_dev(rdev), "Create HW SRQ failed!"); + ibdev_err(&rdev->ibdev, "Create HW SRQ failed!"); goto fail; } @@ -1379,7 +1589,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, resp.srqid = srq->qplib_srq.id; rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (rc) { - dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!"); + ibdev_err(&rdev->ibdev, "SRQ copy to udata failed!"); bnxt_qplib_destroy_srq(&rdev->qplib_res, &srq->qplib_srq); goto fail; @@ -1418,7 +1628,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, srq->qplib_srq.threshold = srq_attr->srq_limit; rc = bnxt_qplib_modify_srq(&rdev->qplib_res, &srq->qplib_srq); if (rc) { - dev_err(rdev_to_dev(rdev), "Modify HW SRQ failed!"); + ibdev_err(&rdev->ibdev, "Modify HW SRQ failed!"); return rc; } /* On success, update the shadow */ @@ -1426,8 +1636,8 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, /* No need to Build and send response back to udata */ break; default: - dev_err(rdev_to_dev(rdev), - "Unsupported srq_attr_mask 0x%x", srq_attr_mask); + ibdev_err(&rdev->ibdev, + "Unsupported srq_attr_mask 0x%x", srq_attr_mask); return -EINVAL; } return 0; @@ -1445,7 +1655,7 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) tsrq.qplib_srq.id = srq->qplib_srq.id; rc = bnxt_qplib_query_srq(&rdev->qplib_res, &tsrq.qplib_srq); if (rc) { - dev_err(rdev_to_dev(rdev), "Query HW SRQ failed!"); + ibdev_err(&rdev->ibdev, "Query HW SRQ failed!"); return rc; } srq_attr->max_wr = srq->qplib_srq.max_wqe; @@ -1487,7 +1697,7 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp1_qp, int qp_attr_mask) { - struct bnxt_re_qp *qp = rdev->qp1_sqp; + struct bnxt_re_qp *qp = rdev->gsi_ctx.gsi_sqp; int rc = 0; if (qp_attr_mask & IB_QP_STATE) { @@ -1511,8 +1721,7 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev, rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) - dev_err(rdev_to_dev(rdev), - "Failed to modify Shadow QP for QP1"); + ibdev_err(&rdev->ibdev, "Failed to modify Shadow QP for QP1"); return rc; } @@ -1533,15 +1742,15 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, new_qp_state = qp_attr->qp_state; if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state, ib_qp->qp_type, qp_attr_mask)) { - dev_err(rdev_to_dev(rdev), - "Invalid attribute mask: %#x specified ", - qp_attr_mask); - dev_err(rdev_to_dev(rdev), - "for qpn: %#x type: %#x", - ib_qp->qp_num, ib_qp->qp_type); - dev_err(rdev_to_dev(rdev), - "curr_qp_state=0x%x, new_qp_state=0x%x\n", - curr_qp_state, new_qp_state); + ibdev_err(&rdev->ibdev, + "Invalid attribute mask: %#x specified ", + qp_attr_mask); + ibdev_err(&rdev->ibdev, + "for qpn: %#x type: %#x", + ib_qp->qp_num, ib_qp->qp_type); + ibdev_err(&rdev->ibdev, + "curr_qp_state=0x%x, new_qp_state=0x%x\n", + curr_qp_state, new_qp_state); return -EINVAL; } qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE; @@ -1549,18 +1758,16 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, if (!qp->sumem && qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - dev_dbg(rdev_to_dev(rdev), - "Move QP = %p to flush list\n", - qp); + ibdev_dbg(&rdev->ibdev, + "Move QP = %p to flush list\n", qp); flags = bnxt_re_lock_cqs(qp); bnxt_qplib_add_flush_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); } if (!qp->sumem && qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { - dev_dbg(rdev_to_dev(rdev), - "Move QP = %p out of flush list\n", - qp); + ibdev_dbg(&rdev->ibdev, + "Move QP = %p out of flush list\n", qp); flags = bnxt_re_lock_cqs(qp); bnxt_qplib_clean_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); @@ -1593,6 +1800,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, const struct ib_global_route *grh = rdma_ah_read_grh(&qp_attr->ah_attr); const struct ib_gid_attr *sgid_attr; + struct bnxt_re_gid_ctx *ctx; qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID | CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL | @@ -1604,11 +1812,12 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, memcpy(qp->qplib_qp.ah.dgid.data, grh->dgid.raw, sizeof(qp->qplib_qp.ah.dgid.data)); qp->qplib_qp.ah.flow_label = grh->flow_label; - /* If RoCE V2 is enabled, stack will have two entries for - * each GID entry. Avoiding this duplicte entry in HW. Dividing - * the GID index by 2 for RoCE V2 + sgid_attr = grh->sgid_attr; + /* Get the HW context of the GID. The reference + * of GID table entry is already taken by the caller. */ - qp->qplib_qp.ah.sgid_index = grh->sgid_index / 2; + ctx = rdma_read_gid_hw_context(sgid_attr); + qp->qplib_qp.ah.sgid_index = ctx->idx; qp->qplib_qp.ah.host_sgid_index = grh->sgid_index; qp->qplib_qp.ah.hop_limit = grh->hop_limit; qp->qplib_qp.ah.traffic_class = grh->traffic_class; @@ -1616,7 +1825,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, ether_addr_copy(qp->qplib_qp.ah.dmac, qp_attr->ah_attr.roce.dmac); - sgid_attr = qp_attr->ah_attr.grh.sgid_attr; rc = rdma_read_gid_l2_fields(sgid_attr, NULL, &qp->qplib_qp.smac[0]); if (rc) @@ -1690,10 +1898,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (qp_attr->max_dest_rd_atomic > dev_attr->max_qp_init_rd_atom) { - dev_err(rdev_to_dev(rdev), - "max_dest_rd_atomic requested%d is > dev_max%d", - qp_attr->max_dest_rd_atomic, - dev_attr->max_qp_init_rd_atom); + ibdev_err(&rdev->ibdev, + "max_dest_rd_atomic requested%d is > dev_max%d", + qp_attr->max_dest_rd_atomic, + dev_attr->max_qp_init_rd_atom); return -EINVAL; } @@ -1714,8 +1922,8 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, (qp_attr->cap.max_recv_sge >= dev_attr->max_qp_sges) || (qp_attr->cap.max_inline_data >= dev_attr->max_inline_data)) { - dev_err(rdev_to_dev(rdev), - "Create QP failed - max exceeded"); + ibdev_err(&rdev->ibdev, + "Create QP failed - max exceeded"); return -EINVAL; } entries = roundup_pow_of_two(qp_attr->cap.max_send_wr); @@ -1748,10 +1956,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, } rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to modify HW QP"); + ibdev_err(&rdev->ibdev, "Failed to modify HW QP"); return rc; } - if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) + if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp) rc = bnxt_re_modify_shadow_qp(rdev, qp, qp_attr_mask); return rc; } @@ -1773,7 +1981,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to query HW QP"); + ibdev_err(&rdev->ibdev, "Failed to query HW QP"); goto out; } qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); @@ -1978,7 +2186,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, wqe->num_sge++; } else { - dev_err(rdev_to_dev(qp->rdev), "QP1 buffer is empty!"); + ibdev_err(&qp->rdev->ibdev, "QP1 buffer is empty!"); rc = -ENOMEM; } return rc; @@ -1995,9 +2203,12 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp, struct bnxt_qplib_swqe *wqe, int payload_size) { + struct bnxt_re_sqp_entries *sqp_entry; struct bnxt_qplib_sge ref, sge; + struct bnxt_re_dev *rdev; u32 rq_prod_index; - struct bnxt_re_sqp_entries *sqp_entry; + + rdev = qp->rdev; rq_prod_index = bnxt_qplib_get_rq_prod_index(&qp->qplib_qp); @@ -2012,7 +2223,7 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp, ref.lkey = wqe->sg_list[0].lkey; ref.size = wqe->sg_list[0].size; - sqp_entry = &qp->rdev->sqp_tbl[rq_prod_index]; + sqp_entry = &rdev->gsi_ctx.sqp_tbl[rq_prod_index]; /* SGE 1 */ wqe->sg_list[0].addr = sge.addr; @@ -2164,7 +2375,7 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, wqe->frmr.pbl_dma_ptr = qplib_frpl->hwq.pbl_dma_ptr[0]; wqe->frmr.page_list = mr->pages; wqe->frmr.page_list_len = mr->npages; - wqe->frmr.levels = qplib_frpl->hwq.level + 1; + wqe->frmr.levels = qplib_frpl->hwq.level; wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR; /* Need unconditional fence for reg_mr @@ -2211,8 +2422,8 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev, if ((sge_len + wqe->inline_len) > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { - dev_err(rdev_to_dev(rdev), - "Inline data size requested > supported value"); + ibdev_err(&rdev->ibdev, + "Inline data size requested > supported value"); return -EINVAL; } sge_len = wr->sg_list[i].length; @@ -2259,21 +2470,18 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, const struct ib_send_wr *wr) { - struct bnxt_qplib_swqe wqe; int rc = 0, payload_sz = 0; unsigned long flags; spin_lock_irqsave(&qp->sq_lock, flags); - memset(&wqe, 0, sizeof(wqe)); while (wr) { - /* House keeping */ - memset(&wqe, 0, sizeof(wqe)); + struct bnxt_qplib_swqe wqe = {}; /* Common */ wqe.num_sge = wr->num_sge; if (wr->num_sge > qp->qplib_qp.sq.max_sge) { - dev_err(rdev_to_dev(rdev), - "Limit exceeded for Send SGEs"); + ibdev_err(&rdev->ibdev, + "Limit exceeded for Send SGEs"); rc = -EINVAL; goto bad; } @@ -2292,9 +2500,9 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); bad: if (rc) { - dev_err(rdev_to_dev(rdev), - "Post send failed opcode = %#x rc = %d", - wr->opcode, rc); + ibdev_err(&rdev->ibdev, + "Post send failed opcode = %#x rc = %d", + wr->opcode, rc); break; } wr = wr->next; @@ -2321,8 +2529,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, /* Common */ wqe.num_sge = wr->num_sge; if (wr->num_sge > qp->qplib_qp.sq.max_sge) { - dev_err(rdev_to_dev(qp->rdev), - "Limit exceeded for Send SGEs"); + ibdev_err(&qp->rdev->ibdev, + "Limit exceeded for Send SGEs"); rc = -EINVAL; goto bad; } @@ -2367,8 +2575,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, rc = bnxt_re_build_atomic_wqe(wr, &wqe); break; case IB_WR_RDMA_READ_WITH_INV: - dev_err(rdev_to_dev(qp->rdev), - "RDMA Read with Invalidate is not supported"); + ibdev_err(&qp->rdev->ibdev, + "RDMA Read with Invalidate is not supported"); rc = -EINVAL; goto bad; case IB_WR_LOCAL_INV: @@ -2379,8 +2587,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, break; default: /* Unsupported WRs */ - dev_err(rdev_to_dev(qp->rdev), - "WR (%#x) is not supported", wr->opcode); + ibdev_err(&qp->rdev->ibdev, + "WR (%#x) is not supported", wr->opcode); rc = -EINVAL; goto bad; } @@ -2388,9 +2596,9 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); bad: if (rc) { - dev_err(rdev_to_dev(qp->rdev), - "post_send failed op:%#x qps = %#x rc = %d\n", - wr->opcode, qp->qplib_qp.state, rc); + ibdev_err(&qp->rdev->ibdev, + "post_send failed op:%#x qps = %#x rc = %d\n", + wr->opcode, qp->qplib_qp.state, rc); *bad_wr = wr; break; } @@ -2418,8 +2626,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, /* Common */ wqe.num_sge = wr->num_sge; if (wr->num_sge > qp->qplib_qp.rq.max_sge) { - dev_err(rdev_to_dev(rdev), - "Limit exceeded for Receive SGEs"); + ibdev_err(&rdev->ibdev, + "Limit exceeded for Receive SGEs"); rc = -EINVAL; break; } @@ -2455,8 +2663,8 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, /* Common */ wqe.num_sge = wr->num_sge; if (wr->num_sge > qp->qplib_qp.rq.max_sge) { - dev_err(rdev_to_dev(qp->rdev), - "Limit exceeded for Receive SGEs"); + ibdev_err(&qp->rdev->ibdev, + "Limit exceeded for Receive SGEs"); rc = -EINVAL; *bad_wr = wr; break; @@ -2527,7 +2735,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { - dev_err(rdev_to_dev(rdev), "Failed to create CQ -max exceeded"); + ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded"); return -EINVAL; } @@ -2538,6 +2746,8 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (entries > dev_attr->max_cq_wqes + 1) entries = dev_attr->max_cq_wqes + 1; + cq->qplib_cq.sg_info.pgsize = PAGE_SIZE; + cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT; if (udata) { struct bnxt_re_cq_req req; struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context( @@ -2554,7 +2764,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = PTR_ERR(cq->umem); goto fail; } - cq->qplib_cq.sg_info.sglist = cq->umem->sg_head.sgl; + cq->qplib_cq.sg_info.sghead = cq->umem->sg_head.sgl; cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem); cq->qplib_cq.sg_info.nmap = cq->umem->nmap; cq->qplib_cq.dpi = &uctx->dpi; @@ -2581,7 +2791,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to create HW CQ"); + ibdev_err(&rdev->ibdev, "Failed to create HW CQ"); goto fail; } @@ -2601,7 +2811,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, resp.rsvd = 0; rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to copy CQ udata"); + ibdev_err(&rdev->ibdev, "Failed to copy CQ udata"); bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); goto c2fail; } @@ -2832,12 +3042,13 @@ static bool bnxt_re_is_loopback_packet(struct bnxt_re_dev *rdev, return rc; } -static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp, +static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp, struct bnxt_qplib_cqe *cqe) { - struct bnxt_re_dev *rdev = qp1_qp->rdev; + struct bnxt_re_dev *rdev = gsi_qp->rdev; struct bnxt_re_sqp_entries *sqp_entry = NULL; - struct bnxt_re_qp *qp = rdev->qp1_sqp; + struct bnxt_re_qp *gsi_sqp = rdev->gsi_ctx.gsi_sqp; + struct bnxt_re_ah *gsi_sah; struct ib_send_wr *swr; struct ib_ud_wr udwr; struct ib_recv_wr rwr; @@ -2860,26 +3071,26 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp, swr = &udwr.wr; tbl_idx = cqe->wr_id; - rq_hdr_buf = qp1_qp->qplib_qp.rq_hdr_buf + - (tbl_idx * qp1_qp->qplib_qp.rq_hdr_buf_size); - rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp1_qp->qplib_qp, + rq_hdr_buf = gsi_qp->qplib_qp.rq_hdr_buf + + (tbl_idx * gsi_qp->qplib_qp.rq_hdr_buf_size); + rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp, tbl_idx); /* Shadow QP header buffer */ - shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp->qplib_qp, + shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp, tbl_idx); - sqp_entry = &rdev->sqp_tbl[tbl_idx]; + sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx]; /* Store this cqe */ memcpy(&sqp_entry->cqe, cqe, sizeof(struct bnxt_qplib_cqe)); - sqp_entry->qp1_qp = qp1_qp; + sqp_entry->qp1_qp = gsi_qp; /* Find packet type from the cqe */ pkt_type = bnxt_re_check_packet_type(cqe->raweth_qp1_flags, cqe->raweth_qp1_flags2); if (pkt_type < 0) { - dev_err(rdev_to_dev(rdev), "Invalid packet\n"); + ibdev_err(&rdev->ibdev, "Invalid packet\n"); return -EINVAL; } @@ -2926,10 +3137,10 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp, rwr.wr_id = tbl_idx; rwr.next = NULL; - rc = bnxt_re_post_recv_shadow_qp(rdev, qp, &rwr); + rc = bnxt_re_post_recv_shadow_qp(rdev, gsi_sqp, &rwr); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to post Rx buffers to shadow QP"); + ibdev_err(&rdev->ibdev, + "Failed to post Rx buffers to shadow QP"); return -ENOMEM; } @@ -2938,13 +3149,13 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp, swr->wr_id = tbl_idx; swr->opcode = IB_WR_SEND; swr->next = NULL; - - udwr.ah = &rdev->sqp_ah->ib_ah; - udwr.remote_qpn = rdev->qp1_sqp->qplib_qp.id; - udwr.remote_qkey = rdev->qp1_sqp->qplib_qp.qkey; + gsi_sah = rdev->gsi_ctx.gsi_sah; + udwr.ah = &gsi_sah->ib_ah; + udwr.remote_qpn = gsi_sqp->qplib_qp.id; + udwr.remote_qkey = gsi_sqp->qplib_qp.qkey; /* post data received in the send queue */ - rc = bnxt_re_post_send_shadow_qp(rdev, qp, swr); + rc = bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr); return 0; } @@ -2998,12 +3209,12 @@ static void bnxt_re_process_res_rc_wc(struct ib_wc *wc, wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; } -static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp, +static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp, struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { - struct bnxt_re_dev *rdev = qp->rdev; - struct bnxt_re_qp *qp1_qp = NULL; + struct bnxt_re_dev *rdev = gsi_sqp->rdev; + struct bnxt_re_qp *gsi_qp = NULL; struct bnxt_qplib_cqe *orig_cqe = NULL; struct bnxt_re_sqp_entries *sqp_entry = NULL; int nw_type; @@ -3013,13 +3224,13 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp, tbl_idx = cqe->wr_id; - sqp_entry = &rdev->sqp_tbl[tbl_idx]; - qp1_qp = sqp_entry->qp1_qp; + sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx]; + gsi_qp = sqp_entry->qp1_qp; orig_cqe = &sqp_entry->cqe; wc->wr_id = sqp_entry->wrid; wc->byte_len = orig_cqe->length; - wc->qp = &qp1_qp->ib_qp; + wc->qp = &gsi_qp->ib_qp; wc->ex.imm_data = orig_cqe->immdata; wc->src_qp = orig_cqe->src_qp; @@ -3084,11 +3295,11 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp) rc = bnxt_re_bind_fence_mw(lib_qp); if (!rc) { lib_qp->sq.phantom_wqe_cnt++; - dev_dbg(&lib_qp->sq.hwq.pdev->dev, - "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n", - lib_qp->id, lib_qp->sq.hwq.prod, - HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq), - lib_qp->sq.phantom_wqe_cnt); + ibdev_dbg(&qp->rdev->ibdev, + "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n", + lib_qp->id, lib_qp->sq.hwq.prod, + HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq), + lib_qp->sq.phantom_wqe_cnt); } spin_unlock_irqrestore(&qp->sq_lock, flags); @@ -3098,7 +3309,7 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp) int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) { struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); - struct bnxt_re_qp *qp; + struct bnxt_re_qp *qp, *sh_qp; struct bnxt_qplib_cqe *cqe; int i, ncqe, budget; struct bnxt_qplib_q *sq; @@ -3111,7 +3322,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) budget = min_t(u32, num_entries, cq->max_cql); num_entries = budget; if (!cq->cql) { - dev_err(rdev_to_dev(cq->rdev), "POLL CQ : no CQL to use"); + ibdev_err(&cq->rdev->ibdev, "POLL CQ : no CQL to use"); goto exit; } cqe = &cq->cql[0]; @@ -3124,8 +3335,8 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) qp = container_of(lib_qp, struct bnxt_re_qp, qplib_qp); if (send_phantom_wqe(qp) == -ENOMEM) - dev_err(rdev_to_dev(cq->rdev), - "Phantom failed! Scheduled to send again\n"); + ibdev_err(&cq->rdev->ibdev, + "Phantom failed! Scheduled to send again\n"); else sq->send_phantom = false; } @@ -3149,8 +3360,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) (unsigned long)(cqe->qp_handle), struct bnxt_re_qp, qplib_qp); if (!qp) { - dev_err(rdev_to_dev(cq->rdev), - "POLL CQ : bad QP handle"); + ibdev_err(&cq->rdev->ibdev, "POLL CQ : bad QP handle"); continue; } wc->qp = &qp->ib_qp; @@ -3162,8 +3372,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) switch (cqe->opcode) { case CQ_BASE_CQE_TYPE_REQ: - if (qp->rdev->qp1_sqp && qp->qplib_qp.id == - qp->rdev->qp1_sqp->qplib_qp.id) { + sh_qp = qp->rdev->gsi_ctx.gsi_sqp; + if (sh_qp && + qp->qplib_qp.id == sh_qp->qplib_qp.id) { /* Handle this completion with * the stored completion */ @@ -3189,7 +3400,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) * stored in the table */ tbl_idx = cqe->wr_id; - sqp_entry = &cq->rdev->sqp_tbl[tbl_idx]; + sqp_entry = &cq->rdev->gsi_ctx.sqp_tbl[tbl_idx]; wc->wr_id = sqp_entry->wrid; bnxt_re_process_res_rawqp1_wc(wc, cqe); break; @@ -3197,8 +3408,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) bnxt_re_process_res_rc_wc(wc, cqe); break; case CQ_BASE_CQE_TYPE_RES_UD: - if (qp->rdev->qp1_sqp && qp->qplib_qp.id == - qp->rdev->qp1_sqp->qplib_qp.id) { + sh_qp = qp->rdev->gsi_ctx.gsi_sqp; + if (sh_qp && + qp->qplib_qp.id == sh_qp->qplib_qp.id) { /* Handle this completion with * the stored completion */ @@ -3213,9 +3425,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) bnxt_re_process_res_ud_wc(qp, wc, cqe); break; default: - dev_err(rdev_to_dev(cq->rdev), - "POLL CQ : type 0x%x not handled", - cqe->opcode); + ibdev_err(&cq->rdev->ibdev, + "POLL CQ : type 0x%x not handled", + cqe->opcode); continue; } wc++; @@ -3308,7 +3520,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { - dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc); + ibdev_err(&rdev->ibdev, "Dereg MR failed: %#x\n", rc); return rc; } @@ -3355,7 +3567,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, int rc; if (type != IB_MR_TYPE_MEM_REG) { - dev_dbg(rdev_to_dev(rdev), "MR type 0x%x not supported", type); + ibdev_dbg(&rdev->ibdev, "MR type 0x%x not supported", type); return ERR_PTR(-EINVAL); } if (max_num_sg > MAX_PBL_LVL_1_PGS) @@ -3385,8 +3597,8 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res, &mr->qplib_frpl, max_num_sg); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to allocate HW FR page list"); + ibdev_err(&rdev->ibdev, + "Failed to allocate HW FR page list"); goto fail_mr; } @@ -3421,7 +3633,7 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B); rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw); if (rc) { - dev_err(rdev_to_dev(rdev), "Allocate MW failed!"); + ibdev_err(&rdev->ibdev, "Allocate MW failed!"); goto fail; } mw->ib_mw.rkey = mw->qplib_mw.rkey; @@ -3442,7 +3654,7 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw); if (rc) { - dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc); + ibdev_err(&rdev->ibdev, "Free MW failed: %#x\n", rc); return rc; } @@ -3494,8 +3706,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, int umem_pgs, page_shift, rc; if (length > BNXT_RE_MAX_MR_SIZE) { - dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%lld\n", - length, BNXT_RE_MAX_MR_SIZE); + ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n", + length, BNXT_RE_MAX_MR_SIZE); return ERR_PTR(-ENOMEM); } @@ -3510,7 +3722,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to allocate MR"); + ibdev_err(&rdev->ibdev, "Failed to allocate MR"); goto free_mr; } /* The fixed portion of the rkey is the same as the lkey */ @@ -3518,7 +3730,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags); if (IS_ERR(umem)) { - dev_err(rdev_to_dev(rdev), "Failed to get umem"); + ibdev_err(&rdev->ibdev, "Failed to get umem"); rc = -EFAULT; goto free_mrw; } @@ -3527,7 +3739,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, mr->qplib_mr.va = virt_addr; umem_pgs = ib_umem_page_count(umem); if (!umem_pgs) { - dev_err(rdev_to_dev(rdev), "umem is invalid!"); + ibdev_err(&rdev->ibdev, "umem is invalid!"); rc = -EINVAL; goto free_umem; } @@ -3544,15 +3756,15 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, virt_addr)); if (!bnxt_re_page_size_ok(page_shift)) { - dev_err(rdev_to_dev(rdev), "umem page size unsupported!"); + ibdev_err(&rdev->ibdev, "umem page size unsupported!"); rc = -EFAULT; goto fail; } if (page_shift == BNXT_RE_PAGE_SHIFT_4K && length > BNXT_RE_MAX_MR_SIZE_LOW) { - dev_err(rdev_to_dev(rdev), "Requested MR Sz:%llu Max sup:%llu", - length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); + ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", + length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); rc = -EINVAL; goto fail; } @@ -3562,7 +3774,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl, umem_pgs, false, 1 << page_shift); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to register user MR"); + ibdev_err(&rdev->ibdev, "Failed to register user MR"); goto fail; } @@ -3595,12 +3807,11 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) u32 chip_met_rev_num = 0; int rc; - dev_dbg(rdev_to_dev(rdev), "ABI version requested %u", - ibdev->ops.uverbs_abi_ver); + ibdev_dbg(ibdev, "ABI version requested %u", ibdev->ops.uverbs_abi_ver); if (ibdev->ops.uverbs_abi_ver != BNXT_RE_ABI_VERSION) { - dev_dbg(rdev_to_dev(rdev), " is different from the device %d ", - BNXT_RE_ABI_VERSION); + ibdev_dbg(ibdev, " is different from the device %d ", + BNXT_RE_ABI_VERSION); return -EPERM; } @@ -3614,10 +3825,10 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) spin_lock_init(&uctx->sh_lock); resp.comp_mask = BNXT_RE_UCNTX_CMASK_HAVE_CCTX; - chip_met_rev_num = rdev->chip_ctx.chip_num; - chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_rev & 0xFF) << + chip_met_rev_num = rdev->chip_ctx->chip_num; + chip_met_rev_num |= ((u32)rdev->chip_ctx->chip_rev & 0xFF) << BNXT_RE_CHIP_ID0_CHIP_REV_SFT; - chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_metal & 0xFF) << + chip_met_rev_num |= ((u32)rdev->chip_ctx->chip_metal & 0xFF) << BNXT_RE_CHIP_ID0_CHIP_MET_SFT; resp.chip_id0 = chip_met_rev_num; /* Future extension of chip info */ @@ -3632,7 +3843,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to copy user context"); + ibdev_err(ibdev, "Failed to copy user context"); rc = -EFAULT; goto cfail; } @@ -3682,15 +3893,14 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, PAGE_SIZE, vma->vm_page_prot)) { - dev_err(rdev_to_dev(rdev), "Failed to map DPI"); + ibdev_err(&rdev->ibdev, "Failed to map DPI"); return -EAGAIN; } } else { pfn = virt_to_phys(uctx->shpg) >> PAGE_SHIFT; if (remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) { - dev_err(rdev_to_dev(rdev), - "Failed to map shared page"); + ibdev_err(&rdev->ibdev, "Failed to map shared page"); return -EAGAIN; } } diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 793c97251588..b12fbc857f94 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -78,26 +78,43 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list); /* Mutex to protect the list of bnxt_re devices added */ static DEFINE_MUTEX(bnxt_re_dev_lock); static struct workqueue_struct *bnxt_re_wq; -static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev); +static void bnxt_re_remove_device(struct bnxt_re_dev *rdev); +static void bnxt_re_dealloc_driver(struct ib_device *ib_dev); +static void bnxt_re_stop_irq(void *handle); static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) { + struct bnxt_qplib_chip_ctx *chip_ctx; + + if (!rdev->chip_ctx) + return; + chip_ctx = rdev->chip_ctx; + rdev->chip_ctx = NULL; rdev->rcfw.res = NULL; rdev->qplib_res.cctx = NULL; + rdev->qplib_res.pdev = NULL; + rdev->qplib_res.netdev = NULL; + kfree(chip_ctx); } static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) { + struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; struct bnxt *bp; en_dev = rdev->en_dev; bp = netdev_priv(en_dev->net); - rdev->chip_ctx.chip_num = bp->chip_num; + chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL); + if (!chip_ctx) + return -ENOMEM; + chip_ctx->chip_num = bp->chip_num; + + rdev->chip_ctx = chip_ctx; /* rest members to follow eventually */ - rdev->qplib_res.cctx = &rdev->chip_ctx; + rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; return 0; @@ -136,9 +153,9 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, attr->max_srq); ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); - if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) - rdev->qplib_ctx.tqm_count[i] = + rdev->qplib_ctx.tqm_ctx.qcount[i] = rdev->dev_attr.tqm_alloc_reqs[i]; } @@ -185,7 +202,7 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); bnxt_re_limit_pf_res(rdev); - num_vfs = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + num_vfs = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; if (num_vfs) bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); @@ -208,7 +225,7 @@ static void bnxt_re_sriov_config(void *p, int num_vfs) return; rdev->num_vfs = num_vfs; - if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { bnxt_re_set_resource_limits(rdev); bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, &rdev->qplib_ctx); @@ -221,8 +238,10 @@ static void bnxt_re_shutdown(void *p) if (!rdev) return; - - bnxt_re_ib_unreg(rdev); + ASSERT_RTNL(); + /* Release the MSIx vectors before queuing unregister */ + bnxt_re_stop_irq(rdev); + ib_unregister_device_queued(&rdev->ibdev); } static void bnxt_re_stop_irq(void *handle) @@ -254,7 +273,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) * to f/w will timeout and that will set the * timeout bit. */ - dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n"); + ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n"); return; } @@ -271,8 +290,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) rc = bnxt_qplib_nq_start_irq(nq, indx - 1, msix_ent[indx].vector, false); if (rc) - dev_warn(rdev_to_dev(rdev), - "Failed to reinit NQ index %d\n", indx - 1); + ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n", + indx - 1); } } @@ -358,9 +377,9 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) goto done; } if (num_msix_got != num_msix_want) { - dev_warn(rdev_to_dev(rdev), - "Requested %d MSI-X vectors, got %d\n", - num_msix_want, num_msix_got); + ibdev_warn(&rdev->ibdev, + "Requested %d MSI-X vectors, got %d\n", + num_msix_want, num_msix_got); } rdev->num_msix = num_msix_got; done: @@ -407,14 +426,14 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) - dev_err(rdev_to_dev(rdev), - "Failed to free HW ring:%d :%#x", req.ring_id, rc); + ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x", + req.ring_id, rc); return rc; } -static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr, - int pages, int type, u32 ring_mask, - u32 map_index, u16 *fw_ring_id) +static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, + struct bnxt_re_ring_attr *ring_attr, + u16 *fw_ring_id) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_ring_alloc_input req = {0}; @@ -428,18 +447,18 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr, memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1); req.enables = 0; - req.page_tbl_addr = cpu_to_le64(dma_arr[0]); - if (pages > 1) { + req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]); + if (ring_attr->pages > 1) { /* Page size is in log2 units */ req.page_size = BNXT_PAGE_SHIFT; req.page_tbl_depth = 1; } req.fbo = 0; /* Association of ring index with doorbell index and MSIX number */ - req.logical_id = cpu_to_le16(map_index); - req.length = cpu_to_le32(ring_mask + 1); - req.ring_type = type; - req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX; + req.logical_id = cpu_to_le16(ring_attr->lrid); + req.length = cpu_to_le32(ring_attr->depth + 1); + req.ring_type = ring_attr->type; + req.int_mode = ring_attr->mode; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); @@ -468,8 +487,8 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, sizeof(req), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) - dev_err(rdev_to_dev(rdev), - "Failed to free HW stats context %#x", rc); + ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x", + rc); return rc; } @@ -524,17 +543,12 @@ static bool is_bnxt_re_dev(struct net_device *netdev) static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev) { - struct bnxt_re_dev *rdev; + struct ib_device *ibdev = + ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE); + if (!ibdev) + return NULL; - rcu_read_lock(); - list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) { - if (rdev->netdev == netdev) { - rcu_read_unlock(); - return rdev; - } - } - rcu_read_unlock(); - return NULL; + return container_of(ibdev, struct bnxt_re_dev, ibdev); } static void bnxt_re_dev_unprobe(struct net_device *netdev, @@ -608,11 +622,6 @@ static const struct attribute_group bnxt_re_dev_attr_group = { .attrs = bnxt_re_attributes, }; -static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) -{ - ib_unregister_device(&rdev->ibdev); -} - static const struct ib_device_ops bnxt_re_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_BNXT_RE, @@ -627,6 +636,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .create_cq = bnxt_re_create_cq, .create_qp = bnxt_re_create_qp, .create_srq = bnxt_re_create_srq, + .dealloc_driver = bnxt_re_dealloc_driver, .dealloc_pd = bnxt_re_dealloc_pd, .dealloc_ucontext = bnxt_re_dealloc_ucontext, .del_gid = bnxt_re_del_gid, @@ -723,15 +733,11 @@ static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) { dev_put(rdev->netdev); rdev->netdev = NULL; - mutex_lock(&bnxt_re_dev_lock); list_del_rcu(&rdev->list); mutex_unlock(&bnxt_re_dev_lock); synchronize_rcu(); - - ib_dealloc_device(&rdev->ibdev); - /* rdev is gone */ } static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev, @@ -742,8 +748,8 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev, /* Allocate bnxt_re_dev instance here */ rdev = ib_alloc_device(bnxt_re_dev, ibdev); if (!rdev) { - dev_err(NULL, "%s: bnxt_re_dev allocation failure!", - ROCE_DRV_MODULE_NAME); + ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!", + ROCE_DRV_MODULE_NAME); return NULL; } /* Default values */ @@ -872,8 +878,8 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, int rc = 0; if (!srq) { - dev_err(NULL, "%s: SRQ is NULL, SRQN not handled", - ROCE_DRV_MODULE_NAME); + ibdev_err(NULL, "%s: SRQ is NULL, SRQN not handled", + ROCE_DRV_MODULE_NAME); rc = -EINVAL; goto done; } @@ -900,8 +906,8 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, qplib_cq); if (!cq) { - dev_err(NULL, "%s: CQ is NULL, CQN not handled", - ROCE_DRV_MODULE_NAME); + ibdev_err(NULL, "%s: CQ is NULL, CQN not handled", + ROCE_DRV_MODULE_NAME); return -EINVAL; } if (cq->ib_cq.comp_handler) { @@ -916,7 +922,7 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, #define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) { - return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : BNXT_RE_GEN_P5_PF_NQ_DB) : rdev->msix_entries[indx].db_offset; @@ -948,8 +954,8 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) db_offt, &bnxt_re_cqn_handler, &bnxt_re_srqn_handler); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to enable NQ with rc = 0x%x", rc); + ibdev_err(&rdev->ibdev, + "Failed to enable NQ with rc = 0x%x", rc); goto fail; } num_vec_enabled++; @@ -967,10 +973,10 @@ static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) int i; for (i = 0; i < rdev->num_msix - 1; i++) { - type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); - rdev->nq[i].res = NULL; bnxt_qplib_free_nq(&rdev->nq[i]); + rdev->nq[i].res = NULL; } } @@ -991,10 +997,10 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev) static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) { + struct bnxt_re_ring_attr rattr = {}; + struct bnxt_qplib_ctx *qplib_ctx; int num_vec_created = 0; - dma_addr_t *pg_map; int rc = 0, i; - int pages; u8 type; /* Configure and allocate resources for qplib */ @@ -1015,27 +1021,31 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) if (rc) goto dealloc_res; + qplib_ctx = &rdev->qplib_ctx; for (i = 0; i < rdev->num_msix - 1; i++) { - rdev->nq[i].res = &rdev->qplib_res; - rdev->nq[i].hwq.max_elements = BNXT_RE_MAX_CQ_COUNT + - BNXT_RE_MAX_SRQC_COUNT + 2; - rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq[i]); + struct bnxt_qplib_nq *nq; + + nq = &rdev->nq[i]; + nq->hwq.max_elements = (qplib_ctx->cq_count + + qplib_ctx->srqc_count + 2); + rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]); if (rc) { - dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x", - i, rc); + ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x", + i, rc); goto free_nq; } - type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); - pg_map = rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr; - pages = rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count; - rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type, - BNXT_QPLIB_NQE_MAX_CNT - 1, - rdev->msix_entries[i + 1].ring_idx, - &rdev->nq[i].ring_id); + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); + rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr; + rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count; + rattr.type = type; + rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; + rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1; + rattr.lrid = rdev->msix_entries[i + 1].ring_idx; + rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to allocate NQ fw id with rc = 0x%x", - rc); + ibdev_err(&rdev->ibdev, + "Failed to allocate NQ fw id with rc = 0x%x", + rc); bnxt_qplib_free_nq(&rdev->nq[i]); goto free_nq; } @@ -1043,8 +1053,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) } return 0; free_nq: - for (i = num_vec_created; i >= 0; i--) { - type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); + for (i = num_vec_created - 1; i >= 0; i--) { + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); bnxt_qplib_free_nq(&rdev->nq[i]); } @@ -1109,10 +1119,10 @@ static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir, return rc; if (resp.queue_cfg_info) { - dev_warn(rdev_to_dev(rdev), - "Asymmetric cos queue configuration detected"); - dev_warn(rdev_to_dev(rdev), - " on device, QoS may not be fully functional\n"); + ibdev_warn(&rdev->ibdev, + "Asymmetric cos queue configuration detected"); + ibdev_warn(&rdev->ibdev, + " on device, QoS may not be fully functional\n"); } qcfgmap = &resp.pri0_cos_queue_id; tmp_map = (u8 *)cid_map; @@ -1125,7 +1135,8 @@ static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir, static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp) { - return (qp->ib_qp.qp_type == IB_QPT_GSI) || (qp == rdev->qp1_sqp); + return (qp->ib_qp.qp_type == IB_QPT_GSI) || + (qp == rdev->gsi_ctx.gsi_sqp); } static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) @@ -1160,12 +1171,13 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) u16 gid_idx, index; int rc = 0; - if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) + if (!ib_device_try_get(&rdev->ibdev)) return 0; if (!sgid_tbl) { - dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated"); - return -EINVAL; + ibdev_err(&rdev->ibdev, "QPLIB: SGID table not allocated"); + rc = -EINVAL; + goto out; } for (index = 0; index < sgid_tbl->active; index++) { @@ -1185,7 +1197,8 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, rdev->qplib_res.netdev->dev_addr); } - +out: + ib_device_put(&rdev->ibdev); return rc; } @@ -1241,7 +1254,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) /* Get cosq id for this priority */ rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map); if (rc) { - dev_warn(rdev_to_dev(rdev), "no cos for p_mask %x\n", prio_map); + ibdev_warn(&rdev->ibdev, "no cos for p_mask %x\n", prio_map); return rc; } /* Parse CoS IDs for app priority */ @@ -1250,8 +1263,8 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) /* Config BONO. */ rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq); if (rc) { - dev_warn(rdev_to_dev(rdev), "no tc for cos{%x, %x}\n", - rdev->cosq[0], rdev->cosq[1]); + ibdev_warn(&rdev->ibdev, "no tc for cos{%x, %x}\n", + rdev->cosq[0], rdev->cosq[1]); return rc; } @@ -1286,8 +1299,8 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to query HW version, rc = 0x%x", rc); + ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x", + rc); return; } rdev->qplib_ctx.hwrm_intf_ver = @@ -1297,15 +1310,35 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) le16_to_cpu(resp.hwrm_intf_patch); } -static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) +static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) +{ + int rc = 0; + u32 event; + + /* Register ib dev */ + rc = bnxt_re_register_ib(rdev); + if (rc) { + pr_err("Failed to register with IB: %#x\n", rc); + return rc; + } + dev_info(rdev_to_dev(rdev), "Device registered successfully"); + ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, + &rdev->active_width); + set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); + + event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + + bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event); + + return rc; +} + +static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev) { u8 type; int rc; - if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { - /* Cleanup ib dev */ - bnxt_re_unregister_ib(rdev); - } if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) cancel_delayed_work_sync(&rdev->worker); @@ -1318,28 +1351,28 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) { rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); if (rc) - dev_warn(rdev_to_dev(rdev), - "Failed to deinitialize RCFW: %#x", rc); + ibdev_warn(&rdev->ibdev, + "Failed to deinitialize RCFW: %#x", rc); bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); - bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx); + bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx); bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); - type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); - bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type); + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); bnxt_qplib_free_rcfw_channel(&rdev->rcfw); } if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) { rc = bnxt_re_free_msix(rdev); if (rc) - dev_warn(rdev_to_dev(rdev), - "Failed to free MSI-X vectors: %#x", rc); + ibdev_warn(&rdev->ibdev, + "Failed to free MSI-X vectors: %#x", rc); } bnxt_re_destroy_chip_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) { rc = bnxt_re_unregister_netdev(rdev); if (rc) - dev_warn(rdev_to_dev(rdev), - "Failed to unregister with netdev: %#x", rc); + ibdev_warn(&rdev->ibdev, + "Failed to unregister with netdev: %#x", rc); } } @@ -1353,31 +1386,29 @@ static void bnxt_re_worker(struct work_struct *work) schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } -static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) +static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) { - dma_addr_t *pg_map; - u32 db_offt, ridx; - int pages, vid; - bool locked; + struct bnxt_qplib_creq_ctx *creq; + struct bnxt_re_ring_attr rattr; + u32 db_offt; + int vid; u8 type; int rc; - /* Acquire rtnl lock through out this function */ - rtnl_lock(); - locked = true; - /* Registered a new RoCE device instance to netdev */ + memset(&rattr, 0, sizeof(rattr)); rc = bnxt_re_register_netdev(rdev); if (rc) { rtnl_unlock(); - pr_err("Failed to register with netedev: %#x\n", rc); + ibdev_err(&rdev->ibdev, + "Failed to register with netedev: %#x\n", rc); return -EINVAL; } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); rc = bnxt_re_setup_chip_ctx(rdev); if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to get chip context\n"); + ibdev_err(&rdev->ibdev, "Failed to get chip context\n"); return -EINVAL; } @@ -1386,7 +1417,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) rc = bnxt_re_request_msix(rdev); if (rc) { - pr_err("Failed to get MSI-X vectors: %#x\n", rc); + ibdev_err(&rdev->ibdev, + "Failed to get MSI-X vectors: %#x\n", rc); rc = -EINVAL; goto fail; } @@ -1397,31 +1429,36 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ - rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw, &rdev->qplib_ctx, BNXT_RE_MAX_QPC_COUNT); if (rc) { - pr_err("Failed to allocate RCFW Channel: %#x\n", rc); + ibdev_err(&rdev->ibdev, + "Failed to allocate RCFW Channel: %#x\n", rc); goto fail; } - type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); - pg_map = rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr; - pages = rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count; - ridx = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; - rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type, - BNXT_QPLIB_CREQE_MAX_CNT - 1, - ridx, &rdev->rcfw.creq_ring_id); + + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); + creq = &rdev->rcfw.creq; + rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr; + rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count; + rattr.type = type; + rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; + rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1; + rattr.lrid = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; + rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); if (rc) { - pr_err("Failed to allocate CREQ: %#x\n", rc); + ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); goto free_rcfw; } db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX); vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector; - rc = bnxt_qplib_enable_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vid, db_offt, rdev->is_virtfn, &bnxt_re_aeq_handler); if (rc) { - pr_err("Failed to enable RCFW channel: %#x\n", rc); + ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n", + rc); goto free_ring; } @@ -1432,24 +1469,27 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) bnxt_re_set_resource_limits(rdev); - rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0, - bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)); + rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0, + bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)); if (rc) { - pr_err("Failed to allocate QPLIB context: %#x\n", rc); + ibdev_err(&rdev->ibdev, + "Failed to allocate QPLIB context: %#x\n", rc); goto disable_rcfw; } rc = bnxt_re_net_stats_ctx_alloc(rdev, rdev->qplib_ctx.stats.dma_map, &rdev->qplib_ctx.stats.fw_id); if (rc) { - pr_err("Failed to allocate stats context: %#x\n", rc); + ibdev_err(&rdev->ibdev, + "Failed to allocate stats context: %#x\n", rc); goto free_ctx; } rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, rdev->is_virtfn); if (rc) { - pr_err("Failed to initialize RCFW: %#x\n", rc); + ibdev_err(&rdev->ibdev, + "Failed to initialize RCFW: %#x\n", rc); goto free_sctx; } set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags); @@ -1457,13 +1497,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) /* Resources based on the 'new' device caps */ rc = bnxt_re_alloc_res(rdev); if (rc) { - pr_err("Failed to allocate resources: %#x\n", rc); + ibdev_err(&rdev->ibdev, + "Failed to allocate resources: %#x\n", rc); goto fail; } set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags); rc = bnxt_re_init_res(rdev); if (rc) { - pr_err("Failed to initialize resources: %#x\n", rc); + ibdev_err(&rdev->ibdev, + "Failed to initialize resources: %#x\n", rc); goto fail; } @@ -1472,46 +1514,28 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) if (!rdev->is_virtfn) { rc = bnxt_re_setup_qos(rdev); if (rc) - pr_info("RoCE priority not yet configured\n"); + ibdev_info(&rdev->ibdev, + "RoCE priority not yet configured\n"); INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags); schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } - rtnl_unlock(); - locked = false; - - /* Register ib dev */ - rc = bnxt_re_register_ib(rdev); - if (rc) { - pr_err("Failed to register with IB: %#x\n", rc); - goto fail; - } - set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); - dev_info(rdev_to_dev(rdev), "Device registered successfully"); - ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, - &rdev->active_width); - set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); - bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); - return 0; free_sctx: bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); free_ctx: - bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx); + bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx); disable_rcfw: bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); free_ring: - type = bnxt_qplib_get_ring_type(&rdev->chip_ctx); - bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type); + type = bnxt_qplib_get_ring_type(rdev->chip_ctx); + bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); free_rcfw: bnxt_qplib_free_rcfw_channel(&rdev->rcfw); fail: - if (!locked) - rtnl_lock(); - bnxt_re_ib_unreg(rdev); - rtnl_unlock(); + bnxt_re_dev_uninit(rdev); return rc; } @@ -1538,7 +1562,8 @@ static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev) en_dev = bnxt_re_dev_probe(netdev); if (IS_ERR(en_dev)) { if (en_dev != ERR_PTR(-ENODEV)) - pr_err("%s: Failed to probe\n", ROCE_DRV_MODULE_NAME); + ibdev_err(&(*rdev)->ibdev, "%s: Failed to probe\n", + ROCE_DRV_MODULE_NAME); rc = PTR_ERR(en_dev); goto exit; } @@ -1552,9 +1577,47 @@ exit: return rc; } -static void bnxt_re_remove_one(struct bnxt_re_dev *rdev) +static void bnxt_re_remove_device(struct bnxt_re_dev *rdev) { + bnxt_re_dev_uninit(rdev); pci_dev_put(rdev->en_dev->pdev); + bnxt_re_dev_unreg(rdev); +} + +static int bnxt_re_add_device(struct bnxt_re_dev **rdev, + struct net_device *netdev) +{ + int rc; + + rc = bnxt_re_dev_reg(rdev, netdev); + if (rc == -ENODEV) + return rc; + if (rc) { + pr_err("Failed to register with the device %s: %#x\n", + netdev->name, rc); + return rc; + } + + pci_dev_get((*rdev)->en_dev->pdev); + rc = bnxt_re_dev_init(*rdev); + if (rc) { + pci_dev_put((*rdev)->en_dev->pdev); + bnxt_re_dev_unreg(*rdev); + } + + return rc; +} + +static void bnxt_re_dealloc_driver(struct ib_device *ib_dev) +{ + struct bnxt_re_dev *rdev = + container_of(ib_dev, struct bnxt_re_dev, ibdev); + + dev_info(rdev_to_dev(rdev), "Unregistering Device"); + + rtnl_lock(); + bnxt_re_remove_device(rdev); + rtnl_unlock(); } /* Handle all deferred netevents tasks */ @@ -1567,21 +1630,23 @@ static void bnxt_re_task(struct work_struct *work) re_work = container_of(work, struct bnxt_re_work, work); rdev = re_work->rdev; - if (re_work->event != NETDEV_REGISTER && - !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) - return; - - switch (re_work->event) { - case NETDEV_REGISTER: - rc = bnxt_re_ib_reg(rdev); + if (re_work->event == NETDEV_REGISTER) { + rc = bnxt_re_ib_init(rdev); if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to register with IB: %#x", rc); - bnxt_re_remove_one(rdev); - bnxt_re_dev_unreg(rdev); + ibdev_err(&rdev->ibdev, + "Failed to register with IB: %#x", rc); + rtnl_lock(); + bnxt_re_remove_device(rdev); + rtnl_unlock(); goto exit; } - break; + goto exit; + } + + if (!ib_device_try_get(&rdev->ibdev)) + goto exit; + + switch (re_work->event) { case NETDEV_UP: bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); @@ -1601,17 +1666,12 @@ static void bnxt_re_task(struct work_struct *work) default: break; } - smp_mb__before_atomic(); - atomic_dec(&rdev->sched_count); + ib_device_put(&rdev->ibdev); exit: + put_device(&rdev->ibdev.dev); kfree(re_work); } -static void bnxt_re_init_one(struct bnxt_re_dev *rdev) -{ - pci_dev_get(rdev->en_dev->pdev); -} - /* * "Notifier chain callback can be invoked for the same chain from * different CPUs at the same time". @@ -1634,6 +1694,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, struct bnxt_re_dev *rdev; int rc = 0; bool sch_work = false; + bool release = true; real_dev = rdma_vlan_dev_real_dev(netdev); if (!real_dev) @@ -1641,7 +1702,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, rdev = bnxt_re_from_netdev(real_dev); if (!rdev && event != NETDEV_REGISTER) - goto exit; + return NOTIFY_OK; + if (real_dev != netdev) goto exit; @@ -1649,27 +1711,14 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, case NETDEV_REGISTER: if (rdev) break; - rc = bnxt_re_dev_reg(&rdev, real_dev); - if (rc == -ENODEV) - break; - if (rc) { - pr_err("Failed to register with the device %s: %#x\n", - real_dev->name, rc); - break; - } - bnxt_re_init_one(rdev); - sch_work = true; + rc = bnxt_re_add_device(&rdev, real_dev); + if (!rc) + sch_work = true; + release = false; break; case NETDEV_UNREGISTER: - /* netdev notifier will call NETDEV_UNREGISTER again later since - * we are still holding the reference to the netdev - */ - if (atomic_read(&rdev->sched_count) > 0) - goto exit; - bnxt_re_ib_unreg(rdev); - bnxt_re_remove_one(rdev); - bnxt_re_dev_unreg(rdev); + ib_unregister_device_queued(&rdev->ibdev); break; default: @@ -1680,17 +1729,19 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, /* Allocate for the deferred task */ re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC); if (re_work) { + get_device(&rdev->ibdev.dev); re_work->rdev = rdev; re_work->event = event; re_work->vlan_dev = (real_dev == netdev ? NULL : netdev); INIT_WORK(&re_work->work, bnxt_re_task); - atomic_inc(&rdev->sched_count); queue_work(bnxt_re_wq, &re_work->work); } } exit: + if (rdev && release) + ib_device_put(&rdev->ibdev); return NOTIFY_DONE; } @@ -1726,36 +1777,21 @@ err_netdev: static void __exit bnxt_re_mod_exit(void) { - struct bnxt_re_dev *rdev, *next; - LIST_HEAD(to_be_deleted); + struct bnxt_re_dev *rdev; - mutex_lock(&bnxt_re_dev_lock); - /* Free all adapter allocated resources */ - if (!list_empty(&bnxt_re_dev_list)) - list_splice_init(&bnxt_re_dev_list, &to_be_deleted); - mutex_unlock(&bnxt_re_dev_lock); - /* - * Cleanup the devices in reverse order so that the VF device - * cleanup is done before PF cleanup - */ - list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) { - dev_info(rdev_to_dev(rdev), "Unregistering Device"); - /* - * Flush out any scheduled tasks before destroying the - * resources - */ - flush_workqueue(bnxt_re_wq); - bnxt_re_dev_stop(rdev); - /* Acquire the rtnl_lock as the L2 resources are freed here */ - rtnl_lock(); - bnxt_re_ib_unreg(rdev); - rtnl_unlock(); - bnxt_re_remove_one(rdev); - bnxt_re_dev_unreg(rdev); - } unregister_netdevice_notifier(&bnxt_re_netdev_notifier); if (bnxt_re_wq) destroy_workqueue(bnxt_re_wq); + list_for_each_entry(rdev, &bnxt_re_dev_list, list) { + /* VF device removal should be called before the removal + * of PF device. Queue VFs unregister first, so that VFs + * shall be removed before the PF during the call of + * ib_unregister_driver. + */ + if (rdev->is_virtfn) + ib_unregister_device(&rdev->ibdev); + } + ib_unregister_driver(RDMA_DRIVER_BNXT_RE); } module_init(bnxt_re_mod_init); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 020f70e6865e..899a5d2c100e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -43,6 +43,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/pci.h> +#include <linux/delay.h> #include <linux/prefetch.h> #include <linux/if_ether.h> @@ -53,9 +54,7 @@ #include "qplib_sp.h" #include "qplib_fp.h" -static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq); static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp); -static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type); static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp) { @@ -233,6 +232,70 @@ fail: return rc; } +static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq) +{ + struct bnxt_qplib_hwq *hwq = &nq->hwq; + struct nq_base *nqe, **nq_ptr; + int budget = nq->budget; + u32 sw_cons, raw_cons; + uintptr_t q_handle; + u16 type; + + spin_lock_bh(&hwq->lock); + /* Service the NQ until empty */ + raw_cons = hwq->cons; + while (budget--) { + sw_cons = HWQ_CMP(raw_cons, hwq); + nq_ptr = (struct nq_base **)hwq->pbl_ptr; + nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]; + if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) + break; + + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); + + type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK; + switch (type) { + case NQ_BASE_TYPE_CQ_NOTIFICATION: + { + struct nq_cn *nqcne = (struct nq_cn *)nqe; + + q_handle = le32_to_cpu(nqcne->cq_handle_low); + q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high) + << 32; + if ((unsigned long)cq == q_handle) { + nqcne->cq_handle_low = 0; + nqcne->cq_handle_high = 0; + cq->cnq_events++; + } + break; + } + default: + break; + } + raw_cons++; + } + spin_unlock_bh(&hwq->lock); +} + +/* Wait for receiving all NQEs for this CQ and clean the NQEs associated with + * this CQ. + */ +static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events) +{ + u32 retry_cnt = 100; + + while (retry_cnt--) { + if (cnq_events == cq->cnq_events) + return; + usleep_range(50, 100); + clean_nq(cq->nq, cq); + } +} + static void bnxt_qplib_service_nq(unsigned long data) { struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; @@ -241,12 +304,12 @@ static void bnxt_qplib_service_nq(unsigned long data) struct bnxt_qplib_cq *cq; int num_cqne_processed = 0; int num_srqne_processed = 0; - u32 sw_cons, raw_cons; - u16 type; int budget = nq->budget; + u32 sw_cons, raw_cons; uintptr_t q_handle; - bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); + u16 type; + spin_lock_bh(&hwq->lock); /* Service the NQ until empty */ raw_cons = hwq->cons; while (budget--) { @@ -272,7 +335,10 @@ static void bnxt_qplib_service_nq(unsigned long data) q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high) << 32; cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle; - bnxt_qplib_arm_cq_enable(cq); + if (!cq) + break; + bnxt_qplib_armen_db(&cq->dbinfo, + DBC_DBC_TYPE_CQ_ARMENA); spin_lock_bh(&cq->compl_lock); atomic_set(&cq->arm_state, 0); if (!nq->cqn_handler(nq, (cq))) @@ -280,19 +346,22 @@ static void bnxt_qplib_service_nq(unsigned long data) else dev_warn(&nq->pdev->dev, "cqn - type 0x%x not handled\n", type); + cq->cnq_events++; spin_unlock_bh(&cq->compl_lock); break; } case NQ_BASE_TYPE_SRQ_EVENT: { + struct bnxt_qplib_srq *srq; struct nq_srq_event *nqsrqe = (struct nq_srq_event *)nqe; q_handle = le32_to_cpu(nqsrqe->srq_handle_low); q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high) << 32; - bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle, - DBC_DBC_TYPE_SRQ_ARMENA); + srq = (struct bnxt_qplib_srq *)q_handle; + bnxt_qplib_armen_db(&srq->dbinfo, + DBC_DBC_TYPE_SRQ_ARMENA); if (!nq->srqn_handler(nq, (struct bnxt_qplib_srq *)q_handle, nqsrqe->event)) @@ -314,10 +383,9 @@ static void bnxt_qplib_service_nq(unsigned long data) } if (hwq->cons != raw_cons) { hwq->cons = raw_cons; - bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, hwq->cons, - hwq->max_elements, nq->ring_id, - gen_p5); + bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true); } + spin_unlock_bh(&hwq->lock); } static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) @@ -333,25 +401,23 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]); /* Fan out to CPU affinitized kthreads? */ - tasklet_schedule(&nq->worker); + tasklet_schedule(&nq->nq_tasklet); return IRQ_HANDLED; } void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) { - bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); - tasklet_disable(&nq->worker); + tasklet_disable(&nq->nq_tasklet); /* Mask h/w interrupt */ - bnxt_qplib_ring_nq_db(nq->bar_reg_iomem, nq->hwq.cons, - nq->hwq.max_elements, nq->ring_id, gen_p5); + bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false); /* Sync with last running IRQ handler */ - synchronize_irq(nq->vector); + synchronize_irq(nq->msix_vec); if (kill) - tasklet_kill(&nq->worker); + tasklet_kill(&nq->nq_tasklet); if (nq->requested) { - irq_set_affinity_hint(nq->vector, NULL); - free_irq(nq->vector, nq); + irq_set_affinity_hint(nq->msix_vec, NULL); + free_irq(nq->msix_vec, nq); nq->requested = false; } } @@ -364,89 +430,108 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) } /* Make sure the HW is stopped! */ - if (nq->requested) - bnxt_qplib_nq_stop_irq(nq, true); + bnxt_qplib_nq_stop_irq(nq, true); - if (nq->bar_reg_iomem) - iounmap(nq->bar_reg_iomem); - nq->bar_reg_iomem = NULL; + if (nq->nq_db.reg.bar_reg) { + iounmap(nq->nq_db.reg.bar_reg); + nq->nq_db.reg.bar_reg = NULL; + } nq->cqn_handler = NULL; nq->srqn_handler = NULL; - nq->vector = 0; + nq->msix_vec = 0; } int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, int msix_vector, bool need_init) { - bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx); int rc; if (nq->requested) return -EFAULT; - nq->vector = msix_vector; + nq->msix_vec = msix_vector; if (need_init) - tasklet_init(&nq->worker, bnxt_qplib_service_nq, + tasklet_init(&nq->nq_tasklet, bnxt_qplib_service_nq, (unsigned long)nq); else - tasklet_enable(&nq->worker); + tasklet_enable(&nq->nq_tasklet); snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx); - rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq); + rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq); if (rc) return rc; cpumask_clear(&nq->mask); cpumask_set_cpu(nq_indx, &nq->mask); - rc = irq_set_affinity_hint(nq->vector, &nq->mask); + rc = irq_set_affinity_hint(nq->msix_vec, &nq->mask); if (rc) { dev_warn(&nq->pdev->dev, "set affinity failed; vector: %d nq_idx: %d\n", - nq->vector, nq_indx); + nq->msix_vec, nq_indx); } nq->requested = true; - bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, nq->hwq.cons, - nq->hwq.max_elements, nq->ring_id, gen_p5); + bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true); + + return rc; +} + +static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt) +{ + resource_size_t reg_base; + struct bnxt_qplib_nq_db *nq_db; + struct pci_dev *pdev; + int rc = 0; + + pdev = nq->pdev; + nq_db = &nq->nq_db; + + nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION; + nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id); + if (!nq_db->reg.bar_base) { + dev_err(&pdev->dev, "QPLIB: NQ BAR region %d resc start is 0!", + nq_db->reg.bar_id); + rc = -ENOMEM; + goto fail; + } + reg_base = nq_db->reg.bar_base + reg_offt; + /* Unconditionally map 8 bytes to support 57500 series */ + nq_db->reg.len = 8; + nq_db->reg.bar_reg = ioremap(reg_base, nq_db->reg.len); + if (!nq_db->reg.bar_reg) { + dev_err(&pdev->dev, "QPLIB: NQ BAR region %d mapping failed", + nq_db->reg.bar_id); + rc = -ENOMEM; + goto fail; + } + + nq_db->dbinfo.db = nq_db->reg.bar_reg; + nq_db->dbinfo.hwq = &nq->hwq; + nq_db->dbinfo.xid = nq->ring_id; +fail: return rc; } int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, int nq_idx, int msix_vector, int bar_reg_offset, - int (*cqn_handler)(struct bnxt_qplib_nq *nq, - struct bnxt_qplib_cq *), - int (*srqn_handler)(struct bnxt_qplib_nq *nq, - struct bnxt_qplib_srq *, - u8 event)) + cqn_handler_t cqn_handler, + srqn_handler_t srqn_handler) { - resource_size_t nq_base; int rc = -1; - if (cqn_handler) - nq->cqn_handler = cqn_handler; - - if (srqn_handler) - nq->srqn_handler = srqn_handler; + nq->pdev = pdev; + nq->cqn_handler = cqn_handler; + nq->srqn_handler = srqn_handler; /* Have a task to schedule CQ notifiers in post send case */ nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq"); if (!nq->cqn_wq) return -ENOMEM; - nq->bar_reg = NQ_CONS_PCI_BAR_REGION; - nq->bar_reg_off = bar_reg_offset; - nq_base = pci_resource_start(pdev, nq->bar_reg); - if (!nq_base) { - rc = -ENOMEM; - goto fail; - } - /* Unconditionally map 8 bytes to support 57500 series */ - nq->bar_reg_iomem = ioremap(nq_base + nq->bar_reg_off, 8); - if (!nq->bar_reg_iomem) { - rc = -ENOMEM; + rc = bnxt_qplib_map_nq_db(nq, bar_reg_offset); + if (rc) goto fail; - } rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true); if (rc) { @@ -464,49 +549,38 @@ fail: void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq) { if (nq->hwq.max_elements) { - bnxt_qplib_free_hwq(nq->pdev, &nq->hwq); + bnxt_qplib_free_hwq(nq->res, &nq->hwq); nq->hwq.max_elements = 0; } } -int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) +int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq) { - u8 hwq_type; + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_sg_info sginfo = {}; - nq->pdev = pdev; + nq->pdev = res->pdev; + nq->res = res; if (!nq->hwq.max_elements || nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT) nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; - hwq_type = bnxt_qplib_get_hwq_type(nq->res); - if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, - &nq->hwq.max_elements, - BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0, - PAGE_SIZE, hwq_type)) - return -ENOMEM; + sginfo.pgsize = PAGE_SIZE; + sginfo.pgshft = PAGE_SHIFT; + hwq_attr.res = res; + hwq_attr.sginfo = &sginfo; + hwq_attr.depth = nq->hwq.max_elements; + hwq_attr.stride = sizeof(struct nq_base); + hwq_attr.type = bnxt_qplib_get_hwq_type(nq->res); + if (bnxt_qplib_alloc_init_hwq(&nq->hwq, &hwq_attr)) { + dev_err(&nq->pdev->dev, "FP NQ allocation failed"); + return -ENOMEM; + } nq->budget = 8; return 0; } /* SRQ */ -static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type) -{ - struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - void __iomem *db; - u32 sw_prod; - u64 val = 0; - - /* Ring DB */ - sw_prod = (arm_type == DBC_DBC_TYPE_SRQ_ARM) ? - srq->threshold : HWQ_CMP(srq_hwq->prod, srq_hwq); - db = (arm_type == DBC_DBC_TYPE_SRQ_ARMENA) ? srq->dbr_base : - srq->dpi->dbr; - val = ((srq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type; - val <<= 32; - val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; - writeq(val, db); -} - void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { @@ -526,24 +600,26 @@ void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, kfree(srq->swq); if (rc) return; - bnxt_qplib_free_hwq(res->pdev, &srq->hwq); + bnxt_qplib_free_hwq(res, &srq->hwq); } int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_create_srq req; + struct bnxt_qplib_hwq_attr hwq_attr = {}; struct creq_create_srq_resp resp; + struct cmdq_create_srq req; struct bnxt_qplib_pbl *pbl; u16 cmd_flags = 0; int rc, idx; - srq->hwq.max_elements = srq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, &srq->sg_info, - &srq->hwq.max_elements, - BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); + hwq_attr.res = res; + hwq_attr.sginfo = &srq->sg_info; + hwq_attr.depth = srq->max_wqe; + hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr); if (rc) goto exit; @@ -595,14 +671,17 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, srq->swq[srq->last_idx].next_idx = -1; srq->id = le32_to_cpu(resp.xid); - srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem; + srq->dbinfo.hwq = &srq->hwq; + srq->dbinfo.xid = srq->id; + srq->dbinfo.db = srq->dpi->dbr; + srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; if (srq->threshold) - bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARMENA); + bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); srq->arm_req = false; return 0; fail: - bnxt_qplib_free_hwq(res->pdev, &srq->hwq); + bnxt_qplib_free_hwq(res, &srq->hwq); kfree(srq->swq); exit: return rc; @@ -621,7 +700,7 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, srq_hwq->max_elements - sw_cons + sw_prod; if (count > srq->threshold) { srq->arm_req = false; - bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM); + bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); } else { /* Deferred arming */ srq->arm_req = true; @@ -709,10 +788,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srq_hwq->max_elements - sw_cons + sw_prod; spin_unlock(&srq_hwq->lock); /* Ring DB */ - bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ); + bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ); if (srq->arm_req == true && count > srq->threshold) { srq->arm_req = false; - bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM); + bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); } done: return rc; @@ -721,15 +800,16 @@ done: /* QP */ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { + struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_create_qp1 req; - struct creq_create_qp1_resp resp; - struct bnxt_qplib_pbl *pbl; struct bnxt_qplib_q *sq = &qp->sq; struct bnxt_qplib_q *rq = &qp->rq; - int rc; + struct creq_create_qp1_resp resp; + struct cmdq_create_qp1 req; + struct bnxt_qplib_pbl *pbl; u16 cmd_flags = 0; u32 qp_flags = 0; + int rc; RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags); @@ -739,11 +819,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req.qp_handle = cpu_to_le64(qp->qp_handle); /* SQ */ - sq->hwq.max_elements = sq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, - &sq->hwq.max_elements, - BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); + hwq_attr.res = res; + hwq_attr.sginfo = &sq->sg_info; + hwq_attr.depth = sq->max_wqe; + hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE; + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) goto exit; @@ -778,11 +859,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { - rq->hwq.max_elements = qp->rq.max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, - &rq->hwq.max_elements, - BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); + hwq_attr.res = res; + hwq_attr.sginfo = &rq->sg_info; + hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.depth = qp->rq.max_wqe; + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) goto fail_sq; @@ -840,6 +922,15 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; + qp->cctx = res->cctx; + sq->dbinfo.hwq = &sq->hwq; + sq->dbinfo.xid = qp->id; + sq->dbinfo.db = qp->dpi->dbr; + if (rq->max_wqe) { + rq->dbinfo.hwq = &rq->hwq; + rq->dbinfo.xid = qp->id; + rq->dbinfo.db = qp->dpi->dbr; + } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; @@ -848,10 +939,10 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) fail: bnxt_qplib_free_qp_hdr_buf(res, qp); fail_rq: - bnxt_qplib_free_hwq(res->pdev, &rq->hwq); + bnxt_qplib_free_hwq(res, &rq->hwq); kfree(rq->swq); fail_sq: - bnxt_qplib_free_hwq(res->pdev, &sq->hwq); + bnxt_qplib_free_hwq(res, &sq->hwq); kfree(sq->swq); exit: return rc; @@ -860,7 +951,9 @@ exit: int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct bnxt_qplib_hwq_attr hwq_attr = {}; unsigned long int psn_search, poff = 0; + struct bnxt_qplib_sg_info sginfo = {}; struct sq_psn_search **psn_search_ptr; struct bnxt_qplib_q *sq = &qp->sq; struct bnxt_qplib_q *rq = &qp->rq; @@ -887,12 +980,15 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); } - sq->hwq.max_elements = sq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, &sq->sg_info, - &sq->hwq.max_elements, - BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, - psn_sz, - PAGE_SIZE, HWQ_TYPE_QUEUE); + + hwq_attr.res = res; + hwq_attr.sginfo = &sq->sg_info; + hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE; + hwq_attr.depth = sq->max_wqe; + hwq_attr.aux_stride = psn_sz; + hwq_attr.aux_depth = hwq_attr.depth; + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) goto exit; @@ -956,12 +1052,14 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { - rq->hwq.max_elements = rq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, - &rq->sg_info, - &rq->hwq.max_elements, - BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); + hwq_attr.res = res; + hwq_attr.sginfo = &rq->sg_info; + hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.depth = rq->max_wqe; + hwq_attr.aux_stride = 0; + hwq_attr.aux_depth = 0; + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) goto fail_sq; @@ -1029,10 +1127,17 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req_size = xrrq->max_elements * BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1; req_size &= ~(PAGE_SIZE - 1); - rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, - &xrrq->max_elements, - BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE, - 0, req_size, HWQ_TYPE_CTX); + sginfo.pgsize = req_size; + sginfo.pgshft = PAGE_SHIFT; + + hwq_attr.res = res; + hwq_attr.sginfo = &sginfo; + hwq_attr.depth = xrrq->max_elements; + hwq_attr.stride = BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE; + hwq_attr.aux_stride = 0; + hwq_attr.aux_depth = 0; + hwq_attr.type = HWQ_TYPE_CTX; + rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr); if (rc) goto fail_buf_free; pbl = &xrrq->pbl[PBL_LVL_0]; @@ -1044,11 +1149,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req_size = xrrq->max_elements * BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1; req_size &= ~(PAGE_SIZE - 1); - - rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, - &xrrq->max_elements, - BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE, - 0, req_size, HWQ_TYPE_CTX); + sginfo.pgsize = req_size; + hwq_attr.depth = xrrq->max_elements; + hwq_attr.stride = BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE; + rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr); if (rc) goto fail_orrq; @@ -1064,9 +1168,17 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; - qp->cctx = res->cctx; INIT_LIST_HEAD(&qp->sq_flush); INIT_LIST_HEAD(&qp->rq_flush); + qp->cctx = res->cctx; + sq->dbinfo.hwq = &sq->hwq; + sq->dbinfo.xid = qp->id; + sq->dbinfo.db = qp->dpi->dbr; + if (rq->max_wqe) { + rq->dbinfo.hwq = &rq->hwq; + rq->dbinfo.xid = qp->id; + rq->dbinfo.db = qp->dpi->dbr; + } rcfw->qp_tbl[qp->id].qp_id = qp->id; rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; @@ -1074,17 +1186,17 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) fail: if (qp->irrq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &qp->irrq); + bnxt_qplib_free_hwq(res, &qp->irrq); fail_orrq: if (qp->orrq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &qp->orrq); + bnxt_qplib_free_hwq(res, &qp->orrq); fail_buf_free: bnxt_qplib_free_qp_hdr_buf(res, qp); fail_rq: - bnxt_qplib_free_hwq(res->pdev, &rq->hwq); + bnxt_qplib_free_hwq(res, &rq->hwq); kfree(rq->swq); fail_sq: - bnxt_qplib_free_hwq(res->pdev, &sq->hwq); + bnxt_qplib_free_hwq(res, &sq->hwq); kfree(sq->swq); exit: return rc; @@ -1440,16 +1552,16 @@ void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { bnxt_qplib_free_qp_hdr_buf(res, qp); - bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq); + bnxt_qplib_free_hwq(res, &qp->sq.hwq); kfree(qp->sq.swq); - bnxt_qplib_free_hwq(res->pdev, &qp->rq.hwq); + bnxt_qplib_free_hwq(res, &qp->rq.hwq); kfree(qp->rq.swq); if (qp->irrq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &qp->irrq); + bnxt_qplib_free_hwq(res, &qp->irrq); if (qp->orrq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &qp->orrq); + bnxt_qplib_free_hwq(res, &qp->orrq); } @@ -1506,16 +1618,8 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; - u32 sw_prod; - u64 val = 0; - val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | - DBC_DBC_TYPE_SQ); - val <<= 32; - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; - /* Flush all the WQE writes to HW */ - writeq(val, qp->dpi->dbr); + bnxt_qplib_ring_prod_db(&sq->dbinfo, DBC_DBC_TYPE_SQ); } int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, @@ -1807,16 +1911,8 @@ done: void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - u32 sw_prod; - u64 val = 0; - val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | - DBC_DBC_TYPE_RQ); - val <<= 32; - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; - /* Flush the writes to HW Rx WQE before the ringing Rx DB */ - writeq(val, qp->dpi->dbr); + bnxt_qplib_ring_prod_db(&rq->dbinfo, DBC_DBC_TYPE_RQ); } int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, @@ -1896,48 +1992,22 @@ done: } /* CQ */ - -/* Spinlock must be held */ -static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq) -{ - u64 val = 0; - - val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | - DBC_DBC_TYPE_CQ_ARMENA; - val <<= 32; - /* Flush memory writes before enabling the CQ */ - writeq(val, cq->dbr_base); -} - -static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type) -{ - struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; - u32 sw_cons; - u64 val = 0; - - /* Ring DB */ - val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type; - val <<= 32; - sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq); - val |= (sw_cons << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK; - /* flush memory writes before arming the CQ */ - writeq(val, cq->dpi->dbr); -} - int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_create_cq req; + struct bnxt_qplib_hwq_attr hwq_attr = {}; struct creq_create_cq_resp resp; + struct cmdq_create_cq req; struct bnxt_qplib_pbl *pbl; u16 cmd_flags = 0; int rc; - cq->hwq.max_elements = cq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, &cq->sg_info, - &cq->hwq.max_elements, - BNXT_QPLIB_MAX_CQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); + hwq_attr.res = res; + hwq_attr.depth = cq->max_wqe; + hwq_attr.stride = sizeof(struct cq_base); + hwq_attr.type = HWQ_TYPE_QUEUE; + hwq_attr.sginfo = &cq->sg_info; + rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr); if (rc) goto exit; @@ -1976,7 +2046,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) goto fail; cq->id = le32_to_cpu(resp.xid); - cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem; cq->period = BNXT_QPLIB_QUEUE_START_PERIOD; init_waitqueue_head(&cq->waitq); INIT_LIST_HEAD(&cq->sqf_head); @@ -1984,11 +2053,17 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) spin_lock_init(&cq->compl_lock); spin_lock_init(&cq->flush_lock); - bnxt_qplib_arm_cq_enable(cq); + cq->dbinfo.hwq = &cq->hwq; + cq->dbinfo.xid = cq->id; + cq->dbinfo.db = cq->dpi->dbr; + cq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem; + + bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA); + return 0; fail: - bnxt_qplib_free_hwq(res->pdev, &cq->hwq); + bnxt_qplib_free_hwq(res, &cq->hwq); exit: return rc; } @@ -1998,6 +2073,7 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_cq req; struct creq_destroy_cq_resp resp; + u16 total_cnq_events; u16 cmd_flags = 0; int rc; @@ -2008,7 +2084,9 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) (void *)&resp, NULL, 0); if (rc) return rc; - bnxt_qplib_free_hwq(res->pdev, &cq->hwq); + total_cnq_events = le16_to_cpu(resp.total_cnq_events); + __wait_for_all_nqes(cq, total_cnq_events); + bnxt_qplib_free_hwq(res, &cq->hwq); return 0; } @@ -2141,8 +2219,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, sq->send_phantom = true; /* TODO: Only ARM if the previous SQE is ARMALL */ - bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ_ARMALL); - + bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMALL); rc = -EAGAIN; goto out; } @@ -2426,7 +2503,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; - cqe->length = (u32)le16_to_cpu(hwcqe->length); + cqe->length = le16_to_cpu(hwcqe->length) & CQ_RES_UD_LENGTH_MASK; cqe->cfa_meta = le16_to_cpu(hwcqe->cfa_metadata); cqe->invrkey = le32_to_cpu(hwcqe->imm_data); cqe->flags = le16_to_cpu(hwcqe->flags); @@ -2812,7 +2889,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, } if (cq->hwq.cons != raw_cons) { cq->hwq.cons = raw_cons; - bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ); + bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ); } exit: return num_cqes - budget; @@ -2821,7 +2898,7 @@ exit: void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type) { if (arm_type) - bnxt_qplib_arm_cq(cq, arm_type); + bnxt_qplib_ring_db(&cq->dbinfo, arm_type); /* Using cq->arm_state variable to track whether to issue cq handler */ atomic_set(&cq->arm_state, 1); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 99e0a13cbefa..7edb70b6bb16 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -42,7 +42,7 @@ struct bnxt_qplib_srq { struct bnxt_qplib_pd *pd; struct bnxt_qplib_dpi *dpi; - void __iomem *dbr_base; + struct bnxt_qplib_db_info dbinfo; u64 srq_handle; u32 id; u32 max_wqe; @@ -236,6 +236,7 @@ struct bnxt_qplib_swqe { struct bnxt_qplib_q { struct bnxt_qplib_hwq hwq; struct bnxt_qplib_swq *swq; + struct bnxt_qplib_db_info dbinfo; struct bnxt_qplib_sg_info sg_info; u32 max_wqe; u16 q_full_delta; @@ -370,7 +371,7 @@ struct bnxt_qplib_cqe { #define BNXT_QPLIB_QUEUE_START_PERIOD 0x01 struct bnxt_qplib_cq { struct bnxt_qplib_dpi *dpi; - void __iomem *dbr_base; + struct bnxt_qplib_db_info dbinfo; u32 max_wqe; u32 id; u16 count; @@ -401,6 +402,7 @@ struct bnxt_qplib_cq { * of the same QP while manipulating the flush list. */ spinlock_t flush_lock; /* QP flush management */ + u16 cnq_events; }; #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq) @@ -433,66 +435,32 @@ struct bnxt_qplib_cq { NQ_DB_IDX_VALID | \ NQ_DB_IRQ_DIS) -static inline void bnxt_qplib_ring_nq_db64(void __iomem *db, u32 index, - u32 xid, bool arm) -{ - u64 val; - - val = xid & DBC_DBC_XID_MASK; - val |= DBC_DBC_PATH_ROCE; - val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; - val <<= 32; - val |= index & DBC_DBC_INDEX_MASK; - writeq(val, db); -} - -static inline void bnxt_qplib_ring_nq_db_rearm(void __iomem *db, u32 raw_cons, - u32 max_elements, u32 xid, - bool gen_p5) -{ - u32 index = raw_cons & (max_elements - 1); - - if (gen_p5) - bnxt_qplib_ring_nq_db64(db, index, xid, true); - else - writel(NQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), db); -} +struct bnxt_qplib_nq_db { + struct bnxt_qplib_reg_desc reg; + struct bnxt_qplib_db_info dbinfo; +}; -static inline void bnxt_qplib_ring_nq_db(void __iomem *db, u32 raw_cons, - u32 max_elements, u32 xid, - bool gen_p5) -{ - u32 index = raw_cons & (max_elements - 1); - - if (gen_p5) - bnxt_qplib_ring_nq_db64(db, index, xid, false); - else - writel(NQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), db); -} +typedef int (*cqn_handler_t)(struct bnxt_qplib_nq *nq, + struct bnxt_qplib_cq *cq); +typedef int (*srqn_handler_t)(struct bnxt_qplib_nq *nq, + struct bnxt_qplib_srq *srq, u8 event); struct bnxt_qplib_nq { - struct pci_dev *pdev; - struct bnxt_qplib_res *res; - - int vector; - cpumask_t mask; - int budget; - bool requested; - struct tasklet_struct worker; - struct bnxt_qplib_hwq hwq; - - u16 bar_reg; - u32 bar_reg_off; - u16 ring_id; - void __iomem *bar_reg_iomem; - - int (*cqn_handler)(struct bnxt_qplib_nq *nq, - struct bnxt_qplib_cq *cq); - int (*srqn_handler)(struct bnxt_qplib_nq *nq, - struct bnxt_qplib_srq *srq, - u8 event); - struct workqueue_struct *cqn_wq; - char name[32]; + struct pci_dev *pdev; + struct bnxt_qplib_res *res; + char name[32]; + struct bnxt_qplib_hwq hwq; + struct bnxt_qplib_nq_db nq_db; + u16 ring_id; + int msix_vec; + cpumask_t mask; + struct tasklet_struct nq_tasklet; + bool requested; + int budget; + + cqn_handler_t cqn_handler; + srqn_handler_t srqn_handler; + struct workqueue_struct *cqn_wq; }; struct bnxt_qplib_nq_work { @@ -507,11 +475,8 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, int msix_vector, bool need_init); int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, int nq_idx, int msix_vector, int bar_reg_offset, - int (*cqn_handler)(struct bnxt_qplib_nq *nq, - struct bnxt_qplib_cq *cq), - int (*srqn_handler)(struct bnxt_qplib_nq *nq, - struct bnxt_qplib_srq *srq, - u8 event)); + cqn_handler_t cqn_handler, + srqn_handler_t srq_handler); int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, @@ -550,7 +515,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq); void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type); void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq); -int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq); +int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq); void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp); void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp, unsigned long *flags); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 1291b12287a5..f01e864bb611 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -55,12 +55,14 @@ static void bnxt_qplib_service_creq(unsigned long data); /* Hardware communication channel */ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) { + struct bnxt_qplib_cmdq_ctx *cmdq; u16 cbit; int rc; + cmdq = &rcfw->cmdq; cbit = cookie % rcfw->cmdq_depth; - rc = wait_event_timeout(rcfw->waitq, - !test_bit(cbit, rcfw->cmdq_bitmap), + rc = wait_event_timeout(cmdq->waitq, + !test_bit(cbit, cmdq->cmdq_bitmap), msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS)); return rc ? 0 : -ETIMEDOUT; }; @@ -68,15 +70,17 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) { u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT; + struct bnxt_qplib_cmdq_ctx *cmdq; u16 cbit; + cmdq = &rcfw->cmdq; cbit = cookie % rcfw->cmdq_depth; - if (!test_bit(cbit, rcfw->cmdq_bitmap)) + if (!test_bit(cbit, cmdq->cmdq_bitmap)) goto done; do { mdelay(1); /* 1m sec */ bnxt_qplib_service_creq((unsigned long)rcfw); - } while (test_bit(cbit, rcfw->cmdq_bitmap) && --count); + } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count); done: return count ? 0 : -ETIMEDOUT; }; @@ -84,56 +88,60 @@ done: static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, struct creq_base *resp, void *sb, u8 is_block) { - struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr; - struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq; + struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq; + struct bnxt_qplib_cmdqe *cmdqe, **hwq_ptr; + struct bnxt_qplib_hwq *hwq = &cmdq->hwq; + struct bnxt_qplib_crsqe *crsqe; u32 cmdq_depth = rcfw->cmdq_depth; - struct bnxt_qplib_crsq *crsqe; u32 sw_prod, cmdq_prod; + struct pci_dev *pdev; unsigned long flags; u32 size, opcode; u16 cookie, cbit; u8 *preq; + pdev = rcfw->pdev; + opcode = req->opcode; - if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) && + if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) && (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC && opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW && opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) { - dev_err(&rcfw->pdev->dev, + dev_err(&pdev->dev, "RCFW not initialized, reject opcode 0x%x\n", opcode); return -EINVAL; } - if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) && + if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) && opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) { - dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n"); + dev_err(&pdev->dev, "RCFW already initialized!\n"); return -EINVAL; } - if (test_bit(FIRMWARE_TIMED_OUT, &rcfw->flags)) + if (test_bit(FIRMWARE_TIMED_OUT, &cmdq->flags)) return -ETIMEDOUT; /* Cmdq are in 16-byte units, each request can consume 1 or more * cmdqe */ - spin_lock_irqsave(&cmdq->lock, flags); - if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) { - dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n"); - spin_unlock_irqrestore(&cmdq->lock, flags); + spin_lock_irqsave(&hwq->lock, flags); + if (req->cmd_size >= HWQ_FREE_SLOTS(hwq)) { + dev_err(&pdev->dev, "RCFW: CMDQ is full!\n"); + spin_unlock_irqrestore(&hwq->lock, flags); return -EAGAIN; } - cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE; + cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE; cbit = cookie % rcfw->cmdq_depth; if (is_block) cookie |= RCFW_CMD_IS_BLOCKING; - set_bit(cbit, rcfw->cmdq_bitmap); + set_bit(cbit, cmdq->cmdq_bitmap); req->cookie = cpu_to_le16(cookie); crsqe = &rcfw->crsqe_tbl[cbit]; if (crsqe->resp) { - spin_unlock_irqrestore(&cmdq->lock, flags); + spin_unlock_irqrestore(&hwq->lock, flags); return -EBUSY; } @@ -155,15 +163,15 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, BNXT_QPLIB_CMDQE_UNITS; } - cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr; + hwq_ptr = (struct bnxt_qplib_cmdqe **)hwq->pbl_ptr; preq = (u8 *)req; do { /* Locate the next cmdq slot */ - sw_prod = HWQ_CMP(cmdq->prod, cmdq); - cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)] + sw_prod = HWQ_CMP(hwq->prod, hwq); + cmdqe = &hwq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)] [get_cmdq_idx(sw_prod, cmdq_depth)]; if (!cmdqe) { - dev_err(&rcfw->pdev->dev, + dev_err(&pdev->dev, "RCFW request failed with no cmdqe!\n"); goto done; } @@ -172,31 +180,27 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, memcpy(cmdqe, preq, min_t(u32, size, sizeof(*cmdqe))); preq += min_t(u32, size, sizeof(*cmdqe)); size -= min_t(u32, size, sizeof(*cmdqe)); - cmdq->prod++; - rcfw->seq_num++; + hwq->prod++; } while (size > 0); + cmdq->seq_num++; - rcfw->seq_num++; - - cmdq_prod = cmdq->prod; - if (test_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags)) { + cmdq_prod = hwq->prod; + if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) { /* The very first doorbell write * is required to set this flag * which prompts the FW to reset * its internal pointers */ cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG); - clear_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); + clear_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags); } /* ring CMDQ DB */ wmb(); - writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem + - rcfw->cmdq_bar_reg_prod_off); - writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem + - rcfw->cmdq_bar_reg_trig_off); + writel(cmdq_prod, cmdq->cmdq_mbox.prod); + writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db); done: - spin_unlock_irqrestore(&cmdq->lock, flags); + spin_unlock_irqrestore(&hwq->lock, flags); /* Return the CREQ response pointer */ return 0; } @@ -236,7 +240,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, /* timed out */ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n", cookie, opcode, RCFW_CMD_WAIT_TIME_MS); - set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags); + set_bit(FIRMWARE_TIMED_OUT, &rcfw->cmdq.flags); return rc; } @@ -253,6 +257,8 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw, struct creq_func_event *func_event) { + int rc; + switch (func_event->event) { case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR: break; @@ -286,37 +292,41 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw, default: return -EINVAL; } - return 0; + + rc = rcfw->creq.aeq_handler(rcfw, (void *)func_event, NULL); + return rc; } static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, struct creq_qp_event *qp_event) { - struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq; struct creq_qp_error_notification *err_event; - struct bnxt_qplib_crsq *crsqe; - unsigned long flags; + struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq; + struct bnxt_qplib_crsqe *crsqe; struct bnxt_qplib_qp *qp; u16 cbit, blocked = 0; - u16 cookie; + struct pci_dev *pdev; + unsigned long flags; __le16 mcookie; + u16 cookie; + int rc = 0; u32 qp_id; + pdev = rcfw->pdev; switch (qp_event->event) { case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: err_event = (struct creq_qp_error_notification *)qp_event; qp_id = le32_to_cpu(err_event->xid); qp = rcfw->qp_tbl[qp_id].qp_handle; - dev_dbg(&rcfw->pdev->dev, - "Received QP error notification\n"); - dev_dbg(&rcfw->pdev->dev, + dev_dbg(&pdev->dev, "Received QP error notification\n"); + dev_dbg(&pdev->dev, "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", qp_id, err_event->req_err_state_reason, err_event->res_err_state_reason); if (!qp) break; bnxt_qplib_mark_qp_error(qp); - rcfw->aeq_handler(rcfw, qp_event, qp); + rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp); break; default: /* @@ -328,7 +338,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, * */ - spin_lock_irqsave_nested(&cmdq->lock, flags, + spin_lock_irqsave_nested(&hwq->lock, flags, SINGLE_DEPTH_NESTING); cookie = le16_to_cpu(qp_event->cookie); mcookie = qp_event->cookie; @@ -342,44 +352,44 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, crsqe->resp = NULL; } else { if (crsqe->resp && crsqe->resp->cookie) - dev_err(&rcfw->pdev->dev, + dev_err(&pdev->dev, "CMD %s cookie sent=%#x, recd=%#x\n", crsqe->resp ? "mismatch" : "collision", crsqe->resp ? crsqe->resp->cookie : 0, mcookie); } - if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap)) - dev_warn(&rcfw->pdev->dev, + if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap)) + dev_warn(&pdev->dev, "CMD bit %d was not requested\n", cbit); - cmdq->cons += crsqe->req_size; + hwq->cons += crsqe->req_size; crsqe->req_size = 0; if (!blocked) - wake_up(&rcfw->waitq); - spin_unlock_irqrestore(&cmdq->lock, flags); + wake_up(&rcfw->cmdq.waitq); + spin_unlock_irqrestore(&hwq->lock, flags); } - return 0; + return rc; } /* SP - CREQ Completion handlers */ static void bnxt_qplib_service_creq(unsigned long data) { struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data; - bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); - struct bnxt_qplib_hwq *creq = &rcfw->creq; + struct bnxt_qplib_creq_ctx *creq = &rcfw->creq; u32 type, budget = CREQ_ENTRY_POLL_BUDGET; - struct creq_base *creqe, **creq_ptr; + struct bnxt_qplib_hwq *hwq = &creq->hwq; + struct creq_base *creqe, **hwq_ptr; u32 sw_cons, raw_cons; unsigned long flags; /* Service the CREQ until budget is over */ - spin_lock_irqsave(&creq->lock, flags); - raw_cons = creq->cons; + spin_lock_irqsave(&hwq->lock, flags); + raw_cons = hwq->cons; while (budget > 0) { - sw_cons = HWQ_CMP(raw_cons, creq); - creq_ptr = (struct creq_base **)creq->pbl_ptr; - creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]; - if (!CREQ_CMP_VALID(creqe, raw_cons, creq->max_elements)) + sw_cons = HWQ_CMP(raw_cons, hwq); + hwq_ptr = (struct creq_base **)hwq->pbl_ptr; + creqe = &hwq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]; + if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements)) break; /* The valid test of the entry must be done first before * reading any further. @@ -391,12 +401,12 @@ static void bnxt_qplib_service_creq(unsigned long data) case CREQ_BASE_TYPE_QP_EVENT: bnxt_qplib_process_qp_event (rcfw, (struct creq_qp_event *)creqe); - rcfw->creq_qp_event_processed++; + creq->stats.creq_qp_event_processed++; break; case CREQ_BASE_TYPE_FUNC_EVENT: if (!bnxt_qplib_process_func_event (rcfw, (struct creq_func_event *)creqe)) - rcfw->creq_func_event_processed++; + creq->stats.creq_func_event_processed++; else dev_warn(&rcfw->pdev->dev, "aeqe:%#x Not handled\n", type); @@ -412,28 +422,30 @@ static void bnxt_qplib_service_creq(unsigned long data) budget--; } - if (creq->cons != raw_cons) { - creq->cons = raw_cons; - bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem, - raw_cons, creq->max_elements, - rcfw->creq_ring_id, gen_p5); + if (hwq->cons != raw_cons) { + hwq->cons = raw_cons; + bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, + rcfw->res->cctx, true); } - spin_unlock_irqrestore(&creq->lock, flags); + spin_unlock_irqrestore(&hwq->lock, flags); } static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance) { struct bnxt_qplib_rcfw *rcfw = dev_instance; - struct bnxt_qplib_hwq *creq = &rcfw->creq; + struct bnxt_qplib_creq_ctx *creq; struct creq_base **creq_ptr; + struct bnxt_qplib_hwq *hwq; u32 sw_cons; + creq = &rcfw->creq; + hwq = &creq->hwq; /* Prefetch the CREQ element */ - sw_cons = HWQ_CMP(creq->cons, creq); - creq_ptr = (struct creq_base **)rcfw->creq.pbl_ptr; + sw_cons = HWQ_CMP(hwq->cons, hwq); + creq_ptr = (struct creq_base **)creq->hwq.pbl_ptr; prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]); - tasklet_schedule(&rcfw->worker); + tasklet_schedule(&creq->creq_tasklet); return IRQ_HANDLED; } @@ -452,7 +464,7 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw) if (rc) return rc; - clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags); + clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags); return 0; } @@ -520,9 +532,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, level = ctx->tim_tbl.level; req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) | __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]); - level = ctx->tqm_pde_level; - req.tqm_pg_size_tqm_lvl = (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) | - __get_pbl_pg_idx(&ctx->tqm_pde.pbl[level]); + level = ctx->tqm_ctx.pde.level; + req.tqm_pg_size_tqm_lvl = + (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) | + __get_pbl_pg_idx(&ctx->tqm_ctx.pde.pbl[level]); req.qpc_page_dir = cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]); @@ -535,7 +548,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, req.tim_page_dir = cpu_to_le64(ctx->tim_tbl.pbl[PBL_LVL_0].pg_map_arr[0]); req.tqm_page_dir = - cpu_to_le64(ctx->tqm_pde.pbl[PBL_LVL_0].pg_map_arr[0]); + cpu_to_le64(ctx->tqm_ctx.pde.pbl[PBL_LVL_0].pg_map_arr[0]); req.number_of_qp = cpu_to_le32(ctx->qpc_tbl.max_elements); req.number_of_mrw = cpu_to_le32(ctx->mrw_tbl.max_elements); @@ -555,33 +568,46 @@ skip_ctx_setup: NULL, 0); if (rc) return rc; - set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags); + set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags); return 0; } void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { + kfree(rcfw->cmdq.cmdq_bitmap); kfree(rcfw->qp_tbl); kfree(rcfw->crsqe_tbl); - bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq); - bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq); + bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq); + bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq); rcfw->pdev = NULL; } -int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, +int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int qp_tbl_sz) { - u8 hwq_type; - - rcfw->pdev = pdev; - rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT; - hwq_type = bnxt_qplib_get_hwq_type(rcfw->res); - if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, - &rcfw->creq.max_elements, - BNXT_QPLIB_CREQE_UNITS, - 0, PAGE_SIZE, hwq_type)) { + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_sg_info sginfo = {}; + struct bnxt_qplib_cmdq_ctx *cmdq; + struct bnxt_qplib_creq_ctx *creq; + u32 bmap_size = 0; + + rcfw->pdev = res->pdev; + cmdq = &rcfw->cmdq; + creq = &rcfw->creq; + rcfw->res = res; + + sginfo.pgsize = PAGE_SIZE; + sginfo.pgshft = PAGE_SHIFT; + + hwq_attr.sginfo = &sginfo; + hwq_attr.res = rcfw->res; + hwq_attr.depth = BNXT_QPLIB_CREQE_MAX_CNT; + hwq_attr.stride = BNXT_QPLIB_CREQE_UNITS; + hwq_attr.type = bnxt_qplib_get_hwq_type(res); + + if (bnxt_qplib_alloc_init_hwq(&creq->hwq, &hwq_attr)) { dev_err(&rcfw->pdev->dev, "HW channel CREQ allocation failed\n"); goto fail; @@ -591,23 +617,28 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, else rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192; - rcfw->cmdq.max_elements = rcfw->cmdq_depth; - if (bnxt_qplib_alloc_init_hwq - (rcfw->pdev, &rcfw->cmdq, NULL, - &rcfw->cmdq.max_elements, - BNXT_QPLIB_CMDQE_UNITS, 0, - bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth), - HWQ_TYPE_CTX)) { + sginfo.pgsize = bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth); + hwq_attr.depth = rcfw->cmdq_depth; + hwq_attr.stride = BNXT_QPLIB_CMDQE_UNITS; + hwq_attr.type = HWQ_TYPE_CTX; + if (bnxt_qplib_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) { dev_err(&rcfw->pdev->dev, "HW channel CMDQ allocation failed\n"); goto fail; } - rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements, + rcfw->crsqe_tbl = kcalloc(cmdq->hwq.max_elements, sizeof(*rcfw->crsqe_tbl), GFP_KERNEL); if (!rcfw->crsqe_tbl) goto fail; + bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long); + cmdq->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); + if (!cmdq->cmdq_bitmap) + goto fail; + + cmdq->bmap_size = bmap_size; + rcfw->qp_tbl_size = qp_tbl_sz; rcfw->qp_tbl = kcalloc(qp_tbl_sz, sizeof(struct bnxt_qplib_qp_node), GFP_KERNEL); @@ -623,137 +654,199 @@ fail: void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) { - bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); + struct bnxt_qplib_creq_ctx *creq; - tasklet_disable(&rcfw->worker); + creq = &rcfw->creq; + tasklet_disable(&creq->creq_tasklet); /* Mask h/w interrupts */ - bnxt_qplib_ring_creq_db(rcfw->creq_bar_reg_iomem, rcfw->creq.cons, - rcfw->creq.max_elements, rcfw->creq_ring_id, - gen_p5); + bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false); /* Sync with last running IRQ-handler */ - synchronize_irq(rcfw->vector); + synchronize_irq(creq->msix_vec); if (kill) - tasklet_kill(&rcfw->worker); + tasklet_kill(&creq->creq_tasklet); - if (rcfw->requested) { - free_irq(rcfw->vector, rcfw); - rcfw->requested = false; + if (creq->requested) { + free_irq(creq->msix_vec, rcfw); + creq->requested = false; } } void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { + struct bnxt_qplib_creq_ctx *creq; + struct bnxt_qplib_cmdq_ctx *cmdq; unsigned long indx; + creq = &rcfw->creq; + cmdq = &rcfw->cmdq; + /* Make sure the HW channel is stopped! */ bnxt_qplib_rcfw_stop_irq(rcfw, true); - iounmap(rcfw->cmdq_bar_reg_iomem); - iounmap(rcfw->creq_bar_reg_iomem); + iounmap(cmdq->cmdq_mbox.reg.bar_reg); + iounmap(creq->creq_db.reg.bar_reg); - indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size); - if (indx != rcfw->bmap_size) + indx = find_first_bit(cmdq->cmdq_bitmap, cmdq->bmap_size); + if (indx != cmdq->bmap_size) dev_err(&rcfw->pdev->dev, "disabling RCFW with pending cmd-bit %lx\n", indx); - kfree(rcfw->cmdq_bitmap); - rcfw->bmap_size = 0; - rcfw->cmdq_bar_reg_iomem = NULL; - rcfw->creq_bar_reg_iomem = NULL; - rcfw->aeq_handler = NULL; - rcfw->vector = 0; + cmdq->cmdq_mbox.reg.bar_reg = NULL; + creq->creq_db.reg.bar_reg = NULL; + creq->aeq_handler = NULL; + creq->msix_vec = 0; } int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, bool need_init) { - bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx); + struct bnxt_qplib_creq_ctx *creq; int rc; - if (rcfw->requested) + creq = &rcfw->creq; + + if (creq->requested) return -EFAULT; - rcfw->vector = msix_vector; + creq->msix_vec = msix_vector; if (need_init) - tasklet_init(&rcfw->worker, + tasklet_init(&creq->creq_tasklet, bnxt_qplib_service_creq, (unsigned long)rcfw); else - tasklet_enable(&rcfw->worker); - rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0, + tasklet_enable(&creq->creq_tasklet); + rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0, "bnxt_qplib_creq", rcfw); if (rc) return rc; - rcfw->requested = true; - bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem, - rcfw->creq.cons, rcfw->creq.max_elements, - rcfw->creq_ring_id, gen_p5); + creq->requested = true; + + bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true); return 0; } -int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw, - int msix_vector, - int cp_bar_reg_off, int virt_fn, - int (*aeq_handler)(struct bnxt_qplib_rcfw *, - void *, void *)) +static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw, bool is_vf) { - resource_size_t res_base; - struct cmdq_init init; - u16 bmap_size; - int rc; + struct bnxt_qplib_cmdq_mbox *mbox; + resource_size_t bar_reg; + struct pci_dev *pdev; + u16 prod_offt; + int rc = 0; - /* General */ - rcfw->seq_num = 0; - set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); - bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long); - rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); - if (!rcfw->cmdq_bitmap) - return -ENOMEM; - rcfw->bmap_size = bmap_size; + pdev = rcfw->pdev; + mbox = &rcfw->cmdq.cmdq_mbox; - /* CMDQ */ - rcfw->cmdq_bar_reg = RCFW_COMM_PCI_BAR_REGION; - res_base = pci_resource_start(pdev, rcfw->cmdq_bar_reg); - if (!res_base) + mbox->reg.bar_id = RCFW_COMM_PCI_BAR_REGION; + mbox->reg.len = RCFW_COMM_SIZE; + mbox->reg.bar_base = pci_resource_start(pdev, mbox->reg.bar_id); + if (!mbox->reg.bar_base) { + dev_err(&pdev->dev, + "QPLIB: CMDQ BAR region %d resc start is 0!\n", + mbox->reg.bar_id); return -ENOMEM; + } - rcfw->cmdq_bar_reg_iomem = ioremap(res_base + - RCFW_COMM_BASE_OFFSET, - RCFW_COMM_SIZE); - if (!rcfw->cmdq_bar_reg_iomem) { - dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n", - rcfw->cmdq_bar_reg); + bar_reg = mbox->reg.bar_base + RCFW_COMM_BASE_OFFSET; + mbox->reg.len = RCFW_COMM_SIZE; + mbox->reg.bar_reg = ioremap(bar_reg, mbox->reg.len); + if (!mbox->reg.bar_reg) { + dev_err(&pdev->dev, + "QPLIB: CMDQ BAR region %d mapping failed\n", + mbox->reg.bar_id); return -ENOMEM; } - rcfw->cmdq_bar_reg_prod_off = virt_fn ? RCFW_VF_COMM_PROD_OFFSET : - RCFW_PF_COMM_PROD_OFFSET; + prod_offt = is_vf ? RCFW_VF_COMM_PROD_OFFSET : + RCFW_PF_COMM_PROD_OFFSET; + mbox->prod = (void __iomem *)(mbox->reg.bar_reg + prod_offt); + mbox->db = (void __iomem *)(mbox->reg.bar_reg + RCFW_COMM_TRIG_OFFSET); + return rc; +} - rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET; +static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt) +{ + struct bnxt_qplib_creq_db *creq_db; + resource_size_t bar_reg; + struct pci_dev *pdev; - /* CREQ */ - rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION; - res_base = pci_resource_start(pdev, rcfw->creq_bar_reg); - if (!res_base) - dev_err(&rcfw->pdev->dev, - "CREQ BAR region %d resc start is 0!\n", - rcfw->creq_bar_reg); + pdev = rcfw->pdev; + creq_db = &rcfw->creq.creq_db; + + creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION; + creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id); + if (!creq_db->reg.bar_id) + dev_err(&pdev->dev, + "QPLIB: CREQ BAR region %d resc start is 0!", + creq_db->reg.bar_id); + + bar_reg = creq_db->reg.bar_base + reg_offt; /* Unconditionally map 8 bytes to support 57500 series */ - rcfw->creq_bar_reg_iomem = ioremap(res_base + cp_bar_reg_off, - 8); - if (!rcfw->creq_bar_reg_iomem) { - dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n", - rcfw->creq_bar_reg); - iounmap(rcfw->cmdq_bar_reg_iomem); - rcfw->cmdq_bar_reg_iomem = NULL; + creq_db->reg.len = 8; + creq_db->reg.bar_reg = ioremap(bar_reg, creq_db->reg.len); + if (!creq_db->reg.bar_reg) { + dev_err(&pdev->dev, + "QPLIB: CREQ BAR region %d mapping failed", + creq_db->reg.bar_id); return -ENOMEM; } - rcfw->creq_qp_event_processed = 0; - rcfw->creq_func_event_processed = 0; + creq_db->dbinfo.db = creq_db->reg.bar_reg; + creq_db->dbinfo.hwq = &rcfw->creq.hwq; + creq_db->dbinfo.xid = rcfw->creq.ring_id; + return 0; +} - if (aeq_handler) - rcfw->aeq_handler = aeq_handler; - init_waitqueue_head(&rcfw->waitq); +static void bnxt_qplib_start_rcfw(struct bnxt_qplib_rcfw *rcfw) +{ + struct bnxt_qplib_cmdq_ctx *cmdq; + struct bnxt_qplib_creq_ctx *creq; + struct bnxt_qplib_cmdq_mbox *mbox; + struct cmdq_init init = {0}; + + cmdq = &rcfw->cmdq; + creq = &rcfw->creq; + mbox = &cmdq->cmdq_mbox; + + init.cmdq_pbl = cpu_to_le64(cmdq->hwq.pbl[PBL_LVL_0].pg_map_arr[0]); + init.cmdq_size_cmdq_lvl = + cpu_to_le16(((rcfw->cmdq_depth << + CMDQ_INIT_CMDQ_SIZE_SFT) & + CMDQ_INIT_CMDQ_SIZE_MASK) | + ((cmdq->hwq.level << + CMDQ_INIT_CMDQ_LVL_SFT) & + CMDQ_INIT_CMDQ_LVL_MASK)); + init.creq_ring_id = cpu_to_le16(creq->ring_id); + /* Write to the Bono mailbox register */ + __iowrite32_copy(mbox->reg.bar_reg, &init, sizeof(init) / 4); +} + +int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw, + int msix_vector, + int cp_bar_reg_off, int virt_fn, + aeq_handler_t aeq_handler) +{ + struct bnxt_qplib_cmdq_ctx *cmdq; + struct bnxt_qplib_creq_ctx *creq; + int rc; + + cmdq = &rcfw->cmdq; + creq = &rcfw->creq; + + /* Clear to defaults */ + + cmdq->seq_num = 0; + set_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags); + init_waitqueue_head(&cmdq->waitq); + + creq->stats.creq_qp_event_processed = 0; + creq->stats.creq_func_event_processed = 0; + creq->aeq_handler = aeq_handler; + + rc = bnxt_qplib_map_cmdq_mbox(rcfw, virt_fn); + if (rc) + return rc; + + rc = bnxt_qplib_map_creq_db(rcfw, cp_bar_reg_off); + if (rc) + return rc; rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true); if (rc) { @@ -763,16 +856,8 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, return rc; } - init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]); - init.cmdq_size_cmdq_lvl = cpu_to_le16( - ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) & - CMDQ_INIT_CMDQ_SIZE_MASK) | - ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) & - CMDQ_INIT_CMDQ_LVL_MASK)); - init.creq_ring_id = cpu_to_le16(rcfw->creq_ring_id); + bnxt_qplib_start_rcfw(rcfw); - /* Write to the Bono mailbox register */ - __iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4); return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index dfeadc192e17..411fce3493b6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -206,8 +206,9 @@ static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons, #define CREQ_ENTRY_POLL_BUDGET 0x100 /* HWQ */ +typedef int (*aeq_handler_t)(struct bnxt_qplib_rcfw *, void *, void *); -struct bnxt_qplib_crsq { +struct bnxt_qplib_crsqe { struct creq_qp_event *resp; u32 req_size; }; @@ -225,41 +226,53 @@ struct bnxt_qplib_qp_node { #define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF +#define FIRMWARE_INITIALIZED_FLAG (0) +#define FIRMWARE_FIRST_FLAG (31) +#define FIRMWARE_TIMED_OUT (3) +struct bnxt_qplib_cmdq_mbox { + struct bnxt_qplib_reg_desc reg; + void __iomem *prod; + void __iomem *db; +}; + +struct bnxt_qplib_cmdq_ctx { + struct bnxt_qplib_hwq hwq; + struct bnxt_qplib_cmdq_mbox cmdq_mbox; + wait_queue_head_t waitq; + unsigned long flags; + unsigned long *cmdq_bitmap; + u32 bmap_size; + u32 seq_num; +}; + +struct bnxt_qplib_creq_db { + struct bnxt_qplib_reg_desc reg; + struct bnxt_qplib_db_info dbinfo; +}; + +struct bnxt_qplib_creq_stat { + u64 creq_qp_event_processed; + u64 creq_func_event_processed; +}; + +struct bnxt_qplib_creq_ctx { + struct bnxt_qplib_hwq hwq; + struct bnxt_qplib_creq_db creq_db; + struct bnxt_qplib_creq_stat stats; + struct tasklet_struct creq_tasklet; + aeq_handler_t aeq_handler; + u16 ring_id; + int msix_vec; + bool requested; /*irq handler installed */ +}; + /* RCFW Communication Channels */ struct bnxt_qplib_rcfw { struct pci_dev *pdev; struct bnxt_qplib_res *res; - int vector; - struct tasklet_struct worker; - bool requested; - unsigned long *cmdq_bitmap; - u32 bmap_size; - unsigned long flags; -#define FIRMWARE_INITIALIZED_FLAG 0 -#define FIRMWARE_FIRST_FLAG 31 -#define FIRMWARE_TIMED_OUT 3 - wait_queue_head_t waitq; - int (*aeq_handler)(struct bnxt_qplib_rcfw *, - void *, void *); - u32 seq_num; - - /* Bar region info */ - void __iomem *cmdq_bar_reg_iomem; - u16 cmdq_bar_reg; - u16 cmdq_bar_reg_prod_off; - u16 cmdq_bar_reg_trig_off; - u16 creq_ring_id; - u16 creq_bar_reg; - void __iomem *creq_bar_reg_iomem; - - /* Cmd-Resp and Async Event notification queue */ - struct bnxt_qplib_hwq creq; - u64 creq_qp_event_processed; - u64 creq_func_event_processed; - - /* Actual Cmd and Resp Queues */ - struct bnxt_qplib_hwq cmdq; - struct bnxt_qplib_crsq *crsqe_tbl; + struct bnxt_qplib_cmdq_ctx cmdq; + struct bnxt_qplib_creq_ctx creq; + struct bnxt_qplib_crsqe *crsqe_tbl; int qp_tbl_size; struct bnxt_qplib_qp_node *qp_tbl; u64 oos_prev; @@ -268,7 +281,7 @@ struct bnxt_qplib_rcfw { }; void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); -int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, +int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int qp_tbl_sz); @@ -276,12 +289,10 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, bool need_init); -int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw, +int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw, int msix_vector, int cp_bar_reg_off, int virt_fn, - int (*aeq_handler)(struct bnxt_qplib_rcfw *, - void *aeqe, void *obj)); + aeq_handler_t aeq_handler); struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf( struct bnxt_qplib_rcfw *rcfw, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 60ea1b924b67..cab1adf1fed9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -44,6 +44,7 @@ #include <linux/inetdevice.h> #include <linux/dma-mapping.h> #include <linux/if_vlan.h> +#include <linux/vmalloc.h> #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_sp.h" @@ -55,9 +56,10 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, struct bnxt_qplib_stats *stats); /* PBL */ -static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, +static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl, bool is_umem) { + struct pci_dev *pdev = res->pdev; int i; if (!is_umem) { @@ -74,35 +76,56 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, pbl->pg_arr[i] = NULL; } } - kfree(pbl->pg_arr); + vfree(pbl->pg_arr); pbl->pg_arr = NULL; - kfree(pbl->pg_map_arr); + vfree(pbl->pg_map_arr); pbl->pg_map_arr = NULL; pbl->pg_count = 0; pbl->pg_size = 0; } -static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, - struct scatterlist *sghead, u32 pages, - u32 nmaps, u32 pg_size) +static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl, + struct bnxt_qplib_sg_info *sginfo) { + struct scatterlist *sghead = sginfo->sghead; struct sg_dma_page_iter sg_iter; + int i = 0; + + for_each_sg_dma_page(sghead, &sg_iter, sginfo->nmap, 0) { + pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter); + pbl->pg_arr[i] = NULL; + pbl->pg_count++; + i++; + } +} + +static int __alloc_pbl(struct bnxt_qplib_res *res, + struct bnxt_qplib_pbl *pbl, + struct bnxt_qplib_sg_info *sginfo) +{ + struct pci_dev *pdev = res->pdev; + struct scatterlist *sghead; bool is_umem = false; + u32 pages; int i; + if (sginfo->nopte) + return 0; + pages = sginfo->npages; + sghead = sginfo->sghead; /* page ptr arrays */ - pbl->pg_arr = kcalloc(pages, sizeof(void *), GFP_KERNEL); + pbl->pg_arr = vmalloc(pages * sizeof(void *)); if (!pbl->pg_arr) return -ENOMEM; - pbl->pg_map_arr = kcalloc(pages, sizeof(dma_addr_t), GFP_KERNEL); + pbl->pg_map_arr = vmalloc(pages * sizeof(dma_addr_t)); if (!pbl->pg_map_arr) { - kfree(pbl->pg_arr); + vfree(pbl->pg_arr); pbl->pg_arr = NULL; return -ENOMEM; } pbl->pg_count = 0; - pbl->pg_size = pg_size; + pbl->pg_size = sginfo->pgsize; if (!sghead) { for (i = 0; i < pages; i++) { @@ -115,25 +138,19 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, pbl->pg_count++; } } else { - i = 0; is_umem = true; - for_each_sg_dma_page(sghead, &sg_iter, nmaps, 0) { - pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter); - pbl->pg_arr[i] = NULL; - pbl->pg_count++; - i++; - } + bnxt_qplib_fill_user_dma_pages(pbl, sginfo); } return 0; - fail: - __free_pbl(pdev, pbl, is_umem); + __free_pbl(res, pbl, is_umem); return -ENOMEM; } /* HWQ */ -void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq) +void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res, + struct bnxt_qplib_hwq *hwq) { int i; @@ -144,9 +161,9 @@ void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq) for (i = 0; i < hwq->level + 1; i++) { if (i == hwq->level) - __free_pbl(pdev, &hwq->pbl[i], hwq->is_user); + __free_pbl(res, &hwq->pbl[i], hwq->is_user); else - __free_pbl(pdev, &hwq->pbl[i], false); + __free_pbl(res, &hwq->pbl[i], false); } hwq->level = PBL_LVL_MAX; @@ -158,79 +175,113 @@ void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq) } /* All HWQs are power of 2 in size */ -int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, - struct bnxt_qplib_sg_info *sg_info, - u32 *elements, u32 element_size, u32 aux, - u32 pg_size, enum bnxt_qplib_hwq_type hwq_type) + +int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, + struct bnxt_qplib_hwq_attr *hwq_attr) { - u32 pages, maps, slots, size, aux_pages = 0, aux_size = 0; + u32 npages, aux_slots, pg_size, aux_pages = 0, aux_size = 0; + struct bnxt_qplib_sg_info sginfo = {}; + u32 depth, stride, npbl, npde; dma_addr_t *src_phys_ptr, **dst_virt_ptr; struct scatterlist *sghead = NULL; - int i, rc; - + struct bnxt_qplib_res *res; + struct pci_dev *pdev; + int i, rc, lvl; + + res = hwq_attr->res; + pdev = res->pdev; + sghead = hwq_attr->sginfo->sghead; + pg_size = hwq_attr->sginfo->pgsize; hwq->level = PBL_LVL_MAX; - slots = roundup_pow_of_two(*elements); - if (aux) { - aux_size = roundup_pow_of_two(aux); - aux_pages = (slots * aux_size) / pg_size; - if ((slots * aux_size) % pg_size) + depth = roundup_pow_of_two(hwq_attr->depth); + stride = roundup_pow_of_two(hwq_attr->stride); + if (hwq_attr->aux_depth) { + aux_slots = hwq_attr->aux_depth; + aux_size = roundup_pow_of_two(hwq_attr->aux_stride); + aux_pages = (aux_slots * aux_size) / pg_size; + if ((aux_slots * aux_size) % pg_size) aux_pages++; } - size = roundup_pow_of_two(element_size); - - if (sg_info) - sghead = sg_info->sglist; if (!sghead) { hwq->is_user = false; - pages = (slots * size) / pg_size + aux_pages; - if ((slots * size) % pg_size) - pages++; - if (!pages) + npages = (depth * stride) / pg_size + aux_pages; + if ((depth * stride) % pg_size) + npages++; + if (!npages) return -EINVAL; - maps = 0; + hwq_attr->sginfo->npages = npages; } else { hwq->is_user = true; - pages = sg_info->npages; - maps = sg_info->nmap; + npages = hwq_attr->sginfo->npages; + npages = (npages * PAGE_SIZE) / + BIT_ULL(hwq_attr->sginfo->pgshft); + if ((hwq_attr->sginfo->npages * PAGE_SIZE) % + BIT_ULL(hwq_attr->sginfo->pgshft)) + if (!npages) + npages++; } - /* Alloc the 1st memory block; can be a PDL/PTL/PBL */ - if (sghead && (pages == MAX_PBL_LVL_0_PGS)) - rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead, - pages, maps, pg_size); - else - rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, - 1, 0, pg_size); - if (rc) - goto fail; - - hwq->level = PBL_LVL_0; + if (npages == MAX_PBL_LVL_0_PGS) { + /* This request is Level 0, map PTE */ + rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], hwq_attr->sginfo); + if (rc) + goto fail; + hwq->level = PBL_LVL_0; + } - if (pages > MAX_PBL_LVL_0_PGS) { - if (pages > MAX_PBL_LVL_1_PGS) { + if (npages > MAX_PBL_LVL_0_PGS) { + if (npages > MAX_PBL_LVL_1_PGS) { + u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ? + 0 : PTU_PTE_VALID; /* 2 levels of indirection */ - rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL, - MAX_PBL_LVL_1_PGS_FOR_LVL_2, - 0, pg_size); + npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT; + if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT)) + npbl++; + npde = npbl >> MAX_PDL_LVL_SHIFT; + if (npbl % BIT(MAX_PDL_LVL_SHIFT)) + npde++; + /* Alloc PDE pages */ + sginfo.pgsize = npde * pg_size; + sginfo.npages = 1; + rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo); + + /* Alloc PBL pages */ + sginfo.npages = npbl; + sginfo.pgsize = PAGE_SIZE; + rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1], &sginfo); if (rc) goto fail; - /* Fill in lvl0 PBL */ + /* Fill PDL with PBL page pointers */ dst_virt_ptr = (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr; src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr; - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) - dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = - src_phys_ptr[i] | PTU_PDE_VALID; - hwq->level = PBL_LVL_1; - - rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_2], sghead, - pages, maps, pg_size); + if (hwq_attr->type == HWQ_TYPE_MR) { + /* For MR it is expected that we supply only 1 contigous + * page i.e only 1 entry in the PDL that will contain + * all the PBLs for the user supplied memory region + */ + for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; + i++) + dst_virt_ptr[0][i] = src_phys_ptr[i] | + flag; + } else { + for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; + i++) + dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = + src_phys_ptr[i] | + PTU_PDE_VALID; + } + /* Alloc or init PTEs */ + rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_2], + hwq_attr->sginfo); if (rc) goto fail; - - /* Fill in lvl1 PBL */ + hwq->level = PBL_LVL_2; + if (hwq_attr->sginfo->nopte) + goto done; + /* Fill PBLs with PTE pointers */ dst_virt_ptr = (dma_addr_t **)hwq->pbl[PBL_LVL_1].pg_arr; src_phys_ptr = hwq->pbl[PBL_LVL_2].pg_map_arr; @@ -238,7 +289,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = src_phys_ptr[i] | PTU_PTE_VALID; } - if (hwq_type == HWQ_TYPE_QUEUE) { + if (hwq_attr->type == HWQ_TYPE_QUEUE) { /* Find the last pg of the size */ i = hwq->pbl[PBL_LVL_2].pg_count; dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |= @@ -248,25 +299,36 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, [PTR_IDX(i - 2)] |= PTU_PTE_NEXT_TO_LAST; } - hwq->level = PBL_LVL_2; - } else { - u32 flag = hwq_type == HWQ_TYPE_L2_CMPL ? 0 : - PTU_PTE_VALID; + } else { /* pages < 512 npbl = 1, npde = 0 */ + u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ? + 0 : PTU_PTE_VALID; /* 1 level of indirection */ - rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], sghead, - pages, maps, pg_size); + npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT; + if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT)) + npbl++; + sginfo.npages = npbl; + sginfo.pgsize = PAGE_SIZE; + /* Alloc PBL page */ + rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo); if (rc) goto fail; - /* Fill in lvl0 PBL */ + /* Alloc or init PTEs */ + rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1], + hwq_attr->sginfo); + if (rc) + goto fail; + hwq->level = PBL_LVL_1; + if (hwq_attr->sginfo->nopte) + goto done; + /* Fill PBL with PTE pointers */ dst_virt_ptr = (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr; src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr; - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) { + for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = src_phys_ptr[i] | flag; - } - if (hwq_type == HWQ_TYPE_QUEUE) { + if (hwq_attr->type == HWQ_TYPE_QUEUE) { /* Find the last pg of the size */ i = hwq->pbl[PBL_LVL_1].pg_count; dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |= @@ -276,42 +338,141 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, [PTR_IDX(i - 2)] |= PTU_PTE_NEXT_TO_LAST; } - hwq->level = PBL_LVL_1; } } - hwq->pdev = pdev; - spin_lock_init(&hwq->lock); +done: hwq->prod = 0; hwq->cons = 0; - *elements = hwq->max_elements = slots; - hwq->element_size = size; - + hwq->pdev = pdev; + hwq->depth = hwq_attr->depth; + hwq->max_elements = depth; + hwq->element_size = stride; /* For direct access to the elements */ - hwq->pbl_ptr = hwq->pbl[hwq->level].pg_arr; - hwq->pbl_dma_ptr = hwq->pbl[hwq->level].pg_map_arr; + lvl = hwq->level; + if (hwq_attr->sginfo->nopte && hwq->level) + lvl = hwq->level - 1; + hwq->pbl_ptr = hwq->pbl[lvl].pg_arr; + hwq->pbl_dma_ptr = hwq->pbl[lvl].pg_map_arr; + spin_lock_init(&hwq->lock); return 0; - fail: - bnxt_qplib_free_hwq(pdev, hwq); + bnxt_qplib_free_hwq(res, hwq); return -ENOMEM; } /* Context Tables */ -void bnxt_qplib_free_ctx(struct pci_dev *pdev, +void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx) { int i; - bnxt_qplib_free_hwq(pdev, &ctx->qpc_tbl); - bnxt_qplib_free_hwq(pdev, &ctx->mrw_tbl); - bnxt_qplib_free_hwq(pdev, &ctx->srqc_tbl); - bnxt_qplib_free_hwq(pdev, &ctx->cq_tbl); - bnxt_qplib_free_hwq(pdev, &ctx->tim_tbl); + bnxt_qplib_free_hwq(res, &ctx->qpc_tbl); + bnxt_qplib_free_hwq(res, &ctx->mrw_tbl); + bnxt_qplib_free_hwq(res, &ctx->srqc_tbl); + bnxt_qplib_free_hwq(res, &ctx->cq_tbl); + bnxt_qplib_free_hwq(res, &ctx->tim_tbl); for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) - bnxt_qplib_free_hwq(pdev, &ctx->tqm_tbl[i]); - bnxt_qplib_free_hwq(pdev, &ctx->tqm_pde); - bnxt_qplib_free_stats_ctx(pdev, &ctx->stats); + bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.qtbl[i]); + /* restore original pde level before destroy */ + ctx->tqm_ctx.pde.level = ctx->tqm_ctx.pde_level; + bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.pde); + bnxt_qplib_free_stats_ctx(res->pdev, &ctx->stats); +} + +static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res, + struct bnxt_qplib_ctx *ctx) +{ + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_sg_info sginfo = {}; + struct bnxt_qplib_tqm_ctx *tqmctx; + int rc = 0; + int i; + + tqmctx = &ctx->tqm_ctx; + + sginfo.pgsize = PAGE_SIZE; + sginfo.pgshft = PAGE_SHIFT; + hwq_attr.sginfo = &sginfo; + hwq_attr.res = res; + hwq_attr.type = HWQ_TYPE_CTX; + hwq_attr.depth = 512; + hwq_attr.stride = sizeof(u64); + /* Alloc pdl buffer */ + rc = bnxt_qplib_alloc_init_hwq(&tqmctx->pde, &hwq_attr); + if (rc) + goto out; + /* Save original pdl level */ + tqmctx->pde_level = tqmctx->pde.level; + + hwq_attr.stride = 1; + for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) { + if (!tqmctx->qcount[i]) + continue; + hwq_attr.depth = ctx->qpc_count * tqmctx->qcount[i]; + rc = bnxt_qplib_alloc_init_hwq(&tqmctx->qtbl[i], &hwq_attr); + if (rc) + goto out; + } +out: + return rc; +} + +static void bnxt_qplib_map_tqm_pgtbl(struct bnxt_qplib_tqm_ctx *ctx) +{ + struct bnxt_qplib_hwq *tbl; + dma_addr_t *dma_ptr; + __le64 **pbl_ptr, *ptr; + int i, j, k; + int fnz_idx = -1; + int pg_count; + + pbl_ptr = (__le64 **)ctx->pde.pbl_ptr; + + for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ; + i++, j += MAX_TQM_ALLOC_BLK_SIZE) { + tbl = &ctx->qtbl[i]; + if (!tbl->max_elements) + continue; + if (fnz_idx == -1) + fnz_idx = i; /* first non-zero index */ + switch (tbl->level) { + case PBL_LVL_2: + pg_count = tbl->pbl[PBL_LVL_1].pg_count; + for (k = 0; k < pg_count; k++) { + ptr = &pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)]; + dma_ptr = &tbl->pbl[PBL_LVL_1].pg_map_arr[k]; + *ptr = cpu_to_le64(*dma_ptr | PTU_PTE_VALID); + } + break; + case PBL_LVL_1: + case PBL_LVL_0: + default: + ptr = &pbl_ptr[PTR_PG(j)][PTR_IDX(j)]; + *ptr = cpu_to_le64(tbl->pbl[PBL_LVL_0].pg_map_arr[0] | + PTU_PTE_VALID); + break; + } + } + if (fnz_idx == -1) + fnz_idx = 0; + /* update pde level as per page table programming */ + ctx->pde.level = (ctx->qtbl[fnz_idx].level == PBL_LVL_2) ? PBL_LVL_2 : + ctx->qtbl[fnz_idx].level + 1; +} + +static int bnxt_qplib_setup_tqm_rings(struct bnxt_qplib_res *res, + struct bnxt_qplib_ctx *ctx) +{ + int rc = 0; + + rc = bnxt_qplib_alloc_tqm_rings(res, ctx); + if (rc) + goto fail; + + bnxt_qplib_map_tqm_pgtbl(&ctx->tqm_ctx); +fail: + return rc; } /* @@ -335,120 +496,72 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev, * Returns: * 0 if success, else -ERRORS */ -int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, +int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx, bool virt_fn, bool is_p5) { - int i, j, k, rc = 0; - int fnz_idx = -1; - __le64 **pbl_ptr; + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_sg_info sginfo = {}; + int rc = 0; if (virt_fn || is_p5) goto stats_alloc; /* QPC Tables */ - ctx->qpc_tbl.max_elements = ctx->qpc_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL, - &ctx->qpc_tbl.max_elements, - BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_CTX); + sginfo.pgsize = PAGE_SIZE; + sginfo.pgshft = PAGE_SHIFT; + hwq_attr.sginfo = &sginfo; + + hwq_attr.res = res; + hwq_attr.depth = ctx->qpc_count; + hwq_attr.stride = BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE; + hwq_attr.type = HWQ_TYPE_CTX; + rc = bnxt_qplib_alloc_init_hwq(&ctx->qpc_tbl, &hwq_attr); if (rc) goto fail; /* MRW Tables */ - ctx->mrw_tbl.max_elements = ctx->mrw_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL, - &ctx->mrw_tbl.max_elements, - BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_CTX); + hwq_attr.depth = ctx->mrw_count; + hwq_attr.stride = BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE; + rc = bnxt_qplib_alloc_init_hwq(&ctx->mrw_tbl, &hwq_attr); if (rc) goto fail; /* SRQ Tables */ - ctx->srqc_tbl.max_elements = ctx->srqc_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL, - &ctx->srqc_tbl.max_elements, - BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_CTX); + hwq_attr.depth = ctx->srqc_count; + hwq_attr.stride = BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE; + rc = bnxt_qplib_alloc_init_hwq(&ctx->srqc_tbl, &hwq_attr); if (rc) goto fail; /* CQ Tables */ - ctx->cq_tbl.max_elements = ctx->cq_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL, - &ctx->cq_tbl.max_elements, - BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_CTX); + hwq_attr.depth = ctx->cq_count; + hwq_attr.stride = BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE; + rc = bnxt_qplib_alloc_init_hwq(&ctx->cq_tbl, &hwq_attr); if (rc) goto fail; /* TQM Buffer */ - ctx->tqm_pde.max_elements = 512; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL, - &ctx->tqm_pde.max_elements, sizeof(u64), - 0, PAGE_SIZE, HWQ_TYPE_CTX); + rc = bnxt_qplib_setup_tqm_rings(res, ctx); if (rc) goto fail; - - for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) { - if (!ctx->tqm_count[i]) - continue; - ctx->tqm_tbl[i].max_elements = ctx->qpc_count * - ctx->tqm_count[i]; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL, - &ctx->tqm_tbl[i].max_elements, 1, - 0, PAGE_SIZE, HWQ_TYPE_CTX); - if (rc) - goto fail; - } - pbl_ptr = (__le64 **)ctx->tqm_pde.pbl_ptr; - for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ; - i++, j += MAX_TQM_ALLOC_BLK_SIZE) { - if (!ctx->tqm_tbl[i].max_elements) - continue; - if (fnz_idx == -1) - fnz_idx = i; - switch (ctx->tqm_tbl[i].level) { - case PBL_LVL_2: - for (k = 0; k < ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_count; - k++) - pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)] = - cpu_to_le64( - ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_map_arr[k] - | PTU_PTE_VALID); - break; - case PBL_LVL_1: - case PBL_LVL_0: - default: - pbl_ptr[PTR_PG(j)][PTR_IDX(j)] = cpu_to_le64( - ctx->tqm_tbl[i].pbl[PBL_LVL_0].pg_map_arr[0] | - PTU_PTE_VALID); - break; - } - } - if (fnz_idx == -1) - fnz_idx = 0; - ctx->tqm_pde_level = ctx->tqm_tbl[fnz_idx].level == PBL_LVL_2 ? - PBL_LVL_2 : ctx->tqm_tbl[fnz_idx].level + 1; - /* TIM Buffer */ ctx->tim_tbl.max_elements = ctx->qpc_count * 16; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL, - &ctx->tim_tbl.max_elements, 1, - 0, PAGE_SIZE, HWQ_TYPE_CTX); + hwq_attr.depth = ctx->qpc_count * 16; + hwq_attr.stride = 1; + rc = bnxt_qplib_alloc_init_hwq(&ctx->tim_tbl, &hwq_attr); if (rc) goto fail; - stats_alloc: /* Stats */ - rc = bnxt_qplib_alloc_stats_ctx(pdev, &ctx->stats); + rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats); if (rc) goto fail; return 0; fail: - bnxt_qplib_free_ctx(pdev, ctx); + bnxt_qplib_free_ctx(res, ctx); return rc; } @@ -808,9 +921,6 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res) bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl); bnxt_qplib_free_pd_tbl(&res->pd_tbl); bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl); - - res->netdev = NULL; - res->pdev = NULL; } int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index aaa76d792185..95b645dbbc2d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -55,7 +55,8 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; enum bnxt_qplib_hwq_type { HWQ_TYPE_CTX, HWQ_TYPE_QUEUE, - HWQ_TYPE_L2_CMPL + HWQ_TYPE_L2_CMPL, + HWQ_TYPE_MR }; #define MAX_PBL_LVL_0_PGS 1 @@ -63,6 +64,7 @@ enum bnxt_qplib_hwq_type { #define MAX_PBL_LVL_1_PGS_SHIFT 9 #define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256 #define MAX_PBL_LVL_2_PGS (256 * 512) +#define MAX_PDL_LVL_SHIFT 9 enum bnxt_qplib_pbl_lvl { PBL_LVL_0, @@ -78,6 +80,13 @@ enum bnxt_qplib_pbl_lvl { #define ROCE_PG_SIZE_8M (8 * 1024 * 1024) #define ROCE_PG_SIZE_1G (1024 * 1024 * 1024) +struct bnxt_qplib_reg_desc { + u8 bar_id; + resource_size_t bar_base; + void __iomem *bar_reg; + size_t len; +}; + struct bnxt_qplib_pbl { u32 pg_count; u32 pg_size; @@ -85,17 +94,37 @@ struct bnxt_qplib_pbl { dma_addr_t *pg_map_arr; }; +struct bnxt_qplib_sg_info { + struct scatterlist *sghead; + u32 nmap; + u32 npages; + u32 pgshft; + u32 pgsize; + bool nopte; +}; + +struct bnxt_qplib_hwq_attr { + struct bnxt_qplib_res *res; + struct bnxt_qplib_sg_info *sginfo; + enum bnxt_qplib_hwq_type type; + u32 depth; + u32 stride; + u32 aux_stride; + u32 aux_depth; +}; + struct bnxt_qplib_hwq { struct pci_dev *pdev; /* lock to protect qplib_hwq */ spinlock_t lock; - struct bnxt_qplib_pbl pbl[PBL_LVL_MAX]; + struct bnxt_qplib_pbl pbl[PBL_LVL_MAX + 1]; enum bnxt_qplib_pbl_lvl level; /* 0, 1, or 2 */ /* ptr for easy access to the PBL entries */ void **pbl_ptr; /* ptr for easy access to the dma_addr */ dma_addr_t *pbl_dma_ptr; u32 max_elements; + u32 depth; u16 element_size; /* Size of each entry */ u32 prod; /* raw */ @@ -104,6 +133,13 @@ struct bnxt_qplib_hwq { u8 is_user; }; +struct bnxt_qplib_db_info { + void __iomem *db; + void __iomem *priv_db; + struct bnxt_qplib_hwq *hwq; + u32 xid; +}; + /* Tables */ struct bnxt_qplib_pd_tbl { unsigned long *tbl; @@ -159,6 +195,15 @@ struct bnxt_qplib_vf_res { #define BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE 64 #define BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE 128 +#define MAX_TQM_ALLOC_REQ 48 +#define MAX_TQM_ALLOC_BLK_SIZE 8 +struct bnxt_qplib_tqm_ctx { + struct bnxt_qplib_hwq pde; + u8 pde_level; /* Original level */ + struct bnxt_qplib_hwq qtbl[MAX_TQM_ALLOC_REQ]; + u8 qcount[MAX_TQM_ALLOC_REQ]; +}; + struct bnxt_qplib_ctx { u32 qpc_count; struct bnxt_qplib_hwq qpc_tbl; @@ -169,12 +214,7 @@ struct bnxt_qplib_ctx { u32 cq_count; struct bnxt_qplib_hwq cq_tbl; struct bnxt_qplib_hwq tim_tbl; -#define MAX_TQM_ALLOC_REQ 48 -#define MAX_TQM_ALLOC_BLK_SIZE 8 - u8 tqm_count[MAX_TQM_ALLOC_REQ]; - struct bnxt_qplib_hwq tqm_pde; - u32 tqm_pde_level; - struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ]; + struct bnxt_qplib_tqm_ctx tqm_ctx; struct bnxt_qplib_stats stats; struct bnxt_qplib_vf_res vf_res; u64 hwrm_intf_ver; @@ -223,11 +263,6 @@ static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; } -struct bnxt_qplib_sg_info { - struct scatterlist *sglist; - u32 nmap; - u32 npages; -}; #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) @@ -235,11 +270,10 @@ struct bnxt_qplib_sg_info { struct bnxt_qplib_pd; struct bnxt_qplib_dev_attr; -void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq); -int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, - struct bnxt_qplib_sg_info *sg_info, u32 *elements, - u32 elements_per_page, u32 aux, u32 pg_size, - enum bnxt_qplib_hwq_type hwq_type); +void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res, + struct bnxt_qplib_hwq *hwq); +int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, + struct bnxt_qplib_hwq_attr *hwq_attr); void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid); int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl, struct bnxt_qplib_pd *pd); @@ -258,9 +292,80 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res); int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, struct net_device *netdev, struct bnxt_qplib_dev_attr *dev_attr); -void bnxt_qplib_free_ctx(struct pci_dev *pdev, +void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx); -int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, +int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx, bool virt_fn, bool is_p5); + +static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, + bool arm) +{ + u32 key; + + key = info->hwq->cons & (info->hwq->max_elements - 1); + key |= (CMPL_DOORBELL_IDX_VALID | + (CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK)); + if (!arm) + key |= CMPL_DOORBELL_MASK; + writel(key, info->db); +} + +static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info, + u32 type) +{ + u64 key = 0; + + key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; + key <<= 32; + key |= (info->hwq->cons & (info->hwq->max_elements - 1)) & + DBC_DBC_INDEX_MASK; + writeq(key, info->db); +} + +static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info, + u32 type) +{ + u64 key = 0; + + key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; + key <<= 32; + key |= (info->hwq->prod & (info->hwq->max_elements - 1)) & + DBC_DBC_INDEX_MASK; + writeq(key, info->db); +} + +static inline void bnxt_qplib_armen_db(struct bnxt_qplib_db_info *info, + u32 type) +{ + u64 key = 0; + + key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; + key <<= 32; + writeq(key, info->priv_db); +} + +static inline void bnxt_qplib_srq_arm_db(struct bnxt_qplib_db_info *info, + u32 th) +{ + u64 key = 0; + + key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | th; + key <<= 32; + key |= th & DBC_DBC_INDEX_MASK; + writeq(key, info->priv_db); +} + +static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info, + struct bnxt_qplib_chip_ctx *cctx, + bool arm) +{ + u32 type; + + type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; + if (bnxt_qplib_is_chip_gen_p5(cctx)) + bnxt_qplib_ring_db(info, type); + else + bnxt_qplib_ring_db32(info, arm); +} #endif /* __BNXT_QPLIB_RES_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 40296b97d21e..66954ff6a2f2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -585,7 +585,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw) /* Free the qplib's MRW memory */ if (mrw->hwq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &mrw->hwq); + bnxt_qplib_free_hwq(res, &mrw->hwq); return 0; } @@ -646,7 +646,7 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, if (mrw->hwq.max_elements) { mrw->va = 0; mrw->total_size = 0; - bnxt_qplib_free_hwq(res->pdev, &mrw->hwq); + bnxt_qplib_free_hwq(res, &mrw->hwq); } return 0; @@ -656,10 +656,12 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_register_mr req; + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_sg_info sginfo = {}; struct creq_register_mr_resp resp; - u16 cmd_flags = 0, level; + struct cmdq_register_mr req; int pg_ptrs, pages, i, rc; + u16 cmd_flags = 0, level; dma_addr_t **pbl_ptr; u32 pg_size; @@ -674,20 +676,23 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, if (pages > MAX_PBL_LVL_1_PGS) { dev_err(&res->pdev->dev, - "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n", + "SP: Reg MR: pages requested (0x%x) exceeded max (0x%x)\n", pages, MAX_PBL_LVL_1_PGS); return -ENOMEM; } /* Free the hwq if it already exist, must be a rereg */ if (mr->hwq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &mr->hwq); - - mr->hwq.max_elements = pages; + bnxt_qplib_free_hwq(res, &mr->hwq); /* Use system PAGE_SIZE */ - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, - &mr->hwq.max_elements, - PAGE_SIZE, 0, PAGE_SIZE, - HWQ_TYPE_CTX); + hwq_attr.res = res; + hwq_attr.depth = pages; + hwq_attr.stride = PAGE_SIZE; + hwq_attr.type = HWQ_TYPE_MR; + hwq_attr.sginfo = &sginfo; + hwq_attr.sginfo->npages = pages; + hwq_attr.sginfo->pgsize = PAGE_SIZE; + hwq_attr.sginfo->pgshft = PAGE_SHIFT; + rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr); if (rc) { dev_err(&res->pdev->dev, "SP: Reg MR memory allocation failed\n"); @@ -734,7 +739,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, fail: if (mr->hwq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &mr->hwq); + bnxt_qplib_free_hwq(res, &mr->hwq); return rc; } @@ -742,6 +747,8 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res, struct bnxt_qplib_frpl *frpl, int max_pg_ptrs) { + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_sg_info sginfo = {}; int pg_ptrs, pages, rc; /* Re-calculate the max to fit the HWQ allocation model */ @@ -753,10 +760,15 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res, if (pages > MAX_PBL_LVL_1_PGS) return -ENOMEM; - frpl->hwq.max_elements = pages; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, - &frpl->hwq.max_elements, PAGE_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_CTX); + sginfo.pgsize = PAGE_SIZE; + sginfo.nopte = true; + + hwq_attr.res = res; + hwq_attr.depth = pg_ptrs; + hwq_attr.stride = PAGE_SIZE; + hwq_attr.sginfo = &sginfo; + hwq_attr.type = HWQ_TYPE_CTX; + rc = bnxt_qplib_alloc_init_hwq(&frpl->hwq, &hwq_attr); if (!rc) frpl->max_pg_ptrs = pg_ptrs; @@ -766,7 +778,7 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res, int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res, struct bnxt_qplib_frpl *frpl) { - bnxt_qplib_free_hwq(res->pdev, &frpl->hwq); + bnxt_qplib_free_hwq(res, &frpl->hwq); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7d06b0f8d49a..e8e11bd95e42 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -707,7 +707,7 @@ struct mpa_message { u8 flags; u8 revision; __be16 private_data_size; - u8 private_data[0]; + u8 private_data[]; }; struct mpa_v2_conn_params { @@ -719,7 +719,7 @@ struct terminate_message { u8 layer_etype; u8 ecode; __be16 hdrct_rsvd; - u8 len_hdrs[0]; + u8 len_hdrs[]; }; #define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 89ac2f9ae6dd..ac48012c992f 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2127,7 +2127,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, pr_debug("ib_pd %p\n", pd); if (attrs->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); php = to_c4iw_pd(pd); rhp = php->rhp; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index cbdb300a4794..a2f5e29ef226 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -123,7 +123,7 @@ struct fw_ri_dsgl { __be32 len0; __be64 addr0; #ifndef C99_NOT_SUPPORTED - struct fw_ri_dsge_pair sge[0]; + struct fw_ri_dsge_pair sge[]; #endif }; @@ -139,7 +139,7 @@ struct fw_ri_isgl { __be16 nsge; __be32 r2; #ifndef C99_NOT_SUPPORTED - struct fw_ri_sge sge[0]; + struct fw_ri_sge sge[]; #endif }; @@ -149,7 +149,7 @@ struct fw_ri_immd { __be16 r2; __be32 immdlen; #ifndef C99_NOT_SUPPORTED - __u8 data[0]; + __u8 data[]; #endif }; @@ -321,7 +321,7 @@ struct fw_ri_res_wr { __be32 len16_pkd; __u64 cookie; #ifndef C99_NOT_SUPPORTED - struct fw_ri_res res[0]; + struct fw_ri_res res[]; #endif }; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 74b787a90660..96b104ab5415 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_CMDS_H_ @@ -801,21 +801,16 @@ struct efa_admin_mmio_req_read_less_resp { /* create_qp_cmd */ #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) -#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_SHIFT 1 #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) /* reg_mr_cmd */ #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) -#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) -#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT 2 #define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) /* create_cq_cmd */ -#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) -#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_SHIFT 6 #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index c8e0c8b905be..29d53ed63b3e 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_H_ @@ -121,9 +121,7 @@ struct efa_admin_aenq_entry { /* aq_common_desc */ #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) -#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 #define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) -#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 #define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) /* acq_common_desc */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 0778f4f7dccd..7fce69f5568f 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -16,26 +16,13 @@ #define EFA_ASYNC_QUEUE_DEPTH 16 #define EFA_ADMIN_QUEUE_DEPTH 32 -#define MIN_EFA_VER\ - ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \ - (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK)) - #define EFA_CTRL_MAJOR 0 #define EFA_CTRL_MINOR 0 #define EFA_CTRL_SUB_MINOR 1 -#define MIN_EFA_CTRL_VER \ - (((EFA_CTRL_MAJOR) << \ - (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ - ((EFA_CTRL_MINOR) << \ - (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ - (EFA_CTRL_SUB_MINOR)) - #define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) #define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) -#define EFA_REGS_ADMIN_INTR_MASK 1 - enum efa_cmd_status { EFA_CMD_SUBMITTED, EFA_CMD_COMPLETED, @@ -84,7 +71,7 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) struct efa_com_mmio_read *mmio_read = &edev->mmio_read; struct efa_admin_mmio_req_read_less_resp *read_resp; unsigned long exp_time; - u32 mmio_read_reg; + u32 mmio_read_reg = 0; u32 err; read_resp = mmio_read->read_resp; @@ -94,10 +81,9 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) /* trash DMA req_id to identify when hardware is done */ read_resp->req_id = mmio_read->seq_num + 0x9aL; - mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & - EFA_REGS_MMIO_REG_READ_REG_OFF_MASK; - mmio_read_reg |= mmio_read->seq_num & - EFA_REGS_MMIO_REG_READ_REQ_ID_MASK; + EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REG_OFF, offset); + EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REQ_ID, + mmio_read->seq_num); writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF); @@ -137,9 +123,9 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev) struct efa_com_admin_queue *aq = &edev->aq; struct efa_com_admin_sq *sq = &aq->sq; u16 size = aq->depth * sizeof(*sq->entries); + u32 aq_caps = 0; u32 addr_high; u32 addr_low; - u32 aq_caps; sq->entries = dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL); @@ -160,10 +146,9 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev) writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); - aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK; - aq_caps |= (sizeof(struct efa_admin_aq_entry) << - EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & - EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_DEPTH, aq->depth); + EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE, + sizeof(struct efa_admin_aq_entry)); writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF); @@ -175,9 +160,9 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev) struct efa_com_admin_queue *aq = &edev->aq; struct efa_com_admin_cq *cq = &aq->cq; u16 size = aq->depth * sizeof(*cq->entries); + u32 acq_caps = 0; u32 addr_high; u32 addr_low; - u32 acq_caps; cq->entries = dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL); @@ -195,13 +180,11 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev) writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); - acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; - acq_caps |= (sizeof(struct efa_admin_acq_entry) << - EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & - EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; - acq_caps |= (aq->msix_vector_idx << - EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) & - EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK; + EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_DEPTH, aq->depth); + EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE, + sizeof(struct efa_admin_acq_entry)); + EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR, + aq->msix_vector_idx); writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF); @@ -212,7 +195,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev, struct efa_aenq_handlers *aenq_handlers) { struct efa_com_aenq *aenq = &edev->aenq; - u32 addr_low, addr_high, aenq_caps; + u32 addr_low, addr_high; + u32 aenq_caps = 0; u16 size; if (!aenq_handlers) { @@ -237,13 +221,11 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev, writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); - aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; - aenq_caps |= (sizeof(struct efa_admin_aenq_entry) << - EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & - EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; - aenq_caps |= (aenq->msix_vector_idx - << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) & - EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK; + EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_DEPTH, aenq->depth); + EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE, + sizeof(struct efa_admin_aenq_entry)); + EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR, + aenq->msix_vector_idx); writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF); /* @@ -280,8 +262,8 @@ static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq, static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq, struct efa_comp_ctx *comp_ctx) { - u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command & - EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + u16 cmd_id = EFA_GET(&comp_ctx->user_cqe->acq_common_descriptor.command, + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID); u16 ctx_id = cmd_id & (aq->depth - 1); ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id); @@ -335,8 +317,8 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; cmd->aq_common_descriptor.command_id = cmd_id; - cmd->aq_common_descriptor.flags |= aq->sq.phase & - EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + EFA_SET(&cmd->aq_common_descriptor.flags, + EFA_ADMIN_AQ_COMMON_DESC_PHASE, aq->sq.phase); comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true); if (!comp_ctx) { @@ -427,8 +409,8 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a struct efa_comp_ctx *comp_ctx; u16 cmd_id; - cmd_id = cqe->acq_common_descriptor.command & - EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + cmd_id = EFA_GET(&cqe->acq_common_descriptor.command, + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID); comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false); if (!comp_ctx) { @@ -705,7 +687,7 @@ void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling) u32 mask_value = 0; if (polling) - mask_value = EFA_REGS_ADMIN_INTR_MASK; + EFA_SET(&mask_value, EFA_REGS_INTR_MASK_EN, 1); writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF); if (polling) @@ -743,7 +725,7 @@ int efa_com_admin_init(struct efa_com_dev *edev, int err; dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); - if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) { + if (!EFA_GET(&dev_sts, EFA_REGS_DEV_STS_READY)) { ibdev_err(edev->efa_dev, "Device isn't ready, abort com init %#x\n", dev_sts); return -ENODEV; @@ -778,8 +760,7 @@ int efa_com_admin_init(struct efa_com_dev *edev, goto err_destroy_cq; cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); - timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> - EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO); if (timeout) /* the resolution of timeout reg is 100ms */ aq->completion_timeout = timeout * 100000; @@ -940,7 +921,9 @@ void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev) int efa_com_validate_version(struct efa_com_dev *edev) { + u32 min_ctrl_ver = 0; u32 ctrl_ver_masked; + u32 min_ver = 0; u32 ctrl_ver; u32 ver; @@ -953,33 +936,42 @@ int efa_com_validate_version(struct efa_com_dev *edev) EFA_REGS_CONTROLLER_VERSION_OFF); ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n", - (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >> - EFA_REGS_VERSION_MAJOR_VERSION_SHIFT, - ver & EFA_REGS_VERSION_MINOR_VERSION_MASK); - - if (ver < MIN_EFA_VER) { + EFA_GET(&ver, EFA_REGS_VERSION_MAJOR_VERSION), + EFA_GET(&ver, EFA_REGS_VERSION_MINOR_VERSION)); + + EFA_SET(&min_ver, EFA_REGS_VERSION_MAJOR_VERSION, + EFA_ADMIN_API_VERSION_MAJOR); + EFA_SET(&min_ver, EFA_REGS_VERSION_MINOR_VERSION, + EFA_ADMIN_API_VERSION_MINOR); + if (ver < min_ver) { ibdev_err(edev->efa_dev, "EFA version is lower than the minimal version the driver supports\n"); return -EOPNOTSUPP; } - ibdev_dbg(edev->efa_dev, - "efa controller version: %d.%d.%d implementation version %d\n", - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> - EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> - EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> - EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + ibdev_dbg( + edev->efa_dev, + "efa controller version: %d.%d.%d implementation version %d\n", + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION), + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION), + EFA_GET(&ctrl_ver, + EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION), + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_IMPL_ID)); ctrl_ver_masked = - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); - + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION) | + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION) | + EFA_GET(&ctrl_ver, + EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION); + + EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION, + EFA_CTRL_MAJOR); + EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION, + EFA_CTRL_MINOR); + EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION, + EFA_CTRL_SUB_MINOR); /* Validate the ctrl version without the implementation ID */ - if (ctrl_ver_masked < MIN_EFA_CTRL_VER) { + if (ctrl_ver_masked < min_ctrl_ver) { ibdev_err(edev->efa_dev, "EFA ctrl version is lower than the minimal ctrl version the driver supports\n"); return -EOPNOTSUPP; @@ -1002,8 +994,7 @@ int efa_com_get_dma_width(struct efa_com_dev *edev) u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); int width; - width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> - EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + width = EFA_GET(&caps, EFA_REGS_CAPS_DMA_ADDR_WIDTH); ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width); @@ -1017,16 +1008,14 @@ int efa_com_get_dma_width(struct efa_com_dev *edev) return width; } -static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, - u16 exp_state) +static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int on) { u32 val, i; for (i = 0; i < timeout; i++) { val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); - if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == - exp_state) + if (EFA_GET(&val, EFA_REGS_DEV_STS_RESET_IN_PROGRESS) == on) return 0; ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val); @@ -1046,36 +1035,34 @@ static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int efa_com_dev_reset(struct efa_com_dev *edev, enum efa_regs_reset_reason_types reset_reason) { - u32 stat, timeout, cap, reset_val; + u32 stat, timeout, cap; + u32 reset_val = 0; int err; stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); - if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) { + if (!EFA_GET(&stat, EFA_REGS_DEV_STS_READY)) { ibdev_err(edev->efa_dev, "Device isn't ready, can't reset device\n"); return -EINVAL; } - timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >> - EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + timeout = EFA_GET(&cap, EFA_REGS_CAPS_RESET_TIMEOUT); if (!timeout) { ibdev_err(edev->efa_dev, "Invalid timeout value\n"); return -EINVAL; } /* start reset */ - reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK; - reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) & - EFA_REGS_DEV_CTL_RESET_REASON_MASK; + EFA_SET(&reset_val, EFA_REGS_DEV_CTL_DEV_RESET, 1); + EFA_SET(&reset_val, EFA_REGS_DEV_CTL_RESET_REASON, reset_reason); writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF); /* reset clears the mmio readless address, restore it */ efa_com_mmio_reg_read_resp_addr_init(edev); - err = wait_for_reset_state(edev, timeout, - EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + err = wait_for_reset_state(edev, timeout, 1); if (err) { ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n"); return err; @@ -1089,8 +1076,7 @@ int efa_com_dev_reset(struct efa_com_dev *edev, return err; } - timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> - EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO); if (timeout) /* the resolution of timeout reg is 100ms */ edev->aq.completion_timeout = timeout * 100000; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index e20bd84a1014..eea5574a62e8 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -161,8 +161,9 @@ int efa_com_create_cq(struct efa_com_dev *edev, int err; create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ; - create_cmd.cq_caps_2 = (params->entry_size_in_bytes / 4) & - EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK; + EFA_SET(&create_cmd.cq_caps_2, + EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS, + params->entry_size_in_bytes / 4); create_cmd.cq_depth = params->cq_depth; create_cmd.num_sub_cqs = params->num_sub_cqs; create_cmd.uar = params->uarn; @@ -227,8 +228,8 @@ int efa_com_register_mr(struct efa_com_dev *edev, mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR; mr_cmd.pd = params->pd; mr_cmd.mr_length = params->mr_length_in_bytes; - mr_cmd.flags |= params->page_shift & - EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK; + EFA_SET(&mr_cmd.flags, EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT, + params->page_shift); mr_cmd.iova = params->iova; mr_cmd.permissions = params->permissions; @@ -242,11 +243,11 @@ int efa_com_register_mr(struct efa_com_dev *edev, params->pbl.pbl.address.mem_addr_low; mr_cmd.pbl.pbl.address.mem_addr_high = params->pbl.pbl.address.mem_addr_high; - mr_cmd.aq_common_desc.flags |= - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK; + EFA_SET(&mr_cmd.aq_common_desc.flags, + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1); if (params->indirect) - mr_cmd.aq_common_desc.flags |= - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + EFA_SET(&mr_cmd.aq_common_desc.flags, + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1); } err = efa_com_cmd_exec(aq, @@ -386,9 +387,8 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev, get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE; if (control_buff_size) - get_cmd.aq_common_descriptor.flags = - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; - + EFA_SET(&get_cmd.aq_common_descriptor.flags, + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1); efa_com_set_dma_addr(control_buf_dma_addr, &get_cmd.control_buffer.address.mem_addr_high, @@ -538,8 +538,9 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev, set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE; if (control_buff_size) { - set_cmd->aq_common_descriptor.flags = - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + set_cmd->aq_common_descriptor.flags = 0; + EFA_SET(&set_cmd->aq_common_descriptor.flags, + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1); efa_com_set_dma_addr(control_buf_dma_addr, &set_cmd->control_buffer.address.mem_addr_high, &set_cmd->control_buffer.address.mem_addr_low); diff --git a/drivers/infiniband/hw/efa/efa_common_defs.h b/drivers/infiniband/hw/efa/efa_common_defs.h index c559ec08898e..90af1c82c9c6 100644 --- a/drivers/infiniband/hw/efa/efa_common_defs.h +++ b/drivers/infiniband/hw/efa/efa_common_defs.h @@ -1,14 +1,25 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COMMON_H_ #define _EFA_COMMON_H_ +#include <linux/bitfield.h> + #define EFA_COMMON_SPEC_VERSION_MAJOR 2 #define EFA_COMMON_SPEC_VERSION_MINOR 0 +#define EFA_GET(ptr, mask) FIELD_GET(mask##_MASK, *(ptr)) + +#define EFA_SET(ptr, mask, value) \ + ({ \ + typeof(ptr) _ptr = ptr; \ + *_ptr = (*_ptr & ~(mask##_MASK)) | \ + FIELD_PREP(mask##_MASK, value); \ + }) + struct efa_common_mem_addr { u32 mem_addr_low; diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h index bb9cad3d6a15..4017982fe13b 100644 --- a/drivers/infiniband/hw/efa/efa_regs_defs.h +++ b/drivers/infiniband/hw/efa/efa_regs_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_REGS_H_ @@ -45,69 +45,52 @@ enum efa_regs_reset_reason_types { /* version register */ #define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff -#define EFA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 #define EFA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 /* controller_version register */ #define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff -#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 #define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 -#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 #define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 -#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 #define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 /* caps register */ #define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 -#define EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 #define EFA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e -#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 #define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 -#define EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16 #define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000 /* aq_caps register */ #define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff -#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 #define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 /* acq_caps register */ #define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff -#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 #define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xff0000 -#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT 24 #define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK 0xff000000 /* aenq_caps register */ #define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff -#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 #define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xff0000 -#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT 24 #define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK 0xff000000 +/* intr_mask register */ +#define EFA_REGS_INTR_MASK_EN_MASK 0x1 + /* dev_ctl register */ #define EFA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 -#define EFA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 #define EFA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 -#define EFA_REGS_DEV_CTL_RESET_REASON_SHIFT 28 #define EFA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000 /* dev_sts register */ #define EFA_REGS_DEV_STS_READY_MASK 0x1 -#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 #define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 -#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 #define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 -#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 #define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 -#define EFA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 #define EFA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 -#define EFA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 #define EFA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 /* mmio_reg_read register */ #define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff -#define EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 #define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 #endif /* _EFA_REGS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index ec5545870554..5c57098a4aee 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/vmalloc.h> @@ -144,9 +144,6 @@ static inline bool is_rdma_read_cap(struct efa_dev *dev) return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; } -#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ - sizeof_field(typeof(x), fld) <= (sz)) - #define is_reserved_cleared(reserved) \ !memchr_inv(reserved, 0, sizeof(reserved)) @@ -169,6 +166,14 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, return addr; } +static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr, + dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir) +{ + dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir); + free_pages_exact(cpu_addr, size); +} + int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) @@ -402,6 +407,9 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) int err; ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); + + efa_qp_user_mmap_entries_remove(qp); + err = efa_destroy_qp_handle(dev, qp->qp_handle); if (err) return err; @@ -411,11 +419,10 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); - dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, - DMA_TO_DEVICE); + efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, + qp->rq_size, DMA_TO_DEVICE); } - efa_qp_user_mmap_entries_remove(qp); kfree(qp); return 0; } @@ -599,7 +606,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, if (err) goto err_out; - if (!field_avail(cmd, driver_qp_type, udata->inlen)) { + if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, no input udata\n"); err = -EINVAL; @@ -720,13 +727,9 @@ err_remove_mmap_entries: err_destroy_qp: efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); err_free_mapped: - if (qp->rq_size) { - dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, - DMA_TO_DEVICE); - - if (!qp->rq_mmap_entry) - free_pages_exact(qp->rq_cpu_addr, qp->rq_size); - } + if (qp->rq_size) + efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, + qp->rq_size, DMA_TO_DEVICE); err_free_qp: kfree(qp); err_out: @@ -845,10 +848,10 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); - efa_destroy_cq_idx(dev, cq->cq_idx); - dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, - DMA_FROM_DEVICE); rdma_user_mmap_entry_remove(cq->mmap_entry); + efa_destroy_cq_idx(dev, cq->cq_idx); + efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, @@ -890,7 +893,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (!field_avail(cmd, num_sub_cqs, udata->inlen)) { + if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) { ibdev_dbg(ibdev, "Incompatible ABI params, no input udata\n"); err = -EINVAL; @@ -985,10 +988,8 @@ err_remove_mmap: err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: - dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, - DMA_FROM_DEVICE); - if (!cq->mmap_entry) - free_pages_exact(cq->cpu_addr, cq->size); + efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); err_out: atomic64_inc(&dev->stats.sw_stats.create_cq_err); @@ -1550,10 +1551,6 @@ void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); - /* DMA mapping is already gone, now free the pages */ - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) - free_pages_exact(phys_to_virt(entry->address), - entry->rdma_entry.npages * PAGE_SIZE); kfree(entry); } diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c index d106d23016ba..c22ab7b5163b 100644 --- a/drivers/infiniband/hw/hfi1/efivar.c +++ b/drivers/infiniband/hw/hfi1/efivar.c @@ -78,7 +78,7 @@ static int read_efi_var(const char *name, unsigned long *size, *size = 0; *return_data = NULL; - if (!efi_enabled(EFI_RUNTIME_SERVICES)) + if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) return -EOPNOTSUPP; uni_name = kcalloc(strlen(name) + 1, sizeof(efi_char16_t), GFP_KERNEL); diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 986c12153e62..0dfbcfb048ca 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -222,11 +222,11 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, while (bit < bitsize) { zero = find_next_zero_bit(fault->opcodes, bitsize, bit); if (zero - 1 != bit) - size += snprintf(data + size, + size += scnprintf(data + size, datalen - size - 1, "0x%lx-0x%lx,", bit, zero - 1); else - size += snprintf(data + size, + size += scnprintf(data + size, datalen - size - 1, "0x%lx,", bit); bit = find_next_bit(fault->opcodes, bitsize, zero); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 259115886d35..e7fdd70c6e78 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -209,7 +209,6 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->mm = current->mm; mmgrab(fd->mm); fd->dd = dd; - kobject_get(&fd->dd->kobj); fp->private_data = fd; return 0; nomem: @@ -713,7 +712,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) deallocate_ctxt(uctxt); done: mmdrop(fdata->mm); - kobject_put(&dd->kobj); if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); @@ -1696,7 +1694,7 @@ static int user_add(struct hfi1_devdata *dd) snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops, &dd->user_cdev, &dd->user_device, - true, &dd->kobj); + true, &dd->verbs_dev.rdi.ibdev.dev.kobj); if (ret) user_remove(dd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index cae12f416ca0..b06c2594105a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1413,8 +1413,6 @@ struct hfi1_devdata { bool aspm_enabled; /* ASPM state: enabled/disabled */ struct rhashtable *sdma_rht; - struct kobject kobj; - /* vnic data */ struct hfi1_vnic_data vnic; /* Lock to protect IRQ SRC register access */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e3acda7a0800..3759d9233a1c 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1198,13 +1198,13 @@ static void finalize_asic_data(struct hfi1_devdata *dd, } /** - * hfi1_clean_devdata - cleans up per-unit data structure + * hfi1_free_devdata - cleans up and frees per-unit data structure * @dd: pointer to a valid devdata structure * - * It cleans up all data structures set up by + * It cleans up and frees all data structures set up by * by hfi1_alloc_devdata(). */ -static void hfi1_clean_devdata(struct hfi1_devdata *dd) +void hfi1_free_devdata(struct hfi1_devdata *dd) { struct hfi1_asic_data *ad; unsigned long flags; @@ -1231,23 +1231,6 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) rvt_dealloc_device(&dd->verbs_dev.rdi); } -static void __hfi1_free_devdata(struct kobject *kobj) -{ - struct hfi1_devdata *dd = - container_of(kobj, struct hfi1_devdata, kobj); - - hfi1_clean_devdata(dd); -} - -static struct kobj_type hfi1_devdata_type = { - .release = __hfi1_free_devdata, -}; - -void hfi1_free_devdata(struct hfi1_devdata *dd) -{ - kobject_put(&dd->kobj); -} - /** * hfi1_alloc_devdata - Allocate our primary per-unit data structure. * @pdev: Valid PCI device @@ -1333,11 +1316,10 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, goto bail; } - kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; bail: - hfi1_clean_devdata(dd); + hfi1_free_devdata(dd); return ERR_PTR(ret); } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index a51bcd2b4391..7073f237a949 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2381,7 +2381,7 @@ struct opa_port_status_rsp { __be64 port_vl_rcv_bubble; __be64 port_vl_mark_fecn; __be64 port_vl_xmit_discards; - } vls[0]; /* real array size defined by # bits set in vl_select_mask */ + } vls[]; /* real array size defined by # bits set in vl_select_mask */ }; enum counter_selects { @@ -2423,7 +2423,7 @@ struct opa_aggregate { __be16 attr_id; __be16 err_reqlength; /* 1 bit, 8 res, 7 bit */ __be32 attr_mod; - u8 data[0]; + u8 data[]; }; #define MSK_LLI 0x000000f0 diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 2f48e6953629..889e63d3f2cc 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -165,7 +165,7 @@ struct opa_mad_notice_attr { } __packed ntc_2048; }; - u8 class_data[0]; + u8 class_data[]; }; #define IB_VLARB_LOWPRI_0_31 1 diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index c9a58b642bdd..0102262343c0 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -243,7 +243,7 @@ struct sc_config_sizes { */ struct pio_map_elem { u32 mask; - struct send_context *ksc[0]; + struct send_context *ksc[]; }; /* @@ -263,7 +263,7 @@ struct pio_vl_map { u32 mask; u8 actual_vls; u8 vls; - struct pio_map_elem *map[0]; + struct pio_map_elem *map[]; }; int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index a51525647ac8..c93ea021cf49 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -833,7 +833,7 @@ struct sdma_engine *sdma_select_engine_sc( struct sdma_rht_map_elem { u32 mask; u8 ctr; - struct sdma_engine *sde[0]; + struct sdma_engine *sde[]; }; struct sdma_rht_node { diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 1e2e40f79cb2..7a851191f987 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -1002,7 +1002,7 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status); */ struct sdma_map_elem { u32 mask; - struct sdma_engine *sde[0]; + struct sdma_engine *sde[]; }; /** @@ -1024,7 +1024,7 @@ struct sdma_vl_map { u32 mask; u8 actual_vls; u8 vls; - struct sdma_map_elem *map[0]; + struct sdma_map_elem *map[]; }; int sdma_map_init( diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 90f62c4bddba..074ec71772d2 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -674,7 +674,11 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sc2vl sysfs info, (err %d) port %u\n", ret, port_num); - goto bail; + /* + * Based on the documentation for kobject_init_and_add(), the + * caller should call kobject_put even if this call fails. + */ + goto bail_sc2vl; } kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD); @@ -684,7 +688,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sl2sc sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sc2vl; + goto bail_sl2sc; } kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD); @@ -694,7 +698,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping vl2mtu sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sl2sc; + goto bail_vl2mtu; } kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD); @@ -704,7 +708,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping Congestion Control sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_vl2mtu; + goto bail_cc; } kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); @@ -742,7 +746,6 @@ bail_sl2sc: kobject_put(&ppd->sl2sc_kobj); bail_sc2vl: kobject_put(&ppd->sc2vl_kobj); -bail: return ret; } @@ -853,8 +856,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) return 0; bail: - for (i = 0; i < dd->num_sdma; i++) - kobject_del(&dd->per_sdma[i].kobj); + /* + * The function kobject_put() will call kobject_del() if the kobject + * has been added successfully. The sysfs files created under the + * kobject directory will also be removed during the process. + */ + for (; i >= 0; i--) + kobject_put(&dd->per_sdma[i].kobj); return ret; } @@ -867,6 +875,10 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; int i; + /* Unwind operations in hfi1_verbs_register_sysfs() */ + for (i = 0; i < dd->num_sdma; i++) + kobject_put(&dd->per_sdma[i].kobj); + for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 6257eee083a1..332abb446861 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -73,7 +73,7 @@ struct tid_rb_node { dma_addr_t dma_addr; bool freed; unsigned int npages; - struct page *pages[0]; + struct page *pages[]; }; static inline int num_user_pages(unsigned long addr, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index c2f0d9ba93de..13e4203497b3 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -141,6 +141,7 @@ static int defer_packet_queue( */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); if (list_empty(&pq->busy.list)) { + pq->busy.lock = &sde->waitlock; iowait_get_priority(&pq->busy); iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); } @@ -155,6 +156,7 @@ static void activate_packet_queue(struct iowait *wait, int reason) { struct hfi1_user_sdma_pkt_q *pq = container_of(wait, struct hfi1_user_sdma_pkt_q, busy); + pq->busy.lock = NULL; xchg(&pq->state, SDMA_PKT_Q_ACTIVE); wake_up(&wait->wait_dma); }; @@ -256,6 +258,21 @@ pq_reqs_nomem: return ret; } +static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq) +{ + unsigned long flags; + seqlock_t *lock = pq->busy.lock; + + if (!lock) + return; + write_seqlock_irqsave(lock, flags); + if (!list_empty(&pq->busy.list)) { + list_del_init(&pq->busy.list); + pq->busy.lock = NULL; + } + write_sequnlock_irqrestore(lock, flags); +} + int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt) { @@ -281,6 +298,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->reqs); kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); + flush_pq_iowait(pq); kfree(pq); } else { spin_unlock(&fd->pq_rcu_lock); @@ -587,11 +605,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret < 0) { if (ret != -EBUSY) goto free_req; - wait_event_interruptible_timeout( + if (wait_event_interruptible_timeout( pq->busy.wait_dma, - (pq->state == SDMA_PKT_Q_ACTIVE), + pq->state == SDMA_PKT_Q_ACTIVE, msecs_to_jiffies( - SDMA_IOWAIT_TIMEOUT)); + SDMA_IOWAIT_TIMEOUT)) <= 0) + flush_pq_iowait(pq); } } *count += idx; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 089e201d7550..2f6323ad9c59 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -515,10 +515,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -527,6 +528,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 5ffe4c996ed3..5bfb52ffd590 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -257,8 +257,8 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, return ret; } - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && - (udata->outlen >= sizeof(*resp))) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB && + udata->outlen >= offsetofend(typeof(*resp), cap_flags)) { ret = hns_roce_db_map_user(context, udata, ucmd.db_addr, &hr_cq->db); if (ret) { @@ -321,8 +321,8 @@ static void destroy_user_cq(struct hns_roce_dev *hr_dev, struct hns_roce_ucontext *context = rdma_udata_to_drv_context( udata, struct hns_roce_ucontext, ibucontext); - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && - (udata->outlen >= sizeof(*resp))) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB && + udata->outlen >= offsetofend(typeof(*resp), cap_flags)) hns_roce_db_unmap_user(context, &hr_cq->db); hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a7c4ff975c28..f6b3cf6b95d6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -641,6 +641,19 @@ struct hns_roce_rinl_buf { u32 wqe_cnt; }; +enum { + HNS_ROCE_FLUSH_FLAG = 0, +}; + +struct hns_roce_work { + struct hns_roce_dev *hr_dev; + struct work_struct work; + u32 qpn; + u32 cqn; + int event_type; + int sub_type; +}; + struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_buf hr_buf; @@ -656,11 +669,6 @@ struct hns_roce_qp { struct ib_umem *umem; struct hns_roce_mtt mtt; struct hns_roce_mtr mtr; - - /* this define must less than HNS_ROCE_MAX_BT_REGION */ -#define HNS_ROCE_WQE_REGION_MAX 3 - struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX]; - int region_cnt; int wqe_bt_pg_shift; u32 buff_size; @@ -684,6 +692,9 @@ struct hns_roce_qp { struct hns_roce_sge sge; u32 next_sge; + /* 0: flush needed, 1: unneeded */ + unsigned long flush_flag; + struct hns_roce_work flush_work; struct hns_roce_rinl_buf rq_inl_buf; struct list_head node; /* all qps are on a list */ struct list_head rq_node; /* all recv qps are on a list */ @@ -762,14 +773,8 @@ struct hns_roce_eq { int eqe_ba_pg_sz; int eqe_buf_pg_sz; int hop_num; - u64 *bt_l0; /* Base address table for L0 */ - u64 **bt_l1; /* Base address table for L1 */ - u64 **buf; - dma_addr_t l0_dma; - dma_addr_t *l1_dma; - dma_addr_t *buf_dma; - u32 l0_last_num; /* L0 last chunk num */ - u32 l1_last_num; /* L1 last chunk num */ + struct hns_roce_mtr mtr; + struct hns_roce_buf buf; int eq_max_cnt; int eq_period; int shift; @@ -881,7 +886,7 @@ struct hns_roce_caps { u32 cqc_timer_ba_pg_sz; u32 cqc_timer_buf_pg_sz; u32 cqc_timer_hop_num; - u32 cqe_ba_pg_sz; + u32 cqe_ba_pg_sz; /* page_size = 4K*(2^cqe_ba_pg_sz) */ u32 cqe_buf_pg_sz; u32 cqe_hop_num; u32 srqwqe_ba_pg_sz; @@ -906,15 +911,6 @@ struct hns_roce_caps { u16 default_ceq_arm_st; }; -struct hns_roce_work { - struct hns_roce_dev *hr_dev; - struct work_struct work; - u32 qpn; - u32 cqn; - int event_type; - int sub_type; -}; - struct hns_roce_dfx_hw { int (*query_cqc_info)(struct hns_roce_dev *hr_dev, u32 cqn, int *buffer); @@ -1237,9 +1233,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_udata *udata); int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n); -void *get_send_wqe(struct hns_roce_qp *hr_qp, int n); -void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n); +void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n); +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n); +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n); bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, struct ib_cq *ib_cq); enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); @@ -1248,9 +1245,8 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); -void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); -void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, - int cnt); +void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata); __be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index e82215774032..263338b90d7a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -39,6 +39,16 @@ #define DMA_ADDR_T_SHIFT 12 #define BT_BA_SHIFT 32 +#define HEM_INDEX_BUF BIT(0) +#define HEM_INDEX_L0 BIT(1) +#define HEM_INDEX_L1 BIT(2) +struct hns_roce_hem_index { + u64 buf; + u64 l0; + u64 l1; + u32 inited; /* indicate which index is available */ +}; + bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) { int hop_num = 0; @@ -84,25 +94,27 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) return hop_num ? true : false; } -static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx, - u32 bt_chunk_num, u64 hem_max_num) +static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx, + u32 bt_chunk_num, u64 hem_max_num) { + u64 start_idx = round_down(hem_idx, bt_chunk_num); u64 check_max_num = start_idx + bt_chunk_num; u64 i; for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++) - if (hem[i]) + if (i != hem_idx && hem[i]) return false; return true; } -static bool hns_roce_check_bt_null(u64 **bt, u64 start_idx, u32 bt_chunk_num) +static bool hns_roce_check_bt_null(u64 **bt, u64 ba_idx, u32 bt_chunk_num) { + u64 start_idx = round_down(ba_idx, bt_chunk_num); int i; for (i = 0; i < bt_chunk_num; i++) - if (bt[start_idx + i]) + if (i != ba_idx && bt[start_idx + i]) return false; return true; @@ -434,178 +446,235 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long obj) +static int calc_hem_config(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj, + struct hns_roce_hem_mhop *mhop, + struct hns_roce_hem_index *index) { - struct device *dev = hr_dev->dev; - struct hns_roce_hem_mhop mhop; - struct hns_roce_hem_iter iter; - u32 buf_chunk_size; - u32 bt_chunk_size; + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned long mhop_obj = obj; + u32 l0_idx, l1_idx, l2_idx; u32 chunk_ba_num; - u32 hop_num; - u32 size; u32 bt_num; - u64 hem_idx; - u64 bt_l1_idx = 0; - u64 bt_l0_idx = 0; - u64 bt_ba; - unsigned long mhop_obj = obj; - int bt_l1_allocated = 0; - int bt_l0_allocated = 0; - int step_idx; int ret; - ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); + ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, mhop); if (ret) return ret; - buf_chunk_size = mhop.buf_chunk_size; - bt_chunk_size = mhop.bt_chunk_size; - hop_num = mhop.hop_num; - chunk_ba_num = bt_chunk_size / BA_BYTE_LEN; - - bt_num = hns_roce_get_bt_num(table->type, hop_num); + l0_idx = mhop->l0_idx; + l1_idx = mhop->l1_idx; + l2_idx = mhop->l2_idx; + chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN; + bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num); switch (bt_num) { case 3: - hem_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num + - mhop.l1_idx * chunk_ba_num + mhop.l2_idx; - bt_l1_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx; - bt_l0_idx = mhop.l0_idx; + index->l1 = l0_idx * chunk_ba_num + l1_idx; + index->l0 = l0_idx; + index->buf = l0_idx * chunk_ba_num * chunk_ba_num + + l1_idx * chunk_ba_num + l2_idx; break; case 2: - hem_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx; - bt_l0_idx = mhop.l0_idx; + index->l0 = l0_idx; + index->buf = l0_idx * chunk_ba_num + l1_idx; break; case 1: - hem_idx = mhop.l0_idx; + index->buf = l0_idx; break; default: - dev_err(dev, "Table %d not support hop_num = %d!\n", - table->type, hop_num); + ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n", + table->type, mhop->hop_num); return -EINVAL; } - if (unlikely(hem_idx >= table->num_hem)) { - dev_err(dev, "Table %d exceed hem limt idx = %llu,max = %lu!\n", - table->type, hem_idx, table->num_hem); + if (unlikely(index->buf >= table->num_hem)) { + ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n", + table->type, index->buf, table->num_hem); return -EINVAL; } - mutex_lock(&table->mutex); + return 0; +} - if (table->hem[hem_idx]) { - ++table->hem[hem_idx]->refcount; - goto out; +static void free_mhop_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + struct hns_roce_hem_mhop *mhop, + struct hns_roce_hem_index *index) +{ + u32 bt_size = mhop->bt_chunk_size; + struct device *dev = hr_dev->dev; + + if (index->inited & HEM_INDEX_BUF) { + hns_roce_free_hem(hr_dev, table->hem[index->buf]); + table->hem[index->buf] = NULL; + } + + if (index->inited & HEM_INDEX_L1) { + dma_free_coherent(dev, bt_size, table->bt_l1[index->l1], + table->bt_l1_dma_addr[index->l1]); + table->bt_l1[index->l1] = NULL; } + if (index->inited & HEM_INDEX_L0) { + dma_free_coherent(dev, bt_size, table->bt_l0[index->l0], + table->bt_l0_dma_addr[index->l0]); + table->bt_l0[index->l0] = NULL; + } +} + +static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + struct hns_roce_hem_mhop *mhop, + struct hns_roce_hem_index *index) +{ + u32 bt_size = mhop->bt_chunk_size; + struct device *dev = hr_dev->dev; + struct hns_roce_hem_iter iter; + gfp_t flag; + u64 bt_ba; + u32 size; + int ret; + /* alloc L1 BA's chunk */ - if ((check_whether_bt_num_3(table->type, hop_num) || - check_whether_bt_num_2(table->type, hop_num)) && - !table->bt_l0[bt_l0_idx]) { - table->bt_l0[bt_l0_idx] = dma_alloc_coherent(dev, bt_chunk_size, - &(table->bt_l0_dma_addr[bt_l0_idx]), + if ((check_whether_bt_num_3(table->type, mhop->hop_num) || + check_whether_bt_num_2(table->type, mhop->hop_num)) && + !table->bt_l0[index->l0]) { + table->bt_l0[index->l0] = dma_alloc_coherent(dev, bt_size, + &table->bt_l0_dma_addr[index->l0], GFP_KERNEL); - if (!table->bt_l0[bt_l0_idx]) { + if (!table->bt_l0[index->l0]) { ret = -ENOMEM; goto out; } - bt_l0_allocated = 1; - - /* set base address to hardware */ - if (table->type < HEM_TYPE_MTT) { - step_idx = 0; - if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) { - ret = -ENODEV; - dev_err(dev, "set HEM base address to HW failed!\n"); - goto err_dma_alloc_l1; - } - } + index->inited |= HEM_INDEX_L0; } /* alloc L2 BA's chunk */ - if (check_whether_bt_num_3(table->type, hop_num) && - !table->bt_l1[bt_l1_idx]) { - table->bt_l1[bt_l1_idx] = dma_alloc_coherent(dev, bt_chunk_size, - &(table->bt_l1_dma_addr[bt_l1_idx]), + if (check_whether_bt_num_3(table->type, mhop->hop_num) && + !table->bt_l1[index->l1]) { + table->bt_l1[index->l1] = dma_alloc_coherent(dev, bt_size, + &table->bt_l1_dma_addr[index->l1], GFP_KERNEL); - if (!table->bt_l1[bt_l1_idx]) { + if (!table->bt_l1[index->l1]) { ret = -ENOMEM; - goto err_dma_alloc_l1; - } - bt_l1_allocated = 1; - *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = - table->bt_l1_dma_addr[bt_l1_idx]; - - /* set base address to hardware */ - step_idx = 1; - if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) { - ret = -ENODEV; - dev_err(dev, "set HEM base address to HW failed!\n"); - goto err_alloc_hem_buf; + goto err_alloc_hem; } + index->inited |= HEM_INDEX_L1; + *(table->bt_l0[index->l0] + mhop->l1_idx) = + table->bt_l1_dma_addr[index->l1]; } /* * alloc buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC. * alloc bt space chunk for MTT/CQE. */ - size = table->type < HEM_TYPE_MTT ? buf_chunk_size : bt_chunk_size; - table->hem[hem_idx] = hns_roce_alloc_hem(hr_dev, - size >> PAGE_SHIFT, - size, - (table->lowmem ? GFP_KERNEL : - GFP_HIGHUSER) | __GFP_NOWARN); - if (!table->hem[hem_idx]) { + size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size; + flag = (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN; + table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT, + size, flag); + if (!table->hem[index->buf]) { ret = -ENOMEM; - goto err_alloc_hem_buf; + goto err_alloc_hem; } - hns_roce_hem_first(table->hem[hem_idx], &iter); + index->inited |= HEM_INDEX_BUF; + hns_roce_hem_first(table->hem[index->buf], &iter); bt_ba = hns_roce_hem_addr(&iter); - if (table->type < HEM_TYPE_MTT) { - if (hop_num == 2) { - *(table->bt_l1[bt_l1_idx] + mhop.l2_idx) = bt_ba; - step_idx = 2; - } else if (hop_num == 1) { - *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = bt_ba; - step_idx = 1; - } else if (hop_num == HNS_ROCE_HOP_NUM_0) { - step_idx = 0; - } else { - ret = -EINVAL; - goto err_dma_alloc_l1; + if (mhop->hop_num == 2) + *(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba; + else if (mhop->hop_num == 1) + *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba; + } else if (mhop->hop_num == 2) { + *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba; + } + + return 0; +err_alloc_hem: + free_mhop_hem(hr_dev, table, mhop, index); +out: + return ret; +} + +static int set_mhop_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj, + struct hns_roce_hem_mhop *mhop, + struct hns_roce_hem_index *index) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int step_idx; + int ret = 0; + + if (index->inited & HEM_INDEX_L0) { + ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0); + if (ret) { + ibdev_err(ibdev, "set HEM step 0 failed!\n"); + goto out; } + } - /* set HEM base address to hardware */ - if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) { - ret = -ENODEV; - dev_err(dev, "set HEM base address to HW failed!\n"); - goto err_alloc_hem_buf; + if (index->inited & HEM_INDEX_L1) { + ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1); + if (ret) { + ibdev_err(ibdev, "set HEM step 1 failed!\n"); + goto out; } - } else if (hop_num == 2) { - *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = bt_ba; } - ++table->hem[hem_idx]->refcount; - goto out; + if (index->inited & HEM_INDEX_BUF) { + if (mhop->hop_num == HNS_ROCE_HOP_NUM_0) + step_idx = 0; + else + step_idx = mhop->hop_num; + ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx); + if (ret) + ibdev_err(ibdev, "set HEM step last failed!\n"); + } +out: + return ret; +} -err_alloc_hem_buf: - if (bt_l1_allocated) { - dma_free_coherent(dev, bt_chunk_size, table->bt_l1[bt_l1_idx], - table->bt_l1_dma_addr[bt_l1_idx]); - table->bt_l1[bt_l1_idx] = NULL; +static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long obj) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_hem_index index = {}; + struct hns_roce_hem_mhop mhop = {}; + int ret; + + ret = calc_hem_config(hr_dev, table, obj, &mhop, &index); + if (ret) { + ibdev_err(ibdev, "calc hem config failed!\n"); + return ret; + } + + mutex_lock(&table->mutex); + if (table->hem[index.buf]) { + ++table->hem[index.buf]->refcount; + goto out; } -err_dma_alloc_l1: - if (bt_l0_allocated) { - dma_free_coherent(dev, bt_chunk_size, table->bt_l0[bt_l0_idx], - table->bt_l0_dma_addr[bt_l0_idx]); - table->bt_l0[bt_l0_idx] = NULL; + ret = alloc_mhop_hem(hr_dev, table, &mhop, &index); + if (ret) { + ibdev_err(ibdev, "alloc mhop hem failed!\n"); + goto out; } + /* set HEM base address to hardware */ + if (table->type < HEM_TYPE_MTT) { + ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index); + if (ret) { + ibdev_err(ibdev, "set HEM address to HW failed!\n"); + goto err_alloc; + } + } + + ++table->hem[index.buf]->refcount; + goto out; + +err_alloc: + free_mhop_hem(hr_dev, table, &mhop, &index); out: mutex_unlock(&table->mutex); return ret; @@ -656,116 +725,75 @@ out: return ret; } +static void clear_mhop_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj, + struct hns_roce_hem_mhop *mhop, + struct hns_roce_hem_index *index) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 hop_num = mhop->hop_num; + u32 chunk_ba_num; + int step_idx; + + index->inited = HEM_INDEX_BUF; + chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN; + if (check_whether_bt_num_2(table->type, hop_num)) { + if (hns_roce_check_hem_null(table->hem, index->buf, + chunk_ba_num, table->num_hem)) + index->inited |= HEM_INDEX_L0; + } else if (check_whether_bt_num_3(table->type, hop_num)) { + if (hns_roce_check_hem_null(table->hem, index->buf, + chunk_ba_num, table->num_hem)) { + index->inited |= HEM_INDEX_L1; + if (hns_roce_check_bt_null(table->bt_l1, index->l1, + chunk_ba_num)) + index->inited |= HEM_INDEX_L0; + } + } + + if (table->type < HEM_TYPE_MTT) { + if (hop_num == HNS_ROCE_HOP_NUM_0) + step_idx = 0; + else + step_idx = hop_num; + + if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx)) + ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num); + + if (index->inited & HEM_INDEX_L1) + if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) + ibdev_warn(ibdev, "Clear HEM step 1 failed.\n"); + + if (index->inited & HEM_INDEX_L0) + if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) + ibdev_warn(ibdev, "Clear HEM step 0 failed.\n"); + } +} + static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj, int check_refcount) { - struct device *dev = hr_dev->dev; - struct hns_roce_hem_mhop mhop; - unsigned long mhop_obj = obj; - u32 bt_chunk_size; - u32 chunk_ba_num; - u32 hop_num; - u32 start_idx; - u32 bt_num; - u64 hem_idx; - u64 bt_l1_idx = 0; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_hem_index index = {}; + struct hns_roce_hem_mhop mhop = {}; int ret; - ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); - if (ret) - return; - - bt_chunk_size = mhop.bt_chunk_size; - hop_num = mhop.hop_num; - chunk_ba_num = bt_chunk_size / BA_BYTE_LEN; - - bt_num = hns_roce_get_bt_num(table->type, hop_num); - switch (bt_num) { - case 3: - hem_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num + - mhop.l1_idx * chunk_ba_num + mhop.l2_idx; - bt_l1_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx; - break; - case 2: - hem_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx; - break; - case 1: - hem_idx = mhop.l0_idx; - break; - default: - dev_err(dev, "Table %d not support hop_num = %d!\n", - table->type, hop_num); + ret = calc_hem_config(hr_dev, table, obj, &mhop, &index); + if (ret) { + ibdev_err(ibdev, "calc hem config failed!\n"); return; } mutex_lock(&table->mutex); - - if (check_refcount && (--table->hem[hem_idx]->refcount > 0)) { + if (check_refcount && (--table->hem[index.buf]->refcount > 0)) { mutex_unlock(&table->mutex); return; } - if (table->type < HEM_TYPE_MTT && hop_num == 1) { - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) - dev_warn(dev, "Clear HEM base address failed.\n"); - } else if (table->type < HEM_TYPE_MTT && hop_num == 2) { - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 2)) - dev_warn(dev, "Clear HEM base address failed.\n"); - } else if (table->type < HEM_TYPE_MTT && - hop_num == HNS_ROCE_HOP_NUM_0) { - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - dev_warn(dev, "Clear HEM base address failed.\n"); - } - - /* - * free buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC. - * free bt space chunk for MTT/CQE. - */ - hns_roce_free_hem(hr_dev, table->hem[hem_idx]); - table->hem[hem_idx] = NULL; - - if (check_whether_bt_num_2(table->type, hop_num)) { - start_idx = mhop.l0_idx * chunk_ba_num; - if (hns_roce_check_hem_null(table->hem, start_idx, - chunk_ba_num, table->num_hem)) { - if (table->type < HEM_TYPE_MTT && - hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - dev_warn(dev, "Clear HEM base address failed.\n"); - - dma_free_coherent(dev, bt_chunk_size, - table->bt_l0[mhop.l0_idx], - table->bt_l0_dma_addr[mhop.l0_idx]); - table->bt_l0[mhop.l0_idx] = NULL; - } - } else if (check_whether_bt_num_3(table->type, hop_num)) { - start_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num + - mhop.l1_idx * chunk_ba_num; - if (hns_roce_check_hem_null(table->hem, start_idx, - chunk_ba_num, table->num_hem)) { - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) - dev_warn(dev, "Clear HEM base address failed.\n"); - - dma_free_coherent(dev, bt_chunk_size, - table->bt_l1[bt_l1_idx], - table->bt_l1_dma_addr[bt_l1_idx]); - table->bt_l1[bt_l1_idx] = NULL; - - start_idx = mhop.l0_idx * chunk_ba_num; - if (hns_roce_check_bt_null(table->bt_l1, start_idx, - chunk_ba_num)) { - if (hr_dev->hw->clear_hem(hr_dev, table, obj, - 0)) - dev_warn(dev, "Clear HEM base address failed.\n"); - - dma_free_coherent(dev, bt_chunk_size, - table->bt_l0[mhop.l0_idx], - table->bt_l0_dma_addr[mhop.l0_idx]); - table->bt_l0[mhop.l0_idx] = NULL; - } - } - } + clear_mhop_hem(hr_dev, table, obj, &mhop, &index); + free_mhop_hem(hr_dev, table, &mhop, &index); mutex_unlock(&table->mutex); } @@ -1383,6 +1411,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base; u64 phy_base; int ret = 0; + int ba_num; int offset; int total; int step; @@ -1393,12 +1422,16 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, if (root_hem) return 0; + ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit); + if (ba_num < 1) + return -ENOMEM; + INIT_LIST_HEAD(&temp_root); - total = r->offset; + offset = r->offset; /* indicate to last region */ r = ®ions[region_cnt - 1]; - root_hem = hem_list_alloc_item(hr_dev, total, r->offset + r->count - 1, - unit, true, 0); + root_hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1, + ba_num, true, 0); if (!root_hem) return -ENOMEM; list_add(&root_hem->list, &temp_root); @@ -1410,7 +1443,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, INIT_LIST_HEAD(&temp_list[i]); total = 0; - for (i = 0; i < region_cnt && total < unit; i++) { + for (i = 0; i < region_cnt && total < ba_num; i++) { r = ®ions[i]; if (!r->count) continue; @@ -1443,7 +1476,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, /* if exist mid bt, link L1 to L0 */ list_for_each_entry_safe(hem, temp_hem, &hem_list->mid_bt[i][1], list) { - offset = hem->start / step * BA_BYTE_LEN; + offset = (hem->start - r->offset) / step * + BA_BYTE_LEN; hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr); total++; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index c6e66586e533..5ff028d77be3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -69,7 +69,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct hns_roce_wqe_data_seg *dseg = NULL; struct hns_roce_qp *qp = to_hr_qp(ibqp); struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_sq_db sq_db; + struct hns_roce_sq_db sq_db = {}; int ps_opcode = 0, i = 0; unsigned long flags = 0; void *wqe = NULL; @@ -106,7 +106,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, goto out; } - wqe = get_send_wqe(qp, wqe_idx); + wqe = hns_roce_get_send_wqe(qp, wqe_idx); qp->sq.wrid[wqe_idx] = wr->wr_id; /* Corresponding to the RC and RD type wqe process separately */ @@ -318,8 +318,6 @@ out: /* Memory barrier */ wmb(); - sq_db.u32_4 = 0; - sq_db.u32_8 = 0; roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, SQ_DOORBELL_U32_4_SQ_HEAD_S, (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1))); @@ -351,7 +349,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_rq_db rq_db; + struct hns_roce_rq_db rq_db = {}; __le32 doorbell[2] = {0}; unsigned long flags = 0; unsigned int wqe_idx; @@ -380,7 +378,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, goto out; } - ctrl = get_recv_wqe(hr_qp, wqe_idx); + ctrl = hns_roce_get_recv_wqe(hr_qp, wqe_idx); roce_set_field(ctrl->rwqe_byte_12, RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M, @@ -418,9 +416,6 @@ out: ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val); } else { - rq_db.u32_4 = 0; - rq_db.u32_8 = 0; - roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M, RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head); @@ -2289,9 +2284,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, if (is_send) { /* SQ conrespond to CQE */ - sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4, + sq_wqe = hns_roce_get_send_wqe(*cur_qp, + roce_get_field(cqe->cqe_byte_4, CQE_BYTE_4_WQE_INDEX_M, - CQE_BYTE_4_WQE_INDEX_S)& + CQE_BYTE_4_WQE_INDEX_S) & ((*cur_qp)->sq.wqe_cnt-1)); switch (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_OPCODE_MASK) { case HNS_ROCE_WQE_OPCODE_SEND: @@ -3623,26 +3619,11 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (send_cq && send_cq != recv_cq) __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL); } - hns_roce_unlock_cqs(send_cq, recv_cq); - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_qp_free(hr_dev, hr_qp); - - /* RC QP, release QPN */ - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); - - hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - - ib_umem_release(hr_qp->umem); - if (!udata) { - kfree(hr_qp->sq.wrid); - kfree(hr_qp->rq.wrid); + hns_roce_unlock_cqs(send_cq, recv_cq); - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - } + hns_roce_qp_destroy(hr_dev, hr_qp, udata); - kfree(hr_qp); return 0; } @@ -3954,10 +3935,8 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, eq->cons_index++; aeqes_found = 1; - if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) { - dev_warn(dev, "cons_index overflow, set back to 0.\n"); + if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) eq->cons_index = 0; - } } set_eq_cons_index_v1(eq, 0); @@ -4007,11 +3986,8 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev, ceqes_found = 1; if (eq->cons_index > - EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1) { - dev_warn(&eq->hr_dev->pdev->dev, - "cons_index overflow, set back to 0.\n"); + EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1) eq->cons_index = 0; - } } set_eq_cons_index_v1(eq, 0); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 12c4cd8e9378..c3316672b70e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -56,11 +56,45 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, dseg->len = cpu_to_le32(sg->length); } +/* + * mapped-value = 1 + real-value + * The hns wr opcode real value is start from 0, In order to distinguish between + * initialized and uninitialized map values, we plus 1 to the actual value when + * defining the mapping, so that the validity can be identified by checking the + * mapped value is greater than 0. + */ +#define HR_OPC_MAP(ib_key, hr_key) \ + [IB_WR_ ## ib_key] = 1 + HNS_ROCE_V2_WQE_OP_ ## hr_key + +static const u32 hns_roce_op_code[] = { + HR_OPC_MAP(RDMA_WRITE, RDMA_WRITE), + HR_OPC_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE_WITH_IMM), + HR_OPC_MAP(SEND, SEND), + HR_OPC_MAP(SEND_WITH_IMM, SEND_WITH_IMM), + HR_OPC_MAP(RDMA_READ, RDMA_READ), + HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP), + HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD), + HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV), + HR_OPC_MAP(LOCAL_INV, LOCAL_INV), + HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP), + HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD), + HR_OPC_MAP(REG_MR, FAST_REG_PMR), +}; + +static u32 to_hr_opcode(u32 ib_opcode) +{ + if (ib_opcode >= ARRAY_SIZE(hns_roce_op_code)) + return HNS_ROCE_V2_WQE_OP_MASK; + + return hns_roce_op_code[ib_opcode] ? hns_roce_op_code[ib_opcode] - 1 : + HNS_ROCE_V2_WQE_OP_MASK; +} + static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, - struct hns_roce_wqe_frmr_seg *fseg, - const struct ib_reg_wr *wr) + void *wqe, const struct ib_reg_wr *wr) { struct hns_roce_mr *mr = to_hr_mr(wr->mr); + struct hns_roce_wqe_frmr_seg *fseg = wqe; /* use ib_access_flags */ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, @@ -92,16 +126,26 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } -static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg, - const struct ib_atomic_wr *wr) +static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, + int valid_num_sge) { - if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->fetchadd_swap_data = cpu_to_le64(wr->swap); - aseg->cmp_data = cpu_to_le64(wr->compare_add); + struct hns_roce_wqe_atomic_seg *aseg; + + set_data_seg_v2(wqe, wr->sg_list); + aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg); + + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap); + aseg->cmp_data = cpu_to_le64(atomic_wr(wr)->compare_add); } else { - aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add); + aseg->fetchadd_swap_data = + cpu_to_le64(atomic_wr(wr)->compare_add); aseg->cmp_data = 0; } + + roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); } static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, @@ -127,7 +171,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, * should calculate how many sges in the first page and the second * page. */ - dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1)); + dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1)); fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) - (uintptr_t)dseg) / sizeof(struct hns_roce_v2_wqe_data_seg); @@ -137,7 +181,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, set_data_seg_v2(dseg++, sg + i); (*sge_ind)++; } - dseg = get_send_extend_sge(qp, + dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1)); for (i = 0; i < se_sge_num; i++) { set_data_seg_v2(dseg++, sg + fi_sge_num + i); @@ -154,11 +198,11 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, - int valid_num_sge, - const struct ib_send_wr **bad_wr) + int valid_num_sge) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); int j = 0; int i; @@ -166,15 +210,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { if (le32_to_cpu(rc_sq_wqe->msg_len) > hr_dev->caps.max_sq_inline) { - *bad_wr = wr; - dev_err(hr_dev->dev, "inline len(1-%d)=%d, illegal", - rc_sq_wqe->msg_len, hr_dev->caps.max_sq_inline); + ibdev_err(ibdev, "inline len(1-%d)=%d, illegal", + rc_sq_wqe->msg_len, + hr_dev->caps.max_sq_inline); return -EINVAL; } if (wr->opcode == IB_WR_RDMA_READ) { - *bad_wr = wr; - dev_err(hr_dev->dev, "Not support inline data!\n"); + ibdev_err(ibdev, "Not support inline data!\n"); return -EINVAL; } @@ -220,62 +263,287 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, return 0; } -static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, - int attr_mask, enum ib_qp_state cur_state, - enum ib_qp_state new_state); - static int check_send_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct ib_device *ibdev = &hr_dev->ib_dev; struct ib_qp *ibqp = &hr_qp->ibqp; - struct device *dev = hr_dev->dev; if (unlikely(ibqp->qp_type != IB_QPT_RC && ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_UD)) { - dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type); + ibdev_err(ibdev, "Not supported QP(0x%x)type!\n", + ibqp->qp_type); return -EOPNOTSUPP; } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - dev_err(dev, "Post WQE fail, QP state %d!\n", hr_qp->state); + ibdev_err(ibdev, "failed to post WQE, QP state %d!\n", + hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { - dev_err(dev, "Post WQE fail, dev state %d!\n", hr_dev->state); + ibdev_err(ibdev, "failed to post WQE, dev state %d!\n", + hr_dev->state); return -EIO; } return 0; } +static inline int calc_wr_sge_num(const struct ib_send_wr *wr, u32 *sge_len) +{ + int valid_num = 0; + u32 len = 0; + int i; + + for (i = 0; i < wr->num_sge; i++) { + if (likely(wr->sg_list[i].length)) { + len += wr->sg_list[i].length; + valid_num++; + } + } + + *sge_len = len; + return valid_num; +} + +static inline int set_ud_wqe(struct hns_roce_qp *qp, + const struct ib_send_wr *wr, + void *wqe, unsigned int *sge_idx, + unsigned int owner_bit) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); + struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); + struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe; + unsigned int curr_idx = *sge_idx; + int valid_num_sge; + u32 msg_len = 0; + bool loopback; + u8 *smac; + + valid_num_sge = calc_wr_sge_num(wr, &msg_len); + memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); + + roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, + V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); + roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, + V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]); + roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M, + V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]); + roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M, + V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]); + roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_4_M, + V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, ah->av.mac[4]); + roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, + V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, ah->av.mac[5]); + + /* MAC loopback */ + smac = (u8 *)hr_dev->dev_addr[qp->port]; + loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; + + roce_set_bit(ud_sq_wqe->byte_40, + V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); + + roce_set_field(ud_sq_wqe->byte_4, + V2_UD_SEND_WQE_BYTE_4_OPCODE_M, + V2_UD_SEND_WQE_BYTE_4_OPCODE_S, + HNS_ROCE_V2_WQE_OP_SEND); + + ud_sq_wqe->msg_len = cpu_to_le32(msg_len); + + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); + break; + default: + ud_sq_wqe->immtdata = 0; + break; + } + + /* Set sig attr */ + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, + (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + + /* Set se attr */ + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, + (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); + + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); + + roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, + V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); + + roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + + roce_set_field(ud_sq_wqe->byte_20, + V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, + V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, + curr_idx & (qp->sge.sge_cnt - 1)); + + roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0); + ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? + qp->qkey : ud_wr(wr)->remote_qkey); + roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, + V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); + + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, + V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, + V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); + roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, + V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, + V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); + roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, + V2_UD_SEND_WQE_BYTE_40_PORTN_S, qp->port); + + roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, + ah->av.vlan_en ? 1 : 0); + roce_set_field(ud_sq_wqe->byte_48, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, + V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, ah->av.gid_index); + + memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); + + set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + + *sge_idx = curr_idx; + + return 0; +} + +static inline int set_rc_wqe(struct hns_roce_qp *qp, + const struct ib_send_wr *wr, + void *wqe, unsigned int *sge_idx, + unsigned int owner_bit) +{ + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + unsigned int curr_idx = *sge_idx; + int valid_num_sge; + u32 msg_len = 0; + int ret = 0; + + valid_num_sge = calc_wr_sge_num(wr, &msg_len); + memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); + + rc_sq_wqe->msg_len = cpu_to_le32(msg_len); + + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); + break; + case IB_WR_SEND_WITH_INV: + rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); + break; + default: + rc_sq_wqe->immtdata = 0; + break; + } + + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, + (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); + + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S, + (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); + + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, + (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, + owner_bit); + + wqe += sizeof(struct hns_roce_v2_rc_send_wqe); + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); + break; + case IB_WR_LOCAL_INV: + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); + rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); + break; + case IB_WR_REG_MR: + set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr)); + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); + break; + default: + break; + } + + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, + to_hr_opcode(wr->opcode)); + + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge); + else if (wr->opcode != IB_WR_REG_MR) + ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe, + wqe, &curr_idx, valid_num_sge); + + *sge_idx = curr_idx; + + return ret; +} + +static inline void update_sq_db(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *qp) +{ + /* + * Hip08 hardware cannot flush the WQEs in SQ if the QP state + * gets into errored mode. Hence, as a workaround to this + * hardware limitation, driver needs to assist in flushing. But + * the flushing operation uses mailbox to convey the QP state to + * the hardware and which can sleep due to the mutex protection + * around the mailbox calls. Hence, use the deferred flush for + * now. + */ + if (qp->state == IB_QPS_ERR) { + if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag)) + init_flush_work(hr_dev, qp); + } else { + struct hns_roce_v2_db sq_db = {}; + + roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_TAG_M, + V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); + roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, + V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); + roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, + V2_DB_PARAMETER_IDX_S, + qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)); + roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, + V2_DB_PARAMETER_SL_S, qp->sl); + + hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l); + } +} + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); - struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; - struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); - struct hns_roce_wqe_frmr_seg *fseg; - struct device *dev = hr_dev->dev; - struct hns_roce_v2_db sq_db; - struct ib_qp_attr attr; + unsigned long flags = 0; unsigned int owner_bit; unsigned int sge_idx; unsigned int wqe_idx; - unsigned long flags; - int valid_num_sge; void *wqe = NULL; - bool loopback; - int attr_mask; - u32 tmp_len; - u32 hr_op; - u8 *smac; int nreq; int ret; - int i; spin_lock_irqsave(&qp->sq.lock, flags); @@ -298,327 +566,37 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); if (unlikely(wr->num_sge > qp->sq.max_gs)) { - dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", - wr->num_sge, qp->sq.max_gs); + ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, qp->sq.max_gs); ret = -EINVAL; *bad_wr = wr; goto out; } - wqe = get_send_wqe(qp, wqe_idx); + wqe = hns_roce_get_send_wqe(qp, wqe_idx); qp->sq.wrid[wqe_idx] = wr->wr_id; owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); - valid_num_sge = 0; - tmp_len = 0; - - for (i = 0; i < wr->num_sge; i++) { - if (likely(wr->sg_list[i].length)) { - tmp_len += wr->sg_list[i].length; - valid_num_sge++; - } - } /* Corresponding to the QP type, wqe process separately */ - if (ibqp->qp_type == IB_QPT_GSI) { - ud_sq_wqe = wqe; - memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); - - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, - V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, - V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M, - V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]); - roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M, - V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]); - roce_set_field(ud_sq_wqe->byte_48, - V2_UD_SEND_WQE_BYTE_48_DMAC_4_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_4_S, - ah->av.mac[4]); - roce_set_field(ud_sq_wqe->byte_48, - V2_UD_SEND_WQE_BYTE_48_DMAC_5_M, - V2_UD_SEND_WQE_BYTE_48_DMAC_5_S, - ah->av.mac[5]); - - /* MAC loopback */ - smac = (u8 *)hr_dev->dev_addr[qp->port]; - loopback = ether_addr_equal_unaligned(ah->av.mac, - smac) ? 1 : 0; - - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - - roce_set_field(ud_sq_wqe->byte_4, - V2_UD_SEND_WQE_BYTE_4_OPCODE_M, - V2_UD_SEND_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_V2_WQE_OP_SEND); - - ud_sq_wqe->msg_len = - cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len); - - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - ud_sq_wqe->immtdata = - cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); - break; - default: - ud_sq_wqe->immtdata = 0; - break; - } + if (ibqp->qp_type == IB_QPT_GSI) + ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit); + else if (ibqp->qp_type == IB_QPT_RC) + ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); - /* Set sig attr */ - roce_set_bit(ud_sq_wqe->byte_4, - V2_UD_SEND_WQE_BYTE_4_CQE_S, - (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); - - /* Set se attr */ - roce_set_bit(ud_sq_wqe->byte_4, - V2_UD_SEND_WQE_BYTE_4_SE_S, - (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - - roce_set_bit(ud_sq_wqe->byte_4, - V2_UD_SEND_WQE_BYTE_4_OWNER_S, owner_bit); - - roce_set_field(ud_sq_wqe->byte_16, - V2_UD_SEND_WQE_BYTE_16_PD_M, - V2_UD_SEND_WQE_BYTE_16_PD_S, - to_hr_pd(ibqp->pd)->pdn); - - roce_set_field(ud_sq_wqe->byte_16, - V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, - valid_num_sge); - - roce_set_field(ud_sq_wqe->byte_20, - V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, - V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - sge_idx & (qp->sge.sge_cnt - 1)); - - roce_set_field(ud_sq_wqe->byte_24, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0); - ud_sq_wqe->qkey = - cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? - qp->qkey : ud_wr(wr)->remote_qkey); - roce_set_field(ud_sq_wqe->byte_32, - V2_UD_SEND_WQE_BYTE_32_DQPN_M, - V2_UD_SEND_WQE_BYTE_32_DQPN_S, - ud_wr(wr)->remote_qpn); - - roce_set_field(ud_sq_wqe->byte_36, - V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, - ah->av.vlan_id); - roce_set_field(ud_sq_wqe->byte_36, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, - ah->av.hop_limit); - roce_set_field(ud_sq_wqe->byte_36, - V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, - ah->av.tclass); - roce_set_field(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, - ah->av.flowlabel); - roce_set_field(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_SL_M, - V2_UD_SEND_WQE_BYTE_40_SL_S, - ah->av.sl); - roce_set_field(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_PORTN_M, - V2_UD_SEND_WQE_BYTE_40_PORTN_S, - qp->port); - - roce_set_bit(ud_sq_wqe->byte_40, - V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en ? 1 : 0); - roce_set_field(ud_sq_wqe->byte_48, - V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, - V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, - hns_get_gid_index(hr_dev, qp->phy_port, - ah->av.gid_index)); - - memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], - GID_LEN_V2); - - set_extend_sge(qp, wr, &sge_idx, valid_num_sge); - } else if (ibqp->qp_type == IB_QPT_RC) { - rc_sq_wqe = wqe; - memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); - - rc_sq_wqe->msg_len = - cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len); - - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->immtdata = - cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); - break; - case IB_WR_SEND_WITH_INV: - rc_sq_wqe->inv_key = - cpu_to_le32(wr->ex.invalidate_rkey); - break; - default: - rc_sq_wqe->immtdata = 0; - break; - } - - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_SEND_WQE_BYTE_4_FENCE_S, - (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); - - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_SEND_WQE_BYTE_4_SE_S, - (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_SEND_WQE_BYTE_4_CQE_S, - (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); - - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); - - wqe += sizeof(struct hns_roce_v2_rc_send_wqe); - switch (wr->opcode) { - case IB_WR_RDMA_READ: - hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ; - rc_sq_wqe->rkey = - cpu_to_le32(rdma_wr(wr)->rkey); - rc_sq_wqe->va = - cpu_to_le64(rdma_wr(wr)->remote_addr); - break; - case IB_WR_RDMA_WRITE: - hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE; - rc_sq_wqe->rkey = - cpu_to_le32(rdma_wr(wr)->rkey); - rc_sq_wqe->va = - cpu_to_le64(rdma_wr(wr)->remote_addr); - break; - case IB_WR_RDMA_WRITE_WITH_IMM: - hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM; - rc_sq_wqe->rkey = - cpu_to_le32(rdma_wr(wr)->rkey); - rc_sq_wqe->va = - cpu_to_le64(rdma_wr(wr)->remote_addr); - break; - case IB_WR_SEND: - hr_op = HNS_ROCE_V2_WQE_OP_SEND; - break; - case IB_WR_SEND_WITH_INV: - hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV; - break; - case IB_WR_SEND_WITH_IMM: - hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM; - break; - case IB_WR_LOCAL_INV: - hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV; - roce_set_bit(rc_sq_wqe->byte_4, - V2_RC_SEND_WQE_BYTE_4_SO_S, 1); - rc_sq_wqe->inv_key = - cpu_to_le32(wr->ex.invalidate_rkey); - break; - case IB_WR_REG_MR: - hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR; - fseg = wqe; - set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr)); - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP; - rc_sq_wqe->rkey = - cpu_to_le32(atomic_wr(wr)->rkey); - rc_sq_wqe->va = - cpu_to_le64(atomic_wr(wr)->remote_addr); - break; - case IB_WR_ATOMIC_FETCH_AND_ADD: - hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD; - rc_sq_wqe->rkey = - cpu_to_le32(atomic_wr(wr)->rkey); - rc_sq_wqe->va = - cpu_to_le64(atomic_wr(wr)->remote_addr); - break; - case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - hr_op = - HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP; - break; - case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: - hr_op = - HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD; - break; - default: - hr_op = HNS_ROCE_V2_WQE_OP_MASK; - break; - } - - roce_set_field(rc_sq_wqe->byte_4, - V2_RC_SEND_WQE_BYTE_4_OPCODE_M, - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op); - - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { - struct hns_roce_v2_wqe_data_seg *dseg; - - dseg = wqe; - set_data_seg_v2(dseg, wr->sg_list); - wqe += sizeof(struct hns_roce_v2_wqe_data_seg); - set_atomic_seg(wqe, atomic_wr(wr)); - roce_set_field(rc_sq_wqe->byte_16, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, - valid_num_sge); - } else if (wr->opcode != IB_WR_REG_MR) { - ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, - wqe, &sge_idx, - valid_num_sge, bad_wr); - if (ret) - goto out; - } - } else { - dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); - spin_unlock_irqrestore(&qp->sq.lock, flags); + if (ret) { *bad_wr = wr; - return -EOPNOTSUPP; + goto out; } } out: if (likely(nreq)) { qp->sq.head += nreq; + qp->next_sge = sge_idx; /* Memory barrier */ wmb(); - - sq_db.byte_4 = 0; - sq_db.parameter = 0; - - roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_TAG_M, - V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); - roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, - V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); - roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, - V2_DB_PARAMETER_IDX_S, - qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)); - roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, - V2_DB_PARAMETER_SL_S, qp->sl); - - hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l); - - qp->next_sge = sge_idx; - - if (qp->state == IB_QPS_ERR) { - attr_mask = IB_QP_STATE; - attr.qp_state = IB_QPS_ERR; - - ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask, - qp->state, IB_QPS_ERR); - if (ret) { - spin_unlock_irqrestore(&qp->sq.lock, flags); - *bad_wr = wr; - return ret; - } - } + update_sq_db(hr_dev, qp); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -643,13 +621,11 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_rinl_sge *sge_list; - struct device *dev = hr_dev->dev; - struct ib_qp_attr attr; unsigned long flags; void *wqe = NULL; - int attr_mask; u32 wqe_idx; int nreq; int ret; @@ -675,14 +651,14 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { - dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", - wr->num_sge, hr_qp->rq.max_gs); + ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", + wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; *bad_wr = wr; goto out; } - wqe = get_recv_wqe(hr_qp, wqe_idx); + wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; for (i = 0; i < wr->num_sge; i++) { if (!wr->sg_list[i].length) @@ -717,20 +693,21 @@ out: /* Memory barrier */ wmb(); - *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; - + /* + * Hip08 hardware cannot flush the WQEs in RQ if the QP state + * gets into errored mode. Hence, as a workaround to this + * hardware limitation, driver needs to assist in flushing. But + * the flushing operation uses mailbox to convey the QP state to + * the hardware and which can sleep due to the mutex protection + * around the mailbox calls. Hence, use the deferred flush for + * now. + */ if (hr_qp->state == IB_QPS_ERR) { - attr_mask = IB_QP_STATE; - attr.qp_state = IB_QPS_ERR; - - ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, - attr_mask, hr_qp->state, - IB_QPS_ERR); - if (ret) { - spin_unlock_irqrestore(&hr_qp->rq.lock, flags); - *bad_wr = wr; - return ret; - } + if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, + &hr_qp->flush_flag)) + init_flush_work(hr_dev, hr_qp); + } else { + *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -1448,82 +1425,63 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); else desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - - if (i == 0) { - roce_set_field(req_a->vf_qpc_bt_idx_num, - VF_RES_A_DATA_1_VF_QPC_BT_IDX_M, - VF_RES_A_DATA_1_VF_QPC_BT_IDX_S, 0); - roce_set_field(req_a->vf_qpc_bt_idx_num, - VF_RES_A_DATA_1_VF_QPC_BT_NUM_M, - VF_RES_A_DATA_1_VF_QPC_BT_NUM_S, - HNS_ROCE_VF_QPC_BT_NUM); - - roce_set_field(req_a->vf_srqc_bt_idx_num, - VF_RES_A_DATA_2_VF_SRQC_BT_IDX_M, - VF_RES_A_DATA_2_VF_SRQC_BT_IDX_S, 0); - roce_set_field(req_a->vf_srqc_bt_idx_num, - VF_RES_A_DATA_2_VF_SRQC_BT_NUM_M, - VF_RES_A_DATA_2_VF_SRQC_BT_NUM_S, - HNS_ROCE_VF_SRQC_BT_NUM); - - roce_set_field(req_a->vf_cqc_bt_idx_num, - VF_RES_A_DATA_3_VF_CQC_BT_IDX_M, - VF_RES_A_DATA_3_VF_CQC_BT_IDX_S, 0); - roce_set_field(req_a->vf_cqc_bt_idx_num, - VF_RES_A_DATA_3_VF_CQC_BT_NUM_M, - VF_RES_A_DATA_3_VF_CQC_BT_NUM_S, - HNS_ROCE_VF_CQC_BT_NUM); - - roce_set_field(req_a->vf_mpt_bt_idx_num, - VF_RES_A_DATA_4_VF_MPT_BT_IDX_M, - VF_RES_A_DATA_4_VF_MPT_BT_IDX_S, 0); - roce_set_field(req_a->vf_mpt_bt_idx_num, - VF_RES_A_DATA_4_VF_MPT_BT_NUM_M, - VF_RES_A_DATA_4_VF_MPT_BT_NUM_S, - HNS_ROCE_VF_MPT_BT_NUM); - - roce_set_field(req_a->vf_eqc_bt_idx_num, - VF_RES_A_DATA_5_VF_EQC_IDX_M, - VF_RES_A_DATA_5_VF_EQC_IDX_S, 0); - roce_set_field(req_a->vf_eqc_bt_idx_num, - VF_RES_A_DATA_5_VF_EQC_NUM_M, - VF_RES_A_DATA_5_VF_EQC_NUM_S, - HNS_ROCE_VF_EQC_NUM); - } else { - roce_set_field(req_b->vf_smac_idx_num, - VF_RES_B_DATA_1_VF_SMAC_IDX_M, - VF_RES_B_DATA_1_VF_SMAC_IDX_S, 0); - roce_set_field(req_b->vf_smac_idx_num, - VF_RES_B_DATA_1_VF_SMAC_NUM_M, - VF_RES_B_DATA_1_VF_SMAC_NUM_S, - HNS_ROCE_VF_SMAC_NUM); - - roce_set_field(req_b->vf_sgid_idx_num, - VF_RES_B_DATA_2_VF_SGID_IDX_M, - VF_RES_B_DATA_2_VF_SGID_IDX_S, 0); - roce_set_field(req_b->vf_sgid_idx_num, - VF_RES_B_DATA_2_VF_SGID_NUM_M, - VF_RES_B_DATA_2_VF_SGID_NUM_S, - HNS_ROCE_VF_SGID_NUM); - - roce_set_field(req_b->vf_qid_idx_sl_num, - VF_RES_B_DATA_3_VF_QID_IDX_M, - VF_RES_B_DATA_3_VF_QID_IDX_S, 0); - roce_set_field(req_b->vf_qid_idx_sl_num, - VF_RES_B_DATA_3_VF_SL_NUM_M, - VF_RES_B_DATA_3_VF_SL_NUM_S, - HNS_ROCE_VF_SL_NUM); - - roce_set_field(req_b->vf_sccc_idx_num, - VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M, - VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S, 0); - roce_set_field(req_b->vf_sccc_idx_num, - VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M, - VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S, - HNS_ROCE_VF_SCCC_BT_NUM); - } } + roce_set_field(req_a->vf_qpc_bt_idx_num, + VF_RES_A_DATA_1_VF_QPC_BT_IDX_M, + VF_RES_A_DATA_1_VF_QPC_BT_IDX_S, 0); + roce_set_field(req_a->vf_qpc_bt_idx_num, + VF_RES_A_DATA_1_VF_QPC_BT_NUM_M, + VF_RES_A_DATA_1_VF_QPC_BT_NUM_S, HNS_ROCE_VF_QPC_BT_NUM); + + roce_set_field(req_a->vf_srqc_bt_idx_num, + VF_RES_A_DATA_2_VF_SRQC_BT_IDX_M, + VF_RES_A_DATA_2_VF_SRQC_BT_IDX_S, 0); + roce_set_field(req_a->vf_srqc_bt_idx_num, + VF_RES_A_DATA_2_VF_SRQC_BT_NUM_M, + VF_RES_A_DATA_2_VF_SRQC_BT_NUM_S, + HNS_ROCE_VF_SRQC_BT_NUM); + + roce_set_field(req_a->vf_cqc_bt_idx_num, + VF_RES_A_DATA_3_VF_CQC_BT_IDX_M, + VF_RES_A_DATA_3_VF_CQC_BT_IDX_S, 0); + roce_set_field(req_a->vf_cqc_bt_idx_num, + VF_RES_A_DATA_3_VF_CQC_BT_NUM_M, + VF_RES_A_DATA_3_VF_CQC_BT_NUM_S, HNS_ROCE_VF_CQC_BT_NUM); + + roce_set_field(req_a->vf_mpt_bt_idx_num, + VF_RES_A_DATA_4_VF_MPT_BT_IDX_M, + VF_RES_A_DATA_4_VF_MPT_BT_IDX_S, 0); + roce_set_field(req_a->vf_mpt_bt_idx_num, + VF_RES_A_DATA_4_VF_MPT_BT_NUM_M, + VF_RES_A_DATA_4_VF_MPT_BT_NUM_S, HNS_ROCE_VF_MPT_BT_NUM); + + roce_set_field(req_a->vf_eqc_bt_idx_num, VF_RES_A_DATA_5_VF_EQC_IDX_M, + VF_RES_A_DATA_5_VF_EQC_IDX_S, 0); + roce_set_field(req_a->vf_eqc_bt_idx_num, VF_RES_A_DATA_5_VF_EQC_NUM_M, + VF_RES_A_DATA_5_VF_EQC_NUM_S, HNS_ROCE_VF_EQC_NUM); + + roce_set_field(req_b->vf_smac_idx_num, VF_RES_B_DATA_1_VF_SMAC_IDX_M, + VF_RES_B_DATA_1_VF_SMAC_IDX_S, 0); + roce_set_field(req_b->vf_smac_idx_num, VF_RES_B_DATA_1_VF_SMAC_NUM_M, + VF_RES_B_DATA_1_VF_SMAC_NUM_S, HNS_ROCE_VF_SMAC_NUM); + + roce_set_field(req_b->vf_sgid_idx_num, VF_RES_B_DATA_2_VF_SGID_IDX_M, + VF_RES_B_DATA_2_VF_SGID_IDX_S, 0); + roce_set_field(req_b->vf_sgid_idx_num, VF_RES_B_DATA_2_VF_SGID_NUM_M, + VF_RES_B_DATA_2_VF_SGID_NUM_S, HNS_ROCE_VF_SGID_NUM); + + roce_set_field(req_b->vf_qid_idx_sl_num, VF_RES_B_DATA_3_VF_QID_IDX_M, + VF_RES_B_DATA_3_VF_QID_IDX_S, 0); + roce_set_field(req_b->vf_qid_idx_sl_num, VF_RES_B_DATA_3_VF_SL_NUM_M, + VF_RES_B_DATA_3_VF_SL_NUM_S, HNS_ROCE_VF_SL_NUM); + + roce_set_field(req_b->vf_sccc_idx_num, VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M, + VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S, 0); + roce_set_field(req_b->vf_sccc_idx_num, VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M, + VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S, + HNS_ROCE_VF_SCCC_BT_NUM); + return hns_roce_cmq_send(hr_dev, desc, 2); } @@ -1691,7 +1649,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) { + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; @@ -1939,7 +1897,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->srqc_bt_num, &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz, HEM_TYPE_SRQC); - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_B) { + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { caps->sccc_hop_num = ctx_hop_num; caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; @@ -1999,7 +1957,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } - if (hr_dev->pci_dev->revision == 0x21) { + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) { ret = hns_roce_query_pf_timer_resource(hr_dev); if (ret) { dev_err(hr_dev->dev, @@ -2007,16 +1965,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) ret); return ret; } - } - - ret = hns_roce_alloc_vf_resource(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n", - ret); - return ret; - } - if (hr_dev->pci_dev->revision == 0x21) { ret = hns_roce_set_vf_switch_param(hr_dev, 0); if (ret) { dev_err(hr_dev->dev, @@ -2046,6 +1995,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) if (ret) set_default_caps(hr_dev); + ret = hns_roce_alloc_vf_resource(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n", + ret); + return ret; + } + ret = hns_roce_v2_set_bt(hr_dev); if (ret) dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", @@ -2298,7 +2254,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; - if (hr_dev->pci_dev->revision == 0x21) + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) hns_roce_function_clear(hr_dev); hns_roce_free_link_table(hr_dev, &priv->tpq); @@ -2461,7 +2417,9 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); if (ret) - dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret); + ibdev_err(&hr_dev->ib_dev, + "failed to configure sgid table, ret = %d!\n", + ret); return ret; } @@ -2757,7 +2715,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { - *hr_cq->set_ci_db = cons_index & 0xffffff; + *hr_cq->set_ci_db = cons_index & V2_CQ_DB_PARAMETER_CONS_IDX_M; } static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, @@ -2942,7 +2900,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, sge_list = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sg_list; sge_num = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sge_cnt; - wqe_buf = get_recv_wqe(*cur_qp, wr_cnt); + wqe_buf = hns_roce_get_recv_wqe(*cur_qp, wr_cnt); data_len = wc->byte_len; for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) { @@ -3013,13 +2971,11 @@ out: static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { + struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); struct hns_roce_srq *srq = NULL; - struct hns_roce_dev *hr_dev; struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; struct hns_roce_wq *wq; - struct ib_qp_attr attr; - int attr_mask; int is_send; u16 wqe_ctr; u32 opcode; @@ -3043,16 +2999,17 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, V2_CQE_BYTE_16_LCL_QPN_S); if (!*cur_qp || (qpn & HNS_ROCE_V2_CQE_QPN_MASK) != (*cur_qp)->qpn) { - hr_dev = to_hr_dev(hr_cq->ib_cq.device); hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); if (unlikely(!hr_qp)) { - dev_err(hr_dev->dev, "CQ %06lx with entry for unknown QPN %06x\n", - hr_cq->cqn, (qpn & HNS_ROCE_V2_CQE_QPN_MASK)); + ibdev_err(&hr_dev->ib_dev, + "CQ %06lx with entry for unknown QPN %06x\n", + hr_cq->cqn, qpn & HNS_ROCE_V2_CQE_QPN_MASK); return -EINVAL; } *cur_qp = hr_qp; } + hr_qp = *cur_qp; wc->qp = &(*cur_qp)->ibqp; wc->vendor_err = 0; @@ -3137,14 +3094,24 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, break; } - /* flush cqe if wc status is error, excluding flush error */ - if ((wc->status != IB_WC_SUCCESS) && - (wc->status != IB_WC_WR_FLUSH_ERR)) { - attr_mask = IB_QP_STATE; - attr.qp_state = IB_QPS_ERR; - return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp, - &attr, attr_mask, - (*cur_qp)->state, IB_QPS_ERR); + /* + * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets + * into errored mode. Hence, as a workaround to this hardware + * limitation, driver needs to assist in flushing. But the flushing + * operation uses mailbox to convey the QP state to the hardware and + * which can sleep due to the mutex protection around the mailbox calls. + * Hence, use the deferred flush for now. Once wc error detected, the + * flushing operation is needed. + */ + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) { + ibdev_err(&hr_dev->ib_dev, "error cqe status is: 0x%x\n", + status & HNS_ROCE_V2_CQE_STATUS_MASK); + + if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) + init_flush_work(hr_dev, hr_qp); + + return 0; } if (wc->status == IB_WC_WR_FLUSH_ERR) @@ -3262,14 +3229,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->port_num = roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_PORTN_M, V2_CQE_BYTE_32_PORTN_S); wc->pkey_index = 0; - memcpy(wc->smac, cqe->smac, 4); - wc->smac[4] = roce_get_field(cqe->byte_28, - V2_CQE_BYTE_28_SMAC_4_M, - V2_CQE_BYTE_28_SMAC_4_S); - wc->smac[5] = roce_get_field(cqe->byte_28, - V2_CQE_BYTE_28_SMAC_5_M, - V2_CQE_BYTE_28_SMAC_5_S); - wc->wc_flags |= IB_WC_WITH_SMAC; + if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) { wc->vlan_id = (u16)roce_get_field(cqe->byte_28, V2_CQE_BYTE_28_VID_M, @@ -3567,14 +3527,9 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); - roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, 0); - roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, ilog2((unsigned int)hr_qp->sq.wqe_cnt)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, @@ -3582,9 +3537,6 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || hr_qp->ibqp.srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); - - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); } static void modify_qp_reset_to_init(struct ib_qp *ibqp, @@ -3604,280 +3556,53 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, */ roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type)); - roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, - V2_QPC_BYTE_4_TST_S, 0); roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn); - roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, - V2_QPC_BYTE_4_SQPN_S, 0); roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); - roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, - V2_QPC_BYTE_16_PD_S, 0); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M, V2_QPC_BYTE_20_RQWS_S, ilog2(hr_qp->rq.max_gs)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M, - V2_QPC_BYTE_20_RQWS_S, 0); set_qpc_wqe_cnt(hr_qp, context, qpc_mask); /* No VLAN need to set 0xFFF */ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_S, 0xfff); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, - V2_QPC_BYTE_24_VLAN_ID_S, 0); - - /* - * Set some fields in context to zero, Because the default values - * of all fields in context are zero, we need not set them to 0 again. - * but we should set the relevant fields of context mask to 0. - */ - roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_SQ_TX_ERR_S, 0); - roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_SQ_RX_ERR_S, 0); - roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0); - roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0); - roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M, - V2_QPC_BYTE_60_TEMPID_S, 0); - - roce_set_field(qpc_mask->byte_60_qpst_tempid, - V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S, - 0); - roce_set_bit(qpc_mask->byte_60_qpst_tempid, - V2_QPC_BYTE_60_SQ_DB_DOING_S, 0); - roce_set_bit(qpc_mask->byte_60_qpst_tempid, - V2_QPC_BYTE_60_RQ_DB_DOING_S, 0); - roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0); - roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0); - - if (hr_qp->rdb_en) { + if (hr_qp->rdb_en) roce_set_bit(context->byte_68_rq_db, V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1); - roce_set_bit(qpc_mask->byte_68_rq_db, - V2_QPC_BYTE_68_RQ_RECORD_EN_S, 0); - } roce_set_field(context->byte_68_rq_db, V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M, V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, ((u32)hr_qp->rdb.dma) >> 1); - roce_set_field(qpc_mask->byte_68_rq_db, - V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M, - V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, 0); context->rq_db_record_addr = cpu_to_le32(hr_qp->rdb.dma >> 32); - qpc_mask->rq_db_record_addr = 0; roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn); - roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, - V2_QPC_BYTE_80_RX_CQN_S, 0); if (ibqp->srq) { roce_set_field(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, to_hr_srq(ibqp->srq)->srqn); - roce_set_field(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0); roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_SRQ_EN_S, 1); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_SRQ_EN_S, 0); } - roce_set_field(qpc_mask->byte_84_rq_ci_pi, - V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, - V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); - roce_set_field(qpc_mask->byte_84_rq_ci_pi, - V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M, - V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S, 0); - - roce_set_field(qpc_mask->byte_92_srq_info, V2_QPC_BYTE_92_SRQ_INFO_M, - V2_QPC_BYTE_92_SRQ_INFO_S, 0); - - roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M, - V2_QPC_BYTE_96_RX_REQ_MSN_S, 0); - - roce_set_field(qpc_mask->byte_104_rq_sge, - V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_M, - V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_S, 0); - - roce_set_bit(qpc_mask->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0); - roce_set_field(qpc_mask->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M, - V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S, 0); - roce_set_bit(qpc_mask->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_RNR_S, 0); - - qpc_mask->rq_rnr_timer = 0; - qpc_mask->rx_msg_len = 0; - qpc_mask->rx_rkey_pkt_info = 0; - qpc_mask->rx_va = 0; - - roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M, - V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0); - roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, - V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); - - roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S, - 0); - roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M, - V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0); - roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M, - V2_QPC_BYTE_140_RAQ_TRRL_TAIL_S, 0); - - roce_set_field(qpc_mask->byte_144_raq, - V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M, - V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0); - roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M, - V2_QPC_BYTE_144_RAQ_CREDIT_S, 0); - roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0); - - roce_set_field(qpc_mask->byte_148_raq, V2_QPC_BYTE_148_RQ_MSN_M, - V2_QPC_BYTE_148_RQ_MSN_S, 0); - roce_set_field(qpc_mask->byte_148_raq, V2_QPC_BYTE_148_RAQ_SYNDROME_M, - V2_QPC_BYTE_148_RAQ_SYNDROME_S, 0); - - roce_set_field(qpc_mask->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, - V2_QPC_BYTE_152_RAQ_PSN_S, 0); - roce_set_field(qpc_mask->byte_152_raq, - V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_M, - V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_S, 0); - - roce_set_field(qpc_mask->byte_156_raq, V2_QPC_BYTE_156_RAQ_USE_PKTN_M, - V2_QPC_BYTE_156_RAQ_USE_PKTN_S, 0); - - roce_set_field(qpc_mask->byte_160_sq_ci_pi, - V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, - V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); - roce_set_field(qpc_mask->byte_160_sq_ci_pi, - V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M, - V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0); - - roce_set_bit(qpc_mask->byte_168_irrl_idx, - V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0); - roce_set_bit(qpc_mask->byte_168_irrl_idx, - V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0); - roce_set_bit(qpc_mask->byte_168_irrl_idx, - V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0); - roce_set_bit(qpc_mask->byte_168_irrl_idx, - V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0); - roce_set_bit(qpc_mask->byte_168_irrl_idx, - V2_QPC_BYTE_168_SQ_INVLD_FLG_S, 0); - roce_set_field(qpc_mask->byte_168_irrl_idx, - V2_QPC_BYTE_168_IRRL_IDX_LSB_M, - V2_QPC_BYTE_168_IRRL_IDX_LSB_S, 0); - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M, V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4); - roce_set_field(qpc_mask->byte_172_sq_psn, - V2_QPC_BYTE_172_ACK_REQ_FREQ_M, - V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0); - - roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S, - 0); roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); - roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0); - - roce_set_field(qpc_mask->byte_176_msg_pktn, - V2_QPC_BYTE_176_MSG_USE_PKTN_M, - V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0); - roce_set_field(qpc_mask->byte_176_msg_pktn, - V2_QPC_BYTE_176_IRRL_HEAD_PRE_M, - V2_QPC_BYTE_176_IRRL_HEAD_PRE_S, 0); - - roce_set_field(qpc_mask->byte_184_irrl_idx, - V2_QPC_BYTE_184_IRRL_IDX_MSB_M, - V2_QPC_BYTE_184_IRRL_IDX_MSB_S, 0); - - qpc_mask->cur_sge_offset = 0; - - roce_set_field(qpc_mask->byte_192_ext_sge, - V2_QPC_BYTE_192_CUR_SGE_IDX_M, - V2_QPC_BYTE_192_CUR_SGE_IDX_S, 0); - roce_set_field(qpc_mask->byte_192_ext_sge, - V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_M, - V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_S, 0); - - roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M, - V2_QPC_BYTE_196_IRRL_HEAD_S, 0); - - roce_set_field(qpc_mask->byte_200_sq_max, V2_QPC_BYTE_200_SQ_MAX_IDX_M, - V2_QPC_BYTE_200_SQ_MAX_IDX_S, 0); - roce_set_field(qpc_mask->byte_200_sq_max, - V2_QPC_BYTE_200_LCL_OPERATED_CNT_M, - V2_QPC_BYTE_200_LCL_OPERATED_CNT_S, 0); - - roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_PKT_RNR_FLG_S, 0); - roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_PKT_RTY_FLG_S, 0); - - roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_CHECK_FLG_M, - V2_QPC_BYTE_212_CHECK_FLG_S, 0); - - qpc_mask->sq_timer = 0; - - roce_set_field(qpc_mask->byte_220_retry_psn_msn, - V2_QPC_BYTE_220_RETRY_MSG_MSN_M, - V2_QPC_BYTE_220_RETRY_MSG_MSN_S, 0); - roce_set_field(qpc_mask->byte_232_irrl_sge, - V2_QPC_BYTE_232_IRRL_SGE_IDX_M, - V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0); - - roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S, - 0); - roce_set_bit(qpc_mask->byte_232_irrl_sge, - V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0); - roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S, - 0); - - qpc_mask->irrl_cur_sge_offset = 0; - - roce_set_field(qpc_mask->byte_240_irrl_tail, - V2_QPC_BYTE_240_IRRL_TAIL_REAL_M, - V2_QPC_BYTE_240_IRRL_TAIL_REAL_S, 0); - roce_set_field(qpc_mask->byte_240_irrl_tail, - V2_QPC_BYTE_240_IRRL_TAIL_RD_M, - V2_QPC_BYTE_240_IRRL_TAIL_RD_S, 0); - roce_set_field(qpc_mask->byte_240_irrl_tail, - V2_QPC_BYTE_240_RX_ACK_MSN_M, - V2_QPC_BYTE_240_RX_ACK_MSN_S, 0); - - roce_set_field(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_IRRL_PSN_M, - V2_QPC_BYTE_248_IRRL_PSN_S, 0); - roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_ACK_PSN_ERR_S, - 0); - roce_set_field(qpc_mask->byte_248_ack_psn, - V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M, - V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S, 0); - roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_IRRL_PSN_VLD_S, - 0); - roce_set_bit(qpc_mask->byte_248_ack_psn, - V2_QPC_BYTE_248_RNR_RETRY_FLAG_S, 0); - roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_CQ_ERR_IND_S, - 0); hr_qp->access_flags = attr->qp_access_flags; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); - roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, - V2_QPC_BYTE_252_TX_CQN_S, 0); - - roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_ERR_TYPE_M, - V2_QPC_BYTE_252_ERR_TYPE_S, 0); - - roce_set_field(qpc_mask->byte_256_sqflush_rqcqe, - V2_QPC_BYTE_256_RQ_CQE_IDX_M, - V2_QPC_BYTE_256_RQ_CQE_IDX_S, 0); - roce_set_field(qpc_mask->byte_256_sqflush_rqcqe, - V2_QPC_BYTE_256_SQ_FLUSH_IDX_M, - V2_QPC_BYTE_256_SQ_FLUSH_IDX_S, 0); } static void modify_qp_init_to_init(struct ib_qp *ibqp, @@ -3987,21 +3712,22 @@ static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, int mtt_cnt, u32 page_size) { - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; if (hr_qp->rq.wqe_cnt < 1) return true; if (mtt_cnt < 1) { - dev_err(dev, "qp(0x%lx) rqwqe buf ba find failed\n", - hr_qp->qpn); + ibdev_err(ibdev, "failed to find RQWQE buf ba of QP(0x%lx)\n", + hr_qp->qpn); return false; } if (mtt_cnt < MTT_MIN_COUNT && (hr_qp->rq.offset + page_size) < hr_qp->buff_size) { - dev_err(dev, "qp(0x%lx) next rqwqe buf ba find failed\n", - hr_qp->qpn); + ibdev_err(ibdev, + "failed to find next RQWQE buf ba of QP(0x%lx)\n", + hr_qp->qpn); return false; } @@ -4016,7 +3742,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle_3; dma_addr_t dma_handle_2; @@ -4043,7 +3769,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, hr_qp->qpn, &dma_handle_2); if (!mtts_2) { - dev_err(dev, "qp irrl_table find failed\n"); + ibdev_err(ibdev, "failed to find QP irrl_table\n"); return -EINVAL; } @@ -4051,12 +3777,13 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, mtts_3 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table, hr_qp->qpn, &dma_handle_3); if (!mtts_3) { - dev_err(dev, "qp trrl_table find failed\n"); + ibdev_err(ibdev, "failed to find QP trrl_table\n"); return -EINVAL; } if (attr_mask & IB_QP_ALT_PATH) { - dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask); + ibdev_err(ibdev, "INIT2RTR attr_mask (0x%x) error\n", + attr_mask); return -EINVAL; } @@ -4201,7 +3928,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */ roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, - V2_QPC_BYTE_56_LP_PKTN_INI_S, 4); + V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); @@ -4259,7 +3986,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; u64 sge_cur_blk = 0; u64 sq_cur_blk = 0; u32 page_size; @@ -4268,7 +3995,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, /* Search qp buf's mtts */ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); if (count < 1) { - dev_err(dev, "qp(0x%lx) buf pa find failed\n", hr_qp->qpn); + ibdev_err(ibdev, "failed to find buf pa of QP(0x%lx)\n", + hr_qp->qpn); return -EINVAL; } @@ -4278,16 +4006,15 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, hr_qp->sge.offset / page_size, &sge_cur_blk, 1, NULL); if (count < 1) { - dev_err(dev, "qp(0x%lx) sge pa find failed\n", - hr_qp->qpn); + ibdev_err(ibdev, "failed to find sge pa of QP(0x%lx)\n", + hr_qp->qpn); return -EINVAL; } } /* Not support alternate path and path migration */ - if ((attr_mask & IB_QP_ALT_PATH) || - (attr_mask & IB_QP_PATH_MIG_STATE)) { - dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask); + if (attr_mask & (IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE)) { + ibdev_err(ibdev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask); return -EINVAL; } @@ -4405,6 +4132,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct ib_device *ibdev = &hr_dev->ib_dev; const struct ib_gid_attr *gid_attr = NULL; int is_roce_protocol; u16 vlan_id = 0xffff; @@ -4446,13 +4174,13 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, V2_QPC_BYTE_24_VLAN_ID_S, 0); if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) { - dev_err(hr_dev->dev, "sgid_index(%u) too large. max is %d\n", - grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]); + ibdev_err(ibdev, "sgid_index(%u) too large. max is %d\n", + grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]); return -EINVAL; } if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) { - dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n"); + ibdev_err(ibdev, "ah attr is not RDMA roce type\n"); return -EINVAL; } @@ -4475,7 +4203,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - if (hr_dev->pci_dev->revision == 0x21 && is_udp) + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B && is_udp) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); else @@ -4530,7 +4258,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, /* Nothing */ ; } else { - dev_err(hr_dev->dev, "Illegal state for QP!\n"); + ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n"); ret = -EINVAL; goto out; } @@ -4565,8 +4293,8 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, 0); } else { - dev_warn(hr_dev->dev, - "Local ACK timeout shall be 0 to 30.\n"); + ibdev_warn(&hr_dev->ib_dev, + "Local ACK timeout shall be 0 to 30.\n"); } } @@ -4734,7 +4462,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, struct hns_roce_v2_qp_context ctx[2]; struct hns_roce_v2_qp_context *context = ctx; struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned long sq_flag = 0; + unsigned long rq_flag = 0; int ret; /* @@ -4752,6 +4482,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* When QP state is err, SQ and RQ WQE should be flushed */ if (new_state == IB_QPS_ERR) { + spin_lock_irqsave(&hr_qp->sq.lock, sq_flag); + hr_qp->state = IB_QPS_ERR; roce_set_field(context->byte_160_sq_ci_pi, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, @@ -4759,8 +4491,10 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_160_sq_ci_pi, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); + spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag); if (!ibqp->srq) { + spin_lock_irqsave(&hr_qp->rq.lock, rq_flag); roce_set_field(context->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, @@ -4768,6 +4502,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag); } } @@ -4791,7 +4526,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* SW pass context to HW */ ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp); if (ret) { - dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret); + ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); goto out; } @@ -4848,10 +4583,8 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, HNS_ROCE_CMD_QUERY_QPC, HNS_ROCE_CMD_TIMEOUT_MSECS); - if (ret) { - dev_err(hr_dev->dev, "QUERY QP cmd process error\n"); + if (ret) goto out; - } memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); @@ -4867,7 +4600,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_v2_qp_context context = {}; - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; int tmp_qp_state; int state; int ret; @@ -4885,7 +4618,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); if (ret) { - dev_err(dev, "query qpc error\n"); + ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret); ret = -EINVAL; goto out; } @@ -4894,7 +4627,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S); tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); if (tmp_qp_state == -1) { - dev_err(dev, "Illegal ib_qp_state\n"); + ibdev_err(ibdev, "Illegal ib_qp_state\n"); ret = -EINVAL; goto out; } @@ -4992,8 +4725,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata) { - struct hns_roce_cq *send_cq, *recv_cq; struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_cq *send_cq, *recv_cq; unsigned long flags; int ret = 0; @@ -5002,7 +4735,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); if (ret) - ibdev_err(ibdev, "modify QP to Reset failed.\n"); + ibdev_err(ibdev, + "failed to modify QP to RST, ret = %d\n", + ret); } send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; @@ -5011,10 +4746,6 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, spin_lock_irqsave(&hr_dev->qp_list_lock, flags); hns_roce_lock_cqs(send_cq, recv_cq); - list_del(&hr_qp->node); - list_del(&hr_qp->sq_node); - list_del(&hr_qp->rq_node); - if (!udata) { if (recv_cq) __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, @@ -5032,43 +4763,6 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); - hns_roce_qp_free(hr_dev, hr_qp); - - /* Not special_QP, free their QPN */ - if ((hr_qp->ibqp.qp_type == IB_QPT_RC) || - (hr_qp->ibqp.qp_type == IB_QPT_UC) || - (hr_qp->ibqp.qp_type == IB_QPT_UD)) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); - - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); - - if (udata) { - struct hns_roce_ucontext *context = - rdma_udata_to_drv_context( - udata, - struct hns_roce_ucontext, - ibucontext); - - if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) - hns_roce_db_unmap_user(context, &hr_qp->sdb); - - if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) - hns_roce_db_unmap_user(context, &hr_qp->rdb); - } else { - kfree(hr_qp->sq.wrid); - kfree(hr_qp->rq.wrid); - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - if (hr_qp->rq.wqe_cnt) - hns_roce_free_db(hr_dev, &hr_qp->rdb); - } - ib_umem_release(hr_qp->umem); - - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hr_qp->rq.wqe_cnt) { - kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); - kfree(hr_qp->rq_inl_buf.wqe_list); - } - return ret; } @@ -5080,17 +4774,19 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) - ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", + ibdev_err(&hr_dev->ib_dev, + "failed to destroy QP 0x%06lx, ret = %d\n", hr_qp->qpn, ret); - kfree(hr_qp); + hns_roce_qp_destroy(hr_dev, hr_qp, udata); return 0; } static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) + struct hns_roce_qp *hr_qp) { + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_sccc_clr_done *resp; struct hns_roce_sccc_clr *clr; struct hns_roce_cmq_desc desc; @@ -5102,7 +4798,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - dev_err(hr_dev->dev, "Reset SCC ctx failed(%d)\n", ret); + ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret); goto out; } @@ -5112,7 +4808,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, clr->qpn = cpu_to_le32(hr_qp->qpn); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - dev_err(hr_dev->dev, "Clear SCC ctx failed(%d)\n", ret); + ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret); goto out; } @@ -5123,7 +4819,8 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, HNS_ROCE_OPC_QUERY_SCCC, true); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - dev_err(hr_dev->dev, "Query clr cmq failed(%d)\n", ret); + ibdev_err(ibdev, "failed to query clr cmq, ret = %d\n", + ret); goto out; } @@ -5133,7 +4830,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, msleep(20); } - dev_err(hr_dev->dev, "Query SCC clr done flag overtime.\n"); + ibdev_err(ibdev, "Query SCC clr done flag overtime.\n"); ret = -ETIMEDOUT; out: @@ -5177,99 +4874,65 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) - dev_err(hr_dev->dev, "MODIFY CQ Failed to cmd mailbox.\n"); + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd when modifying CQ, ret = %d\n", + ret); return ret; } -static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn) -{ - struct hns_roce_qp *hr_qp; - struct ib_qp_attr attr; - int attr_mask; - int ret; - - hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (!hr_qp) { - dev_warn(hr_dev->dev, "no hr_qp can be found!\n"); - return; - } - - if (hr_qp->ibqp.uobject) { - if (hr_qp->sdb_en == 1) { - hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); - if (hr_qp->rdb_en == 1) - hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); - } else { - dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n"); - return; - } - } - - attr_mask = IB_QP_STATE; - attr.qp_state = IB_QPS_ERR; - ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask, - hr_qp->state, IB_QPS_ERR); - if (ret) - dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n", - qpn); -} - static void hns_roce_irq_work_handle(struct work_struct *work) { struct hns_roce_work *irq_work = container_of(work, struct hns_roce_work, work); - struct device *dev = irq_work->hr_dev->dev; + struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; u32 qpn = irq_work->qpn; u32 cqn = irq_work->cqn; switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: - dev_info(dev, "Path migrated succeeded.\n"); + ibdev_info(ibdev, "Path migrated succeeded.\n"); break; case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - dev_warn(dev, "Path migration failed.\n"); + ibdev_warn(ibdev, "Path migration failed.\n"); break; case HNS_ROCE_EVENT_TYPE_COMM_EST: break; case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: - dev_warn(dev, "Send queue drained.\n"); + ibdev_warn(ibdev, "Send queue drained.\n"); break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - dev_err(dev, "Local work queue 0x%x catas error, sub_type:%d\n", - qpn, irq_work->sub_type); - hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", + qpn, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: - dev_err(dev, "Invalid request local work queue 0x%x error.\n", - qpn); - hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n", + qpn); break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - dev_err(dev, "Local access violation work queue 0x%x error, sub_type:%d\n", - qpn, irq_work->sub_type); - hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n", + qpn, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - dev_warn(dev, "SRQ limit reach.\n"); + ibdev_warn(ibdev, "SRQ limit reach.\n"); break; case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: - dev_warn(dev, "SRQ last wqe reach.\n"); + ibdev_warn(ibdev, "SRQ last wqe reach.\n"); break; case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - dev_err(dev, "SRQ catas error.\n"); + ibdev_err(ibdev, "SRQ catas error.\n"); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - dev_err(dev, "CQ 0x%x access err.\n", cqn); + ibdev_err(ibdev, "CQ 0x%x access err.\n", cqn); break; case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - dev_warn(dev, "CQ 0x%x overflow\n", cqn); + ibdev_warn(ibdev, "CQ 0x%x overflow\n", cqn); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - dev_warn(dev, "DB overflow.\n"); + ibdev_warn(ibdev, "DB overflow.\n"); break; case HNS_ROCE_EVENT_TYPE_FLR: - dev_warn(dev, "Function level reset.\n"); + ibdev_warn(ibdev, "Function level reset.\n"); break; default: break; @@ -5326,44 +4989,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) hns_roce_write64(hr_dev, doorbell, eq->doorbell); } -static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) +static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset) { u32 buf_chk_sz; - unsigned long off; buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); - off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE; - - return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) + - off % buf_chk_sz); -} - -static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry) -{ - u32 buf_chk_sz; - unsigned long off; - - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); - - off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE; - - if (eq->hop_num == HNS_ROCE_HOP_NUM_0) - return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) + - off % buf_chk_sz); + if (eq->buf.nbufs == 1) + return eq->buf.direct.buf + offset % buf_chk_sz; else - return (struct hns_roce_aeqe *)((u8 *) - (eq->buf[off / buf_chk_sz]) + off % buf_chk_sz); + return eq->buf.page_list[offset / buf_chk_sz].buf + + offset % buf_chk_sz; } static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_aeqe *aeqe; - if (!eq->hop_num) - aeqe = get_aeqe_v2(eq, eq->cons_index); - else - aeqe = mhop_get_aeqe(eq, eq->cons_index); - + aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * + HNS_ROCE_AEQ_ENTRY_SIZE); return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; } @@ -5456,44 +5099,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, return aeqe_found; } -static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry) -{ - u32 buf_chk_sz; - unsigned long off; - - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); - off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE; - - return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) + - off % buf_chk_sz); -} - -static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry) -{ - u32 buf_chk_sz; - unsigned long off; - - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); - - off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE; - - if (eq->hop_num == HNS_ROCE_HOP_NUM_0) - return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) + - off % buf_chk_sz); - else - return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off / - buf_chk_sz]) + off % buf_chk_sz); -} - static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_ceqe *ceqe; - if (!eq->hop_num) - ceqe = get_ceqe_v2(eq, eq->cons_index); - else - ceqe = mhop_get_ceqe(eq, eq->cons_index); - + ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * + HNS_ROCE_CEQ_ENTRY_SIZE); return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; } @@ -5501,7 +5112,6 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - struct device *dev = hr_dev->dev; struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); int ceqe_found = 0; u32 cqn; @@ -5520,10 +5130,8 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqe_found = 1; - if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) { - dev_warn(dev, "cons_index overflow, set back to 0.\n"); + if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) eq->cons_index = 0; - } ceqe = next_ceqe_sw_v2(eq); } @@ -5653,90 +5261,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn) dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn); } -static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) -{ - struct device *dev = hr_dev->dev; - u64 idx; - u64 size; - u32 buf_chk_sz; - u32 bt_chk_sz; - u32 mhop_num; - int eqe_alloc; - int i = 0; - int j = 0; - - mhop_num = hr_dev->caps.eqe_hop_num; - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); - bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); - - if (mhop_num == HNS_ROCE_HOP_NUM_0) { - dma_free_coherent(dev, (unsigned int)(eq->entries * - eq->eqe_size), eq->bt_l0, eq->l0_dma); - return; - } - - dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma); - if (mhop_num == 1) { - for (i = 0; i < eq->l0_last_num; i++) { - if (i == eq->l0_last_num - 1) { - eqe_alloc = i * (buf_chk_sz / eq->eqe_size); - size = (eq->entries - eqe_alloc) * eq->eqe_size; - dma_free_coherent(dev, size, eq->buf[i], - eq->buf_dma[i]); - break; - } - dma_free_coherent(dev, buf_chk_sz, eq->buf[i], - eq->buf_dma[i]); - } - } else if (mhop_num == 2) { - for (i = 0; i < eq->l0_last_num; i++) { - dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i], - eq->l1_dma[i]); - - for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { - idx = i * (bt_chk_sz / BA_BYTE_LEN) + j; - if ((i == eq->l0_last_num - 1) - && j == eq->l1_last_num - 1) { - eqe_alloc = (buf_chk_sz / eq->eqe_size) - * idx; - size = (eq->entries - eqe_alloc) - * eq->eqe_size; - dma_free_coherent(dev, size, - eq->buf[idx], - eq->buf_dma[idx]); - break; - } - dma_free_coherent(dev, buf_chk_sz, eq->buf[idx], - eq->buf_dma[idx]); - } - } - } - kfree(eq->buf_dma); - kfree(eq->buf); - kfree(eq->l1_dma); - kfree(eq->bt_l1); - eq->buf_dma = NULL; - eq->buf = NULL; - eq->l1_dma = NULL; - eq->bt_l1 = NULL; -} - -static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - u32 buf_chk_sz; - - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); - - if (hr_dev->caps.eqe_hop_num) { - hns_roce_mhop_free_eq(hr_dev, eq); - return; - } - - dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf, - eq->buf_list->map); - kfree(eq->buf_list); + if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) + hns_roce_mtr_cleanup(hr_dev, &eq->mtr); + hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf); } static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, @@ -5744,6 +5273,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, void *mb_buf) { struct hns_roce_eq_context *eqc; + u64 ba[MTT_MIN_COUNT] = { 0 }; + int count; eqc = mb_buf; memset(eqc, 0, sizeof(struct hns_roce_eq_context)); @@ -5759,10 +5290,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz; eq->shift = ilog2((unsigned int)eq->entries); - if (!eq->hop_num) - eq->eqe_ba = eq->buf_list->map; - else - eq->eqe_ba = eq->l0_dma; + /* if not muti-hop, eqe buffer only use one trunk */ + if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) { + eq->eqe_ba = eq->buf.direct.map; + eq->cur_eqe_ba = eq->eqe_ba; + if (eq->buf.npages > 1) + eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz); + else + eq->nxt_eqe_ba = eq->eqe_ba; + } else { + count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba, + MTT_MIN_COUNT, &eq->eqe_ba); + eq->cur_eqe_ba = ba[0]; + if (count > 1) + eq->nxt_eqe_ba = ba[1]; + else + eq->nxt_eqe_ba = ba[0]; + } /* set eqc state */ roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S, @@ -5860,220 +5404,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44); } -static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, + u32 page_shift) { - struct device *dev = hr_dev->dev; - int eq_alloc_done = 0; - int eq_buf_cnt = 0; - int eqe_alloc; - u32 buf_chk_sz; - u32 bt_chk_sz; - u32 mhop_num; - u64 size; - u64 idx; + struct hns_roce_buf_region region = {}; + dma_addr_t *buf_list = NULL; int ba_num; - int bt_num; - int record_i; - int record_j; - int i = 0; - int j = 0; - - mhop_num = hr_dev->caps.eqe_hop_num; - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); - bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); + int ret; ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size), - buf_chk_sz); - bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN); - - if (mhop_num == HNS_ROCE_HOP_NUM_0) { - if (eq->entries > buf_chk_sz / eq->eqe_size) { - dev_err(dev, "eq entries %d is larger than buf_pg_sz!", - eq->entries); - return -EINVAL; - } - eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size, - &(eq->l0_dma), GFP_KERNEL); - if (!eq->bt_l0) - return -ENOMEM; + 1 << page_shift); + hns_roce_init_buf_region(®ion, hr_dev->caps.eqe_hop_num, 0, ba_num); - eq->cur_eqe_ba = eq->l0_dma; - eq->nxt_eqe_ba = 0; + /* alloc a tmp list for storing eq buf address */ + ret = hns_roce_alloc_buf_list(®ion, &buf_list, 1); + if (ret) { + dev_err(hr_dev->dev, "alloc eq buf_list error\n"); + return ret; + } - return 0; + ba_num = hns_roce_get_kmem_bufs(hr_dev, buf_list, region.count, + region.offset, &eq->buf); + if (ba_num != region.count) { + dev_err(hr_dev->dev, "get eqe buf err,expect %d,ret %d.\n", + region.count, ba_num); + ret = -ENOBUFS; + goto done; } - eq->buf_dma = kcalloc(ba_num, sizeof(*eq->buf_dma), GFP_KERNEL); - if (!eq->buf_dma) - return -ENOMEM; - eq->buf = kcalloc(ba_num, sizeof(*eq->buf), GFP_KERNEL); - if (!eq->buf) - goto err_kcalloc_buf; - - if (mhop_num == 2) { - eq->l1_dma = kcalloc(bt_num, sizeof(*eq->l1_dma), GFP_KERNEL); - if (!eq->l1_dma) - goto err_kcalloc_l1_dma; - - eq->bt_l1 = kcalloc(bt_num, sizeof(*eq->bt_l1), GFP_KERNEL); - if (!eq->bt_l1) - goto err_kcalloc_bt_l1; - } - - /* alloc L0 BT */ - eq->bt_l0 = dma_alloc_coherent(dev, bt_chk_sz, &eq->l0_dma, GFP_KERNEL); - if (!eq->bt_l0) - goto err_dma_alloc_l0; - - if (mhop_num == 1) { - if (ba_num > (bt_chk_sz / BA_BYTE_LEN)) - dev_err(dev, "ba_num %d is too large for 1 hop\n", - ba_num); - - /* alloc buf */ - for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) { - if (eq_buf_cnt + 1 < ba_num) { - size = buf_chk_sz; - } else { - eqe_alloc = i * (buf_chk_sz / eq->eqe_size); - size = (eq->entries - eqe_alloc) * eq->eqe_size; - } - eq->buf[i] = dma_alloc_coherent(dev, size, - &(eq->buf_dma[i]), - GFP_KERNEL); - if (!eq->buf[i]) - goto err_dma_alloc_buf; + hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz, + page_shift); + ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, ®ion, 1); + if (ret) + dev_err(hr_dev->dev, "mtr attach error for eqe\n"); - *(eq->bt_l0 + i) = eq->buf_dma[i]; + goto done; - eq_buf_cnt++; - if (eq_buf_cnt >= ba_num) - break; - } - eq->cur_eqe_ba = eq->buf_dma[0]; - if (ba_num > 1) - eq->nxt_eqe_ba = eq->buf_dma[1]; - - } else if (mhop_num == 2) { - /* alloc L1 BT and buf */ - for (i = 0; i < bt_chk_sz / BA_BYTE_LEN; i++) { - eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz, - &(eq->l1_dma[i]), - GFP_KERNEL); - if (!eq->bt_l1[i]) - goto err_dma_alloc_l1; - *(eq->bt_l0 + i) = eq->l1_dma[i]; - - for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { - idx = i * bt_chk_sz / BA_BYTE_LEN + j; - if (eq_buf_cnt + 1 < ba_num) { - size = buf_chk_sz; - } else { - eqe_alloc = (buf_chk_sz / eq->eqe_size) - * idx; - size = (eq->entries - eqe_alloc) - * eq->eqe_size; - } - eq->buf[idx] = dma_alloc_coherent(dev, size, - &(eq->buf_dma[idx]), - GFP_KERNEL); - if (!eq->buf[idx]) - goto err_dma_alloc_buf; - - *(eq->bt_l1[i] + j) = eq->buf_dma[idx]; - - eq_buf_cnt++; - if (eq_buf_cnt >= ba_num) { - eq_alloc_done = 1; - break; - } - } + hns_roce_mtr_cleanup(hr_dev, &eq->mtr); +done: + hns_roce_free_buf_list(&buf_list, 1); - if (eq_alloc_done) - break; - } - eq->cur_eqe_ba = eq->buf_dma[0]; - if (ba_num > 1) - eq->nxt_eqe_ba = eq->buf_dma[1]; - } + return ret; +} - eq->l0_last_num = i + 1; - if (mhop_num == 2) - eq->l1_last_num = j + 1; +static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + struct hns_roce_buf *buf = &eq->buf; + bool is_mhop = false; + u32 page_shift; + u32 mhop_num; + u32 max_size; + int ret; - return 0; + page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz; + mhop_num = hr_dev->caps.eqe_hop_num; + if (!mhop_num) { + max_size = 1 << page_shift; + buf->size = max_size; + } else if (mhop_num == HNS_ROCE_HOP_NUM_0) { + max_size = eq->entries * eq->eqe_size; + buf->size = max_size; + } else { + max_size = 1 << page_shift; + buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size); + is_mhop = true; + } -err_dma_alloc_l1: - dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma); - eq->bt_l0 = NULL; - eq->l0_dma = 0; - for (i -= 1; i >= 0; i--) { - dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i], - eq->l1_dma[i]); - - for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { - idx = i * bt_chk_sz / BA_BYTE_LEN + j; - dma_free_coherent(dev, buf_chk_sz, eq->buf[idx], - eq->buf_dma[idx]); - } + ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift); + if (ret) { + dev_err(hr_dev->dev, "alloc eq buf error\n"); + return ret; } - goto err_dma_alloc_l0; - -err_dma_alloc_buf: - dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma); - eq->bt_l0 = NULL; - eq->l0_dma = 0; - - if (mhop_num == 1) - for (i -= 1; i >= 0; i--) - dma_free_coherent(dev, buf_chk_sz, eq->buf[i], - eq->buf_dma[i]); - else if (mhop_num == 2) { - record_i = i; - record_j = j; - for (; i >= 0; i--) { - dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i], - eq->l1_dma[i]); - - for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { - if (i == record_i && j >= record_j) - break; - - idx = i * bt_chk_sz / BA_BYTE_LEN + j; - dma_free_coherent(dev, buf_chk_sz, - eq->buf[idx], - eq->buf_dma[idx]); - } + + if (is_mhop) { + ret = map_eq_buf(hr_dev, eq, page_shift); + if (ret) { + dev_err(hr_dev->dev, "map roce buf error\n"); + goto err_alloc; } } -err_dma_alloc_l0: - kfree(eq->bt_l1); - eq->bt_l1 = NULL; - -err_kcalloc_bt_l1: - kfree(eq->l1_dma); - eq->l1_dma = NULL; - -err_kcalloc_l1_dma: - kfree(eq->buf); - eq->buf = NULL; - -err_kcalloc_buf: - kfree(eq->buf_dma); - eq->buf_dma = NULL; - - return -ENOMEM; + return 0; +err_alloc: + hns_roce_buf_free(hr_dev, buf->size, buf); + return ret; } static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, unsigned int eq_cmd) { - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; - u32 buf_chk_sz = 0; int ret; /* Allocate mailbox memory */ @@ -6081,38 +5502,17 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - if (!hr_dev->caps.eqe_hop_num) { - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); - - eq->buf_list = kzalloc(sizeof(struct hns_roce_buf_list), - GFP_KERNEL); - if (!eq->buf_list) { - ret = -ENOMEM; - goto free_cmd_mbox; - } - - eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz, - &(eq->buf_list->map), - GFP_KERNEL); - if (!eq->buf_list->buf) { - ret = -ENOMEM; - goto err_alloc_buf; - } - - } else { - ret = hns_roce_mhop_alloc_eq(hr_dev, eq); - if (ret) { - ret = -ENOMEM; - goto free_cmd_mbox; - } + ret = alloc_eq_buf(hr_dev, eq); + if (ret) { + ret = -ENOMEM; + goto free_cmd_mbox; } - hns_roce_config_eqc(hr_dev, eq, mailbox->buf); ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) { - dev_err(dev, "[mailbox cmd] create eqc failed.\n"); + dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n"); goto err_cmd_mbox; } @@ -6121,16 +5521,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, return 0; err_cmd_mbox: - if (!hr_dev->caps.eqe_hop_num) - dma_free_coherent(dev, buf_chk_sz, eq->buf_list->buf, - eq->buf_list->map); - else { - hns_roce_mhop_free_eq(hr_dev, eq); - goto free_cmd_mbox; - } - -err_alloc_buf: - kfree(eq->buf_list); + free_eq_buf(hr_dev, eq); free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -6292,8 +5683,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) goto err_request_irq_fail; } - hr_dev->irq_workq = - create_singlethread_workqueue("hns_roce_irq_workqueue"); + hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0); if (!hr_dev->irq_workq) { dev_err(dev, "Create irq workqueue failed!\n"); ret = -ENOMEM; @@ -6310,7 +5700,7 @@ err_request_irq_fail: err_create_eq_fail: for (i -= 1; i >= 0; i--) - hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]); + free_eq_buf(hr_dev, &eq_table->eq[i]); kfree(eq_table->eq); return ret; @@ -6332,7 +5722,7 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) for (i = 0; i < eq_num; i++) { hns_roce_v2_destroy_eqc(hr_dev, i); - hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]); + free_eq_buf(hr_dev, &eq_table->eq[i]); } kfree(eq_table->eq); @@ -6472,8 +5862,9 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - dev_err(hr_dev->dev, - "MODIFY SRQ Failed to cmd mailbox.\n"); + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd when modifying SRQ, ret = %d\n", + ret); return ret; } } @@ -6499,7 +5890,9 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) HNS_ROCE_CMD_QUERY_SRQC, HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) { - dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n"); + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd when querying SRQ, ret = %d\n", + ret); goto out; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 2a117ff6a6be..82dd9f6f4845 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -50,15 +50,14 @@ #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_SRQ 0x100000 #define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 -#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100 +#define HNS_ROCE_V2_MAX_SRQ_SGE 64 #define HNS_ROCE_V2_MAX_CQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100 #define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 #define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 -#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 -#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff -#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100 +#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64 +#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V2_UAR_NUM 256 @@ -163,7 +162,7 @@ enum { #define GID_LEN_V2 16 -#define HNS_ROCE_V2_CQE_QPN_MASK 0x3ffff +#define HNS_ROCE_V2_CQE_QPN_MASK 0xfffff enum { HNS_ROCE_V2_WQE_OP_SEND = 0x0, @@ -460,8 +459,8 @@ enum hns_roce_v2_qp_state { HNS_ROCE_QP_ST_INIT, HNS_ROCE_QP_ST_RTR, HNS_ROCE_QP_ST_RTS, - HNS_ROCE_QP_ST_SQER, HNS_ROCE_QP_ST_SQD, + HNS_ROCE_QP_ST_SQER, HNS_ROCE_QP_ST_ERR, HNS_ROCE_QP_ST_SQ_DRAINING, HNS_ROCE_QP_NUM_ST @@ -1056,11 +1055,6 @@ struct hns_roce_v2_mpt_entry { #define V2_DB_PARAMETER_SL_S 16 #define V2_DB_PARAMETER_SL_M GENMASK(18, 16) -struct hns_roce_v2_cq_db { - __le32 byte_4; - __le32 parameter; -}; - #define V2_CQ_DB_BYTE_4_TAG_S 0 #define V2_CQ_DB_BYTE_4_TAG_M GENMASK(23, 0) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index b9898e71655a..176f34692f88 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -243,7 +243,7 @@ int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, /* Allocate MTT entry */ ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg, mtt->mtt_type); - if (ret == -1) + if (ret) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 780c780fdb22..b10c50b8736e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -60,14 +60,12 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev) int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ib_dev = ibpd->device; - struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct device *dev = hr_dev->dev; struct hns_roce_pd *pd = to_hr_pd(ibpd); int ret; ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); if (ret) { - dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n"); + ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret); return ret; } @@ -76,7 +74,7 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); - dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n"); + ibdev_err(ib_dev, "failed to copy to udata\n"); return -EFAULT; } } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 3257ad11be48..6317901c4b4f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -43,6 +43,45 @@ #define SQP_NUM (2 * HNS_ROCE_MAX_PORTS) +static void flush_work_handle(struct work_struct *work) +{ + struct hns_roce_work *flush_work = container_of(work, + struct hns_roce_work, work); + struct hns_roce_qp *hr_qp = container_of(flush_work, + struct hns_roce_qp, flush_work); + struct device *dev = flush_work->hr_dev->dev; + struct ib_qp_attr attr; + int attr_mask; + int ret; + + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + + if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) { + ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL); + if (ret) + dev_err(dev, "Modify QP to error state failed(%d) during CQE flush\n", + ret); + } + + /* + * make sure we signal QP destroy leg that flush QP was completed + * so that it can safely proceed ahead now and destroy QP + */ + if (atomic_dec_and_test(&hr_qp->refcount)) + complete(&hr_qp->free); +} + +void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_work *flush_work = &hr_qp->flush_work; + + flush_work->hr_dev = hr_dev; + INIT_WORK(&flush_work->work, flush_work_handle); + atomic_inc(&hr_qp->refcount); + queue_work(hr_dev->irq_workq, &flush_work->work); +} + void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) { struct device *dev = hr_dev->dev; @@ -59,6 +98,15 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) return; } + if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && + (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR || + event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR || + event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR)) { + qp->state = IB_QPS_ERR; + if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag)) + init_flush_work(hr_dev, qp); + } + qp->event(qp, (enum hns_roce_event)event_type); if (atomic_dec_and_test(&qp->refcount)) @@ -108,15 +156,34 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, } } -static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt, - int align, unsigned long *base) +static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + unsigned long num = 0; + int ret; - return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, - base) ? - -ENOMEM : - 0; + if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { + /* when hw version is v1, the sqpn is allocated */ + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) + num = HNS_ROCE_MAX_PORTS + + hr_dev->iboe.phy_port[hr_qp->port]; + else + num = 1; + + hr_qp->doorbell_qpn = 1; + } else { + ret = hns_roce_bitmap_alloc_range(&hr_dev->qp_table.bitmap, + 1, 1, &num); + if (ret) { + ibdev_err(&hr_dev->ib_dev, "Failed to alloc bitmap\n"); + return -ENOMEM; + } + + hr_qp->doorbell_qpn = (u32)num; + } + + hr_qp->qpn = num; + + return 0; } enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) @@ -139,50 +206,75 @@ enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) } } -static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, - struct hns_roce_qp *hr_qp) +static void add_qp_to_list(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct ib_cq *send_cq, struct ib_cq *recv_cq) +{ + struct hns_roce_cq *hr_send_cq, *hr_recv_cq; + unsigned long flags; + + hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL; + hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL; + + spin_lock_irqsave(&hr_dev->qp_list_lock, flags); + hns_roce_lock_cqs(hr_send_cq, hr_recv_cq); + + list_add_tail(&hr_qp->node, &hr_dev->qp_list); + if (hr_send_cq) + list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list); + if (hr_recv_cq) + list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list); + + hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq); + spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); +} + +static int hns_roce_qp_store(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr) { struct xarray *xa = &hr_dev->qp_table_xa; int ret; - if (!qpn) + if (!hr_qp->qpn) return -EINVAL; - hr_qp->qpn = qpn; - atomic_set(&hr_qp->refcount, 1); - init_completion(&hr_qp->free); - - ret = xa_err(xa_store_irq(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1), - hr_qp, GFP_KERNEL)); + ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL)); if (ret) - dev_err(hr_dev->dev, "QPC xa_store failed\n"); + dev_err(hr_dev->dev, "Failed to xa store for QPC\n"); + else + /* add QP to device's QP list for softwc */ + add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq, + init_attr->recv_cq); return ret; } -static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, - struct hns_roce_qp *hr_qp) +static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; struct device *dev = hr_dev->dev; int ret; - if (!qpn) + if (!hr_qp->qpn) return -EINVAL; - hr_qp->qpn = qpn; + /* In v1 engine, GSI QP context is saved in the RoCE hw's register */ + if (hr_qp->ibqp.qp_type == IB_QPT_GSI && + hr_dev->hw_rev == HNS_ROCE_HW_VER1) + return 0; /* Alloc memory for QPC */ ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn); if (ret) { - dev_err(dev, "QPC table get failed\n"); + dev_err(dev, "Failed to get QPC table\n"); goto err_out; } /* Alloc memory for IRRL */ ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn); if (ret) { - dev_err(dev, "IRRL table get failed\n"); + dev_err(dev, "Failed to get IRRL table\n"); goto err_put_qp; } @@ -191,7 +283,7 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table, hr_qp->qpn); if (ret) { - dev_err(dev, "TRRL table get failed\n"); + dev_err(dev, "Failed to get TRRL table\n"); goto err_put_irrl; } } @@ -201,22 +293,13 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); if (ret) { - dev_err(dev, "SCC CTX table get failed\n"); + dev_err(dev, "Failed to get SCC CTX table\n"); goto err_put_trrl; } } - ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp); - if (ret) - goto err_put_sccc; - return 0; -err_put_sccc: - if (hr_dev->caps.sccc_entry_sz) - hns_roce_table_put(hr_dev, &qp_table->sccc_table, - hr_qp->qpn); - err_put_trrl: if (hr_dev->caps.trrl_entry_sz) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); @@ -236,88 +319,84 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) struct xarray *xa = &hr_dev->qp_table_xa; unsigned long flags; + list_del(&hr_qp->node); + list_del(&hr_qp->sq_node); + list_del(&hr_qp->rq_node); + xa_lock_irqsave(xa, flags); __xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1)); xa_unlock_irqrestore(xa, flags); } -void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - if (atomic_dec_and_test(&hr_qp->refcount)) - complete(&hr_qp->free); - wait_for_completion(&hr_qp->free); + /* In v1 engine, GSI QP context is saved in the RoCE hw's register */ + if (hr_qp->ibqp.qp_type == IB_QPT_GSI && + hr_dev->hw_rev == HNS_ROCE_HW_VER1) + return; - if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { - if (hr_dev->caps.trrl_entry_sz) - hns_roce_table_put(hr_dev, &qp_table->trrl_table, - hr_qp->qpn); - hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); - } + if (hr_dev->caps.trrl_entry_sz) + hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); + hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); } -void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, - int cnt) +static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - if (base_qpn < hr_dev->caps.reserved_qps) + if (hr_qp->ibqp.qp_type == IB_QPT_GSI) + return; + + if (hr_qp->qpn < hr_dev->caps.reserved_qps) return; - hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR); + hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); } -static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, bool is_user, int has_rq, struct hns_roce_qp *hr_qp) { - struct device *dev = hr_dev->dev; u32 max_cnt; - /* Check the validity of QP support capacity */ - if (cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > hr_dev->caps.max_rq_sg) { - dev_err(dev, "RQ WR or sge error!max_recv_wr=%d max_recv_sge=%d\n", - cap->max_recv_wr, cap->max_recv_sge); - return -EINVAL; - } - /* If srq exist, set zero for relative number of rq */ if (!has_rq) { hr_qp->rq.wqe_cnt = 0; hr_qp->rq.max_gs = 0; cap->max_recv_wr = 0; cap->max_recv_sge = 0; - } else { - if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { - dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n"); - return -EINVAL; - } - if (hr_dev->caps.min_wqes) - max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); - else - max_cnt = cap->max_recv_wr; + return 0; + } - hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); + /* Check the validity of QP support capacity */ + if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || + cap->max_recv_sge > hr_dev->caps.max_rq_sg) { + ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n", + cap->max_recv_wr, cap->max_recv_sge); + return -EINVAL; + } - if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "while setting rq size, rq.wqe_cnt too large\n"); - return -EINVAL; - } + max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); - max_cnt = max(1U, cap->max_recv_sge); - hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); - if (hr_dev->caps.max_rq_sg <= 2) - hr_qp->rq.wqe_shift = - ilog2(hr_dev->caps.max_rq_desc_sz); - else - hr_qp->rq.wqe_shift = - ilog2(hr_dev->caps.max_rq_desc_sz - * hr_qp->rq.max_gs); + hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); + if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { + ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n", + cap->max_recv_wr); + return -EINVAL; } + max_cnt = max(1U, cap->max_recv_sge); + hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); + + if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) + hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); + else + hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * + hr_qp->rq.max_gs); + cap->max_recv_wr = hr_qp->rq.wqe_cnt; cap->max_recv_sge = hr_qp->rq.max_gs; @@ -334,12 +413,12 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, /* Sanity check SQ size before proceeding */ if (ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n"); + ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n"); return -EINVAL; } if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - ibdev_err(&hr_dev->ib_dev, "SQ sge error! max_send_sge=%d\n", + ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n", cap->max_send_sge); return -EINVAL; } @@ -347,10 +426,9 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, - struct hns_roce_ib_create_qp *ucmd) +static int set_user_sq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, + struct hns_roce_ib_create_qp *ucmd) { u32 ex_sge_num; u32 page_size; @@ -363,27 +441,28 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { - ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n"); + ibdev_err(&hr_dev->ib_dev, "Failed to check user SQ size limit\n"); return ret; } hr_qp->sq.wqe_shift = ucmd->log_sq_stride; max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->caps.max_sq_sg <= 2) + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); else hr_qp->sq.max_gs = max_cnt; - if (hr_qp->sq.max_gs > 2) + if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * (hr_qp->sq.max_gs - 2)); - if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) { + if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE && + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - dev_err(hr_dev->dev, - "The extended sge cnt error! sge_cnt=%d\n", - hr_qp->sge.sge_cnt); + ibdev_err(&hr_dev->ib_dev, + "Failed to check extended SGE size limit %d\n", + hr_qp->sge.sge_cnt); return -EINVAL; } } @@ -392,7 +471,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, ex_sge_num = hr_qp->sge.sge_cnt; /* Get buf size, SQ and RQ are aligned to page_szie */ - if (hr_dev->caps.max_sq_sg <= 2) { + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), PAGE_SIZE) + round_up((hr_qp->sq.wqe_cnt << @@ -492,30 +571,6 @@ static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, return region_cnt; } -static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev, - struct hns_roce_buf_region *regions, - int region_cnt) -{ - int bt_pg_shift; - int ba_num; - int ret; - - bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz; - - /* all root ba entries must in one bt page */ - do { - ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN; - ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt, - ba_num); - if (ret <= ba_num) - break; - - bt_pg_shift++; - } while (ret > ba_num); - - return bt_pg_shift - PAGE_SHIFT; -} - static int set_extend_sge_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { @@ -528,13 +583,15 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, } /* ud sqwqe's sge use extend sge */ - if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) { + if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && + hr_qp->ibqp.qp_type == IB_QPT_GSI) { hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * hr_qp->sq.max_gs); hr_qp->sge.sge_shift = 4; } - if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) { + if (hr_qp->sq.max_gs > 2 && + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n", hr_qp->sge.sge_cnt); @@ -545,46 +602,43 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp) +static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) { - struct device *dev = hr_dev->dev; u32 page_size; u32 max_cnt; int size; int ret; - if (cap->max_send_wr > hr_dev->caps.max_wqes || + if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg || cap->max_inline_data > hr_dev->caps.max_sq_inline) { - dev_err(dev, "SQ WR or sge or inline data error!\n"); + ibdev_err(&hr_dev->ib_dev, + "SQ WR or sge or inline data error!\n"); return -EINVAL; } hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); - if (hr_dev->caps.min_wqes) - max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); - else - max_cnt = cap->max_send_wr; + max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "while setting kernel sq size, sq.wqe_cnt too large\n"); + ibdev_err(&hr_dev->ib_dev, + "while setting kernel sq size, sq.wqe_cnt too large\n"); return -EINVAL; } /* Get data_seg numbers */ max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->caps.max_sq_sg <= 2) + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); else hr_qp->sq.max_gs = max_cnt; ret = set_extend_sge_param(hr_dev, hr_qp); if (ret) { - dev_err(dev, "set extend sge parameters fail\n"); + ibdev_err(&hr_dev->ib_dev, "set extend sge parameters fail\n"); return ret; } @@ -593,7 +647,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.offset = 0; size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); - if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { + if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && hr_qp->sge.sge_cnt) { hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), (u32)hr_qp->sge.sge_cnt); hr_qp->sge.offset = size; @@ -677,362 +731,449 @@ static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) kfree(hr_qp->rq_inl_buf.wqe_list); } -static void add_qp_to_list(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct ib_cq *send_cq, struct ib_cq *recv_cq) -{ - struct hns_roce_cq *hr_send_cq, *hr_recv_cq; - unsigned long flags; - - hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL; - hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL; - - spin_lock_irqsave(&hr_dev->qp_list_lock, flags); - hns_roce_lock_cqs(hr_send_cq, hr_recv_cq); - - list_add_tail(&hr_qp->node, &hr_dev->qp_list); - if (hr_send_cq) - list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list); - if (hr_recv_cq) - list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list); - - hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq); - spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); -} - -static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, - struct ib_pd *ib_pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, unsigned long sqpn, - struct hns_roce_qp *hr_qp) +static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + u32 page_shift, bool is_user) { - dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL }; - struct device *dev = hr_dev->dev; - struct hns_roce_ib_create_qp ucmd; - struct hns_roce_ib_create_qp_resp resp = {}; - struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context( - udata, struct hns_roce_ucontext, ibucontext); +/* WQE buffer include 3 parts: SQ, extend SGE and RQ. */ +#define HNS_ROCE_WQE_REGION_MAX 3 + struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX] = {}; + dma_addr_t *buf_list[HNS_ROCE_WQE_REGION_MAX] = {}; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; - unsigned long qpn = 0; - u32 page_shift; + int region_count; int buf_count; int ret; int i; - mutex_init(&hr_qp->mutex); - spin_lock_init(&hr_qp->sq.lock); - spin_lock_init(&hr_qp->rq.lock); - - hr_qp->state = IB_QPS_RESET; - - hr_qp->ibqp.qp_type = init_attr->qp_type; - - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) - hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; - else - hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; + region_count = split_wqe_buf_region(hr_dev, hr_qp, regions, + ARRAY_SIZE(regions), page_shift); - ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata, - hns_roce_qp_has_rq(init_attr), hr_qp); + /* alloc a tmp list to store WQE buffers address */ + ret = hns_roce_alloc_buf_list(regions, buf_list, region_count); if (ret) { - dev_err(dev, "hns_roce_set_rq_size failed\n"); - goto err_out; + ibdev_err(ibdev, "Failed to alloc WQE buffer list\n"); + return ret; } - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hns_roce_qp_has_rq(init_attr)) { - ret = alloc_rq_inline_buf(hr_qp, init_attr); - if (ret) { - dev_err(dev, "allocate receive inline buffer failed\n"); - goto err_out; + for (i = 0; i < region_count; i++) { + r = ®ions[i]; + if (is_user) + buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i], + r->count, r->offset, hr_qp->umem, + page_shift); + else + buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i], + r->count, r->offset, &hr_qp->hr_buf); + + if (buf_count != r->count) { + ibdev_err(ibdev, "Failed to get %s WQE buf, expect %d = %d.\n", + is_user ? "user" : "kernel", + r->count, buf_count); + ret = -ENOBUFS; + goto done; } } - page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; - if (udata) { - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - dev_err(dev, "ib_copy_from_udata error for create qp\n"); - ret = -EFAULT; - goto err_alloc_rq_inline_buf; - } + hr_qp->wqe_bt_pg_shift = hr_dev->caps.mtt_ba_pg_sz; + hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, + page_shift); + ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, regions, + region_count); + if (ret) + ibdev_err(ibdev, "Failed to attach WQE's mtr\n"); + + goto done; - ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, - &ucmd); + hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); +done: + hns_roce_free_buf_list(buf_list, region_count); + + return ret; +} + +static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, unsigned long addr) +{ + u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + struct ib_device *ibdev = &hr_dev->ib_dev; + bool is_rq_buf_inline; + int ret; + + is_rq_buf_inline = (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hns_roce_qp_has_rq(init_attr); + if (is_rq_buf_inline) { + ret = alloc_rq_inline_buf(hr_qp, init_attr); if (ret) { - dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n"); - goto err_alloc_rq_inline_buf; + ibdev_err(ibdev, "Failed to alloc inline RQ buffer\n"); + return ret; } + } - hr_qp->umem = ib_umem_get(ib_pd->device, ucmd.buf_addr, - hr_qp->buff_size, 0); + if (udata) { + hr_qp->umem = ib_umem_get(ibdev, addr, hr_qp->buff_size, 0); if (IS_ERR(hr_qp->umem)) { - dev_err(dev, "ib_umem_get error for create qp\n"); ret = PTR_ERR(hr_qp->umem); - goto err_alloc_rq_inline_buf; - } - hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp, - hr_qp->regions, ARRAY_SIZE(hr_qp->regions), - page_shift); - ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, - hr_qp->region_cnt); - if (ret) { - dev_err(dev, "alloc buf_list error for create qp\n"); - goto err_alloc_list; + goto err_inline; } + } else { + ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, + (1 << page_shift) * 2, + &hr_qp->hr_buf, page_shift); + if (ret) + goto err_inline; + } - for (i = 0; i < hr_qp->region_cnt; i++) { - r = &hr_qp->regions[i]; - buf_count = hns_roce_get_umem_bufs(hr_dev, - buf_list[i], r->count, r->offset, - hr_qp->umem, page_shift); - if (buf_count != r->count) { - dev_err(dev, - "get umem buf err, expect %d,ret %d.\n", - r->count, buf_count); - ret = -ENOBUFS; - goto err_get_bufs; - } - } + ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata); + if (ret) + goto err_alloc; + + return 0; + +err_inline: + if (is_rq_buf_inline) + free_rq_inline_buf(hr_qp); - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && - (udata->inlen >= sizeof(ucmd)) && - (udata->outlen >= sizeof(resp)) && - hns_roce_qp_has_sq(init_attr)) { - ret = hns_roce_db_map_user(uctx, udata, ucmd.sdb_addr, +err_alloc: + if (udata) { + ib_umem_release(hr_qp->umem); + hr_qp->umem = NULL; + } else { + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + } + + ibdev_err(ibdev, "Failed to alloc WQE buffer, ret %d.\n", ret); + + return ret; +} + +static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); + if (hr_qp->umem) { + ib_umem_release(hr_qp->umem); + hr_qp->umem = NULL; + } + + if (hr_qp->hr_buf.nbufs > 0) + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hr_qp->rq.wqe_cnt) + free_rq_inline_buf(hr_qp); +} + +static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, + struct hns_roce_ib_create_qp_resp *resp, + struct hns_roce_ib_create_qp *ucmd) +{ + return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && + udata->outlen >= offsetofend(typeof(*resp), cap_flags) && + hns_roce_qp_has_sq(init_attr) && + udata->inlen >= offsetofend(typeof(*ucmd), sdb_addr)); +} + +static inline bool user_qp_has_rdb(struct hns_roce_dev *hr_dev, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, + struct hns_roce_ib_create_qp_resp *resp) +{ + return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + udata->outlen >= offsetofend(typeof(*resp), cap_flags) && + hns_roce_qp_has_rq(init_attr)); +} + +static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev, + struct ib_qp_init_attr *init_attr) +{ + return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + hns_roce_qp_has_rq(init_attr)); +} + +static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, + struct hns_roce_ib_create_qp *ucmd, + struct hns_roce_ib_create_qp_resp *resp) +{ + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct hns_roce_ucontext, ibucontext); + struct ib_device *ibdev = &hr_dev->ib_dev; + int ret; + + if (udata) { + if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { + ret = hns_roce_db_map_user(uctx, udata, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { - dev_err(dev, "sq record doorbell map failed!\n"); - goto err_get_bufs; + ibdev_err(ibdev, + "Failed to map user SQ doorbell\n"); + goto err_out; } - - /* indicate kernel supports sq record db */ - resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB; hr_qp->sdb_en = 1; + resp->cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB; } - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && - (udata->outlen >= sizeof(resp)) && - hns_roce_qp_has_rq(init_attr)) { - ret = hns_roce_db_map_user(uctx, udata, ucmd.db_addr, + if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { + ret = hns_roce_db_map_user(uctx, udata, ucmd->db_addr, &hr_qp->rdb); if (ret) { - dev_err(dev, "rq record doorbell map failed!\n"); - goto err_sq_dbmap; + ibdev_err(ibdev, + "Failed to map user RQ doorbell\n"); + goto err_sdb; } - - /* indicate kernel supports rq record db */ - resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; hr_qp->rdb_en = 1; + resp->cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; } } else { - if (init_attr->create_flags & - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - dev_err(dev, "init_attr->create_flags error!\n"); - ret = -EINVAL; - goto err_alloc_rq_inline_buf; - } - - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { - dev_err(dev, "init_attr->create_flags error!\n"); - ret = -EINVAL; - goto err_alloc_rq_inline_buf; - } - - /* Set SQ size */ - ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap, - hr_qp); - if (ret) { - dev_err(dev, "hns_roce_set_kernel_sq_size error!\n"); - goto err_alloc_rq_inline_buf; - } - /* QP doorbell register address */ hr_qp->sq.db_reg_l = hr_dev->reg_base + hr_dev->sdb_offset + DB_REG_OFFSET * hr_dev->priv_uar.index; hr_qp->rq.db_reg_l = hr_dev->reg_base + hr_dev->odb_offset + DB_REG_OFFSET * hr_dev->priv_uar.index; - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && - hns_roce_qp_has_rq(init_attr)) { + if (kernel_qp_has_rdb(hr_dev, init_attr)) { ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); if (ret) { - dev_err(dev, "rq record doorbell alloc failed!\n"); - goto err_alloc_rq_inline_buf; + ibdev_err(ibdev, + "Failed to alloc kernel RQ doorbell\n"); + goto err_out; } *hr_qp->rdb.db_record = 0; hr_qp->rdb_en = 1; } + } - /* Allocate QP buf */ - if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, - (1 << page_shift) * 2, - &hr_qp->hr_buf, page_shift)) { - dev_err(dev, "hns_roce_buf_alloc error!\n"); + return 0; +err_sdb: + if (udata && hr_qp->sdb_en) + hns_roce_db_unmap_user(uctx, &hr_qp->sdb); +err_out: + return ret; +} + +static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct hns_roce_ucontext, ibucontext); + + if (udata) { + if (hr_qp->rdb_en) + hns_roce_db_unmap_user(uctx, &hr_qp->rdb); + if (hr_qp->sdb_en) + hns_roce_db_unmap_user(uctx, &hr_qp->sdb); + } else { + if (hr_qp->rdb_en) + hns_roce_free_db(hr_dev, &hr_qp->rdb); + } +} + +static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + u64 *sq_wrid = NULL; + u64 *rq_wrid = NULL; + int ret; + + sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(sq_wrid)) { + ibdev_err(ibdev, "Failed to alloc SQ wrid\n"); + return -ENOMEM; + } + + if (hr_qp->rq.wqe_cnt) { + rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(rq_wrid)) { + ibdev_err(ibdev, "Failed to alloc RQ wrid\n"); ret = -ENOMEM; - goto err_db; - } - hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp, - hr_qp->regions, ARRAY_SIZE(hr_qp->regions), - page_shift); - ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, - hr_qp->region_cnt); - if (ret) { - dev_err(dev, "alloc buf_list error for create qp!\n"); - goto err_alloc_list; + goto err_sq; } + } - for (i = 0; i < hr_qp->region_cnt; i++) { - r = &hr_qp->regions[i]; - buf_count = hns_roce_get_kmem_bufs(hr_dev, - buf_list[i], r->count, r->offset, - &hr_qp->hr_buf); - if (buf_count != r->count) { - dev_err(dev, - "get kmem buf err, expect %d,ret %d.\n", - r->count, buf_count); - ret = -ENOBUFS; - goto err_get_bufs; - } + hr_qp->sq.wrid = sq_wrid; + hr_qp->rq.wrid = rq_wrid; + return 0; +err_sq: + kfree(sq_wrid); + + return ret; +} + +static void free_kernel_wrid(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + kfree(hr_qp->rq.wrid); + kfree(hr_qp->sq.wrid); +} + +static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, + struct hns_roce_ib_create_qp *ucmd) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int ret; + + hr_qp->ibqp.qp_type = init_attr->qp_type; + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; + else + hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; + + ret = set_rq_size(hr_dev, &init_attr->cap, udata, + hns_roce_qp_has_rq(init_attr), hr_qp); + if (ret) { + ibdev_err(ibdev, "Failed to set user RQ size\n"); + return ret; + } + + if (udata) { + if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { + ibdev_err(ibdev, "Failed to copy QP ucmd\n"); + return -EFAULT; } - hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), - GFP_KERNEL); - if (ZERO_OR_NULL_PTR(hr_qp->sq.wrid)) { - ret = -ENOMEM; - goto err_get_bufs; + ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); + if (ret) + ibdev_err(ibdev, "Failed to set user SQ size\n"); + } else { + if (init_attr->create_flags & + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { + ibdev_err(ibdev, "Failed to check multicast loopback\n"); + return -EINVAL; } - if (hr_qp->rq.wqe_cnt) { - hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), - GFP_KERNEL); - if (ZERO_OR_NULL_PTR(hr_qp->rq.wrid)) { - ret = -ENOMEM; - goto err_sq_wrid; - } + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { + ibdev_err(ibdev, "Failed to check ipoib ud lso\n"); + return -EINVAL; } + + ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); + if (ret) + ibdev_err(ibdev, "Failed to set kernel SQ size\n"); } - if (sqpn) { - qpn = sqpn; - } else { - /* Get QPN */ - ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn); + return ret; +} + +static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, + struct ib_pd *ib_pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_ib_create_qp_resp resp = {}; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ib_create_qp ucmd; + int ret; + + mutex_init(&hr_qp->mutex); + spin_lock_init(&hr_qp->sq.lock); + spin_lock_init(&hr_qp->rq.lock); + + hr_qp->state = IB_QPS_RESET; + hr_qp->flush_flag = 0; + + ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); + if (ret) { + ibdev_err(ibdev, "Failed to set QP param\n"); + return ret; + } + + if (!udata) { + ret = alloc_kernel_wrid(hr_dev, hr_qp); if (ret) { - dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n"); - goto err_wrid; + ibdev_err(ibdev, "Failed to alloc wrid\n"); + return ret; } } - hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions, - hr_qp->region_cnt); - hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, - hr_qp->regions, hr_qp->region_cnt); + ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp); if (ret) { - dev_err(dev, "mtr attach error for create qp\n"); - goto err_mtr; + ibdev_err(ibdev, "Failed to alloc QP doorbell\n"); + goto err_wrid; } - if (init_attr->qp_type == IB_QPT_GSI && - hr_dev->hw_rev == HNS_ROCE_HW_VER1) { - /* In v1 engine, GSI QP context in RoCE engine's register */ - ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp); - if (ret) { - dev_err(dev, "hns_roce_qp_alloc failed!\n"); - goto err_qpn; - } - } else { - ret = hns_roce_qp_alloc(hr_dev, qpn, hr_qp); - if (ret) { - dev_err(dev, "hns_roce_qp_alloc failed!\n"); - goto err_qpn; - } + ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc QP buffer\n"); + goto err_db; } - if (sqpn) - hr_qp->doorbell_qpn = 1; - else - hr_qp->doorbell_qpn = (u32)hr_qp->qpn; + ret = alloc_qpn(hr_dev, hr_qp); + if (ret) { + ibdev_err(ibdev, "Failed to alloc QPN\n"); + goto err_buf; + } + + ret = alloc_qpc(hr_dev, hr_qp); + if (ret) { + ibdev_err(ibdev, "Failed to alloc QP context\n"); + goto err_qpn; + } + + ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr); + if (ret) { + ibdev_err(ibdev, "Failed to store QP\n"); + goto err_qpc; + } if (udata) { ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); - if (ret) - goto err_qp; + if (ret) { + ibdev_err(ibdev, "copy qp resp failed!\n"); + goto err_store; + } } if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) { ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp); if (ret) - goto err_qp; + goto err_store; } + hr_qp->ibqp.qp_num = hr_qp->qpn; hr_qp->event = hns_roce_ib_qp_event; - - add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq, init_attr->recv_cq); - - hns_roce_free_buf_list(buf_list, hr_qp->region_cnt); + atomic_set(&hr_qp->refcount, 1); + init_completion(&hr_qp->free); return 0; -err_qp: - if (init_attr->qp_type == IB_QPT_GSI && - hr_dev->hw_rev == HNS_ROCE_HW_VER1) - hns_roce_qp_remove(hr_dev, hr_qp); - else - hns_roce_qp_free(hr_dev, hr_qp); - +err_store: + hns_roce_qp_remove(hr_dev, hr_qp); +err_qpc: + free_qpc(hr_dev, hr_qp); err_qpn: - if (!sqpn) - hns_roce_release_range_qp(hr_dev, qpn, 1); - -err_mtr: - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); - + free_qpn(hr_dev, hr_qp); +err_buf: + free_qp_buf(hr_dev, hr_qp); +err_db: + free_qp_db(hr_dev, hr_qp, udata); err_wrid: - if (udata) { - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && - (udata->outlen >= sizeof(resp)) && - hns_roce_qp_has_rq(init_attr)) - hns_roce_db_unmap_user(uctx, &hr_qp->rdb); - } else { - if (hr_qp->rq.wqe_cnt) - kfree(hr_qp->rq.wrid); - } - -err_sq_dbmap: - if (udata) - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && - (udata->inlen >= sizeof(ucmd)) && - (udata->outlen >= sizeof(resp)) && - hns_roce_qp_has_sq(init_attr)) - hns_roce_db_unmap_user(uctx, &hr_qp->sdb); - -err_sq_wrid: - if (!udata) - kfree(hr_qp->sq.wrid); - -err_get_bufs: - hns_roce_free_buf_list(buf_list, hr_qp->region_cnt); - -err_alloc_list: - if (!hr_qp->umem) - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - ib_umem_release(hr_qp->umem); + free_kernel_wrid(hr_dev, hr_qp); + return ret; +} -err_db: - if (!udata && hns_roce_qp_has_rq(init_attr) && - (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) - hns_roce_free_db(hr_dev, &hr_qp->rdb); +void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_udata *udata) +{ + if (atomic_dec_and_test(&hr_qp->refcount)) + complete(&hr_qp->free); + wait_for_completion(&hr_qp->free); -err_alloc_rq_inline_buf: - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hns_roce_qp_has_rq(init_attr)) - free_rq_inline_buf(hr_qp); + free_qpc(hr_dev, hr_qp); + free_qpn(hr_dev, hr_qp); + free_qp_buf(hr_dev, hr_qp); + free_kernel_wrid(hr_dev, hr_qp); + free_qp_db(hr_dev, hr_qp, udata); -err_out: - return ret; + kfree(hr_qp); } struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, @@ -1050,7 +1191,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, if (!hr_qp) return ERR_PTR(-ENOMEM); - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0, + ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); if (ret) { ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n", @@ -1059,8 +1200,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, return ERR_PTR(ret); } - hr_qp->ibqp.qp_num = hr_qp->qpn; - break; } case IB_QPT_GSI: { @@ -1077,15 +1216,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, hr_qp->port = init_attr->port_num - 1; hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; - /* when hw version is v1, the sqpn is allocated */ - if (hr_dev->caps.max_sq_sg <= 2) - hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS + - hr_dev->iboe.phy_port[hr_qp->port]; - else - hr_qp->ibqp.qp_num = 1; - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, - hr_qp->ibqp.qp_num, hr_qp); + hr_qp); if (ret) { ibdev_err(ibdev, "Create GSI QP failed!\n"); kfree(hr_qp); @@ -1097,7 +1229,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, default:{ ibdev_err(ibdev, "not support QP type %d\n", init_attr->qp_type); - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } } @@ -1230,11 +1362,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; if (cur_state == new_state && cur_state == IB_QPS_RESET) { - if (hr_dev->caps.min_wqes) { + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { ret = -EPERM; ibdev_err(&hr_dev->ib_dev, - "cur_state=%d new_state=%d\n", cur_state, - new_state); + "RST2RST state is not supported\n"); } else { ret = 0; } @@ -1306,17 +1437,17 @@ static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) return hns_roce_buf_offset(&hr_qp->hr_buf, offset); } -void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) { return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); } -void *get_send_wqe(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) { return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); } -void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n) +void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) { return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index c6d5f06f9cde..5b3dd1a337d4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -381,7 +381,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); srq->max_gs = init_attr->attr.max_sge; - srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs)); + srq_desc_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * srq->max_gs)); srq->wqe_shift = ilog2(srq_desc_size); diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 8feec35f95a7..3c62c9327a9c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -67,7 +67,7 @@ #include "i40iw_user.h" #include "i40iw_puda.h" -#define I40IW_FW_VERSION 2 +#define I40IW_FW_VER_DEFAULT 2 #define I40IW_HW_VERSION 2 #define I40IW_ARP_ADD 1 @@ -326,6 +326,26 @@ struct i40iw_handler { }; /** + * i40iw_fw_major_ver - get firmware major version + * @dev: iwarp device + **/ +static inline u64 i40iw_fw_major_ver(struct i40iw_sc_dev *dev) +{ + return RS_64(dev->feature_info[I40IW_FEATURE_FW_INFO], + I40IW_FW_VER_MAJOR); +} + +/** + * i40iw_fw_minor_ver - get firmware minor version + * @dev: iwarp device + **/ +static inline u64 i40iw_fw_minor_ver(struct i40iw_sc_dev *dev) +{ + return RS_64(dev->feature_info[I40IW_FEATURE_FW_INFO], + I40IW_FW_VER_MINOR); +} + +/** * to_iwdev - get device * @ibdev: ib device **/ diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index 66dc1ba03389..6e43e4d730f4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -85,7 +85,7 @@ struct ietf_mpa_v1 { u8 flags; u8 rev; __be16 priv_data_len; - u8 priv_data[0]; + u8 priv_data[]; }; #define ietf_mpa_req_resp_frame ietf_mpa_frame @@ -101,7 +101,7 @@ struct ietf_mpa_v2 { u8 rev; __be16 priv_data_len; struct ietf_rtr_msg rtr_msg; - u8 priv_data[0]; + u8 priv_data[]; }; struct i40iw_cm_node; diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 4d841a3c68f3..e8b4b3743661 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -1022,6 +1022,95 @@ static enum i40iw_status_code i40iw_sc_commit_fpm_values( } /** + * i40iw_sc_query_rdma_features_done - poll cqp for query features done + * @cqp: struct for cqp hw + */ +static enum i40iw_status_code +i40iw_sc_query_rdma_features_done(struct i40iw_sc_cqp *cqp) +{ + return i40iw_sc_poll_for_cqp_op_done( + cqp, I40IW_CQP_OP_QUERY_RDMA_FEATURES, NULL); +} + +/** + * i40iw_sc_query_rdma_features - query rdma features + * @cqp: struct for cqp hw + * @feat_mem: holds PA for HW to use + * @scratch: u64 saved to be used during cqp completion + */ +static enum i40iw_status_code +i40iw_sc_query_rdma_features(struct i40iw_sc_cqp *cqp, + struct i40iw_dma_mem *feat_mem, u64 scratch) +{ + u64 *wqe; + u64 header; + + wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch); + if (wqe) + return I40IW_ERR_RING_FULL; + + set_64bit_val(wqe, 32, feat_mem->pa); + + header = LS_64(I40IW_CQP_OP_QUERY_RDMA_FEATURES, I40IW_CQPSQ_OPCODE) | + LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) | feat_mem->size; + + i40iw_insert_wqe_hdr(wqe, header); + + i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY RDMA FEATURES WQE", + wqe, I40IW_CQP_WQE_SIZE * 8); + + i40iw_sc_cqp_post_sq(cqp); + + return 0; +} + +/** + * i40iw_get_rdma_features - get RDMA features + * @dev - sc device struct + */ +enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev) +{ + enum i40iw_status_code ret_code; + struct i40iw_dma_mem feat_buf; + u64 temp; + u16 byte_idx, feat_type, feat_cnt; + + ret_code = i40iw_allocate_dma_mem(dev->hw, &feat_buf, + I40IW_FEATURE_BUF_SIZE, + I40IW_FEATURE_BUF_ALIGNMENT); + + if (ret_code) + return I40IW_ERR_NO_MEMORY; + + ret_code = i40iw_sc_query_rdma_features(dev->cqp, &feat_buf, 0); + if (!ret_code) + ret_code = i40iw_sc_query_rdma_features_done(dev->cqp); + + if (ret_code) + goto exit; + + get_64bit_val(feat_buf.va, 0, &temp); + feat_cnt = RS_64(temp, I40IW_FEATURE_CNT); + if (feat_cnt < I40IW_MAX_FEATURES) { + ret_code = I40IW_ERR_INVALID_FEAT_CNT; + goto exit; + } else if (feat_cnt > I40IW_MAX_FEATURES) { + i40iw_debug(dev, I40IW_DEBUG_CQP, + "features buf size insufficient\n"); + } + + for (byte_idx = 0, feat_type = 0; feat_type < I40IW_MAX_FEATURES; + feat_type++, byte_idx += 8) { + get_64bit_val((u64 *)feat_buf.va, byte_idx, &temp); + dev->feature_info[feat_type] = RS_64(temp, I40IW_FEATURE_INFO); + } +exit: + i40iw_free_dma_mem(dev->hw, &feat_buf); + + return ret_code; +} + +/** * i40iw_sc_query_fpm_values_done - poll for cqp wqe completion for query fpm * @cqp: struct for cqp hw */ @@ -4265,6 +4354,13 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev, true, I40IW_CQP_WAIT_EVENT); break; + case OP_QUERY_RDMA_FEATURES: + values_mem.pa = pcmdinfo->in.u.query_rdma_features.cap_pa; + values_mem.va = pcmdinfo->in.u.query_rdma_features.cap_va; + status = i40iw_sc_query_rdma_features( + pcmdinfo->in.u.query_rdma_features.cqp, &values_mem, + pcmdinfo->in.u.query_rdma_features.scratch); + break; default: status = I40IW_NOT_SUPPORTED; break; diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index 6ddaeec87d2f..e8367d67575d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -403,7 +403,7 @@ #define I40IW_CQP_OP_MANAGE_ARP 0x0f #define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10 #define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11 -#define I40IW_CQP_OP_MANAGE_PE_TEAM 0x12 +#define I40IW_CQP_OP_QUERY_RDMA_FEATURES 0x12 #define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13 #define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14 #define I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE 0x15 @@ -431,6 +431,24 @@ #define I40IW_CQP_OP_SHMC_PAGES_ALLOCATED 0x2b #define I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE 0x2d +#define I40IW_FEATURE_BUF_SIZE (8 * I40IW_MAX_FEATURES) + +#define I40IW_FW_VER_MINOR_SHIFT 0 +#define I40IW_FW_VER_MINOR_MASK \ + (0xffffULL << I40IW_FW_VER_MINOR_SHIFT) + +#define I40IW_FW_VER_MAJOR_SHIFT 16 +#define I40IW_FW_VER_MAJOR_MASK \ + (0xffffULL << I40IW_FW_VER_MAJOR_SHIFT) + +#define I40IW_FEATURE_INFO_SHIFT 0 +#define I40IW_FEATURE_INFO_MASK \ + (0xffffULL << I40IW_FEATURE_INFO_SHIFT) + +#define I40IW_FEATURE_CNT_SHIFT 32 +#define I40IW_FEATURE_CNT_MASK \ + (0xffffULL << I40IW_FEATURE_CNT_SHIFT) + #define I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT 16 #define I40IW_UDA_QPSQ_NEXT_HEADER_MASK ((u64)0xff << I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT) @@ -1529,7 +1547,8 @@ enum i40iw_alignment { I40IW_AEQ_ALIGNMENT = 0x100, I40IW_CEQ_ALIGNMENT = 0x100, I40IW_CQ0_ALIGNMENT = 0x100, - I40IW_SD_BUF_ALIGNMENT = 0x80 + I40IW_SD_BUF_ALIGNMENT = 0x80, + I40IW_FEATURE_BUF_ALIGNMENT = 0x8 }; #define I40IW_WQE_SIZE_64 64 @@ -1732,6 +1751,7 @@ enum i40iw_alignment { #define OP_REQUESTED_COMMANDS 31 #define OP_COMPLETED_COMMANDS 32 #define OP_GEN_AE 33 -#define OP_SIZE_CQP_STAT_ARRAY 34 +#define OP_QUERY_RDMA_FEATURES 34 +#define OP_SIZE_CQP_STAT_ARRAY 35 #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 238614370927..9c96ece5e7f3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1212,22 +1212,19 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev) { struct net_device *dev; struct in_device *idev; - bool got_lock = true; u32 ip_addr; - if (!rtnl_trylock()) - got_lock = false; - - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((((rdma_vlan_dev_vlan_id(dev) < 0xFFFF) && (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) || - (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) { + (dev == iwdev->netdev)) && (READ_ONCE(dev->flags) & IFF_UP)) { const struct in_ifaddr *ifa; - idev = in_dev_get(dev); + idev = __in_dev_get_rcu(dev); if (!idev) continue; - in_dev_for_each_ifa_rtnl(ifa, idev) { + in_dev_for_each_ifa_rcu(ifa, idev) { i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address, rdma_vlan_dev_vlan_id(dev), dev->dev_addr); @@ -1239,12 +1236,9 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev) true, I40IW_ARP_ADD); } - - in_dev_put(idev); } } - if (got_lock) - rtnl_unlock(); + rcu_read_unlock(); } /** @@ -1689,6 +1683,12 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) status = i40iw_setup_ceqs(iwdev, ldev); if (status) break; + + status = i40iw_get_rdma_features(dev); + if (status) + dev->feature_info[I40IW_FEATURE_FW_INFO] = + I40IW_FW_VER_DEFAULT; + iwdev->init_state = CEQ_CREATED; status = i40iw_initialize_hw_resources(iwdev); if (status) diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index 11d3a2a72100..4c429567bbb4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -105,6 +105,7 @@ enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(struct i40iw_sc_cqp * bool poll_registers); enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count); +enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev); void free_sd_mem(struct i40iw_sc_dev *dev); diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h index f7013f11d808..d1c5855bd8c3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_status.h +++ b/drivers/infiniband/hw/i40iw/i40iw_status.h @@ -95,7 +95,8 @@ enum i40iw_status_code { I40IW_ERR_INVALID_MAC_ADDR = -65, I40IW_ERR_BAD_STAG = -66, I40IW_ERR_CQ_COMPL_ERROR = -67, - I40IW_ERR_QUEUE_DESTROYED = -68 + I40IW_ERR_QUEUE_DESTROYED = -68, + I40IW_ERR_INVALID_FEAT_CNT = -69 }; #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index adc8d2ec523d..54c323c40d96 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -234,6 +234,11 @@ enum i40iw_hw_stats_index_64b { I40IW_HW_STAT_INDEX_MAX_64 }; +enum i40iw_feature_type { + I40IW_FEATURE_FW_INFO = 0, + I40IW_MAX_FEATURES +}; + struct i40iw_dev_hw_stats_offsets { u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32]; u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64]; @@ -501,6 +506,7 @@ struct i40iw_sc_dev { const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; struct i40iw_hmc_fpm_misc hmc_fpm_misc; + u64 feature_info[I40IW_MAX_FEATURES]; u32 debug_mask; u8 hmc_fn_id; bool is_pf; @@ -1340,6 +1346,12 @@ struct cqp_info { struct i40iw_sc_qp *qp; u64 scratch; } suspend_resume; + struct { + struct i40iw_sc_cqp *cqp; + void *cap_va; + u64 cap_pa; + u64 scratch; + } query_rdma_features; } u; }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index c335de91508f..1b6fb1380961 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -64,7 +64,8 @@ static int i40iw_query_device(struct ib_device *ibdev, return -EINVAL; memset(props, 0, sizeof(*props)); ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr); - props->fw_ver = I40IW_FW_VERSION; + props->fw_ver = i40iw_fw_major_ver(&iwdev->sc_dev) << 32 | + i40iw_fw_minor_ver(&iwdev->sc_dev); props->device_cap_flags = iwdev->device_cap_flags; props->vendor_id = iwdev->ldev->pcidev->vendor; props->vendor_part_id = iwdev->ldev->pcidev->device; @@ -617,7 +618,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; if (init_attr->qp_type != IB_QPT_RC) { - err_code = -EINVAL; + err_code = -EOPNOTSUPP; goto error; } if (iwdev->push_mode) @@ -2534,10 +2535,11 @@ static const char * const i40iw_hw_stat_names[] = { static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str) { - u32 firmware_version = I40IW_FW_VERSION; + struct i40iw_device *iwdev = to_iwdev(dev); - snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", firmware_version, - (firmware_version & 0x000000ff)); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%llu.%llu", + i40iw_fw_major_ver(&iwdev->sc_dev), + i40iw_fw_minor_ver(&iwdev->sc_dev)); } /** diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2f5d9b181848..a66518a5c938 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -434,9 +434,6 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, return real_index; } -#define field_avail(type, fld, sz) (offsetof(type, fld) + \ - sizeof(((type *)0)->fld) <= (sz)) - static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -447,7 +444,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, int err; int have_ib_ports; struct mlx4_uverbs_ex_query_device cmd; - struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0}; + struct mlx4_uverbs_ex_query_device_resp resp = {}; struct mlx4_clock_params clock_params; if (uhw->inlen) { @@ -602,7 +599,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, sizeof(struct mlx4_wqe_data_seg); } - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { + if (offsetofend(typeof(resp), rss_caps) <= uhw->outlen) { if (props->rss_caps.supported_qpts) { resp.rss_caps.rx_hash_function = MLX4_IB_RX_HASH_FUNC_TOEPLITZ; @@ -626,7 +623,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, sizeof(resp.rss_caps); } - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + if (offsetofend(typeof(resp), tso_caps) <= uhw->outlen) { if (dev->dev->caps.max_gso_sz && ((mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) || diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 26425dd2d960..2f9f78912267 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1636,7 +1636,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, } default: /* Don't support raw QPs */ - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } return &qp->ibqp; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index d0a043ccbe58..2a334800f109 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -8,3 +8,4 @@ mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o +mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += qos.o diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 8ba439fabf7f..de4da92b81a6 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -47,6 +47,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = { "rp_byte_reset", "rp_threshold", "rp_ai_rate", + "rp_max_rate", "rp_hai_rate", "rp_min_dec_fac", "rp_min_rate", @@ -56,6 +57,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = { "rp_rate_reduce_monitor_period", "rp_initial_alpha_value", "rp_gd", + "np_min_time_between_cnps", "np_cnp_dscp", "np_cnp_prio_mode", "np_cnp_prio", @@ -66,6 +68,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = { #define MLX5_IB_RP_TIME_RESET_ATTR BIT(3) #define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4) #define MLX5_IB_RP_THRESHOLD_ATTR BIT(5) +#define MLX5_IB_RP_MAX_RATE_ATTR BIT(6) #define MLX5_IB_RP_AI_RATE_ATTR BIT(7) #define MLX5_IB_RP_HAI_RATE_ATTR BIT(8) #define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9) @@ -77,6 +80,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = { #define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15) #define MLX5_IB_RP_GD_ATTR BIT(16) +#define MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR BIT(2) #define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3) #define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4) @@ -111,6 +115,9 @@ static u32 mlx5_get_cc_param_val(void *field, int offset) case MLX5_IB_DBG_CC_RP_AI_RATE: return MLX5_GET(cong_control_r_roce_ecn_rp, field, rpg_ai_rate); + case MLX5_IB_DBG_CC_RP_MAX_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_max_rate); case MLX5_IB_DBG_CC_RP_HAI_RATE: return MLX5_GET(cong_control_r_roce_ecn_rp, field, rpg_hai_rate); @@ -138,6 +145,9 @@ static u32 mlx5_get_cc_param_val(void *field, int offset) case MLX5_IB_DBG_CC_RP_GD: return MLX5_GET(cong_control_r_roce_ecn_rp, field, rpg_gd); + case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + min_time_between_cnps); case MLX5_IB_DBG_CC_NP_CNP_DSCP: return MLX5_GET(cong_control_r_roce_ecn_np, field, cnp_dscp); @@ -186,6 +196,11 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset, MLX5_SET(cong_control_r_roce_ecn_rp, field, rpg_ai_rate, var); break; + case MLX5_IB_DBG_CC_RP_MAX_RATE: + *attr_mask |= MLX5_IB_RP_MAX_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_max_rate, var); + break; case MLX5_IB_DBG_CC_RP_HAI_RATE: *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR; MLX5_SET(cong_control_r_roce_ecn_rp, field, @@ -231,6 +246,11 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset, MLX5_SET(cong_control_r_roce_ecn_rp, field, rpg_gd, var); break; + case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS: + *attr_mask |= MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, + min_time_between_cnps, var); + break; case MLX5_IB_DBG_CC_NP_CNP_DSCP: *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR; MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 367a71bc5f4b..146ba2966744 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -330,6 +330,22 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, dump_cqe(dev, cqe); } +static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + u16 tail, u16 head) +{ + u16 idx; + + do { + idx = tail & (qp->sq.wqe_cnt - 1); + if (idx == head) + break; + + tail = qp->sq.w_list[idx].next; + } while (1); + tail = qp->sq.w_list[idx].next; + qp->sq.last_poll = tail; +} + static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); @@ -368,7 +384,7 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, } static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, - int *npolled, int is_send) + int *npolled, bool is_send) { struct mlx5_ib_wq *wq; unsigned int cur; @@ -383,10 +399,16 @@ static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, return; for (i = 0; i < cur && np < num_entries; i++) { - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + unsigned int idx; + + idx = (is_send) ? wq->last_poll : wq->tail; + idx &= (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; wc->status = IB_WC_WR_FLUSH_ERR; wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; wq->tail++; + if (is_send) + wq->last_poll = wq->w_list[idx].next; np++; wc->qp = &qp->ibqp; wc++; @@ -473,6 +495,7 @@ repoll: wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); handle_good_req(wc, cqe64, wq, idx); + handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; wc->status = IB_WC_SUCCESS; @@ -692,17 +715,19 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - ucmdlen = udata->inlen < sizeof(ucmd) ? - (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd); + ucmdlen = min(udata->inlen, sizeof(ucmd)); + if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags)) + return -EINVAL; if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; - if (ucmdlen == sizeof(ucmd) && - (ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD))) + if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD | + MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX))) return -EINVAL; - if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) + if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) || + ucmd.reserved0 || ucmd.reserved1) return -EINVAL; *cqe_size = ucmd.cqe_size; @@ -739,7 +764,14 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - *index = context->bfregi.sys_pages[0]; + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { + *index = ucmd.uar_page_index; + } else if (context->bfregi.lib_uar_dyn) { + err = -EINVAL; + goto err_cqb; + } else { + *index = context->bfregi.sys_pages[0]; + } if (ucmd.cqe_comp_en == 1) { int mini_cqe_format; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index dbee17d22d50..862b7bf3e646 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -35,6 +35,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; + break; default: return -EINVAL; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e4bcfa81b70a..6679756506e6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -39,9 +39,6 @@ #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/bitmap.h> -#if defined(CONFIG_X86) -#include <asm/memtype.h> -#endif #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> @@ -898,7 +895,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->raw_packet_caps |= IB_RAW_PACKET_CAP_CVLAN_STRIPPING; - if (field_avail(typeof(resp), tso_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) { max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); if (max_tso) { resp.tso_caps.max_tso = 1 << max_tso; @@ -908,7 +905,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), rss_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) { resp.rss_caps.rx_hash_function = MLX5_RX_HASH_FUNC_TOEPLITZ; resp.rss_caps.rx_hash_fields_mask = @@ -928,9 +925,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.rss_caps); } } else { - if (field_avail(typeof(resp), tso_caps, uhw_outlen)) + if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) resp.response_length += sizeof(resp.tso_caps); - if (field_avail(typeof(resp), rss_caps, uhw_outlen)) + if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) resp.response_length += sizeof(resp.rss_caps); } @@ -1072,7 +1069,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MAX_CQ_PERIOD; } - if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), cqe_comp_caps) <= uhw_outlen) { resp.response_length += sizeof(resp.cqe_comp_caps); if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { @@ -1090,7 +1087,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) && + if (offsetofend(typeof(resp), packet_pacing_caps) <= uhw_outlen && raw_support) { if (MLX5_CAP_QOS(mdev, packet_pacing) && MLX5_CAP_GEN(mdev, qos)) { @@ -1108,8 +1105,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.packet_pacing_caps); } - if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, - uhw_outlen)) { + if (offsetofend(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes) <= + uhw_outlen) { if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; @@ -1122,7 +1119,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), flags, uhw_outlen)) { + if (offsetofend(typeof(resp), flags) <= uhw_outlen) { resp.response_length += sizeof(resp.flags); if (MLX5_CAP_GEN(mdev, cqe_compression_128)) @@ -1138,7 +1135,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } - if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) { resp.response_length += sizeof(resp.sw_parsing_caps); if (MLX5_CAP_ETH(mdev, swp)) { resp.sw_parsing_caps.sw_parsing_offloads |= @@ -1158,7 +1155,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) && + if (offsetofend(typeof(resp), striding_rq_caps) <= uhw_outlen && raw_support) { resp.response_length += sizeof(resp.striding_rq_caps); if (MLX5_CAP_GEN(mdev, striding_rq)) { @@ -1181,7 +1178,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) { + if (offsetofend(typeof(resp), tunnel_offloads_caps) <= uhw_outlen) { resp.response_length += sizeof(resp.tunnel_offloads_caps); if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) resp.tunnel_offloads_caps |= @@ -1192,12 +1189,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; - if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_GRE) + if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; - if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_UDP) + if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } @@ -1791,6 +1786,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, max_cqe_version); u32 dump_fill_mkey; bool lib_uar_4k; + bool lib_uar_dyn; if (!dev->ib_active) return -EAGAIN; @@ -1849,8 +1845,14 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, } lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; + lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; bfregi = &context->bfregi; + if (lib_uar_dyn) { + bfregi->lib_uar_dyn = lib_uar_dyn; + goto uar_done; + } + /* updates req->total_num_bfregs */ err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); if (err) @@ -1877,6 +1879,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (err) goto out_sys_pages; +uar_done: if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { err = mlx5_ib_devx_create(dev, true); if (err < 0) @@ -1898,19 +1901,19 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - resp.tot_bfregs = req.total_num_bfregs; + resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs; resp.num_ports = dev->num_ports; - if (field_avail(typeof(resp), cqe_version, udata->outlen)) + if (offsetofend(typeof(resp), cqe_version) <= udata->outlen) resp.response_length += sizeof(resp.cqe_version); - if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { + if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) { resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; resp.response_length += sizeof(resp.cmds_supp_uhw); } - if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) { + if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) { if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); resp.eth_min_inline++; @@ -1918,7 +1921,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, resp.response_length += sizeof(resp.eth_min_inline); } - if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) { + if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) { if (mdev->clock_info) resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); resp.response_length += sizeof(resp.clock_info_versions); @@ -1930,7 +1933,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, * pretend we don't support reading the HCA's core clock. This is also * forced by mmap function. */ - if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { + if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) { if (PAGE_SIZE <= 4096) { resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; @@ -1940,18 +1943,18 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, resp.response_length += sizeof(resp.hca_core_clock_offset); } - if (field_avail(typeof(resp), log_uar_size, udata->outlen)) + if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen) resp.response_length += sizeof(resp.log_uar_size); - if (field_avail(typeof(resp), num_uars_per_page, udata->outlen)) + if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen) resp.response_length += sizeof(resp.num_uars_per_page); - if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) { + if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) { resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; resp.response_length += sizeof(resp.num_dyn_bfregs); } - if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) { + if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) { if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { resp.dump_fill_mkey = dump_fill_mkey; resp.comp_mask |= @@ -2026,6 +2029,17 @@ static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page; } +static u64 uar_index2paddress(struct mlx5_ib_dev *dev, + int uar_idx) +{ + unsigned int fw_uars_per_page; + + fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_UARS_IN_PAGE : 1; + + return (dev->mdev->bar_addr + (uar_idx / fw_uars_per_page) * PAGE_SIZE); +} + static int get_command(unsigned long offset) { return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; @@ -2110,6 +2124,11 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) mutex_unlock(&var_table->bitmap_lock); kfree(mentry); break; + case MLX5_IB_MMAP_TYPE_UAR_WC: + case MLX5_IB_MMAP_TYPE_UAR_NC: + mlx5_cmd_free_uar(dev->mdev, mentry->page_idx); + kfree(mentry); + break; default: WARN_ON(true); } @@ -2130,6 +2149,9 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, int max_valid_idx = dyn_uar ? bfregi->num_sys_pages : bfregi->num_static_sys_pages; + if (bfregi->lib_uar_dyn) + return -EINVAL; + if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; @@ -2147,14 +2169,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, switch (cmd) { case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_ALLOC_WC: -/* Some architectures don't support WC memory */ -#if defined(CONFIG_X86) - if (!pat_enabled()) - return -EPERM; -#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU))) - return -EPERM; -#endif - /* fall through */ case MLX5_IB_MMAP_REGULAR_PAGE: /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ prot = pgprot_writecombine(vma->vm_page_prot); @@ -2269,7 +2283,8 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, mentry = to_mmmap(entry); pfn = (mentry->address >> PAGE_SHIFT); - if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR) + if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR || + mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC) prot = pgprot_noncached(vma->vm_page_prot); else prot = pgprot_writecombine(vma->vm_page_prot); @@ -2300,9 +2315,12 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm command = get_command(vma->vm_pgoff); switch (command) { case MLX5_IB_MMAP_WC_PAGE: + case MLX5_IB_MMAP_ALLOC_WC: + if (!dev->wc_support) + return -EPERM; + fallthrough; case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE: - case MLX5_IB_MMAP_ALLOC_WC: return uar_mmap(dev, command, vma, context); case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: @@ -3570,7 +3588,8 @@ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); - MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); } else { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); @@ -4045,6 +4064,11 @@ _get_flow_table(struct mlx5_ib_dev *dev, BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size)); priority = fs_matcher->priority; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; } max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); @@ -4061,6 +4085,8 @@ _get_flow_table(struct mlx5_ib_dev *dev, prio = &dev->flow_db->fdb; else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) prio = &dev->flow_db->rdma_rx[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) + prio = &dev->flow_db->rdma_tx[priority]; if (!prio) return ERR_PTR(-EINVAL); @@ -5722,9 +5748,10 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port - 1); - /* Q counters are in the beginning of all counters */ return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -6088,9 +6115,9 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -static int var_obj_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why, - struct uverbs_attr_bundle *attrs) +static int mmap_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct mlx5_user_mmap_entry *obj = uobject->object; @@ -6098,6 +6125,16 @@ static int var_obj_cleanup(struct ib_uobject *uobject, return 0; } +static int mlx5_rdma_user_mmap_entry_insert(struct mlx5_ib_ucontext *c, + struct mlx5_user_mmap_entry *entry, + size_t length) +{ + return rdma_user_mmap_entry_insert_range( + &c->ibucontext, &entry->rdma_entry, length, + (MLX5_IB_MMAP_OFFSET_START << 16), + ((MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1)); +} + static struct mlx5_user_mmap_entry * alloc_var_entry(struct mlx5_ib_ucontext *c) { @@ -6128,10 +6165,8 @@ alloc_var_entry(struct mlx5_ib_ucontext *c) entry->page_idx = page_idx; entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR; - err = rdma_user_mmap_entry_insert_range( - &c->ibucontext, &entry->rdma_entry, var_table->stride_size, - MLX5_IB_MMAP_OFFSET_START << 16, - (MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1); + err = mlx5_rdma_user_mmap_entry_insert(c, entry, + var_table->stride_size); if (err) goto err_insert; @@ -6215,7 +6250,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( UA_MANDATORY)); DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR, - UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup), + UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup), &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC), &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY)); @@ -6227,6 +6262,134 @@ static bool var_is_supported(struct ib_device *device) MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q); } +static struct mlx5_user_mmap_entry * +alloc_uar_entry(struct mlx5_ib_ucontext *c, + enum mlx5_ib_uapi_uar_alloc_type alloc_type) +{ + struct mlx5_user_mmap_entry *entry; + struct mlx5_ib_dev *dev; + u32 uar_index; + int err; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + dev = to_mdev(c->ibucontext.device); + err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index); + if (err) + goto end; + + entry->page_idx = uar_index; + entry->address = uar_index2paddress(dev, uar_index); + if (alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) + entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_WC; + else + entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_NC; + + err = mlx5_rdma_user_mmap_entry_insert(c, entry, PAGE_SIZE); + if (err) + goto err_insert; + + return entry; + +err_insert: + mlx5_cmd_free_uar(dev->mdev, uar_index); +end: + kfree(entry); + return ERR_PTR(err); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE); + enum mlx5_ib_uapi_uar_alloc_type alloc_type; + struct mlx5_ib_ucontext *c; + struct mlx5_user_mmap_entry *entry; + u64 mmap_offset; + u32 length; + int err; + + c = to_mucontext(ib_uverbs_get_ucontext(attrs)); + if (IS_ERR(c)) + return PTR_ERR(c); + + err = uverbs_get_const(&alloc_type, attrs, + MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE); + if (err) + return err; + + if (alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF && + alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC) + return -EOPNOTSUPP; + + if (!to_mdev(c->ibucontext.device)->wc_support && + alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) + return -EOPNOTSUPP; + + entry = alloc_uar_entry(c, alloc_type); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + mmap_offset = mlx5_entry_to_mmap_offset(entry); + length = entry->rdma_entry.npages * PAGE_SIZE; + uobj->object = entry; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, + &mmap_offset, sizeof(mmap_offset)); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, + &entry->page_idx, sizeof(entry->page_idx)); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, + &length, sizeof(length)); + if (err) + goto err; + + return 0; + +err: + rdma_user_mmap_entry_remove(&entry->rdma_entry); + return err; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_UAR_OBJ_ALLOC, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE, + MLX5_IB_OBJECT_UAR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE, + enum mlx5_ib_uapi_uar_alloc_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_UAR_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_UAR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR, + UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC), + &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY)); + ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_dm, UVERBS_OBJECT_DM, @@ -6251,12 +6414,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), + UAPI_DEF_CHAIN(mlx5_ib_qos_defs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR), {} }; @@ -6390,6 +6555,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->reset_flow_resource_lock); xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); + atomic_set(&dev->mkey_var, 0); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; @@ -6545,7 +6711,8 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) doorbell_bar_offset); bar_size = (1ULL << log_doorbell_bar_size) * 4096; var_table->stride_size = 1ULL << log_doorbell_stride; - var_table->num_var_hw_entries = div64_u64(bar_size, var_table->stride_size); + var_table->num_var_hw_entries = div_u64(bar_size, + var_table->stride_size); mutex_init(&var_table->bitmap_lock); var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries, GFP_KERNEL); @@ -7077,6 +7244,9 @@ const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, + mlx5_ib_stage_cong_debugfs_init, + mlx5_ib_stage_cong_debugfs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_UAR, mlx5_ib_stage_uar_init, mlx5_ib_stage_uar_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index b90a3649e7d1..c19ec9fd8a63 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -316,7 +316,7 @@ int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) if (!dev->mdev->roce.roce_en && port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { if (mlx5_core_is_pf(dev->mdev)) - dev->wc_support = true; + dev->wc_support = arch_can_pci_mmap_wc(); return 0; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d9bffcc93587..a4e522385de0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -64,8 +64,6 @@ dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) -#define field_avail(type, fld, sz) (offsetof(type, fld) + \ - sizeof(((type *)0)->fld) <= (sz)) #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) @@ -126,11 +124,27 @@ enum { enum mlx5_ib_mmap_type { MLX5_IB_MMAP_TYPE_MEMIC = 1, MLX5_IB_MMAP_TYPE_VAR = 2, + MLX5_IB_MMAP_TYPE_UAR_WC = 3, + MLX5_IB_MMAP_TYPE_UAR_NC = 4, }; -#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \ - (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) -#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) +struct mlx5_bfreg_info { + u32 *sys_pages; + int num_low_latency_bfregs; + unsigned int *count; + + /* + * protect bfreg allocation data structs + */ + struct mutex lock; + u32 ver; + u8 lib_uar_4k : 1; + u8 lib_uar_dyn : 1; + u32 num_sys_pages; + u32 num_static_sys_pages; + u32 total_num_bfregs; + u32 num_dyn_bfregs; +}; struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; @@ -203,6 +217,11 @@ struct mlx5_ib_flow_matcher { u8 match_criteria_enable; }; +struct mlx5_ib_pp { + u16 index; + struct mlx5_core_dev *mdev; +}; + struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; @@ -210,6 +229,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; struct mlx5_ib_flow_prio fdb; struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -288,6 +308,7 @@ struct mlx5_ib_wq { unsigned head; unsigned tail; u16 cur_post; + u16 last_poll; void *cur_edge; }; @@ -617,8 +638,8 @@ struct mlx5_ib_mr { struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - int order; - bool allocated_from_cache; + unsigned int order; + struct mlx5_cache_ent *cache_ent; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; @@ -636,6 +657,7 @@ struct mlx5_ib_mr { /* For ODP and implicit */ atomic_t num_deferred_work; + wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { struct rcu_head rcu; @@ -699,22 +721,34 @@ struct mlx5_cache_ent { u32 access_mode; u32 page; - u32 size; - u32 cur; + u8 disabled:1; + u8 fill_to_high_water:1; + + /* + * - available_mrs is the length of list head, ie the number of MRs + * available for immediate allocation. + * - total_mrs is available_mrs plus all in use MRs that could be + * returned to the cache. + * - limit is the low water mark for available_mrs, 2* limit is the + * upper water mark. + * - pending is the number of MRs currently being created + */ + u32 total_mrs; + u32 available_mrs; + u32 limit; + u32 pending; + + /* Statistics */ u32 miss; - u32 limit; struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; - int pending; - struct completion compl; }; struct mlx5_mr_cache { struct workqueue_struct *wq; struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; - int stopped; struct dentry *root; unsigned long last_add; }; @@ -792,6 +826,7 @@ enum mlx5_ib_dbg_cc_types { MLX5_IB_DBG_CC_RP_BYTE_RESET, MLX5_IB_DBG_CC_RP_THRESHOLD, MLX5_IB_DBG_CC_RP_AI_RATE, + MLX5_IB_DBG_CC_RP_MAX_RATE, MLX5_IB_DBG_CC_RP_HAI_RATE, MLX5_IB_DBG_CC_RP_MIN_DEC_FAC, MLX5_IB_DBG_CC_RP_MIN_RATE, @@ -801,6 +836,7 @@ enum mlx5_ib_dbg_cc_types { MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD, MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE, MLX5_IB_DBG_CC_RP_GD, + MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS, MLX5_IB_DBG_CC_NP_CNP_DSCP, MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE, MLX5_IB_DBG_CC_NP_CNP_PRIO, @@ -984,14 +1020,16 @@ struct mlx5_ib_dev { */ struct mutex cap_mask_mutex; u8 ib_active:1; - u8 fill_delay:1; u8 is_rep:1; u8 lag_active:1; u8 wc_support:1; + u8 fill_delay; struct umr_common umrc; /* sync used page count stats */ struct mlx5_ib_resources devr; + + atomic_t mkey_var; struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ @@ -1261,7 +1299,8 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + unsigned int entry); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); @@ -1381,6 +1420,7 @@ int mlx5_ib_fill_stat_entry(struct sk_buff *msg, extern const struct uapi_definition mlx5_ib_devx_defs[]; extern const struct uapi_definition mlx5_ib_flow_defs[]; +extern const struct uapi_definition mlx5_ib_qos_defs[]; #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); @@ -1470,12 +1510,11 @@ static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, { u8 cqe_version = ucontext->cqe_version; - if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && - !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + if ((offsetofend(typeof(*ucmd), uidx) <= inlen) && !cqe_version && + (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) return 0; - if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != - !!cqe_version)) + if ((offsetofend(typeof(*ucmd), uidx) <= inlen) != !!cqe_version) return -EINVAL; return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); @@ -1488,12 +1527,11 @@ static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, { u8 cqe_version = ucontext->cqe_version; - if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && - !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + if ((offsetofend(typeof(*ucmd), uidx) <= inlen) && !cqe_version && + (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) return 0; - if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != - !!cqe_version)) + if ((offsetofend(typeof(*ucmd), uidx) <= inlen) != !!cqe_version) return -EINVAL; return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); @@ -1532,7 +1570,9 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) return false; - if (access_flags & IB_ACCESS_RELAXED_ORDERING) + if (access_flags & IB_ACCESS_RELAXED_ORDERING && + (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) || + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))) return false; return true; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6fa0a83c19de..a401931189b7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -47,9 +47,46 @@ enum { #define MLX5_UMR_ALIGN 2048 +static void +create_mkey_callback(int status, struct mlx5_async_work *context); + +static void +assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in) +{ + u8 key = atomic_inc_return(&dev->mkey_var); + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, mkey_7_0, key); + mkey->key = key; +} + +static int +mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in, int inlen) +{ + assign_mkey_variant(dev, mkey, in); + return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen); +} + +static int +mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mkey, + struct mlx5_async_ctx *async_ctx, + u32 *in, int inlen, u32 *out, int outlen, + struct mlx5_async_work *context) +{ + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + assign_mkey_variant(dev, mkey, in); + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, + create_mkey_callback, context); +} + static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); +static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { @@ -63,67 +100,73 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static int order2idx(struct mlx5_ib_dev *dev, int order) -{ - struct mlx5_mr_cache *cache = &dev->cache; - - if (order < cache->ent[0].order) - return 0; - else - return order - cache->ent[0].order; -} - static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) { return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void reg_mr_callback(int status, struct mlx5_async_work *context) +static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; - struct mlx5_mr_cache *cache = &dev->cache; - int c = order2idx(dev, mr->order); - struct mlx5_cache_ent *ent = &cache->ent[c]; - u8 key; + struct mlx5_cache_ent *ent = mr->cache_ent; unsigned long flags; - spin_lock_irqsave(&ent->lock, flags); - ent->pending--; - spin_unlock_irqrestore(&ent->lock, flags); if (status) { mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); kfree(mr); - dev->fill_delay = 1; + spin_lock_irqsave(&ent->lock, flags); + ent->pending--; + WRITE_ONCE(dev->fill_delay, 1); + spin_unlock_irqrestore(&ent->lock, flags); mod_timer(&dev->delay_timer, jiffies + HZ); return; } mr->mmkey.type = MLX5_MKEY_MR; - spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); - key = dev->mdev->priv.mkey_key++; - spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; + mr->mmkey.key |= mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, mr->out, mkey_index)); - cache->last_add = jiffies; + WRITE_ONCE(dev->cache.last_add, jiffies); spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); - ent->cur++; - ent->size++; + ent->available_mrs++; + ent->total_mrs++; + /* If we are doing fill_to_high_water then keep going. */ + queue_adjust_cache_locked(ent); + ent->pending--; spin_unlock_irqrestore(&ent->lock, flags); +} - if (!completion_done(&ent->compl)) - complete(&ent->compl); +static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) +{ + struct mlx5_ib_mr *mr; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return NULL; + mr->order = ent->order; + mr->cache_ent = ent; + mr->dev = ent->dev; + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); + + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, log_page_size, ent->page); + return mr; } -static int add_keys(struct mlx5_ib_dev *dev, int c, int num) +/* Asynchronously schedule new MRs to be populated in the cache. */ +static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; - int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -136,42 +179,29 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { - if (ent->pending >= MAX_PENDING_REG_MR) { - err = -EAGAIN; - break; - } - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = alloc_cache_mr(ent, mkc); if (!mr) { err = -ENOMEM; break; } - mr->order = ent->order; - mr->allocated_from_cache = true; - mr->dev = dev; - - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (ent->access_mode >> 2) & 0x7); - - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); - MLX5_SET(mkc, mkc, log_page_size, ent->page); - spin_lock_irq(&ent->lock); + if (ent->pending >= MAX_PENDING_REG_MR) { + err = -EAGAIN; + spin_unlock_irq(&ent->lock); + kfree(mr); + break; + } ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - &dev->async_ctx, in, inlen, - mr->out, sizeof(mr->out), - reg_mr_callback, &mr->cb_work); + err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, + &ent->dev->async_ctx, in, inlen, + mr->out, sizeof(mr->out), + &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; spin_unlock_irq(&ent->lock); - mlx5_ib_warn(dev, "create mkey failed %d\n", err); + mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); kfree(mr); break; } @@ -181,70 +211,128 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) return err; } -static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) +/* Synchronously create a MR in the cache */ +static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; - struct mlx5_ib_mr *tmp_mr; + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; - LIST_HEAD(del_list); - int i; + void *mkc; + u32 *in; + int err; - for (i = 0; i < num; i++) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - break; - } - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_move(&mr->list, &del_list); - ent->cur--; - ent->size--; - spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); - } + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { - list_del(&mr->list); - kfree(mr); + mr = alloc_cache_mr(ent, mkc); + if (!mr) { + err = -ENOMEM; + goto free_in; } + + err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto free_mr; + + mr->mmkey.type = MLX5_MKEY_MR; + WRITE_ONCE(ent->dev->cache.last_add, jiffies); + spin_lock_irq(&ent->lock); + ent->total_mrs++; + spin_unlock_irq(&ent->lock); + kfree(in); + return mr; +free_mr: + kfree(mr); +free_in: + kfree(in); + return ERR_PTR(err); } -static ssize_t size_write(struct file *filp, const char __user *buf, - size_t count, loff_t *pos) +static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) +{ + struct mlx5_ib_mr *mr; + + lockdep_assert_held(&ent->lock); + if (list_empty(&ent->head)) + return; + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; + ent->total_mrs--; + spin_unlock_irq(&ent->lock); + mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); + kfree(mr); + spin_lock_irq(&ent->lock); +} + +static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, + bool limit_fill) { - struct mlx5_cache_ent *ent = filp->private_data; - struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20] = {0}; - u32 var; int err; - int c; - count = min(count, sizeof(lbuf) - 1); - if (copy_from_user(lbuf, buf, count)) - return -EFAULT; + lockdep_assert_held(&ent->lock); - c = order2idx(dev, ent->order); + while (true) { + if (limit_fill) + target = ent->limit * 2; + if (target == ent->available_mrs + ent->pending) + return 0; + if (target > ent->available_mrs + ent->pending) { + u32 todo = target - (ent->available_mrs + ent->pending); - if (sscanf(lbuf, "%u", &var) != 1) - return -EINVAL; + spin_unlock_irq(&ent->lock); + err = add_keys(ent, todo); + if (err == -EAGAIN) + usleep_range(3000, 5000); + spin_lock_irq(&ent->lock); + if (err) { + if (err != -EAGAIN) + return err; + } else + return 0; + } else { + remove_cache_mr_locked(ent); + } + } +} - if (var < ent->limit) - return -EINVAL; +static ssize_t size_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + u32 target; + int err; - if (var > ent->size) { - do { - err = add_keys(dev, c, var - ent->size); - if (err && err != -EAGAIN) - return err; + err = kstrtou32_from_user(buf, count, 0, &target); + if (err) + return err; - usleep_range(3000, 5000); - } while (err); - } else if (var < ent->size) { - remove_keys(dev, c, ent->size - var); + /* + * Target is the new value of total_mrs the user requests, however we + * cannot free MRs that are in use. Compute the target value for + * available_mrs. + */ + spin_lock_irq(&ent->lock); + if (target < ent->total_mrs - ent->available_mrs) { + err = -EINVAL; + goto err_unlock; } + target = target - (ent->total_mrs - ent->available_mrs); + if (target < ent->limit || target > ent->limit*2) { + err = -EINVAL; + goto err_unlock; + } + err = resize_available_mrs(ent, target, false); + if (err) + goto err_unlock; + spin_unlock_irq(&ent->lock); return count; + +err_unlock: + spin_unlock_irq(&ent->lock); + return err; } static ssize_t size_read(struct file *filp, char __user *buf, size_t count, @@ -254,7 +342,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs); if (err < 0) return err; @@ -272,32 +360,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20] = {0}; u32 var; int err; - int c; - - count = min(count, sizeof(lbuf) - 1); - if (copy_from_user(lbuf, buf, count)) - return -EFAULT; - c = order2idx(dev, ent->order); - - if (sscanf(lbuf, "%u", &var) != 1) - return -EINVAL; - - if (var > ent->size) - return -EINVAL; + err = kstrtou32_from_user(buf, count, 0, &var); + if (err) + return err; + /* + * Upon set we immediately fill the cache to high water mark implied by + * the limit. + */ + spin_lock_irq(&ent->lock); ent->limit = var; - - if (ent->cur < ent->limit) { - err = add_keys(dev, c, 2 * ent->limit - ent->cur); - if (err) - return err; - } - + err = resize_available_mrs(ent, 0, true); + spin_unlock_irq(&ent->lock); + if (err) + return err; return count; } @@ -322,68 +401,119 @@ static const struct file_operations limit_fops = { .read = limit_read, }; -static int someone_adding(struct mlx5_mr_cache *cache) +static bool someone_adding(struct mlx5_mr_cache *cache) { - int i; + unsigned int i; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - if (cache->ent[i].cur < cache->ent[i].limit) - return 1; + struct mlx5_cache_ent *ent = &cache->ent[i]; + bool ret; + + spin_lock_irq(&ent->lock); + ret = ent->available_mrs < ent->limit; + spin_unlock_irq(&ent->lock); + if (ret) + return true; } + return false; +} - return 0; +/* + * Check if the bucket is outside the high/low water mark and schedule an async + * update. The cache refill has hysteresis, once the low water mark is hit it is + * refilled up to the high mark. + */ +static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) +{ + lockdep_assert_held(&ent->lock); + + if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) + return; + if (ent->available_mrs < ent->limit) { + ent->fill_to_high_water = true; + queue_work(ent->dev->cache.wq, &ent->work); + } else if (ent->fill_to_high_water && + ent->available_mrs + ent->pending < 2 * ent->limit) { + /* + * Once we start populating due to hitting a low water mark + * continue until we pass the high water mark. + */ + queue_work(ent->dev->cache.wq, &ent->work); + } else if (ent->available_mrs == 2 * ent->limit) { + ent->fill_to_high_water = false; + } else if (ent->available_mrs > 2 * ent->limit) { + /* Queue deletion of excess entries */ + ent->fill_to_high_water = false; + if (ent->pending) + queue_delayed_work(ent->dev->cache.wq, &ent->dwork, + msecs_to_jiffies(1000)); + else + queue_work(ent->dev->cache.wq, &ent->work); + } } static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; struct mlx5_mr_cache *cache = &dev->cache; - int i = order2idx(dev, ent->order); int err; - if (cache->stopped) - return; + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; - ent = &dev->cache.ent[i]; - if (ent->cur < 2 * ent->limit && !dev->fill_delay) { - err = add_keys(dev, i, 1); - if (ent->cur < 2 * ent->limit) { - if (err == -EAGAIN) { - mlx5_ib_dbg(dev, "returned eagain, order %d\n", - i + 2); - queue_delayed_work(cache->wq, &ent->dwork, - msecs_to_jiffies(3)); - } else if (err) { - mlx5_ib_warn(dev, "command failed order %d, err %d\n", - i + 2, err); + if (ent->fill_to_high_water && + ent->available_mrs + ent->pending < 2 * ent->limit && + !READ_ONCE(dev->fill_delay)) { + spin_unlock_irq(&ent->lock); + err = add_keys(ent, 1); + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; + if (err) { + /* + * EAGAIN only happens if pending is positive, so we + * will be rescheduled from reg_mr_callback(). The only + * failure path here is ENOMEM. + */ + if (err != -EAGAIN) { + mlx5_ib_warn( + dev, + "command failed order %d, err %d\n", + ent->order, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); - } else { - queue_work(cache->wq, &ent->work); } } - } else if (ent->cur > 2 * ent->limit) { + } else if (ent->available_mrs > 2 * ent->limit) { + bool need_delay; + /* - * The remove_keys() logic is performed as garbage collection - * task. Such task is intended to be run when no other active - * processes are running. + * The remove_cache_mr() logic is performed as garbage + * collection task. Such task is intended to be run when no + * other active processes are running. * * The need_resched() will return TRUE if there are user tasks * to be activated in near future. * - * In such case, we don't execute remove_keys() and postpone - * the garbage collection work to try to run in next cycle, - * in order to free CPU resources to other tasks. + * In such case, we don't execute remove_cache_mr() and postpone + * the garbage collection work to try to run in next cycle, in + * order to free CPU resources to other tasks. */ - if (!need_resched() && !someone_adding(cache) && - time_after(jiffies, cache->last_add + 300 * HZ)) { - remove_keys(dev, i, 1); - if (ent->cur > ent->limit) - queue_work(cache->wq, &ent->work); - } else { + spin_unlock_irq(&ent->lock); + need_delay = need_resched() || someone_adding(cache) || + time_after(jiffies, + READ_ONCE(cache->last_add) + 300 * HZ); + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; + if (need_delay) queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); - } + remove_cache_mr_locked(ent); + queue_adjust_cache_locked(ent); } +out: + spin_unlock_irq(&ent->lock); } static void delayed_cache_work_func(struct work_struct *work) @@ -402,117 +532,95 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) +/* Allocate a special entry from the cache */ +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + unsigned int entry) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - int err; - if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { - mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); + if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || + entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); - } ent = &cache->ent[entry]; - while (1) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - - err = add_keys(dev, entry, 1); - if (err && err != -EAGAIN) - return ERR_PTR(err); - - wait_for_completion(&ent->compl); - } else { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, - list); - list_del(&mr->list); - ent->cur--; - spin_unlock_irq(&ent->lock); - if (ent->cur < ent->limit) - queue_work(cache->wq, &ent->work); + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock_irq(&ent->lock); + mr = create_cache_mr(ent); + if (IS_ERR(mr)) return mr; - } + } else { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); } + return mr; } -static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) +/* Return a MR already available in the cache */ +static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; - struct mlx5_cache_ent *ent; - int last_umr_cache_entry; - int c; - int i; - - c = order2idx(dev, order); - last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); - if (c < 0 || c > last_umr_cache_entry) { - mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); - return NULL; - } - - for (i = c; i <= last_umr_cache_entry; i++) { - ent = &cache->ent[i]; + struct mlx5_cache_ent *ent = req_ent; - mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); + /* Try larger MR pools from the cache to satisfy the allocation */ + for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { + mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, + ent - dev->cache.ent); spin_lock_irq(&ent->lock); if (!list_empty(&ent->head)) { mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); - ent->cur--; + ent->available_mrs--; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - if (ent->cur < ent->limit) - queue_work(cache->wq, &ent->work); break; } + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - - queue_work(cache->wq, &ent->work); } if (!mr) - cache->ent[c].miss++; + req_ent->miss++; return mr; } +static void detach_mr_from_cache(struct mlx5_ib_mr *mr) +{ + struct mlx5_cache_ent *ent = mr->cache_ent; + + mr->cache_ent = NULL; + spin_lock_irq(&ent->lock); + ent->total_mrs--; + spin_unlock_irq(&ent->lock); +} + void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int shrink = 0; - int c; + struct mlx5_cache_ent *ent = mr->cache_ent; - if (!mr->allocated_from_cache) + if (!ent) return; - c = order2idx(dev, mr->order); - WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (mlx5_mr_cache_invalidate(mr)) { - mr->allocated_from_cache = false; + detach_mr_from_cache(mr); destroy_mkey(dev, mr); - ent = &cache->ent[c]; - if (ent->cur < ent->limit) - queue_work(cache->wq, &ent->work); return; } - ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); - ent->cur++; - if (ent->cur > 2 * ent->limit) - shrink = 1; + ent->available_mrs++; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - - if (shrink) - queue_work(cache->wq, &ent->work); } static void clean_keys(struct mlx5_ib_dev *dev, int c) @@ -532,8 +640,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_move(&mr->list, &del_list); - ent->cur--; - ent->size--; + ent->available_mrs--; + ent->total_mrs--; spin_unlock_irq(&ent->lock); mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } @@ -571,7 +679,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) dir = debugfs_create_dir(ent->name, cache->root); debugfs_create_file("size", 0600, dir, ent, &size_fops); debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_u32("cur", 0400, dir, &ent->cur); + debugfs_create_u32("cur", 0400, dir, &ent->available_mrs); debugfs_create_u32("miss", 0600, dir, &ent->miss); } } @@ -580,7 +688,7 @@ static void delay_time_func(struct timer_list *t) { struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer); - dev->fill_delay = 0; + WRITE_ONCE(dev->fill_delay, 0); } int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) @@ -606,7 +714,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->dev = dev; ent->limit = 0; - init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); @@ -628,7 +735,9 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; - queue_work(cache->wq, &ent->work); + spin_lock_irq(&ent->lock); + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); } mlx5_mr_cache_debugfs_init(dev); @@ -638,13 +747,20 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { - int i; + unsigned int i; if (!dev->cache.wq) return 0; - dev->cache.stopped = 1; - flush_workqueue(dev->cache.wq); + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + struct mlx5_cache_ent *ent = &dev->cache.ent[i]; + + spin_lock_irq(&ent->lock); + ent->disabled = true; + spin_unlock_irq(&ent->lock); + cancel_work_sync(&ent->work); + cancel_delayed_work_sync(&ent->dwork); + } mlx5_mr_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); @@ -685,7 +801,6 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -707,7 +822,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) MLX5_SET(mkc, mkc, length64, 1); set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -840,31 +955,37 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, return err; } -static struct mlx5_ib_mr *alloc_mr_from_cache( - struct ib_pd *pd, struct ib_umem *umem, - u64 virt_addr, u64 len, int npages, - int page_shift, int order, int access_flags) +static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, + unsigned int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + + if (order < cache->ent[0].order) + return &cache->ent[0]; + order = order - cache->ent[0].order; + if (order > MR_CACHE_LAST_STD_ENTRY) + return NULL; + return &cache->ent[order]; +} + +static struct mlx5_ib_mr * +alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, + u64 len, int npages, int page_shift, unsigned int order, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); struct mlx5_ib_mr *mr; - int err = 0; - int i; - for (i = 0; i < 1; i++) { - mr = alloc_cached_mr(dev, order); - if (mr) - break; - - err = add_keys(dev, order2idx(dev, order), 1); - if (err && err != -EAGAIN) { - mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); - break; - } + if (!ent) + return ERR_PTR(-E2BIG); + mr = get_cache_mr(ent); + if (!mr) { + mr = create_cache_mr(ent); + if (IS_ERR(mr)) + return mr; } - if (!mr) - return ERR_PTR(-EAGAIN); - mr->ibmr.pd = pd; mr->umem = umem; mr->access_flags = access_flags; @@ -1097,7 +1218,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, get_octo_len(virt_addr, length, page_shift)); } - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -1137,7 +1258,6 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -1160,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET64(mkc, mkc, len, length); set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1439,10 +1559,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->allocated_from_cache) - err = mlx5_mr_cache_invalidate(mr); - else - err = destroy_mkey(dev, mr); + if (mr->cache_ent) + detach_mr_from_cache(mr); + err = destroy_mkey(dev, mr); if (err) goto err; @@ -1454,8 +1573,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mr = to_mmr(ib_mr); goto err; } - - mr->allocated_from_cache = false; } else { /* * Send a UMR WQE @@ -1542,8 +1659,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int allocated_from_cache = mr->allocated_from_cache; - if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) @@ -1558,7 +1673,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig = NULL; } - if (!allocated_from_cache) { + if (!mr->cache_ent) { destroy_mkey(dev, mr); mlx5_free_priv_descs(mr); } @@ -1575,7 +1690,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); - if (mr->allocated_from_cache) + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); @@ -1638,7 +1753,7 @@ static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_free_descs; @@ -1905,7 +2020,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); - err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); if (err) goto free; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4216814ba871..3de7606d4a1a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -197,7 +197,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) odp->private = NULL; mutex_unlock(&odp->umem_mutex); - if (!mr->allocated_from_cache) { + if (!mr->cache_ent) { mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); WARN_ON(mr->descs); } @@ -235,7 +235,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); ib_umem_odp_release(odp); - atomic_dec(&imr->num_deferred_work); + if (atomic_dec_and_test(&imr->num_deferred_work)) + wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) @@ -554,6 +555,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); + init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -611,10 +613,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) * under xa_lock while the child is in the xarray. Thus at this point * it is only decreasing, and all work holding it is now on the wq. */ - if (atomic_read(&imr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&imr->num_deferred_work)); - } + wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); /* * Fence the imr before we destroy the children. This allows us to @@ -645,10 +644,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&mr->dev->odp_srcu); - if (atomic_read(&mr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_deferred_work)); - } + wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); dma_fence_odp_mr(mr); } @@ -1720,7 +1716,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - atomic_dec(&work->frags[i].mr->num_deferred_work); + if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) + wake_up(&work->frags[i].mr->q_deferred_work); kvfree(work); } diff --git a/drivers/infiniband/hw/mlx5/qos.c b/drivers/infiniband/hw/mlx5/qos.c new file mode 100644 index 000000000000..cac878a70edb --- /dev/null +++ b/drivers/infiniband/hw/mlx5/qos.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/mlx5_user_ioctl_verbs.h> +#include <linux/mlx5/driver.h> +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +static bool pp_is_supported(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + + return (MLX5_CAP_GEN(dev->mdev, qos) && + MLX5_CAP_QOS(dev->mdev, packet_pacing) && + MLX5_CAP_QOS(dev->mdev, packet_pacing_uid)); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_PP_OBJ_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE); + struct mlx5_ib_dev *dev; + struct mlx5_ib_ucontext *c; + struct mlx5_ib_pp *pp_entry; + void *in_ctx; + u16 uid; + int inlen; + u32 flags; + int err; + + c = to_mucontext(ib_uverbs_get_ucontext(attrs)); + if (IS_ERR(c)) + return PTR_ERR(c); + + /* The allocated entry can be used only by a DEVX context */ + if (!c->devx_uid) + return -EINVAL; + + dev = to_mdev(c->ibucontext.device); + pp_entry = kzalloc(sizeof(*pp_entry), GFP_KERNEL); + if (!pp_entry) + return -ENOMEM; + + in_ctx = uverbs_attr_get_alloced_ptr(attrs, + MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX); + inlen = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX); + memcpy(rl_raw, in_ctx, inlen); + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS, + MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX); + if (err) + goto err; + + uid = (flags & MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX) ? + c->devx_uid : MLX5_SHARED_RESOURCE_UID; + + err = mlx5_rl_add_rate_raw(dev->mdev, rl_raw, uid, + (flags & MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX), + &pp_entry->index); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX, + &pp_entry->index, sizeof(pp_entry->index)); + if (err) + goto clean; + + pp_entry->mdev = dev->mdev; + uobj->object = pp_entry; + return 0; + +clean: + mlx5_rl_remove_rate_raw(dev->mdev, pp_entry->index); +err: + kfree(pp_entry); + return err; +} + +static int pp_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_pp *pp_entry = uobject->object; + + mlx5_rl_remove_rate_raw(pp_entry->mdev, pp_entry->index); + kfree(pp_entry); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_PP_OBJ_ALLOC, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE, + MLX5_IB_OBJECT_PP, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX, + UVERBS_ATTR_SIZE(1, + MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS, + enum mlx5_ib_uapi_pp_alloc_flags, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_PP_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_PP_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_PP, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_PP, + UVERBS_TYPE_ALLOC_IDR(pp_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_PP_OBJ_ALLOC), + &UVERBS_METHOD(MLX5_IB_METHOD_PP_OBJ_DESTROY)); + + +const struct uapi_definition mlx5_ib_qos_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_PP, + UAPI_DEF_IS_OBJ_SUPPORTED(pp_is_supported)), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 957f3a52589b..1456db4b6295 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -697,6 +697,9 @@ static int alloc_bfreg(struct mlx5_ib_dev *dev, { int bfregn = -ENOMEM; + if (bfregi->lib_uar_dyn) + return -EINVAL; + mutex_lock(&bfregi->lock); if (bfregi->ver >= 2) { bfregn = alloc_high_class_bfreg(dev, bfregi); @@ -768,6 +771,9 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, u32 index_of_sys_page; u32 offset; + if (bfregi->lib_uar_dyn) + return -EINVAL; + bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * MLX5_NON_FP_BFREGS_PER_UAR; index_of_sys_page = bfregn / bfregs_per_sys_page; @@ -919,6 +925,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, void *qpc; int err; u16 uid; + u32 uar_flags; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) { @@ -928,24 +935,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext, ibucontext); - if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) { + uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX | + MLX5_QP_FLAG_BFREG_INDEX); + switch (uar_flags) { + case MLX5_QP_FLAG_UAR_PAGE_INDEX: + uar_index = ucmd.bfreg_index; + bfregn = MLX5_IB_INVALID_BFREG; + break; + case MLX5_QP_FLAG_BFREG_INDEX: uar_index = bfregn_to_uar_index(dev, &context->bfregi, ucmd.bfreg_index, true); if (uar_index < 0) return uar_index; - bfregn = MLX5_IB_INVALID_BFREG; - } else if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) { - /* - * TBD: should come from the verbs when we have the API - */ - /* In CROSS_CHANNEL CQ and QP must use the same UAR */ - bfregn = MLX5_CROSS_CHANNEL_BFREG; - } - else { + break; + case 0: + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + return -EINVAL; bfregn = alloc_bfreg(dev, &context->bfregi); if (bfregn < 0) return bfregn; + break; + default: + return -EINVAL; } mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); @@ -2100,6 +2112,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC | MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_UAR_PAGE_INDEX | MLX5_QP_FLAG_TYPE_DCI | MLX5_QP_FLAG_TYPE_DCT)) return -EINVAL; @@ -2789,7 +2802,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, mlx5_ib_dbg(dev, "unsupported qp type %d\n", init_attr->qp_type); /* Don't support raw QPs */ - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } if (verbs_init_attr->qp_type == IB_QPT_DRIVER) @@ -3775,6 +3788,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->sq.cur_post = 0; if (qp->sq.wqe_cnt) qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); + qp->sq.last_poll = 0; qp->db.db[MLX5_RCV_DBR] = 0; qp->db.db[MLX5_SND_DBR] = 0; } @@ -6204,6 +6218,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); + if (!capable(CAP_SYS_RAWIO) && + init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) + return ERR_PTR(-EPERM); + dev = to_mdev(pd->device); switch (init_attr->wq_type) { case IB_WQT_RQ: diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 78a48aea3faf..fa808582b08b 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -58,7 +58,7 @@ struct mthca_user_db_table { u64 uvirt; struct scatterlist mem; int refcount; - } page[0]; + } page[]; }; static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index da9b8f9b884f..f9a2e65e2ff5 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -68,7 +68,7 @@ struct mthca_icm_table { int lowmem; int coherent; struct mutex mutex; - struct mthca_icm *icm[0]; + struct mthca_icm *icm[]; }; struct mthca_icm_iter { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index ac19d57803b5..69a3e4f62fb1 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -561,7 +561,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, } default: /* Don't support raw QPs */ - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); } if (err) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index d47ea675734b..10e343894595 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1111,7 +1111,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, (attrs->qp_type != IB_QPT_UD)) { pr_err("%s(%d) unsupported qp type=0x%x requested\n", __func__, dev->id, attrs->qp_type); - return -EINVAL; + return -EOPNOTSUPP; } /* Skip the check for QP1 to support CM size of 128 */ if ((attrs->qp_type != IB_QPT_GSI) && diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 484b555150e0..a5bd3adaf90a 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1186,7 +1186,7 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, DP_DEBUG(dev, QEDR_MSG_QP, "create qp: unsupported qp type=0x%x requested\n", attrs->qp_type); - return -EINVAL; + return -EOPNOTSUPP; } if (attrs->cap.max_send_wr > qattr->max_sqe) { diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 33778d451b82..7508abb6a0fa 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -39,7 +39,6 @@ #include <linux/utsname.h> #include <linux/rculist.h> #include <linux/mm.h> -#include <linux/random.h> #include <linux/vmalloc.h> #include <rdma/rdma_vt.h> @@ -329,8 +328,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. @@ -1501,7 +1502,6 @@ int qib_register_ib_device(struct qib_devdata *dd) unsigned i, ctxt; int ret; - get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); for (i = 0; i < dd->num_pports; i++) init_ibport(ppd + i); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 8bf414b47b96..dc0e81f3b6f4 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -177,7 +177,6 @@ struct qib_ibdev { struct timer_list mem_timer; struct qib_pio_header *pio_hdrs; dma_addr_t pio_hdrs_phys; - u32 qp_rnd; /* random bytes for hash */ u32 n_piowait; u32 n_txwait; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 556b8e44a51c..71f82339446c 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -504,7 +504,7 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, if (init_attr->qp_type != IB_QPT_UD) { usnic_err("%s asked to make a non-UD QP: %d\n", dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type); - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } trans_spec = cmd.spec; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 70be49b1ca05..7ec8991ace67 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -77,7 +77,7 @@ struct usnic_uiom_reg { struct usnic_uiom_chunk { struct list_head list; int nents; - struct scatterlist page_list[0]; + struct scatterlist page_list[]; }; struct usnic_uiom_pd *usnic_uiom_alloc_pd(void); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 9de1281f9a3b..afcc2abcf55c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -217,7 +217,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, init_attr->qp_type != IB_QPT_GSI) { dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n", init_attr->qp_type); - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } if (is_srq && !dev->dsr->caps.max_srq) { diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 13d7f66eadab..5724cbbe38b1 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -327,7 +327,7 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) if (cq->ip) kref_put(&cq->ip->ref, rvt_release_mmap_info); else - vfree(cq->queue); + vfree(cq->kqueue); } /** diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7858d499db03..0e1b291d2cec 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1220,7 +1220,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, default: /* Don't support raw QPs */ - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } init_attr->cap.max_inline_data = 0; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 986265ad6e79..72b031ab7092 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -284,12 +284,6 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, &gid->global.interface_id); } -static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct rvt_ucontext, ibucontext); -} - /** * rvt_alloc_ucontext - Allocate a user context * @uctx: Verbs context diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 0946a301a5c5..4afdd2e20883 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -103,6 +103,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN; rxe->attr.max_pkeys = RXE_MAX_PKEYS; rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; + addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, + rxe->ndev->dev_addr); rxe->max_ucontext = RXE_MAX_UCONTEXT; } diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index ec21f616ac98..6c11c3aeeca6 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -590,15 +590,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, int err; if (mask & IB_QP_MAX_QP_RD_ATOMIC) { - int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); + int max_rd_atomic = attr->max_rd_atomic ? + roundup_pow_of_two(attr->max_rd_atomic) : 0; qp->attr.max_rd_atomic = max_rd_atomic; atomic_set(&qp->req.rd_atomic, max_rd_atomic); } if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { - int max_dest_rd_atomic = - __roundup_pow_of_two(attr->max_dest_rd_atomic); + int max_dest_rd_atomic = attr->max_dest_rd_atomic ? + roundup_pow_of_two(attr->max_dest_rd_atomic) : 0; qp->attr.max_dest_rd_atomic = max_dest_rd_atomic; diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index acd0a925481c..8ef17d617022 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -63,7 +63,7 @@ struct rxe_queue_buf { __u32 pad_2[31]; __u32 consumer_index; __u32 pad_3[31]; - __u8 data[0]; + __u8 data[]; }; struct rxe_queue { diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index c5651a96b196..559e5fd3bad8 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1769,14 +1769,23 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) return 0; } -static int siw_listen_address(struct iw_cm_id *id, int backlog, - struct sockaddr *laddr, int addr_family) +/* + * siw_create_listen - Create resources for a listener's IWCM ID @id + * + * Starts listen on the socket address id->local_addr. + * + */ +int siw_create_listen(struct iw_cm_id *id, int backlog) { struct socket *s; struct siw_cep *cep = NULL; struct siw_device *sdev = to_siw_dev(id->device); + int addr_family = id->local_addr.ss_family; int rv = 0, s_val; + if (addr_family != AF_INET && addr_family != AF_INET6) + return -EAFNOSUPPORT; + rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); if (rv < 0) return rv; @@ -1791,9 +1800,25 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, siw_dbg(id->device, "setsockopt error: %d\n", rv); goto error; } - rv = s->ops->bind(s, laddr, addr_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); + if (addr_family == AF_INET) { + struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); + + /* For wildcard addr, limit binding to current device only */ + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) + s->sk->sk_bound_dev_if = sdev->netdev->ifindex; + + rv = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in)); + } else { + struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr); + + /* For wildcard addr, limit binding to current device only */ + if (ipv6_addr_any(&laddr->sin6_addr)) + s->sk->sk_bound_dev_if = sdev->netdev->ifindex; + + rv = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in6)); + } if (rv) { siw_dbg(id->device, "socket bind error: %d\n", rv); goto error; @@ -1852,7 +1877,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = SIW_EPSTATE_LISTENING; - siw_dbg(id->device, "Listen at laddr %pISp\n", laddr); + siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr); return 0; @@ -1910,106 +1935,6 @@ static void siw_drop_listeners(struct iw_cm_id *id) } } -/* - * siw_create_listen - Create resources for a listener's IWCM ID @id - * - * Listens on the socket address id->local_addr. - * - * If the listener's @id provides a specific local IP address, at most one - * listening socket is created and associated with @id. - * - * If the listener's @id provides the wildcard (zero) local IP address, - * a separate listen is performed for each local IP address of the device - * by creating a listening socket and binding to that local IP address. - * - */ -int siw_create_listen(struct iw_cm_id *id, int backlog) -{ - struct net_device *dev = to_siw_dev(id->device)->netdev; - int rv = 0, listeners = 0; - - siw_dbg(id->device, "backlog %d\n", backlog); - - /* - * For each attached address of the interface, create a - * listening socket, if id->local_addr is the wildcard - * IP address or matches the IP address. - */ - if (id->local_addr.ss_family == AF_INET) { - struct in_device *in_dev = in_dev_get(dev); - struct sockaddr_in s_laddr; - const struct in_ifaddr *ifa; - - if (!in_dev) { - rv = -ENODEV; - goto out; - } - memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); - - siw_dbg(id->device, "laddr %pISp\n", &s_laddr); - - rtnl_lock(); - in_dev_for_each_ifa_rtnl(ifa, in_dev) { - if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) || - s_laddr.sin_addr.s_addr == ifa->ifa_address) { - s_laddr.sin_addr.s_addr = ifa->ifa_address; - - rv = siw_listen_address(id, backlog, - (struct sockaddr *)&s_laddr, - AF_INET); - if (!rv) - listeners++; - } - } - rtnl_unlock(); - in_dev_put(in_dev); - } else if (id->local_addr.ss_family == AF_INET6) { - struct inet6_dev *in6_dev = in6_dev_get(dev); - struct inet6_ifaddr *ifp; - struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr); - - if (!in6_dev) { - rv = -ENODEV; - goto out; - } - siw_dbg(id->device, "laddr %pISp\n", &s_laddr); - - rtnl_lock(); - list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { - if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) - continue; - if (ipv6_addr_any(&s_laddr->sin6_addr) || - ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) { - struct sockaddr_in6 bind_addr = { - .sin6_family = AF_INET6, - .sin6_port = s_laddr->sin6_port, - .sin6_flowinfo = 0, - .sin6_addr = ifp->addr, - .sin6_scope_id = dev->ifindex }; - - rv = siw_listen_address(id, backlog, - (struct sockaddr *)&bind_addr, - AF_INET6); - if (!rv) - listeners++; - } - } - rtnl_unlock(); - in6_dev_put(in6_dev); - } else { - rv = -EAFNOSUPPORT; - } -out: - if (listeners) - rv = 0; - else if (!rv) - rv = -EINVAL; - - siw_dbg(id->device, "%s\n", rv ? "FAIL" : "OK"); - - return rv; -} - int siw_destroy_listen(struct iw_cm_id *id) { if (!id->provider_data) { diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 96ed349c0939..5cd40fb9e20c 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -388,6 +388,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { .max_segment_size = SZ_2G }; base_dev->num_comp_vectors = num_possible_cpus(); + xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1); + ib_set_device_ops(base_dev, &siw_device_ops); rv = ib_device_set_netdev(base_dev, netdev, 1); if (rv) @@ -415,9 +418,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; sdev->attrs.max_srq_sge = SIW_MAX_SGE; - xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); - xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1); - INIT_LIST_HEAD(&sdev->cep_list); INIT_LIST_HEAD(&sdev->qp_list); diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 9ccce2909ac4..650520244ed0 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -332,7 +332,7 @@ static struct siw_wqe *siw_rqe_get(struct siw_qp *qp) struct siw_srq *srq; struct siw_wqe *wqe = NULL; bool srq_event = false; - unsigned long flags; + unsigned long uninitialized_var(flags); srq = qp->srq; if (srq) { diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 07e30138aaa1..aeb842bc7a1e 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -165,15 +165,16 @@ int siw_query_port(struct ib_device *base_dev, u8 port, struct ib_port_attr *attr) { struct siw_device *sdev = to_siw_dev(base_dev); + int rv; memset(attr, 0, sizeof(*attr)); - attr->active_mtu = attr->max_mtu; - attr->active_speed = 2; - attr->active_width = 2; + rv = ib_get_eth_speed(base_dev, port, &attr->active_speed, + &attr->active_width); attr->gid_tbl_len = 1; attr->max_msg_sz = -1; attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); + attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); attr->phys_state = sdev->state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; attr->pkey_tbl_len = 1; @@ -192,7 +193,7 @@ int siw_query_port(struct ib_device *base_dev, u8 port, * attr->subnet_timeout = 0; * attr->init_type_repy = 0; */ - return 0; + return rv; } int siw_get_port_immutable(struct ib_device *base_dev, u8 port, @@ -322,7 +323,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, } if (attrs->qp_type != IB_QPT_RC) { siw_dbg(base_dev, "only RC QP's supported\n"); - rv = -EINVAL; + rv = -EOPNOTSUPP; goto err_out; } if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) || diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 2aa3457a30ce..e188a95984b5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -838,6 +838,4 @@ extern int ipoib_debug_level; #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) -extern const char ipoib_driver_version[]; - #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 63e4f9d15fd9..67a21fdf5367 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -68,9 +68,6 @@ static void ipoib_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), sizeof(drvinfo->bus_info)); - strlcpy(drvinfo->version, ipoib_driver_version, - sizeof(drvinfo->version)); - strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); } @@ -213,6 +210,8 @@ static int ipoib_get_link_ksettings(struct net_device *netdev, } static const struct ethtool_ops ipoib_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + ETHTOOL_COALESCE_RX_MAX_FRAMES, .get_link_ksettings = ipoib_get_link_ksettings, .get_drvinfo = ipoib_get_drvinfo, .get_coalesce = ipoib_get_coalesce, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4a0d3a9e72e1..81b8227214f1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -52,10 +52,6 @@ #include <linux/inetdevice.h> #include <rdma/ib_cache.h> -#define DRV_VERSION "1.0.0" - -const char ipoib_driver_version[] = DRV_VERSION; - MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 7a8f24de3631..999ef7cdd05e 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -292,12 +292,27 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, { struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; + struct iser_fr_desc *desc; + struct ib_mr_status mr_status; - if (!reg->mem_h) + desc = reg->mem_h; + if (!desc) return; - device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn, - reg->mem_h); + /* + * The signature MR cannot be invalidated and reused without checking. + * libiscsi calls the check_protection transport handler only if + * SCSI-Response is received. And the signature MR is not checked if + * the task is completed for some other reason like a timeout or error + * handling. That's why we must check the signature MR here before + * putting it to the free pool. + */ + if (unlikely(desc->sig_protected)) { + desc->sig_protected = false; + ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, + &mr_status); + } + device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn, desc); reg->mem_h = NULL; } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h index 4480092c68e0..d324312a373c 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h @@ -239,7 +239,7 @@ struct opa_veswport_mactable_entry { * @offset: mac table starting offset * @num_entries: Number of entries to get or set * @mac_tbl_digest: mac table digest - * @tbl_entries[]: Array of table entries + * @tbl_entries: Array of table entries * * The EM sends down this structure in a MAD indicating * the starting offset in the forwarding table that this @@ -258,7 +258,7 @@ struct opa_veswport_mactable { __be16 offset; __be16 num_entries; __be32 mac_tbl_digest; - struct opa_veswport_mactable_entry tbl_entries[0]; + struct opa_veswport_mactable_entry tbl_entries[]; } __packed; /** @@ -440,7 +440,7 @@ struct opa_veswport_iface_macs { __be16 num_macs_in_msg; __be16 tot_macs_in_lst; __be16 gen_count; - struct opa_vnic_iface_mac_entry entry[0]; + struct opa_vnic_iface_mac_entry entry[]; } __packed; /** diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c index 8ad7da989a0e..42d557dff19d 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c @@ -125,8 +125,6 @@ static void vnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); - strlcpy(drvinfo->version, opa_vnic_driver_version, - sizeof(drvinfo->version)); strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent), sizeof(drvinfo->bus_info)); } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h index 6dbc08e1a6a6..dd942dd642bd 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h @@ -292,7 +292,6 @@ struct opa_vnic_mac_tbl_node { hlist_for_each_entry(obj, &name[bkt], member) extern char opa_vnic_driver_name[]; -extern const char opa_vnic_driver_version[]; struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev, u8 port_num, u8 vport_num); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index be5befd92d16..6e8d650c17c7 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -59,9 +59,7 @@ #include "opa_vnic_internal.h" -#define DRV_VERSION "1.0" char opa_vnic_driver_name[] = "opa_vnic"; -const char opa_vnic_driver_version[] = DRV_VERSION; /* * The trap service level is kept in bits 3 to 7 in the trap_sl_rsvd @@ -1041,9 +1039,6 @@ static int __init opa_vnic_init(void) { int rc; - pr_info("OPA Virtual Network Driver - v%s\n", - opa_vnic_driver_version); - rc = ib_register_client(&opa_vnic_client); if (rc) pr_err("VNIC driver register failed %d\n", rc); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 5359ece561ca..6fabcc2faf1f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -309,7 +309,7 @@ struct srp_fr_pool { int max_page_list_len; spinlock_t lock; struct list_head free_list; - struct srp_fr_desc desc[0]; + struct srp_fr_desc desc[]; }; /** |