diff options
author | Borislav Petkov <bp@suse.de> | 2020-12-01 18:01:27 +0100 |
---|---|---|
committer | Borislav Petkov <bp@suse.de> | 2020-12-01 18:01:27 +0100 |
commit | 87314fb181f9042a226d721ab4a5579ddfca139c (patch) | |
tree | b663e58cfe19ecf5003d4ea6e8048f6ae1925e9f /drivers/nvme | |
parent | 2002d2951398317d0f46e64ae6d8dd58ed541c6d (diff) | |
parent | b65054597872ce3aefbc6a666385eabdf9e288da (diff) | |
download | linux-87314fb181f9042a226d721ab4a5579ddfca139c.tar.bz2 |
Merge tag 'v5.10-rc6' into x86/cache
Merge -rc6 tag to pick up dependent changes.
Signed-off-by: Borislav Petkov <bp@suse.de>
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 37 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 270 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 7 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 38 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 26 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 16 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/trace.h | 21 |
8 files changed, 195 insertions, 224 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 95ef4943d8bd..9a270e49df17 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2060,8 +2060,6 @@ static void nvme_update_disk_info(struct gendisk *disk, if (id->nsattr & NVME_NS_ATTR_RO) set_disk_ro(disk, true); - else - set_disk_ro(disk, false); } static inline bool nvme_first_scan(struct gendisk *disk) @@ -2125,7 +2123,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (blk_queue_is_zoned(ns->queue)) { ret = nvme_revalidate_zones(ns); - if (ret) + if (ret && !nvme_first_scan(ns->disk)) return ret; } @@ -2931,7 +2929,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, struct nvme_effects_log **log) { - struct nvme_cel *cel = xa_load(&ctrl->cels, csi); + struct nvme_effects_log *cel = xa_load(&ctrl->cels, csi); int ret; if (cel) @@ -2942,16 +2940,15 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, return -ENOMEM; ret = nvme_get_log(ctrl, 0x00, NVME_LOG_CMD_EFFECTS, 0, csi, - &cel->log, sizeof(cel->log), 0); + cel, sizeof(*cel), 0); if (ret) { kfree(cel); return ret; } - cel->csi = csi; - xa_store(&ctrl->cels, cel->csi, cel, GFP_KERNEL); + xa_store(&ctrl->cels, csi, cel, GFP_KERNEL); out: - *log = &cel->log; + *log = cel; return 0; } @@ -4376,6 +4373,19 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); +static void nvme_free_cels(struct nvme_ctrl *ctrl) +{ + struct nvme_effects_log *cel; + unsigned long i; + + xa_for_each (&ctrl->cels, i, cel) { + xa_erase(&ctrl->cels, i); + kfree(cel); + } + + xa_destroy(&ctrl->cels); +} + static void nvme_free_ctrl(struct device *dev) { struct nvme_ctrl *ctrl = @@ -4385,8 +4395,7 @@ static void nvme_free_ctrl(struct device *dev) if (!subsys || ctrl->instance != subsys->instance) ida_simple_remove(&nvme_instance_ida, ctrl->instance); - xa_destroy(&ctrl->cels); - + nvme_free_cels(ctrl); nvme_mpath_uninit(ctrl); __free_page(ctrl->discard_page); @@ -4582,8 +4591,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); - -void nvme_sync_queues(struct nvme_ctrl *ctrl) +void nvme_sync_io_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; @@ -4591,7 +4599,12 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl) list_for_each_entry(ns, &ctrl->namespaces, list) blk_sync_queue(ns->queue); up_read(&ctrl->namespaces_rwsem); +} +EXPORT_SYMBOL_GPL(nvme_sync_io_queues); +void nvme_sync_queues(struct nvme_ctrl *ctrl) +{ + nvme_sync_io_queues(ctrl); if (ctrl->admin_q) blk_sync_queue(ctrl->admin_q); } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 3c002bdcace3..f4c246462658 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -146,7 +146,8 @@ struct nvme_fc_rport { /* fc_ctrl flags values - specified as bit positions */ #define ASSOC_ACTIVE 0 -#define FCCTRL_TERMIO 1 +#define ASSOC_FAILED 1 +#define FCCTRL_TERMIO 2 struct nvme_fc_ctrl { spinlock_t lock; @@ -157,7 +158,6 @@ struct nvme_fc_ctrl { u32 cnum; bool ioq_live; - atomic_t err_work_active; u64 association_id; struct nvmefc_ls_rcv_op *rcv_disconn; @@ -167,7 +167,6 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set tag_set; struct delayed_work connect_work; - struct work_struct err_work; struct kref ref; unsigned long flags; @@ -2414,24 +2413,97 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) nvme_fc_ctrl_put(ctrl); } +/* + * This routine is used by the transport when it needs to find active + * io on a queue that is to be terminated. The transport uses + * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke + * this routine to kill them on a 1 by 1 basis. + * + * As FC allocates FC exchange for each io, the transport must contact + * the LLDD to terminate the exchange, thus releasing the FC exchange. + * After terminating the exchange the LLDD will call the transport's + * normal io done path for the request, but it will have an aborted + * status. The done path will return the io request back to the block + * layer with an error status. + */ +static bool +nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +{ + struct nvme_ctrl *nctrl = data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + + __nvme_fc_abort_op(ctrl, op); + return true; +} + +/* + * This routine runs through all outstanding commands on the association + * and aborts them. This routine is typically be called by the + * delete_association routine. It is also called due to an error during + * reconnect. In that scenario, it is most likely a command that initializes + * the controller, including fabric Connect commands on io queues, that + * may have timed out or failed thus the io must be killed for the connect + * thread to see the error. + */ static void -nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) +__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) { - int active; + /* + * If io queues are present, stop them and terminate all outstanding + * ios on them. As FC allocates FC exchange for each io, the + * transport must contact the LLDD to terminate the exchange, + * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() + * to tell us what io's are busy and invoke a transport routine + * to kill them with the LLDD. After terminating the exchange + * the LLDD will call the transport's normal io done path, but it + * will have an aborted status. The done path will return the + * io requests back to the block layer as part of normal completions + * (but with error status). + */ + if (ctrl->ctrl.queue_count > 1) { + nvme_stop_queues(&ctrl->ctrl); + blk_mq_tagset_busy_iter(&ctrl->tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->tag_set); + if (start_queues) + nvme_start_queues(&ctrl->ctrl); + } + + /* + * Other transports, which don't have link-level contexts bound + * to sqe's, would try to gracefully shutdown the controller by + * writing the registers for shutdown and polling (call + * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially + * just aborted and we will wait on those contexts, and given + * there was no indication of how live the controlelr is on the + * link, don't send more io to create more contexts for the + * shutdown. Let the controller fail via keepalive failure if + * its still present. + */ /* - * if an error (io timeout, etc) while (re)connecting, - * it's an error on creating the new association. - * Start the error recovery thread if it hasn't already - * been started. It is expected there could be multiple - * ios hitting this path before things are cleaned up. + * clean up the admin queue. Same thing as above. + */ + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); +} + +static void +nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) +{ + /* + * if an error (io timeout, etc) while (re)connecting, the remote + * port requested terminating of the association (disconnect_ls) + * or an error (timeout or abort) occurred on an io while creating + * the controller. Abort any ios on the association and let the + * create_association error path resolve things. */ if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { - active = atomic_xchg(&ctrl->err_work_active, 1); - if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) { - atomic_set(&ctrl->err_work_active, 0); - WARN_ON(1); - } + __nvme_fc_abort_outstanding_ios(ctrl, true); + set_bit(ASSOC_FAILED, &ctrl->flags); return; } @@ -2745,30 +2817,6 @@ nvme_fc_complete_rq(struct request *rq) nvme_fc_ctrl_put(ctrl); } -/* - * This routine is used by the transport when it needs to find active - * io on a queue that is to be terminated. The transport uses - * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke - * this routine to kill them on a 1 by 1 basis. - * - * As FC allocates FC exchange for each io, the transport must contact - * the LLDD to terminate the exchange, thus releasing the FC exchange. - * After terminating the exchange the LLDD will call the transport's - * normal io done path for the request, but it will have an aborted - * status. The done path will return the io request back to the block - * layer with an error status. - */ -static bool -nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) -{ - struct nvme_ctrl *nctrl = data; - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); - - __nvme_fc_abort_op(ctrl, op); - return true; -} - static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, @@ -2988,6 +3036,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->cnum, ctrl->lport->localport.port_name, ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn); + clear_bit(ASSOC_FAILED, &ctrl->flags); + /* * Create the admin queue */ @@ -3016,7 +3066,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) */ ret = nvme_enable_ctrl(&ctrl->ctrl); - if (ret) + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments; @@ -3026,7 +3076,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); ret = nvme_init_identify(&ctrl->ctrl); - if (ret) + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; /* sanity checks */ @@ -3071,9 +3121,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ret = nvme_fc_create_io_queues(ctrl); else ret = nvme_fc_recreate_io_queues(ctrl); - if (ret) - goto out_term_aen_ops; } + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) + goto out_term_aen_ops; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -3108,60 +3158,6 @@ out_free_queue: /* - * This routine runs through all outstanding commands on the association - * and aborts them. This routine is typically be called by the - * delete_association routine. It is also called due to an error during - * reconnect. In that scenario, it is most likely a command that initializes - * the controller, including fabric Connect commands on io queues, that - * may have timed out or failed thus the io must be killed for the connect - * thread to see the error. - */ -static void -__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) -{ - /* - * If io queues are present, stop them and terminate all outstanding - * ios on them. As FC allocates FC exchange for each io, the - * transport must contact the LLDD to terminate the exchange, - * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() - * to tell us what io's are busy and invoke a transport routine - * to kill them with the LLDD. After terminating the exchange - * the LLDD will call the transport's normal io done path, but it - * will have an aborted status. The done path will return the - * io requests back to the block layer as part of normal completions - * (but with error status). - */ - if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - blk_mq_tagset_wait_completed_request(&ctrl->tag_set); - if (start_queues) - nvme_start_queues(&ctrl->ctrl); - } - - /* - * Other transports, which don't have link-level contexts bound - * to sqe's, would try to gracefully shutdown the controller by - * writing the registers for shutdown and polling (call - * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially - * just aborted and we will wait on those contexts, and given - * there was no indication of how live the controlelr is on the - * link, don't send more io to create more contexts for the - * shutdown. Let the controller fail via keepalive failure if - * its still present. - */ - - /* - * clean up the admin queue. Same thing as above. - */ - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); -} - -/* * This routine stops operation of the controller on the host side. * On the host os stack side: Admin and IO queues are stopped, * outstanding ios on them terminated via FC ABTS. @@ -3237,7 +3233,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block @@ -3292,78 +3287,34 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) } static void -__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) +nvme_fc_reset_ctrl_work(struct work_struct *work) { - /* - * if state is CONNECTING - the error occurred as part of a - * reconnect attempt. Abort any ios on the association and - * let the create_association error paths resolve things. - */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { - __nvme_fc_abort_outstanding_ios(ctrl, true); - return; - } - - /* - * For any other state, kill the association. As this routine - * is a common io abort routine for resetting and such, after - * the association is terminated, ensure that the state is set - * to CONNECTING. - */ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); - nvme_stop_keep_alive(&ctrl->ctrl); + nvme_stop_ctrl(&ctrl->ctrl); /* will block will waiting for io to terminate */ nvme_fc_delete_association(ctrl); - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && - !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) dev_err(ctrl->ctrl.device, "NVME-FC{%d}: error_recovery: Couldn't change state " "to CONNECTING\n", ctrl->cnum); -} - -static void -nvme_fc_reset_ctrl_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); - int ret; - __nvme_fc_terminate_io(ctrl); - - nvme_stop_ctrl(&ctrl->ctrl); - - if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) - ret = nvme_fc_create_association(ctrl); - else - ret = -ENOTCONN; - - if (ret) - nvme_fc_reconnect_or_delete(ctrl, ret); - else - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: controller reset complete\n", - ctrl->cnum); + if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to schedule connect " + "after reset\n", ctrl->cnum); + } else { + flush_delayed_work(&ctrl->connect_work); + } + } else { + nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN); + } } -static void -nvme_fc_connect_err_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, err_work); - - __nvme_fc_terminate_io(ctrl); - - atomic_set(&ctrl->err_work_active, 0); - - /* - * Rescheduling the connection after recovering - * from the io error is left to the reconnect work - * item, which is what should have stalled waiting on - * the io that had the error that scheduled this work. - */ -} static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .name = "fc", @@ -3491,7 +3442,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->dev = lport->dev; ctrl->cnum = idx; ctrl->ioq_live = false; - atomic_set(&ctrl->err_work_active, 0); init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); @@ -3499,7 +3449,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); - INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -3592,7 +3541,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, fail_ctrl: nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); cancel_work_sync(&ctrl->ctrl.reset_work); - cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->connect_work); ctrl->ctrl.opts = NULL; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cc111136a981..567f7ad18a91 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -226,12 +226,6 @@ struct nvme_fault_inject { #endif }; -struct nvme_cel { - struct list_head entry; - struct nvme_effects_log log; - u8 csi; -}; - struct nvme_ctrl { bool comp_seen; enum nvme_ctrl_state state; @@ -602,6 +596,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); void nvme_sync_queues(struct nvme_ctrl *ctrl); +void nvme_sync_io_queues(struct nvme_ctrl *ctrl); void nvme_unfreeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl); int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index df8f3612107f..3be352403839 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -198,6 +198,7 @@ struct nvme_queue { u32 q_depth; u16 cq_vector; u16 sq_tail; + u16 last_sq_tail; u16 cq_head; u16 qid; u8 cq_phase; @@ -291,9 +292,21 @@ static void nvme_dbbuf_init(struct nvme_dev *dev, nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; } +static void nvme_dbbuf_free(struct nvme_queue *nvmeq) +{ + if (!nvmeq->qid) + return; + + nvmeq->dbbuf_sq_db = NULL; + nvmeq->dbbuf_cq_db = NULL; + nvmeq->dbbuf_sq_ei = NULL; + nvmeq->dbbuf_cq_ei = NULL; +} + static void nvme_dbbuf_set(struct nvme_dev *dev) { struct nvme_command c; + unsigned int i; if (!dev->dbbuf_dbs) return; @@ -307,6 +320,9 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "unable to set dbbuf\n"); /* Free memory and continue on */ nvme_dbbuf_dma_free(dev); + + for (i = 1; i <= dev->online_queues; i++) + nvme_dbbuf_free(&dev->queues[i]); } } @@ -455,11 +471,24 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; } -static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) +/* + * Write sq tail if we are asked to, or if the next command would wrap. + */ +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) { + if (!write_sq) { + u16 next_tail = nvmeq->sq_tail + 1; + + if (next_tail == nvmeq->q_depth) + next_tail = 0; + if (next_tail != nvmeq->last_sq_tail) + return; + } + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) writel(nvmeq->sq_tail, nvmeq->q_db); + nvmeq->last_sq_tail = nvmeq->sq_tail; } /** @@ -476,8 +505,7 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, cmd, sizeof(*cmd)); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - if (write_sq) - nvme_write_sq_db(nvmeq); + nvme_write_sq_db(nvmeq, write_sq); spin_unlock(&nvmeq->sq_lock); } @@ -486,7 +514,8 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) struct nvme_queue *nvmeq = hctx->driver_data; spin_lock(&nvmeq->sq_lock); - nvme_write_sq_db(nvmeq); + if (nvmeq->sq_tail != nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq, true); spin_unlock(&nvmeq->sq_lock); } @@ -1496,6 +1525,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) struct nvme_dev *dev = nvmeq->dev; nvmeq->sq_tail = 0; + nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index aad829a2b50d..65e3d0ef36e1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -122,7 +122,6 @@ struct nvme_rdma_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; - struct mutex teardown_lock; bool use_inline_data; u32 io_queues[HCTX_MAX_TYPES]; }; @@ -1010,8 +1009,8 @@ out_free_io_queues: static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - mutex_lock(&ctrl->teardown_lock); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); if (ctrl->ctrl.admin_tagset) { blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, @@ -1021,16 +1020,15 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl, remove); - mutex_unlock(&ctrl->teardown_lock); } static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, bool remove) { - mutex_lock(&ctrl->teardown_lock); if (ctrl->ctrl.queue_count > 1) { nvme_start_freeze(&ctrl->ctrl); nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); if (ctrl->ctrl.tagset) { blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, @@ -1041,7 +1039,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_start_queues(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, remove); } - mutex_unlock(&ctrl->teardown_lock); } static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) @@ -1768,6 +1765,14 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) return; } + /* sanity checking for received data length */ + if (unlikely(wc->byte_len < len)) { + dev_err(queue->ctrl->ctrl.device, + "Unexpected nvme completion length(%d)\n", wc->byte_len); + nvme_rdma_error_recovery(queue->ctrl); + return; + } + ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); /* * AEN requests are special as they don't time out and can @@ -1890,10 +1895,10 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize); } - ret = rdma_connect(queue->cm_id, ¶m); + ret = rdma_connect_locked(queue->cm_id, ¶m); if (ret) { dev_err(ctrl->ctrl.device, - "rdma_connect failed (%d).\n", ret); + "rdma_connect_locked failed (%d).\n", ret); goto out_destroy_queue_ib; } @@ -1968,16 +1973,12 @@ static void nvme_rdma_complete_timed_out(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - /* fence other contexts that may complete the command */ - mutex_lock(&ctrl->teardown_lock); nvme_rdma_stop_queue(queue); - if (!blk_mq_request_completed(rq)) { + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); } - mutex_unlock(&ctrl->teardown_lock); } static enum blk_eh_timer_return @@ -2312,7 +2313,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return ERR_PTR(-ENOMEM); ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - mutex_init(&ctrl->teardown_lock); if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid = diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d6a3e1487354..c0c33320fe65 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -124,7 +124,6 @@ struct nvme_tcp_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; - struct mutex teardown_lock; struct work_struct err_work; struct delayed_work connect_work; struct nvme_tcp_request async_req; @@ -1886,8 +1885,8 @@ out_free_queue: static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove) { - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); blk_mq_quiesce_queue(ctrl->admin_q); + blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); if (ctrl->admin_tagset) { blk_mq_tagset_busy_iter(ctrl->admin_tagset, @@ -1897,18 +1896,17 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->admin_q); nvme_tcp_destroy_admin_queue(ctrl, remove); - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, bool remove) { - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); if (ctrl->queue_count <= 1) - goto out; + return; blk_mq_quiesce_queue(ctrl->admin_q); nvme_start_freeze(ctrl); nvme_stop_queues(ctrl); + nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); if (ctrl->tagset) { blk_mq_tagset_busy_iter(ctrl->tagset, @@ -1918,8 +1916,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, if (remove) nvme_start_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove); -out: - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) @@ -2171,14 +2167,11 @@ static void nvme_tcp_complete_timed_out(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; - /* fence other contexts that may complete the command */ - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); - if (!blk_mq_request_completed(rq)) { + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); } - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static enum blk_eh_timer_return @@ -2455,7 +2448,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, nvme_tcp_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); - mutex_init(&ctrl->teardown_lock); if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid = diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index aafcbc424b7a..957b39a82431 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -907,8 +907,6 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->error_loc = NVMET_NO_ERROR_LOC; req->error_slba = 0; - trace_nvmet_req_init(req, req->cmd); - /* no support for fused commands yet */ if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { req->error_loc = offsetof(struct nvme_common_command, flags); @@ -938,6 +936,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, if (status) goto fail; + trace_nvmet_req_init(req, req->cmd); + if (unlikely(!percpu_ref_tryget_live(&sq->ref))) { status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto fail; diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index 0458046d6501..c14e3249a14d 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -46,19 +46,12 @@ static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req) return req->sq->ctrl; } -static inline void __assign_disk_name(char *name, struct nvmet_req *req, - bool init) +static inline void __assign_req_name(char *name, struct nvmet_req *req) { - struct nvmet_ctrl *ctrl = nvmet_req_to_ctrl(req); - struct nvmet_ns *ns; - - if ((init && req->sq->qid) || (!init && req->cq->qid)) { - ns = nvmet_find_namespace(ctrl, req->cmd->rw.nsid); - strncpy(name, ns->device_path, DISK_NAME_LEN); - return; - } - - memset(name, 0, DISK_NAME_LEN); + if (req->ns) + strncpy(name, req->ns->device_path, DISK_NAME_LEN); + else + memset(name, 0, DISK_NAME_LEN); } #endif @@ -81,7 +74,7 @@ TRACE_EVENT(nvmet_req_init, TP_fast_assign( __entry->cmd = cmd; __entry->ctrl = nvmet_req_to_ctrl(req); - __assign_disk_name(__entry->disk, req, true); + __assign_req_name(__entry->disk, req); __entry->qid = req->sq->qid; __entry->cid = cmd->common.command_id; __entry->opcode = cmd->common.opcode; @@ -121,7 +114,7 @@ TRACE_EVENT(nvmet_req_complete, __entry->cid = req->cqe->command_id; __entry->result = le64_to_cpu(req->cqe->result.u64); __entry->status = le16_to_cpu(req->cqe->status) >> 1; - __assign_disk_name(__entry->disk, req, false); + __assign_req_name(__entry->disk, req); ), TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x", __print_ctrl_name(__entry->ctrl), |