From 0544f5494a03b8846db74e02be5685d1f32b06c9 Mon Sep 17 00:00:00 2001 From: Marta Rybczynska Date: Mon, 10 Apr 2017 17:12:34 +0200 Subject: nvme-rdma: support devices with queue size < 32 In the case of small NVMe-oF queue size (<32) we may enter a deadlock caused by the fact that the IB completions aren't sent waiting for 32 and the send queue will fill up. The error is seen as (using mlx5): [ 2048.693355] mlx5_0:mlx5_ib_post_send:3765:(pid 7273): [ 2048.693360] nvme nvme1: nvme_rdma_post_send failed with error code -12 This patch changes the way the signaling is done so that it depends on the queue depth now. The magic define has been removed completely. Cc: stable@vger.kernel.org Signed-off-by: Marta Rybczynska Signed-off-by: Samuel Jones Acked-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dd1c6deef82f..e2c18f3d9dcf 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1038,6 +1038,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) nvme_rdma_wr_error(cq, wc, "SEND"); } +static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) +{ + int sig_limit; + + /* + * We signal completion every queue depth/2 and also handle the + * degenerated case of a device with queue_depth=1, where we + * would need to signal every message. + */ + sig_limit = max(queue->queue_size / 2, 1); + return (++queue->sig_count % sig_limit) == 0; +} + static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, struct ib_send_wr *first, bool flush) @@ -1065,9 +1078,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, * Would have been way to obvious to handle this in hardware or * at least the RDMA stack.. * - * This messy and racy code sniplet is copy and pasted from the iSER - * initiator, and the magic '32' comes from there as well. - * * Always signal the flushes. The magic request used for the flush * sequencer is not allocated in our driver's tagset and it's * triggered to be freed by blk_cleanup_queue(). So we need to @@ -1075,7 +1085,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, * embedded in request's payload, is not freed when __ib_process_cq() * calls wr_cqe->done(). */ - if ((++queue->sig_count % 32) == 0 || flush) + if (nvme_rdma_queue_sig_limit(queue) || flush) wr.send_flags |= IB_SEND_SIGNALED; if (first) -- cgit v1.2.3 From 806f026f9b901eaf1a6baeb48b5da18d6a4f818e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:03 +0800 Subject: nvme: use blk_mq_start_hw_queues() in nvme_kill_queues() Inside nvme_kill_queues(), we have to start hw queues for draining requests in sw queues, .dispatch list and requeue list, so use blk_mq_start_hw_queues() instead of blk_mq_start_stopped_hw_queues() which only run queues if queues are stopped, but the queues may have been started already, for example nvme_start_queues() is called in reset work function. blk_mq_start_hw_queues() run hw queues in current context, instead of running asynchronously like before. Given nvme_kill_queues() is run from either remove context or reset worker context, both are fine to run hw queue directly. And the mutex of namespaces_mutex isn't a problem too becasue nvme_start_freeze() runs hw queue in this way already. Cc: stable@vger.kernel.org Reported-by: Zhang Yi Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d5e0906262ea..40d5e4a9e8d7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2437,7 +2437,13 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); blk_mq_abort_requeue_list(ns->queue); - blk_mq_start_stopped_hw_queues(ns->queue, true); + + /* + * Forcibly start all queues to avoid having stuck requests. + * Note that we must ensure the queues are not stopped + * when the final removal happens. + */ + blk_mq_start_hw_queues(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); } -- cgit v1.2.3 From 986f75c876dbafed98eba7cb516c5118f155db23 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:04 +0800 Subject: nvme: avoid to use blk_mq_abort_requeue_list() NVMe may add request into requeue list simply and not kick off the requeue if hw queues are stopped. Then blk_mq_abort_requeue_list() is called in both nvme_kill_queues() and nvme_ns_remove() for dealing with this issue. Unfortunately blk_mq_abort_requeue_list() is absolutely a race maker, for example, one request may be requeued during the aborting. So this patch just calls blk_mq_kick_requeue_list() in nvme_kill_queues() to handle this issue like what nvme_start_queues() does. Now all requests in requeue list when queues are stopped will be handled by blk_mq_kick_requeue_list() when queues are restarted, either in nvme_start_queues() or in nvme_kill_queues(). Cc: stable@vger.kernel.org Reported-by: Zhang Yi Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40d5e4a9e8d7..04e115834702 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2098,7 +2098,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (ns->ndev) nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); - blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); } @@ -2436,7 +2435,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) continue; revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); - blk_mq_abort_requeue_list(ns->queue); /* * Forcibly start all queues to avoid having stuck requests. @@ -2444,6 +2442,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) * when the final removal happens. */ blk_mq_start_hw_queues(ns->queue); + + /* draining requests in requeue list */ + blk_mq_kick_requeue_list(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); } -- cgit v1.2.3 From 7254a50a5db40ca6739ddf37e0a45e6912532b2c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:05 +0800 Subject: blk-mq: remove blk_mq_abort_requeue_list() No one uses it any more, so remove it. Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- block/blk-mq.c | 19 ------------------- include/linux/blk-mq.h | 1 - 2 files changed, 20 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a69ad122ed66..f2224ffd225d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); -void blk_mq_abort_requeue_list(struct request_queue *q) -{ - unsigned long flags; - LIST_HEAD(rq_list); - - spin_lock_irqsave(&q->requeue_lock, flags); - list_splice_init(&q->requeue_list, &rq_list); - spin_unlock_irqrestore(&q->requeue_lock, flags); - - while (!list_empty(&rq_list)) { - struct request *rq; - - rq = list_first_entry(&rq_list, struct request, queuelist); - list_del_init(&rq->queuelist); - blk_mq_end_request(rq, -EIO); - } -} -EXPORT_SYMBOL(blk_mq_abort_requeue_list); - struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) { if (tag < tags->nr_tags) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c47aa248c640..fcd641032f8d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -238,7 +238,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq); bool blk_mq_queue_stopped(struct request_queue *q); -- cgit v1.2.3 From 0ce872bf8b5c4d425a41940a523ff1b8daa0b275 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:15 -0700 Subject: nvme_fc: get rid of local reconnect_delay Remove the local copy of reconnect_delay. Use the value in the controller options directly. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index dca7165fabcf..c3ab1043efbd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -165,7 +165,6 @@ struct nvme_fc_ctrl { struct work_struct delete_work; struct work_struct reset_work; struct delayed_work connect_work; - int reconnect_delay; int connect_attempts; struct kref ref; @@ -2615,9 +2614,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->reconnect_delay); + ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, - ctrl->reconnect_delay * HZ); + ctrl->ctrl.opts->reconnect_delay * HZ); } else dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); @@ -2695,9 +2694,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->reconnect_delay); + ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, - ctrl->reconnect_delay * HZ); + ctrl->ctrl.opts->reconnect_delay * HZ); } else dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller reconnect complete\n", @@ -2755,7 +2754,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); - ctrl->reconnect_delay = opts->reconnect_delay; spin_lock_init(&ctrl->lock); /* io queue count */ -- cgit v1.2.3 From 5bbecdbc8e7ffaaf47ac1f02014bf3bedda3fd11 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:16 -0700 Subject: nvme_fc: Support ctrl_loss_tmo Sync with Sagi's recent addition of ctrl_loss_tmo in the core fabrics layer. Remove local connect limits and connect_attempts variable. Use fabrics new nr_connects variable and use of nvmf_should_reconnect() Refactor duplicate reconnect failure code. Addresses review comment by Sagi on controller reset support: http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 116 +++++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 67 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c3ab1043efbd..a0f05d5e966c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -45,8 +45,6 @@ enum nvme_fc_queue_flags { #define NVMEFC_QUEUE_DELAY 3 /* ms units */ -#define NVME_FC_MAX_CONNECT_ATTEMPTS 1 - struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; struct device *dev; @@ -165,7 +163,6 @@ struct nvme_fc_ctrl { struct work_struct delete_work; struct work_struct reset_work; struct delayed_work connect_work; - int connect_attempts; struct kref ref; u32 flags; @@ -2305,7 +2302,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) int ret; bool changed; - ctrl->connect_attempts++; + ++ctrl->ctrl.opts->nr_reconnects; /* * Create the admin queue @@ -2402,7 +2399,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - ctrl->connect_attempts = 0; + ctrl->ctrl.opts->nr_reconnects = 0; kref_get(&ctrl->ctrl.kref); @@ -2545,16 +2542,22 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) nvme_put_ctrl(&ctrl->ctrl); } -static int -__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) +static bool +__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) { if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; + return true; if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) - return -EBUSY; + return true; - return 0; + return false; +} + +static int +__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) +{ + return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; } /* @@ -2579,6 +2582,35 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) return ret; } +static void +nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) +{ + /* If we are resetting/deleting then do nothing */ + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { + WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || + ctrl->ctrl.state == NVME_CTRL_LIVE); + return; + } + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", + ctrl->cnum, status); + + if (nvmf_should_reconnect(&ctrl->ctrl)) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->ctrl.opts->reconnect_delay * HZ); + } else { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->ctrl.opts->nr_reconnects); + WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); + } +} + static void nvme_fc_reset_ctrl_work(struct work_struct *work) { @@ -2590,34 +2622,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) nvme_fc_delete_association(ctrl); ret = nvme_fc_create_association(ctrl); - if (ret) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", - ctrl->cnum, ret); - if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached. Removing controller\n", - ctrl->cnum, ctrl->connect_attempts); - - if (!nvme_change_ctrl_state(&ctrl->ctrl, - NVME_CTRL_DELETING)) { - dev_err(ctrl->ctrl.device, - "NVME-FC{%d}: failed to change state " - "to DELETING\n", ctrl->cnum); - return; - } - - WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); - return; - } - - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); - queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, - ctrl->ctrl.opts->reconnect_delay * HZ); - } else + if (ret) + nvme_fc_reconnect_or_delete(ctrl, ret); + else dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); } @@ -2670,34 +2677,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work) struct nvme_fc_ctrl, connect_work); ret = nvme_fc_create_association(ctrl); - if (ret) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Reconnect attempt failed (%d)\n", - ctrl->cnum, ret); - if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached. Removing controller\n", - ctrl->cnum, ctrl->connect_attempts); - - if (!nvme_change_ctrl_state(&ctrl->ctrl, - NVME_CTRL_DELETING)) { - dev_err(ctrl->ctrl.device, - "NVME-FC{%d}: failed to change state " - "to DELETING\n", ctrl->cnum); - return; - } - - WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); - return; - } - - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", - ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); - queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, - ctrl->ctrl.opts->reconnect_delay * HZ); - } else + if (ret) + nvme_fc_reconnect_or_delete(ctrl, ret); + else dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller reconnect complete\n", ctrl->cnum); @@ -2969,7 +2951,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) static struct nvmf_transport_ops nvme_fc_transport = { .name = "fc", .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, - .allowed_opts = NVMF_OPT_RECONNECT_DELAY, + .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_fc_create_ctrl, }; -- cgit v1.2.3 From a5321aa5efea05ae748dc5b3e8053584213325ca Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:18 -0700 Subject: nvme_fc: revise comment on teardown Per the recommendation by Sagi on: http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html An extra reference was pointed out. There's no issue with the references, but rather a literal interpretation of what the comment is saying. Reword the comment to avoid confusion. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a0f05d5e966c..0b7f7dd2779a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2532,10 +2532,10 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) /* * tear down the controller - * This will result in the last reference on the nvme ctrl to - * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback. - * From there, the transport will tear down it's logical queues and - * association. + * After the last reference on the nvme ctrl is removed, + * the transport nvme_fc_nvme_ctrl_freed() callback will be + * invoked. From there, the transport will tear down it's + * logical queues and association. */ nvme_uninit_ctrl(&ctrl->ctrl); -- cgit v1.2.3 From 589ff7753bb54edd3ee4a9399ccc3ac48d9b22d7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:19 -0700 Subject: nvme_fc: set logging level on resets/deletes Per the review by Sagi on: http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html Looked at existing warn vs info vs err dev_xxx levels for the messages printed on reconnects and deletes: - Resets due to error and resets transitioned to deletes are dev_warn - Other reset/disconnect messages are dev_info - Removed chatty io queue related messages Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0b7f7dd2779a..e4817f9f4323 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1747,7 +1747,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport association error detected: %s\n", ctrl->cnum, errmsg); - dev_info(ctrl->ctrl.device, + dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: resetting controller\n", ctrl->cnum); /* stop the queues on error, cleanup is in reset thread */ @@ -2191,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) if (!opts->nr_io_queues) return 0; - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - nvme_fc_init_io_queues(ctrl); memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); @@ -2264,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) if (ctrl->queue_count == 1) return 0; - dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n", - opts->nr_io_queues); - nvme_fc_init_io_queues(ctrl); ret = blk_mq_reinit_tagset(&ctrl->tag_set); @@ -2592,7 +2586,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) return; } - dev_warn(ctrl->ctrl.device, + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", ctrl->cnum, status); @@ -2603,7 +2597,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, ctrl->ctrl.opts->reconnect_delay * HZ); } else { - dev_info(ctrl->ctrl.device, + dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Max reconnect attempts (%d) " "reached. Removing controller\n", ctrl->cnum, ctrl->ctrl.opts->nr_reconnects); @@ -2638,7 +2632,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - dev_warn(ctrl->ctrl.device, + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) -- cgit v1.2.3 From e392e1f1f408fe8baf1046c970d05cbf1f0ec945 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:24 -0700 Subject: nvme_fc: correct nvme status set on abort correct nvme status set on abort. Patch that changed status to being actual nvme status crossed in the night with the patch that added abort values. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e4817f9f4323..775869c69df6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1372,9 +1372,9 @@ done: complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); if (!complete_rq) { if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { - status = cpu_to_le16(NVME_SC_ABORT_REQ); + status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); if (blk_queue_dying(rq->q)) - status |= cpu_to_le16(NVME_SC_DNR); + status |= cpu_to_le16(NVME_SC_DNR << 1); } nvme_end_request(rq, status, result); } else -- cgit v1.2.3 From 2cb657bc0242dfdca20869685bf179774ef1a6fb Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 15 May 2017 17:10:22 -0700 Subject: nvme_fc: remove extra controller reference taken on reconnect fix extra controller reference taken on reconnect by moving reference to initial controller create Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 775869c69df6..14a009e43aa5 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2395,8 +2395,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->ctrl.opts->nr_reconnects = 0; - kref_get(&ctrl->ctrl.kref); - if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl); @@ -2793,7 +2791,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = NULL; /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); /* as we're past the point where we transition to the ref * counting teardown path, if we return a bad pointer here, @@ -2809,6 +2806,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, return ERR_PTR(ret); } + kref_get(&ctrl->ctrl.kref); + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", ctrl->cnum, ctrl->ctrl.opts->subsysnqn); -- cgit v1.2.3 From 9bdcfb10f221e796c9619fe48655e0f1272f1d92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 15:14:43 +0200 Subject: nvme-pci: consistencly use ctrl->device for logging This is what most of the code already does and gives much more useful prefixes than the device embedded in the pci_dev. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/pci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4c2ff2bb26bc..bf8bec39c017 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -263,7 +263,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { - dev_warn(dev->dev, "unable to set dbbuf\n"); + dev_warn(dev->ctrl.device, "unable to set dbbuf\n"); /* Free memory and continue on */ nvme_dbbuf_dma_free(dev); } @@ -1394,11 +1394,11 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS, &pci_status); if (result == PCIBIOS_SUCCESSFUL) - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n", csts, pci_status); else - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n", csts, result); } @@ -1740,8 +1740,8 @@ static int nvme_pci_enable(struct nvme_dev *dev) */ if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) { dev->q_depth = 2; - dev_warn(dev->dev, "detected Apple NVMe controller, set " - "queue depth=%u to work around controller resets\n", + dev_warn(dev->ctrl.device, "detected Apple NVMe controller, " + "set queue depth=%u to work around controller resets\n", dev->q_depth); } @@ -1759,7 +1759,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) if (dev->cmbsz) { if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, &dev_attr_cmb.attr, NULL)) - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "failed to add sysfs attribute for CMB\n"); } } -- cgit v1.2.3 From d3d5b87ddde09bade512526f6df90e8c06c28230 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 15:14:44 +0200 Subject: nvme: replace is_flags field in nvme_ctrl_ops with a flags field So that we can have more flags for transport-specific behavior. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/nvme.h | 3 ++- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/target/loop.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 04e115834702..228f7c73e2f1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1605,7 +1605,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); - if (ctrl->ops->is_fabrics) { + if (ctrl->ops->flags & NVME_F_FABRICS) { ctrl->icdoff = le16_to_cpu(id->icdoff); ctrl->ioccsz = le32_to_cpu(id->ioccsz); ctrl->iorcsz = le32_to_cpu(id->iorcsz); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 14a009e43aa5..5b14cbefb724 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2647,7 +2647,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .name = "fc", .module = THIS_MODULE, - .is_fabrics = true, + .flags = NVME_F_FABRICS, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 29c708ca9621..7c4b0f6636c5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -208,7 +208,8 @@ struct nvme_ns { struct nvme_ctrl_ops { const char *name; struct module *module; - bool is_fabrics; + unsigned int flags; +#define NVME_F_FABRICS (1 << 0) int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e2c18f3d9dcf..28bd255c144d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1792,7 +1792,7 @@ static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl) static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .name = "rdma", .module = THIS_MODULE, - .is_fabrics = true, + .flags = NVME_F_FABRICS, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index feb497134aee..e503cfff0337 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -558,7 +558,7 @@ static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl) static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { .name = "loop", .module = THIS_MODULE, - .is_fabrics = true, + .flags = NVME_F_FABRICS, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, -- cgit v1.2.3 From c81bfba9983fc44210d3eb5971e0faac597bf50d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 15:14:45 +0200 Subject: nvme: only setup block integrity if supported by the driver Currently only the PCIe driver supports metadata, so we should not claim integrity support for the other drivers. This prevents nasty crashes with targets that advertise metadata support on fabrics. Also use the opportunity to factor out some code into a separate helper that isn't even compiled if CONFIG_BLK_DEV_INTEGRITY is disabled. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 50 ++++++++++++++++++++++++++++++------------------ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 1 + 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 228f7c73e2f1..a60926410438 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -925,6 +925,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY +static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id, + u16 bs) +{ + struct nvme_ns *ns = disk->private_data; + u16 old_ms = ns->ms; + u8 pi_type = 0; + + ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + + /* PI implementation requires metadata equal t10 pi tuple size */ + if (ns->ms == sizeof(struct t10_pi_tuple)) + pi_type = id->dps & NVME_NS_DPS_PI_MASK; + + if (blk_get_integrity(disk) && + (ns->pi_type != pi_type || ns->ms != old_ms || + bs != queue_logical_block_size(disk->queue) || + (ns->ms && ns->ext))) + blk_integrity_unregister(disk); + + ns->pi_type = pi_type; +} + static void nvme_init_integrity(struct nvme_ns *ns) { struct blk_integrity integrity; @@ -951,6 +974,10 @@ static void nvme_init_integrity(struct nvme_ns *ns) blk_queue_max_integrity_segments(ns->queue, 1); } #else +static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id, + u16 bs) +{ +} static void nvme_init_integrity(struct nvme_ns *ns) { } @@ -997,37 +1024,22 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; - u8 lbaf, pi_type; - u16 old_ms; - unsigned short bs; - - old_ms = ns->ms; - lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; - ns->lba_shift = id->lbaf[lbaf].ds; - ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + u16 bs; /* * If identify namespace failed, use default 512 byte block size so * block layer can use before failing read/write for 0 capacity. */ + ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; bs = 1 << ns->lba_shift; - /* XXX: PI implementation requires metadata equal t10 pi tuple size */ - pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? - id->dps & NVME_NS_DPS_PI_MASK : 0; blk_mq_freeze_queue(disk->queue); - if (blk_get_integrity(disk) && (ns->pi_type != pi_type || - ns->ms != old_ms || - bs != queue_logical_block_size(disk->queue) || - (ns->ms && ns->ext))) - blk_integrity_unregister(disk); - ns->pi_type = pi_type; + if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) + nvme_prep_integrity(disk, id, bs); blk_queue_logical_block_size(ns->queue, bs); - if (ns->ms && !blk_get_integrity(disk) && !ns->ext) nvme_init_integrity(ns); if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7c4b0f6636c5..9d6a070d4391 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -210,6 +210,7 @@ struct nvme_ctrl_ops { struct module *module; unsigned int flags; #define NVME_F_FABRICS (1 << 0) +#define NVME_F_METADATA_SUPPORTED (1 << 1) int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bf8bec39c017..6103b178e43a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2047,6 +2047,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, + .flags = NVME_F_METADATA_SUPPORTED, .reg_read32 = nvme_pci_reg_read32, .reg_write32 = nvme_pci_reg_write32, .reg_read64 = nvme_pci_reg_read64, -- cgit v1.2.3 From 50af47d04ca530544b27affffb0722f158e2bb9c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 24 May 2017 15:06:31 -0700 Subject: nvme: Quirk APST on Intel 600P/P3100 devices They have known firmware bugs. A fix is apparently in the works -- once fixed firmware is available, someone from Intel (Hi, Keith!) can adjust the quirk accordingly. Cc: stable@vger.kernel.org # v4.11 Cc: Kai-Heng Feng Cc: Mario Limonciello Signed-off-by: Andy Lutomirski Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6103b178e43a..d52701df7245 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2294,6 +2294,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0a54), .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, + { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ -- cgit v1.2.3