diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-01-15 10:27:56 +1300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-01-15 10:27:56 +1300 |
commit | 31238e61b51f5a68bb8630ddb60ede475846d698 (patch) | |
tree | 64ed1e7160d79a345ee357d9a8445176e722e170 /drivers | |
parent | 188c901941efd43cbf21e8f4f9e9a276536b989c (diff) | |
parent | 7a509a6b07dd5a08d91f8a7e0cccb9a6438ce439 (diff) | |
download | linux-31238e61b51f5a68bb8630ddb60ede475846d698.tar.bz2 |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer fixes from Jens Axboe:
"The major part is an update to the NVMe driver, fixing various issues
around surprise removal and hung controllers. Most of that is from
Keith, and parts are simple blk-mq fixes or exports/additions of minor
functions to aid this effort, and parts are changes directly to the
NVMe driver.
Apart from the above, this contains:
- Small blk-mq change from me, killing an unused member of the
hardware queue structure.
- Small fix from Ming Lei, fixing up a few drivers that didn't
properly check for ERR_PTR() returns from blk_mq_init_queue()"
* 'for-linus' of git://git.kernel.dk/linux-block:
NVMe: Fix locking on abort handling
NVMe: Start and stop h/w queues on reset
NVMe: Command abort handling fixes
NVMe: Admin queue removal handling
NVMe: Reference count admin queue usage
NVMe: Start all requests
blk-mq: End unstarted requests on a dying queue
blk-mq: Allow requests to never expire
blk-mq: Add helper to abort requeued requests
blk-mq: Let drivers cancel requeue_work
blk-mq: Export if requests were started
blk-mq: Wake tasks entering queue on dying
blk-mq: get rid of ->cmd_size in the hardware queue
block: fix checking return value of blk_mq_init_queue
block: wake up waiters when a queue is marked dying
NVMe: Fix double free irq
blk-mq: Export freeze/unfreeze functions
blk-mq: Exit queue on alloc failure
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/null_blk.c | 2 | ||||
-rw-r--r-- | drivers/block/nvme-core.c | 175 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 2 |
3 files changed, 128 insertions, 51 deletions
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ae9f615382f6..aa2224aa7caa 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -530,7 +530,7 @@ static int null_add_dev(void) goto out_cleanup_queues; nullb->q = blk_mq_init_queue(&nullb->tag_set); - if (!nullb->q) { + if (IS_ERR(nullb->q)) { rv = -ENOMEM; goto out_cleanup_tags; } diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index b1d5d8797315..cb529e9a82dd 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -215,6 +215,7 @@ static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx, cmd->fn = handler; cmd->ctx = ctx; cmd->aborted = 0; + blk_mq_start_request(blk_mq_rq_from_pdu(cmd)); } /* Special values must be less than 0x1000 */ @@ -431,8 +432,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, if (unlikely(status)) { if (!(status & NVME_SC_DNR || blk_noretry_request(req)) && (jiffies - req->start_time) < req->timeout) { + unsigned long flags; + blk_mq_requeue_request(req); - blk_mq_kick_requeue_list(req->q); + spin_lock_irqsave(req->q->queue_lock, flags); + if (!blk_queue_stopped(req->q)) + blk_mq_kick_requeue_list(req->q); + spin_unlock_irqrestore(req->q->queue_lock, flags); return; } req->errors = nvme_error_status(status); @@ -664,8 +670,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } } - blk_mq_start_request(req); - nvme_set_info(cmd, iod, req_completion); spin_lock_irq(&nvmeq->q_lock); if (req->cmd_flags & REQ_DISCARD) @@ -835,6 +839,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) if (IS_ERR(req)) return PTR_ERR(req); + req->cmd_flags |= REQ_NO_TIMEOUT; cmd_info = blk_mq_rq_to_pdu(req); nvme_set_info(cmd_info, req, async_req_completion); @@ -1016,14 +1021,19 @@ static void nvme_abort_req(struct request *req) struct nvme_command cmd; if (!nvmeq->qid || cmd_rq->aborted) { + unsigned long flags; + + spin_lock_irqsave(&dev_list_lock, flags); if (work_busy(&dev->reset_work)) - return; + goto out; list_del_init(&dev->node); dev_warn(&dev->pci_dev->dev, "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); + out: + spin_unlock_irqrestore(&dev_list_lock, flags); return; } @@ -1064,15 +1074,22 @@ static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx, void *ctx; nvme_completion_fn fn; struct nvme_cmd_info *cmd; - static struct nvme_completion cqe = { - .status = cpu_to_le16(NVME_SC_ABORT_REQ << 1), - }; + struct nvme_completion cqe; + + if (!blk_mq_request_started(req)) + return; cmd = blk_mq_rq_to_pdu(req); if (cmd->ctx == CMD_CTX_CANCELLED) return; + if (blk_queue_dying(req->q)) + cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); + else + cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); + + dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid); ctx = cancel_cmd_info(cmd, &fn); @@ -1084,17 +1101,29 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = cmd->nvmeq; - dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, - nvmeq->qid); - if (nvmeq->dev->initialized) - nvme_abort_req(req); - /* * The aborted req will be completed on receiving the abort req. * We enable the timer again. If hit twice, it'll cause a device reset, * as the device then is in a faulty state. */ - return BLK_EH_RESET_TIMER; + int ret = BLK_EH_RESET_TIMER; + + dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, + nvmeq->qid); + + spin_lock_irq(&nvmeq->q_lock); + if (!nvmeq->dev->initialized) { + /* + * Force cancelled command frees the request, which requires we + * return BLK_EH_NOT_HANDLED. + */ + nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved); + ret = BLK_EH_NOT_HANDLED; + } else + nvme_abort_req(req); + spin_unlock_irq(&nvmeq->q_lock); + + return ret; } static void nvme_free_queue(struct nvme_queue *nvmeq) @@ -1131,10 +1160,16 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) */ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { - int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; + int vector; spin_lock_irq(&nvmeq->q_lock); + if (nvmeq->cq_vector == -1) { + spin_unlock_irq(&nvmeq->q_lock); + return 1; + } + vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; nvmeq->dev->online_queues--; + nvmeq->cq_vector = -1; spin_unlock_irq(&nvmeq->q_lock); irq_set_affinity_hint(vector, NULL); @@ -1169,11 +1204,13 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) adapter_delete_sq(dev, qid); adapter_delete_cq(dev, qid); } + if (!qid && dev->admin_q) + blk_mq_freeze_queue_start(dev->admin_q); nvme_clear_queue(nvmeq); } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth, int vector) + int depth) { struct device *dmadev = &dev->pci_dev->dev; struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); @@ -1199,7 +1236,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; - nvmeq->cq_vector = vector; nvmeq->qid = qid; dev->queue_count++; dev->queues[qid] = nvmeq; @@ -1244,6 +1280,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) struct nvme_dev *dev = nvmeq->dev; int result; + nvmeq->cq_vector = qid - 1; result = adapter_alloc_cq(dev, qid, nvmeq); if (result < 0) return result; @@ -1355,6 +1392,14 @@ static struct blk_mq_ops nvme_mq_ops = { .timeout = nvme_timeout, }; +static void nvme_dev_remove_admin(struct nvme_dev *dev) +{ + if (dev->admin_q && !blk_queue_dying(dev->admin_q)) { + blk_cleanup_queue(dev->admin_q); + blk_mq_free_tag_set(&dev->admin_tagset); + } +} + static int nvme_alloc_admin_tags(struct nvme_dev *dev) { if (!dev->admin_q) { @@ -1370,21 +1415,20 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) return -ENOMEM; dev->admin_q = blk_mq_init_queue(&dev->admin_tagset); - if (!dev->admin_q) { + if (IS_ERR(dev->admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); return -ENOMEM; } - } + if (!blk_get_queue(dev->admin_q)) { + nvme_dev_remove_admin(dev); + return -ENODEV; + } + } else + blk_mq_unfreeze_queue(dev->admin_q); return 0; } -static void nvme_free_admin_tags(struct nvme_dev *dev) -{ - if (dev->admin_q) - blk_mq_free_tag_set(&dev->admin_tagset); -} - static int nvme_configure_admin_queue(struct nvme_dev *dev) { int result; @@ -1416,7 +1460,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) nvmeq = dev->queues[0]; if (!nvmeq) { - nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, 0); + nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); if (!nvmeq) return -ENOMEM; } @@ -1439,18 +1483,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) if (result) goto free_nvmeq; - result = nvme_alloc_admin_tags(dev); - if (result) - goto free_nvmeq; - + nvmeq->cq_vector = 0; result = queue_request_irq(dev, nvmeq, nvmeq->irqname); if (result) - goto free_tags; + goto free_nvmeq; return result; - free_tags: - nvme_free_admin_tags(dev); free_nvmeq: nvme_free_queues(dev, 0); return result; @@ -1944,7 +1983,7 @@ static void nvme_create_io_queues(struct nvme_dev *dev) unsigned i; for (i = dev->queue_count; i <= dev->max_qid; i++) - if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1)) + if (!nvme_alloc_queue(dev, i, dev->q_depth)) break; for (i = dev->online_queues; i <= dev->queue_count - 1; i++) @@ -2235,13 +2274,18 @@ static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev) break; if (!schedule_timeout(ADMIN_TIMEOUT) || fatal_signal_pending(current)) { + /* + * Disable the controller first since we can't trust it + * at this point, but leave the admin queue enabled + * until all queue deletion requests are flushed. + * FIXME: This may take a while if there are more h/w + * queues than admin tags. + */ set_current_state(TASK_RUNNING); - nvme_disable_ctrl(dev, readq(&dev->bar->cap)); - nvme_disable_queue(dev, 0); - - send_sig(SIGKILL, dq->worker->task, 1); + nvme_clear_queue(dev->queues[0]); flush_kthread_worker(dq->worker); + nvme_disable_queue(dev, 0); return; } } @@ -2318,7 +2362,6 @@ static void nvme_del_queue_start(struct kthread_work *work) { struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, cmdinfo.work); - allow_signal(SIGKILL); if (nvme_delete_sq(nvmeq)) nvme_del_queue_end(nvmeq); } @@ -2376,6 +2419,34 @@ static void nvme_dev_list_remove(struct nvme_dev *dev) kthread_stop(tmp); } +static void nvme_freeze_queues(struct nvme_dev *dev) +{ + struct nvme_ns *ns; + + list_for_each_entry(ns, &dev->namespaces, list) { + blk_mq_freeze_queue_start(ns->queue); + + spin_lock(ns->queue->queue_lock); + queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); + spin_unlock(ns->queue->queue_lock); + + blk_mq_cancel_requeue_work(ns->queue); + blk_mq_stop_hw_queues(ns->queue); + } +} + +static void nvme_unfreeze_queues(struct nvme_dev *dev) +{ + struct nvme_ns *ns; + + list_for_each_entry(ns, &dev->namespaces, list) { + queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); + blk_mq_unfreeze_queue(ns->queue); + blk_mq_start_stopped_hw_queues(ns->queue, true); + blk_mq_kick_requeue_list(ns->queue); + } +} + static void nvme_dev_shutdown(struct nvme_dev *dev) { int i; @@ -2384,8 +2455,10 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) dev->initialized = 0; nvme_dev_list_remove(dev); - if (dev->bar) + if (dev->bar) { + nvme_freeze_queues(dev); csts = readl(&dev->bar->csts); + } if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { for (i = dev->queue_count - 1; i >= 0; i--) { struct nvme_queue *nvmeq = dev->queues[i]; @@ -2400,12 +2473,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) nvme_dev_unmap(dev); } -static void nvme_dev_remove_admin(struct nvme_dev *dev) -{ - if (dev->admin_q && !blk_queue_dying(dev->admin_q)) - blk_cleanup_queue(dev->admin_q); -} - static void nvme_dev_remove(struct nvme_dev *dev) { struct nvme_ns *ns; @@ -2413,8 +2480,10 @@ static void nvme_dev_remove(struct nvme_dev *dev) list_for_each_entry(ns, &dev->namespaces, list) { if (ns->disk->flags & GENHD_FL_UP) del_gendisk(ns->disk); - if (!blk_queue_dying(ns->queue)) + if (!blk_queue_dying(ns->queue)) { + blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); + } } } @@ -2495,6 +2564,7 @@ static void nvme_free_dev(struct kref *kref) nvme_free_namespaces(dev); nvme_release_instance(dev); blk_mq_free_tag_set(&dev->tagset); + blk_put_queue(dev->admin_q); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -2591,15 +2661,20 @@ static int nvme_dev_start(struct nvme_dev *dev) } nvme_init_queue(dev->queues[0], 0); + result = nvme_alloc_admin_tags(dev); + if (result) + goto disable; result = nvme_setup_io_queues(dev); if (result) - goto disable; + goto free_tags; nvme_set_irq_hints(dev); return result; + free_tags: + nvme_dev_remove_admin(dev); disable: nvme_disable_queue(dev, 0); nvme_dev_list_remove(dev); @@ -2639,6 +2714,9 @@ static int nvme_dev_resume(struct nvme_dev *dev) dev->reset_workfn = nvme_remove_disks; queue_work(nvme_workq, &dev->reset_work); spin_unlock(&dev_list_lock); + } else { + nvme_unfreeze_queues(dev); + nvme_set_irq_hints(dev); } dev->initialized = 1; return 0; @@ -2776,11 +2854,10 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->reset_work); misc_deregister(&dev->miscdev); - nvme_dev_remove(dev); nvme_dev_shutdown(dev); + nvme_dev_remove(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); - nvme_free_admin_tags(dev); nvme_release_prp_pools(dev); kref_put(&dev->kref, nvme_free_dev); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7ef7c098708f..cdfbd21e3597 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -638,7 +638,7 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_put_disk; q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); - if (!q) { + if (IS_ERR(q)) { err = -ENOMEM; goto out_free_tags; } |