From cddcd72bcec3b1dc9cef7f17d724a7fe42d64cc1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2015 09:38:14 +0200 Subject: nvme: disable irqs in nvme_freeze_queues The queue_lock needs to be taken with irqs disabled. This is mostly due to the old pre blk-mq usage pattern, but we've also picked it up in most of the few places where we use the queue_lock with blk-mq. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 85b8036deaa3..00e641937a8e 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2585,9 +2585,9 @@ static void nvme_freeze_queues(struct nvme_dev *dev) list_for_each_entry(ns, &dev->namespaces, list) { blk_mq_freeze_queue_start(ns->queue); - spin_lock(ns->queue->queue_lock); + spin_lock_irq(ns->queue->queue_lock); queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); - spin_unlock(ns->queue->queue_lock); + spin_unlock_irq(ns->queue->queue_lock); blk_mq_cancel_requeue_work(ns->queue); blk_mq_stop_hw_queues(ns->queue); -- cgit v1.2.3 From f705f837c58ebe1ea69dfffff4dcc234e2fbc8dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:38 +0200 Subject: nvme: consolidate synchronous command submission helpers Note that we keep the unused timeout argument, but allow callers to pass 0 instead of a timeout if they want the default. This will allow adding a timeout to the pass through path later on. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 96 ++++++++++++++++------------------------------- drivers/block/nvme-scsi.c | 17 ++++----- include/linux/nvme.h | 6 +-- 3 files changed, 42 insertions(+), 77 deletions(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 00e641937a8e..e81b205ffd04 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -991,27 +991,40 @@ static void sync_completion(struct nvme_queue *nvmeq, void *ctx, * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd, - u32 *result, unsigned timeout) +static int __nvme_submit_sync_cmd(struct request_queue *q, + struct nvme_command *cmd, u32 *result, unsigned timeout) { struct sync_cmd_info cmdinfo; - struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = cmd_rq->nvmeq; + struct nvme_cmd_info *cmd_rq; + struct request *req; + int res; + + req = blk_mq_alloc_request(q, WRITE, GFP_KERNEL, false); + if (IS_ERR(req)) + return PTR_ERR(req); cmdinfo.task = current; cmdinfo.status = -EINTR; cmd->common.command_id = req->tag; + cmd_rq = blk_mq_rq_to_pdu(req); nvme_set_info(cmd_rq, &cmdinfo, sync_completion); set_current_state(TASK_UNINTERRUPTIBLE); - nvme_submit_cmd(nvmeq, cmd); + nvme_submit_cmd(cmd_rq->nvmeq, cmd); schedule(); if (result) *result = cmdinfo.result; - return cmdinfo.status; + res = cmdinfo.status; + blk_mq_free_request(req); + return res; +} + +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd) +{ + return __nvme_submit_sync_cmd(q, cmd, NULL, 0); } static int nvme_submit_async_admin_req(struct nvme_dev *dev) @@ -1060,41 +1073,6 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, return nvme_submit_cmd(nvmeq, cmd); } -static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, - u32 *result, unsigned timeout) -{ - int res; - struct request *req; - - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); - if (IS_ERR(req)) - return PTR_ERR(req); - res = nvme_submit_sync_cmd(req, cmd, result, timeout); - blk_mq_free_request(req); - return res; -} - -int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, - u32 *result) -{ - return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT); -} - -int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns, - struct nvme_command *cmd, u32 *result) -{ - int res; - struct request *req; - - req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT), - false); - if (IS_ERR(req)) - return PTR_ERR(req); - res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT); - blk_mq_free_request(req); - return res; -} - static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) { struct nvme_command c; @@ -1103,7 +1081,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) c.delete_queue.opcode = opcode; c.delete_queue.qid = cpu_to_le16(id); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c); } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, @@ -1120,7 +1098,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cq_flags = cpu_to_le16(flags); c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c); } static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, @@ -1137,7 +1115,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, c.create_sq.sq_flags = cpu_to_le16(flags); c.create_sq.cqid = cpu_to_le16(qid); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c); } static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) @@ -1161,7 +1139,7 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, c.identify.prp1 = cpu_to_le64(dma_addr); c.identify.cns = cpu_to_le32(cns); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c); } int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, @@ -1175,7 +1153,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - return nvme_submit_admin_cmd(dev, &c, result); + return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); } int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, @@ -1189,7 +1167,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - return nvme_submit_admin_cmd(dev, &c, result); + return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); } /** @@ -1813,7 +1791,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); c.rw.prp2 = cpu_to_le64(iod->first_dma); c.rw.metadata = cpu_to_le64(meta_dma); - status = nvme_submit_io_cmd(dev, ns, &c, NULL); + status = nvme_submit_sync_cmd(ns->queue, &c); unmap: nvme_unmap_user_pages(dev, write, iod); nvme_free_iod(dev, iod); @@ -1869,23 +1847,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : ADMIN_TIMEOUT; - if (length != cmd.data_len) + if (length != cmd.data_len) { status = -ENOMEM; - else if (ns) { - struct request *req; - - req = blk_mq_alloc_request(ns->queue, WRITE, - (GFP_KERNEL|__GFP_WAIT), false); - if (IS_ERR(req)) - status = PTR_ERR(req); - else { - status = nvme_submit_sync_cmd(req, &c, &cmd.result, - timeout); - blk_mq_free_request(req); - } - } else - status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout); + goto out; + } + + status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, + &cmd.result, timeout); +out: if (cmd.data_len) { nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); nvme_free_iod(dev, iod); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 6b736b00f63e..ba1809fbd49e 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1053,7 +1053,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, c.common.prp1 = cpu_to_le64(dma_addr); c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_admin_cmd(dev, &c, NULL); + res = nvme_submit_sync_cmd(dev->admin_q, &c); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; } else { @@ -1121,7 +1121,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.prp1 = cpu_to_le64(dma_addr); c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_admin_cmd(dev, &c, NULL); + res = nvme_submit_sync_cmd(dev->admin_q, &c); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; } else { @@ -1609,7 +1609,7 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.cdw10[0] = cpu_to_le32(cdw10); } - nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_unmap; @@ -1971,7 +1971,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.format.nsid = cpu_to_le32(ns->ns_id); c.format.cdw10 = cpu_to_le32(cdw10); - nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out_dma; @@ -2139,7 +2139,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_offset += unit_num_blocks; - nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); if (nvme_sc != NVME_SC_SUCCESS) { nvme_unmap_user_pages(dev, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, @@ -2696,7 +2696,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.opcode = nvme_cmd_flush; c.common.nsid = cpu_to_le32(ns->ns_id); - nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out; @@ -2724,8 +2724,7 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, c.common.opcode = nvme_cmd_flush; c.common.nsid = cpu_to_le32(ns->ns_id); - nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL); - + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out; @@ -2932,7 +2931,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8dbd05e70f09..61488b2ae291 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -158,11 +158,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length); void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod); -int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *, - struct nvme_command *, u32 *); -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); -int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, - u32 *result); +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd); int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, dma_addr_t dma_addr); int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, -- cgit v1.2.3 From e75ec752d725b7b612c0b2db1bca50a9e53c0879 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:39 +0200 Subject: nvme: store a struct device pointer in struct nvme_dev Most users want the generic device, so store that in struct nvme_dev instead of the pci_dev. This also happens to be a nice step towards making some code reusable for non-PCI transports. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 110 ++++++++++++++++++++++------------------------ drivers/block/nvme-scsi.c | 63 +++++++++++--------------- include/linux/nvme.h | 2 +- 3 files changed, 79 insertions(+), 96 deletions(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e81b205ffd04..870a926e1ddc 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -610,17 +610,17 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, req->errors = 0; if (cmd_rq->aborted) - dev_warn(&nvmeq->dev->pci_dev->dev, + dev_warn(nvmeq->dev->dev, "completing aborted command with status:%04x\n", status); if (iod->nents) { - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents, + dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (blk_integrity_rq(req)) { if (!rq_data_dir(req)) nvme_dif_remap(req, nvme_dif_complete); - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1, + dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); } } @@ -861,7 +861,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (blk_rq_bytes(req) != nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, + dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, dma_dir); goto retry_cmd; } @@ -1192,8 +1192,7 @@ static void nvme_abort_req(struct request *req) if (work_busy(&dev->reset_work)) goto out; list_del_init(&dev->node); - dev_warn(&dev->pci_dev->dev, - "I/O %d QID %d timeout, reset controller\n", + dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); @@ -1362,22 +1361,21 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) { - struct device *dmadev = &dev->pci_dev->dev; struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); if (!nvmeq) return NULL; - nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth), + nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) goto free_nvmeq; - nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth), + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), &nvmeq->sq_dma_addr, GFP_KERNEL); if (!nvmeq->sq_cmds) goto free_cqdma; - nvmeq->q_dmadev = dmadev; + nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", dev->instance, qid); @@ -1393,7 +1391,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, return nvmeq; free_cqdma: - dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes, + dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); free_nvmeq: kfree(nvmeq); @@ -1465,7 +1463,7 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Device not ready; aborting %s\n", enabled ? "initialisation" : "reset"); return -ENODEV; @@ -1515,7 +1513,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev) if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Device shutdown incomplete; abort shutdown\n"); return -ENODEV; } @@ -1558,7 +1556,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; dev->admin_tagset.reserved_tags = 1; dev->admin_tagset.timeout = ADMIN_TIMEOUT; - dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev); + dev->admin_tagset.numa_node = dev_to_node(dev->dev); dev->admin_tagset.cmd_size = nvme_cmd_size(dev); dev->admin_tagset.driver_data = dev; @@ -1591,14 +1589,14 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; if (page_shift < dev_page_min) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Minimum device page size (%u) too large for " "host (%u)\n", 1 << dev_page_min, 1 << page_shift); return -ENODEV; } if (page_shift > dev_page_max) { - dev_info(&dev->pci_dev->dev, + dev_info(dev->dev, "Device maximum page size (%u) smaller than " "host (%u); enabling work-around\n", 1 << dev_page_max, 1 << page_shift); @@ -1689,7 +1687,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, sg_mark_end(&sg[i - 1]); iod->nents = count; - nents = dma_map_sg(&dev->pci_dev->dev, sg, count, + nents = dma_map_sg(dev->dev, sg, count, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (!nents) goto free_iod; @@ -1711,7 +1709,7 @@ void nvme_unmap_user_pages(struct nvme_dev *dev, int write, { int i; - dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, + dma_unmap_sg(dev->dev, iod->sg, iod->nents, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); for (i = 0; i < iod->nents; i++) @@ -1762,7 +1760,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) goto unmap; } if (meta_len) { - meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len, + meta = dma_alloc_coherent(dev->dev, meta_len, &meta_dma, GFP_KERNEL); if (!meta) { status = -ENOMEM; @@ -1801,7 +1799,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len)) status = -EFAULT; } - dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma); + dma_free_coherent(dev->dev, meta_len, meta, meta_dma); } return status; } @@ -1961,15 +1959,13 @@ static int nvme_revalidate_disk(struct gendisk *disk) u16 old_ms; unsigned short bs; - id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, - GFP_KERNEL); + id = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); if (!id) { - dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n", - __func__); + dev_warn(dev->dev, "%s: Memory alocation failure\n", __func__); return 0; } if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) { - dev_warn(&dev->pci_dev->dev, + dev_warn(dev->dev, "identify failed ns:%d, setting capacity to 0\n", ns->ns_id); memset(id, 0, sizeof(*id)); @@ -2014,7 +2010,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (dev->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); - dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr); + dma_free_coherent(dev->dev, 4096, id, dma_addr); return 0; } @@ -2041,7 +2037,7 @@ static int nvme_kthread(void *data) if (work_busy(&dev->reset_work)) continue; list_del_init(&dev->node); - dev_warn(&dev->pci_dev->dev, + dev_warn(dev->dev, "Failed status: %x, reset controller\n", readl(&dev->bar->csts)); dev->reset_workfn = nvme_reset_failed_dev; @@ -2073,7 +2069,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) { struct nvme_ns *ns; struct gendisk *disk; - int node = dev_to_node(&dev->pci_dev->dev); + int node = dev_to_node(dev->dev); ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) @@ -2156,8 +2152,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) if (status < 0) return status; if (status > 0) { - dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", - status); + dev_err(dev->dev, "Could not set queue count (%d)\n", status); return 0; } return min(result & 0xffff, result >> 16) + 1; @@ -2171,7 +2166,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = dev->queues[0]; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); int result, i, vecs, nr_io_queues, size; nr_io_queues = num_possible_cpus(); @@ -2251,7 +2246,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ static int nvme_dev_add(struct nvme_dev *dev) { - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); int res; unsigned nn, i; struct nvme_id_ctrl *ctrl; @@ -2259,14 +2254,14 @@ static int nvme_dev_add(struct nvme_dev *dev) dma_addr_t dma_addr; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; - mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL); + mem = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); if (!mem) return -ENOMEM; res = nvme_identify(dev, 0, 1, dma_addr); if (res) { - dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res); - dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr); + dev_err(dev->dev, "Identify Controller failed (%d)\n", res); + dma_free_coherent(dev->dev, 4096, mem, dma_addr); return -EIO; } @@ -2292,12 +2287,12 @@ static int nvme_dev_add(struct nvme_dev *dev) } else dev->max_hw_sectors = max_hw_sectors; } - dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr); + dma_free_coherent(dev->dev, 4096, mem, dma_addr); dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; dev->tagset.timeout = NVME_IO_TIMEOUT; - dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev); + dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; dev->tagset.cmd_size = nvme_cmd_size(dev); @@ -2317,7 +2312,7 @@ static int nvme_dev_map(struct nvme_dev *dev) { u64 cap; int bars, result = -ENOMEM; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); if (pci_enable_device_mem(pdev)) return result; @@ -2331,8 +2326,8 @@ static int nvme_dev_map(struct nvme_dev *dev) if (pci_request_selected_regions(pdev, bars, "nvme")) goto disable_pci; - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) && - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) + if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && + dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) goto disable; dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); @@ -2373,19 +2368,21 @@ static int nvme_dev_map(struct nvme_dev *dev) static void nvme_dev_unmap(struct nvme_dev *dev) { - if (dev->pci_dev->msi_enabled) - pci_disable_msi(dev->pci_dev); - else if (dev->pci_dev->msix_enabled) - pci_disable_msix(dev->pci_dev); + struct pci_dev *pdev = to_pci_dev(dev->dev); + + if (pdev->msi_enabled) + pci_disable_msi(pdev); + else if (pdev->msix_enabled) + pci_disable_msix(pdev); if (dev->bar) { iounmap(dev->bar); dev->bar = NULL; - pci_release_regions(dev->pci_dev); + pci_release_regions(pdev); } - if (pci_is_enabled(dev->pci_dev)) - pci_disable_device(dev->pci_dev); + if (pci_is_enabled(pdev)) + pci_disable_device(pdev); } struct nvme_delq_ctx { @@ -2504,7 +2501,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) &worker, "nvme%d", dev->instance); if (IS_ERR(kworker_task)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Failed to create queue del task\n"); for (i = dev->queue_count - 1; i > 0; i--) nvme_disable_queue(dev, i); @@ -2622,14 +2619,13 @@ static void nvme_dev_remove(struct nvme_dev *dev) static int nvme_setup_prp_pools(struct nvme_dev *dev) { - struct device *dmadev = &dev->pci_dev->dev; - dev->prp_page_pool = dma_pool_create("prp list page", dmadev, + dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, PAGE_SIZE, PAGE_SIZE, 0); if (!dev->prp_page_pool) return -ENOMEM; /* Optimisation for I/Os between 4k and 128k */ - dev->prp_small_pool = dma_pool_create("prp list 256", dmadev, + dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev, 256, 256, 0); if (!dev->prp_small_pool) { dma_pool_destroy(dev->prp_page_pool); @@ -2693,7 +2689,7 @@ static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); - pci_dev_put(dev->pci_dev); + put_device(dev->dev); put_device(dev->device); nvme_free_namespaces(dev); nvme_release_instance(dev); @@ -2837,7 +2833,7 @@ static int nvme_dev_start(struct nvme_dev *dev) static int nvme_remove_dead_ctrl(void *arg) { struct nvme_dev *dev = (struct nvme_dev *)arg; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); if (pci_get_drvdata(pdev)) pci_stop_and_remove_bus_device_locked(pdev); @@ -2876,11 +2872,11 @@ static void nvme_dev_reset(struct nvme_dev *dev) { nvme_dev_shutdown(dev); if (nvme_dev_resume(dev)) { - dev_warn(&dev->pci_dev->dev, "Device failed to resume\n"); + dev_warn(dev->dev, "Device failed to resume\n"); kref_get(&dev->kref); if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", dev->instance))) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Failed to start controller remove task\n"); kref_put(&dev->kref, nvme_free_dev); } @@ -2924,7 +2920,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->namespaces); dev->reset_workfn = nvme_reset_failed_dev; INIT_WORK(&dev->reset_work, nvme_reset_workfn); - dev->pci_dev = pci_dev_get(pdev); + dev->dev = get_device(&pdev->dev); pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); if (result) @@ -2954,7 +2950,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) release: nvme_release_instance(dev); put_pci: - pci_dev_put(dev->pci_dev); + put_device(dev->dev); free: kfree(dev->queues); kfree(dev->entry); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index ba1809fbd49e..f1c90f273132 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -684,7 +684,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, u8 cmdque = 0x01 << 1; u8 fw_offset = sizeof(dev->firmware_rev); - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -728,8 +728,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out_dma: return res; } @@ -787,7 +786,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int xfer_len; __be32 tmp_id = cpu_to_be32(ns->ns_id); - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -842,7 +841,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, inq_response[6] = 0x00; /* Rsvd */ inq_response[7] = 0x44; /* Designator Length */ - sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor); + sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor); memcpy(&inq_response[12], dev->model, sizeof(dev->model)); sprintf(&inq_response[52], "%04x", tmp_id); memcpy(&inq_response[56], dev->serial, sizeof(dev->serial)); @@ -851,8 +850,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out_dma: return res; } @@ -883,7 +881,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out_mem; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -933,8 +931,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out_dma: kfree(inq_response); out_mem: @@ -1038,8 +1035,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, goto out_mem; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_smart_log), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1077,7 +1073,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), + dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log), mem, dma_addr); out_dma: kfree(log_response); @@ -1106,8 +1102,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out_mem; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_smart_log), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1158,7 +1153,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), + dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log), mem, dma_addr); out_dma: kfree(log_response); @@ -1209,7 +1204,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) return SNTI_INTERNAL_ERROR; - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1246,8 +1241,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, } out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out: return res; } @@ -1494,8 +1488,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, unsigned ps_desired = 0; /* NVMe Controller Identify */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_id_ctrl), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1556,8 +1549,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, if (nvme_sc) res = nvme_sc; out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); out: return res; } @@ -1820,7 +1812,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, */ if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { - mem = dma_alloc_coherent(&dev->pci_dev->dev, + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1845,7 +1837,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, (1 << (id_ns->lbaf[flbas].ds)); } out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); } out: @@ -1928,7 +1920,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -1979,8 +1971,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_sc; out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out: return res; } @@ -2485,7 +2476,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, resp_size = READ_CAP_16_RESP_SIZE; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -2514,8 +2505,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, kfree(response); out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); out: return res; } @@ -2548,8 +2538,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } else { /* NVMe Controller Identify */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_id_ctrl), + mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), &dma_addr, GFP_KERNEL); if (mem == NULL) { res = -ENOMEM; @@ -2600,8 +2589,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, kfree(response); out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, - dma_addr); + dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); out: return res; } @@ -2913,7 +2901,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } - range = dma_alloc_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), + range = dma_alloc_coherent(dev->dev, ndesc * sizeof(*range), &dma_addr, GFP_KERNEL); if (!range) goto out; @@ -2934,8 +2922,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); res = nvme_trans_status_code(hdr, nvme_sc); - dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), - range, dma_addr); + dma_free_coherent(dev->dev, ndesc * sizeof(*range), range, dma_addr); out: kfree(plist); return res; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 61488b2ae291..de0e49a716b8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -74,7 +74,7 @@ struct nvme_dev { struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; - struct pci_dev *pci_dev; + struct device *dev; struct dma_pool *prp_page_pool; struct dma_pool *prp_small_pool; int instance; -- cgit v1.2.3 From d29ec8241c10eacf59c23b3828a88dbae06e7e3f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:46 +0200 Subject: nvme: submit internal commands through the block layer Use block layer queues with an internal cmd_type to submit internally generated NVMe commands. This both simplifies the code a lot and allow for a better structure. For example now the LighNVM code can construct commands without knowing the details of the underlying I/O descriptors. Or a future NVMe over network target could inject commands, as well as could the SCSI translation and ioctl code be reused for such a beast. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 399 +++++++++++++++++++------------------------ drivers/block/nvme-scsi.c | 422 +++++++++++++--------------------------------- include/linux/nvme.h | 24 +-- 3 files changed, 300 insertions(+), 545 deletions(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 870a926e1ddc..03bd638e76dd 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -445,7 +445,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev, (unsigned long) rq, gfp); } -void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) +static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) { const int last_prp = dev->page_size / 8 - 1; int i; @@ -605,7 +605,12 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, spin_unlock_irqrestore(req->q->queue_lock, flags); return; } - req->errors = nvme_error_status(status); + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { + req->sense_len = le32_to_cpup(&cqe->result); + req->errors = status; + } else { + req->errors = nvme_error_status(status); + } } else req->errors = 0; @@ -630,8 +635,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } /* length is in bytes. gfp flags indicates whether we may sleep. */ -int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, - gfp_t gfp) +static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, + int total_len, gfp_t gfp) { struct dma_pool *pool; int length = total_len; @@ -709,6 +714,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, return total_len; } +static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req, + struct nvme_iod *iod) +{ + struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; + + memcpy(cmnd, req->cmd, sizeof(struct nvme_command)); + cmnd->rw.command_id = req->tag; + if (req->nr_phys_segments) { + cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); + } + + if (++nvmeq->sq_tail == nvmeq->q_depth) + nvmeq->sq_tail = 0; + writel(nvmeq->sq_tail, nvmeq->q_db); +} + /* * We reuse the small pool to allocate the 16-byte range here as it is not * worth having a special pool for these or additional cases to handle freeing @@ -807,11 +829,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, return 0; } +/* + * NOTE: ns is NULL when called on the admin queue. + */ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct nvme_ns *ns = hctx->queue->queuedata; struct nvme_queue *nvmeq = hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_iod *iod; @@ -822,7 +848,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * unless this namespace is formated such that the metadata can be * stripped/generated by the controller with PRACT=1. */ - if (ns->ms && !blk_integrity_rq(req)) { + if (ns && ns->ms && !blk_integrity_rq(req)) { if (!(ns->pi_type && ns->ms == 8)) { req->errors = -EFAULT; blk_mq_complete_request(req); @@ -830,7 +856,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } } - iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC); + iod = nvme_alloc_iod(req, dev, GFP_ATOMIC); if (!iod) return BLK_MQ_RQ_QUEUE_BUSY; @@ -841,8 +867,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * as it is not worth having a special pool for these or * additional cases to handle freeing the iod. */ - range = dma_pool_alloc(nvmeq->dev->prp_small_pool, - GFP_ATOMIC, + range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, &iod->first_dma); if (!range) goto retry_cmd; @@ -860,9 +885,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, goto retry_cmd; if (blk_rq_bytes(req) != - nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { - dma_unmap_sg(nvmeq->dev->dev, iod->sg, - iod->nents, dma_dir); + nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); goto retry_cmd; } if (blk_integrity_rq(req)) { @@ -884,7 +908,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, nvme_set_info(cmd, iod, req_completion); spin_lock_irq(&nvmeq->q_lock); - if (req->cmd_flags & REQ_DISCARD) + if (req->cmd_type == REQ_TYPE_DRV_PRIV) + nvme_submit_priv(nvmeq, req, iod); + else if (req->cmd_flags & REQ_DISCARD) nvme_submit_discard(nvmeq, ns, req, iod); else if (req->cmd_flags & REQ_FLUSH) nvme_submit_flush(nvmeq, ns, req->tag); @@ -896,10 +922,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; error_cmd: - nvme_free_iod(nvmeq->dev, iod); + nvme_free_iod(dev, iod); return BLK_MQ_RQ_QUEUE_ERROR; retry_cmd: - nvme_free_iod(nvmeq->dev, iod); + nvme_free_iod(dev, iod); return BLK_MQ_RQ_QUEUE_BUSY; } @@ -942,15 +968,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) return 1; } -/* Admin queue isn't initialized as a request queue. If at some point this - * happens anyway, make sure to notify the user */ -static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) -{ - WARN_ON_ONCE(1); - return BLK_MQ_RQ_QUEUE_ERROR; -} - static irqreturn_t nvme_irq(int irq, void *data) { irqreturn_t result; @@ -972,59 +989,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_WAKE_THREAD; } -struct sync_cmd_info { - struct task_struct *task; - u32 result; - int status; -}; - -static void sync_completion(struct nvme_queue *nvmeq, void *ctx, - struct nvme_completion *cqe) -{ - struct sync_cmd_info *cmdinfo = ctx; - cmdinfo->result = le32_to_cpup(&cqe->result); - cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; - wake_up_process(cmdinfo->task); -} - /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -static int __nvme_submit_sync_cmd(struct request_queue *q, - struct nvme_command *cmd, u32 *result, unsigned timeout) +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout) { - struct sync_cmd_info cmdinfo; - struct nvme_cmd_info *cmd_rq; + bool write = cmd->common.opcode & 1; + struct bio *bio = NULL; struct request *req; - int res; + int ret; - req = blk_mq_alloc_request(q, WRITE, GFP_KERNEL, false); + req = blk_mq_alloc_request(q, write, GFP_KERNEL, false); if (IS_ERR(req)) return PTR_ERR(req); - cmdinfo.task = current; - cmdinfo.status = -EINTR; + req->cmd_type = REQ_TYPE_DRV_PRIV; + req->__data_len = 0; + req->__sector = (sector_t) -1; + req->bio = req->biotail = NULL; - cmd->common.command_id = req->tag; + req->timeout = ADMIN_TIMEOUT; - cmd_rq = blk_mq_rq_to_pdu(req); - nvme_set_info(cmd_rq, &cmdinfo, sync_completion); + req->cmd = (unsigned char *)cmd; + req->cmd_len = sizeof(struct nvme_command); + req->sense = NULL; + req->sense_len = 0; - set_current_state(TASK_UNINTERRUPTIBLE); - nvme_submit_cmd(cmd_rq->nvmeq, cmd); - schedule(); + if (buffer && bufflen) { + ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT); + if (ret) + goto out; + } else if (ubuffer && bufflen) { + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT); + if (ret) + goto out; + bio = req->bio; + } + blk_execute_rq(req->q, NULL, req, 0); + if (bio) + blk_rq_unmap_user(bio); if (result) - *result = cmdinfo.result; - res = cmdinfo.status; + *result = req->sense_len; + ret = req->errors; + out: blk_mq_free_request(req); - return res; + return ret; } -int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd) +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, unsigned bufflen) { - return __nvme_submit_sync_cmd(q, cmd, NULL, 0); + return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0); } static int nvme_submit_async_admin_req(struct nvme_dev *dev) @@ -1081,7 +1100,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) c.delete_queue.opcode = opcode; c.delete_queue.qid = cpu_to_le16(id); - return nvme_submit_sync_cmd(dev->admin_q, &c); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, @@ -1090,6 +1109,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; + /* + * Note: we (ab)use the fact the the prp fields survive if no data + * is attached to the request. + */ memset(&c, 0, sizeof(c)); c.create_cq.opcode = nvme_admin_create_cq; c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); @@ -1098,7 +1121,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cq_flags = cpu_to_le16(flags); c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); - return nvme_submit_sync_cmd(dev->admin_q, &c); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, @@ -1107,6 +1130,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + /* + * Note: we (ab)use the fact the the prp fields survive if no data + * is attached to the request. + */ memset(&c, 0, sizeof(c)); c.create_sq.opcode = nvme_admin_create_sq; c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); @@ -1115,7 +1142,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, c.create_sq.sq_flags = cpu_to_le16(flags); c.create_sq.cqid = cpu_to_le16(qid); - return nvme_submit_sync_cmd(dev->admin_q, &c); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) @@ -1128,18 +1155,43 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); } -int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, - dma_addr_t dma_addr) +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) { - struct nvme_command c; + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.cns = cpu_to_le32(1), + }; + int error; - memset(&c, 0, sizeof(c)); - c.identify.opcode = nvme_admin_identify; - c.identify.nsid = cpu_to_le32(nsid); - c.identify.prp1 = cpu_to_le64(dma_addr); - c.identify.cns = cpu_to_le32(cns); + *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); + if (!*id) + return -ENOMEM; - return nvme_submit_sync_cmd(dev->admin_q, &c); + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, + sizeof(struct nvme_id_ctrl)); + if (error) + kfree(*id); + return error; +} + +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id) +{ + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + }; + int error; + + *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, + sizeof(struct nvme_id_ns)); + if (error) + kfree(*id); + return error; } int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, @@ -1153,7 +1205,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0, + result, 0); } int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, @@ -1167,7 +1220,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0, + result, 0); +} + +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log) +{ + struct nvme_command c = { + .common.opcode = nvme_admin_get_log_page, + .common.nsid = cpu_to_le32(0xFFFFFFFF), + .common.cdw10[0] = cpu_to_le32( + (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | + NVME_LOG_SMART), + }; + int error; + + *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); + if (!*log) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, + sizeof(struct nvme_smart_log)); + if (error) + kfree(*log); + return error; } /** @@ -1523,7 +1599,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev) } static struct blk_mq_ops nvme_mq_admin_ops = { - .queue_rq = nvme_admin_queue_rq, + .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_exit_hctx, @@ -1644,122 +1720,41 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } -struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, - unsigned long addr, unsigned length) -{ - int i, err, count, nents, offset; - struct scatterlist *sg; - struct page **pages; - struct nvme_iod *iod; - - if (addr & 3) - return ERR_PTR(-EINVAL); - if (!length || length > INT_MAX - PAGE_SIZE) - return ERR_PTR(-EINVAL); - - offset = offset_in_page(addr); - count = DIV_ROUND_UP(offset + length, PAGE_SIZE); - pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); - if (!pages) - return ERR_PTR(-ENOMEM); - - err = get_user_pages_fast(addr, count, 1, pages); - if (err < count) { - count = err; - err = -EFAULT; - goto put_pages; - } - - err = -ENOMEM; - iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL); - if (!iod) - goto put_pages; - - sg = iod->sg; - sg_init_table(sg, count); - for (i = 0; i < count; i++) { - sg_set_page(&sg[i], pages[i], - min_t(unsigned, length, PAGE_SIZE - offset), - offset); - length -= (PAGE_SIZE - offset); - offset = 0; - } - sg_mark_end(&sg[i - 1]); - iod->nents = count; - - nents = dma_map_sg(dev->dev, sg, count, - write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (!nents) - goto free_iod; - - kfree(pages); - return iod; - - free_iod: - kfree(iod); - put_pages: - for (i = 0; i < count; i++) - put_page(pages[i]); - kfree(pages); - return ERR_PTR(err); -} - -void nvme_unmap_user_pages(struct nvme_dev *dev, int write, - struct nvme_iod *iod) -{ - int i; - - dma_unmap_sg(dev->dev, iod->sg, iod->nents, - write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - - for (i = 0; i < iod->nents; i++) - put_page(sg_page(&iod->sg[i])); -} - static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_dev *dev = ns->dev; struct nvme_user_io io; struct nvme_command c; - unsigned length, meta_len, prp_len; + unsigned length, meta_len; int status, write; - struct nvme_iod *iod; dma_addr_t meta_dma = 0; void *meta = NULL; if (copy_from_user(&io, uio, sizeof(io))) return -EFAULT; - length = (io.nblocks + 1) << ns->lba_shift; - meta_len = (io.nblocks + 1) * ns->ms; - - if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext) - return -EINVAL; - else if (meta_len && ns->ext) { - length += meta_len; - meta_len = 0; - } - - write = io.opcode & 1; switch (io.opcode) { case nvme_cmd_write: case nvme_cmd_read: case nvme_cmd_compare: - iod = nvme_map_user_pages(dev, write, io.addr, length); break; default: return -EINVAL; } - if (IS_ERR(iod)) - return PTR_ERR(iod); + length = (io.nblocks + 1) << ns->lba_shift; + meta_len = (io.nblocks + 1) * ns->ms; + write = io.opcode & 1; - prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL); - if (length != prp_len) { - status = -ENOMEM; - goto unmap; - } if (meta_len) { + if (((io.metadata & 3) || !io.metadata) && !ns->ext) + return -EINVAL; + + if (ns->ext) { + length += meta_len; + meta_len = 0; + } + meta = dma_alloc_coherent(dev->dev, meta_len, &meta_dma, GFP_KERNEL); if (!meta) { @@ -1786,13 +1781,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.reftag = cpu_to_le32(io.reftag); c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.rw.prp2 = cpu_to_le64(iod->first_dma); c.rw.metadata = cpu_to_le64(meta_dma); - status = nvme_submit_sync_cmd(ns->queue, &c); + + status = __nvme_submit_sync_cmd(ns->queue, &c, NULL, + (void __user *)io.addr, length, NULL, 0); unmap: - nvme_unmap_user_pages(dev, write, iod); - nvme_free_iod(dev, iod); if (meta) { if (status == NVME_SC_SUCCESS && !write) { if (copy_to_user((void __user *)io.metadata, meta, @@ -1809,9 +1802,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, { struct nvme_passthru_cmd cmd; struct nvme_command c; - int status, length; - struct nvme_iod *uninitialized_var(iod); - unsigned timeout; + unsigned timeout = 0; + int status; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -1831,38 +1823,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); - length = cmd.data_len; - if (cmd.data_len) { - iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr, - length); - if (IS_ERR(iod)) - return PTR_ERR(iod); - length = nvme_setup_prps(dev, iod, length, GFP_KERNEL); - c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.common.prp2 = cpu_to_le64(iod->first_dma); - } - - timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : - ADMIN_TIMEOUT; - - if (length != cmd.data_len) { - status = -ENOMEM; - goto out; - } + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, - &cmd.result, timeout); - -out: - if (cmd.data_len) { - nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); - nvme_free_iod(dev, iod); + NULL, (void __user *)cmd.addr, cmd.data_len, + &cmd.result, timeout); + if (status >= 0) { + if (put_user(cmd.result, &ucmd->result)) + return -EFAULT; } - if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result, - sizeof(cmd.result))) - status = -EFAULT; - return status; } @@ -1954,22 +1925,14 @@ static int nvme_revalidate_disk(struct gendisk *disk) struct nvme_ns *ns = disk->private_data; struct nvme_dev *dev = ns->dev; struct nvme_id_ns *id; - dma_addr_t dma_addr; u8 lbaf, pi_type; u16 old_ms; unsigned short bs; - id = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); - if (!id) { - dev_warn(dev->dev, "%s: Memory alocation failure\n", __func__); + if (nvme_identify_ns(dev, ns->ns_id, &id)) { + dev_warn(dev->dev, "%s: Identify failure\n", __func__); return 0; } - if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) { - dev_warn(dev->dev, - "identify failed ns:%d, setting capacity to 0\n", - ns->ns_id); - memset(id, 0, sizeof(*id)); - } old_ms = ns->ms; lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; @@ -2010,7 +1973,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (dev->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); - dma_free_coherent(dev->dev, 4096, id, dma_addr); + kfree(id); return 0; } @@ -2250,22 +2213,14 @@ static int nvme_dev_add(struct nvme_dev *dev) int res; unsigned nn, i; struct nvme_id_ctrl *ctrl; - void *mem; - dma_addr_t dma_addr; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; - mem = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); - if (!mem) - return -ENOMEM; - - res = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_identify_ctrl(dev, &ctrl); if (res) { dev_err(dev->dev, "Identify Controller failed (%d)\n", res); - dma_free_coherent(dev->dev, 4096, mem, dma_addr); return -EIO; } - ctrl = mem; nn = le32_to_cpup(&ctrl->nn); dev->oncs = le16_to_cpup(&ctrl->oncs); dev->abort_limit = ctrl->acl + 1; @@ -2287,7 +2242,7 @@ static int nvme_dev_add(struct nvme_dev *dev) } else dev->max_hw_sectors = max_hw_sectors; } - dma_free_coherent(dev->dev, 4096, mem, dma_addr); + kfree(ctrl); dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 342f5b7f840d..8e6223e5b670 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -525,8 +525,6 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, int alloc_len) { struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; int res; int nvme_sc; @@ -536,21 +534,17 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, u8 cmdque = 0x01 << 1; u8 fw_offset = sizeof(dev->firmware_rev); - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } - /* nvme ns identify - use DPS value for PROTECT field */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; + return res; - id_ns = mem; - (id_ns->dps) ? (protect = 0x01) : (protect = 0); + if (id_ns->dps) + protect = 0x01; + else + protect = 0; + kfree(id_ns); memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); inq_response[2] = VERSION_SPC_4; @@ -567,12 +561,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - out_free: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out_dma: - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns, @@ -615,40 +604,35 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len) { struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; int res; int nvme_sc; int xfer_len; __be32 tmp_id = cpu_to_be32(ns->ns_id); - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } - memset(inq_response, 0, alloc_len); inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */ if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) { - struct nvme_id_ns *id_ns = mem; - void *eui = id_ns->eui64; - int len = sizeof(id_ns->eui64); + struct nvme_id_ns *id_ns; + void *eui; + int len; - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; + return res; + eui = id_ns->eui64; + len = sizeof(id_ns->eui64); if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) { if (bitmap_empty(eui, len * 8)) { eui = id_ns->nguid; len = sizeof(id_ns->nguid); } } - if (bitmap_empty(eui, len * 8)) + if (bitmap_empty(eui, len * 8)) { + kfree(id_ns); goto scsi_string; + } inq_response[3] = 4 + len; /* Page Length */ /* Designation Descriptor start */ @@ -657,14 +641,14 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, inq_response[6] = 0x00; /* Rsvd */ inq_response[7] = len; /* Designator Length */ memcpy(&inq_response[8], eui, len); + kfree(id_ns); } else { scsi_string: if (alloc_len < 72) { - res = nvme_trans_completion(hdr, + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out_free; } inq_response[3] = 0x48; /* Page Length */ /* Designation Descriptor start */ @@ -679,12 +663,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, memcpy(&inq_response[56], dev->serial, sizeof(dev->serial)); } xfer_len = alloc_len; - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - out_free: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out_dma: - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, @@ -694,8 +673,6 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; struct nvme_id_ns *id_ns; int xfer_len; @@ -708,39 +685,32 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 luiclr = 0x01; inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); - if (inq_response == NULL) { - res = -ENOMEM; - goto out_mem; - } - - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + if (inq_response == NULL) + return -ENOMEM; - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; + goto out_free_inq; + + spt = spt_lut[id_ns->dpc & 0x07] << 3; + if (id_ns->dps) + protect = 0x01; + else + protect = 0; + kfree(id_ns); - id_ns = mem; - spt = spt_lut[(id_ns->dpc) & 0x07] << 3; - (id_ns->dps) ? (protect = 0x01) : (protect = 0); grd_chk = protect << 2; app_chk = protect << 1; ref_chk = protect; - /* nvme controller identify */ - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; + goto out_free_inq; - id_ctrl = mem; v_sup = id_ctrl->vwc; + kfree(id_ctrl); memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */ @@ -756,11 +726,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - out_free: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out_dma: + out_free_inq: kfree(inq_response); - out_mem: return res; } @@ -847,43 +814,27 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, int res; int xfer_len; u8 *log_response; - struct nvme_command c; struct nvme_dev *dev = ns->dev; struct nvme_smart_log *smart_log; - dma_addr_t dma_addr; - void *mem; u8 temp_c; u16 temp_k; log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); - if (log_response == NULL) { - res = -ENOMEM; - goto out_mem; - } + if (log_response == NULL) + return -ENOMEM; - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + res = nvme_get_log_page(dev, &smart_log); + if (res < 0) + goto out_free_response; - /* Get SMART Log Page */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(0xFFFFFFFF); - c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_sync_cmd(dev->admin_q, &c); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; } else { - smart_log = mem; temp_k = (smart_log->temperature[1] << 8) + (smart_log->temperature[0]); temp_c = temp_k - KELVIN_TEMP_FACTOR; } + kfree(smart_log); log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE; /* Subpage=0x00, Page Length MSB=0 */ @@ -899,11 +850,8 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log), - mem, dma_addr); - out_dma: + out_free_response: kfree(log_response); - out_mem: return res; } @@ -913,44 +861,28 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int xfer_len; u8 *log_response; - struct nvme_command c; struct nvme_dev *dev = ns->dev; struct nvme_smart_log *smart_log; - dma_addr_t dma_addr; - void *mem; u32 feature_resp; u8 temp_c_cur, temp_c_thresh; u16 temp_k; log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); - if (log_response == NULL) { - res = -ENOMEM; - goto out_mem; - } + if (log_response == NULL) + return -ENOMEM; - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + res = nvme_get_log_page(dev, &smart_log); + if (res < 0) + goto out_free_response; - /* Get SMART Log Page */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(0xFFFFFFFF); - c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_sync_cmd(dev->admin_q, &c); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; } else { - smart_log = mem; temp_k = (smart_log->temperature[1] << 8) + (smart_log->temperature[0]); temp_c_cur = temp_k - KELVIN_TEMP_FACTOR; } + kfree(smart_log); /* Get Features for Temp Threshold */ res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0, @@ -979,11 +911,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log), - mem, dma_addr); - out_dma: + out_free_response: kfree(log_response); - out_mem: return res; } @@ -1019,8 +948,6 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 flbas; u32 lba_length; @@ -1030,20 +957,11 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) return -EINVAL; - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; + return res; - id_ns = mem; flbas = (id_ns->flbas) & 0x0F; lba_length = (1 << (id_ns->lbaf[flbas].ds)); @@ -1063,9 +981,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, memcpy(&resp[12], &tmp_len, sizeof(u32)); } - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out: + kfree(id_ns); return res; } @@ -1291,26 +1207,17 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; int lowest_pow_st; /* max npss = lowest power consumption */ unsigned ps_desired = 0; - /* NVMe Controller Identify */ - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; + return res; - id_ctrl = mem; lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); + kfree(id_ctrl); switch (pc) { case NVME_POWER_STATE_START_VALID: @@ -1350,12 +1257,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, } nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0, NULL); - res = nvme_trans_status_code(hdr, nvme_sc); - - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); - out: - return res; + return nvme_trans_status_code(hdr, nvme_sc); } static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, @@ -1368,7 +1270,7 @@ static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr c.common.opcode = nvme_admin_activate_fw; c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV); - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0); return nvme_trans_status_code(hdr, nvme_sc); } @@ -1376,15 +1278,9 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr u8 opcode, u32 tot_len, u32 offset, u8 buffer_id) { - int res; int nvme_sc; struct nvme_dev *dev = ns->dev; struct nvme_command c; - struct nvme_iod *iod = NULL; - unsigned length; - - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_admin_download_fw; if (hdr->iovec_count > 0) { /* Assuming SGL is not allowed for this command */ @@ -1394,28 +1290,15 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); } - iod = nvme_map_user_pages(dev, DMA_TO_DEVICE, - (unsigned long)hdr->dxferp, tot_len); - if (IS_ERR(iod)) - return PTR_ERR(iod); - length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL); - if (length != tot_len) { - res = -ENOMEM; - goto out_unmap; - } - c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.dlfw.prp2 = cpu_to_le64(iod->first_dma); + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_download_fw; c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); - nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); - res = nvme_trans_status_code(hdr, nvme_sc); - - out_unmap: - nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod); - nvme_free_iod(dev, iod); - return res; + nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, + hdr->dxferp, tot_len, NULL, 0); + return nvme_trans_status_code(hdr, nvme_sc); } /* Mode Select Helper Functions */ @@ -1590,9 +1473,6 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, int res = 0; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; - struct nvme_id_ns *id_ns; u8 flbas; /* @@ -1603,19 +1483,12 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, */ if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { - mem = dma_alloc_coherent(dev->dev, - sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + struct nvme_id_ns *id_ns; + + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - - id_ns = mem; + return res; if (ns->mode_select_num_blocks == 0) ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap); @@ -1624,12 +1497,11 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, ns->mode_select_block_len = (1 << (id_ns->lbaf[flbas].ds)); } - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), - mem, dma_addr); + + kfree(id_ns); } - out: - return res; + + return 0; } static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, @@ -1698,8 +1570,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 i; u8 flbas, nlbaf; @@ -1708,19 +1578,11 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; + return res; - id_ns = mem; flbas = (id_ns->flbas) & 0x0F; nlbaf = id_ns->nlbaf; @@ -1748,12 +1610,10 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.format.nsid = cpu_to_le32(ns->ns_id); c.format.cdw10 = cpu_to_le32(cdw10); - nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c); + nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); res = nvme_trans_status_code(hdr, nvme_sc); - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out: + kfree(id_ns); return res; } @@ -1787,9 +1647,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_trans_io_cdb *cdb_info, u8 is_write) { int nvme_sc = NVME_SC_SUCCESS; - struct nvme_dev *dev = ns->dev; u32 num_cmds; - struct nvme_iod *iod; u64 unit_len; u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */ u32 retcode; @@ -1840,35 +1698,17 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, control = nvme_trans_io_get_control(ns, cdb_info); c.rw.control = cpu_to_le16(control); - iod = nvme_map_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - (unsigned long)next_mapping_addr, unit_len); - if (IS_ERR(iod)) - return PTR_ERR(iod); - - retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL); - if (retcode != unit_len) { - nvme_unmap_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - iod); - nvme_free_iod(dev, iod); - return -ENOMEM; + if (get_capacity(ns->disk) - unit_num_blocks < + cdb_info->lba + nvme_offset) { + nvme_sc = NVME_SC_LBA_RANGE; + break; } - c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.rw.prp2 = cpu_to_le64(iod->first_dma); + nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL, + next_mapping_addr, unit_len, NULL, 0); + if (nvme_sc) + break; nvme_offset += unit_num_blocks; - - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - - nvme_unmap_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - iod); - nvme_free_iod(dev, iod); - - - if (nvme_sc != NVME_SC_SUCCESS) - break; } return nvme_trans_status_code(hdr, nvme_sc); @@ -2199,8 +2039,6 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, u32 resp_size; u32 xfer_len; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 *response; @@ -2212,24 +2050,15 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, resp_size = READ_CAP_10_RESP_SIZE; } - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - - id_ns = mem; + return res; response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; - goto out_dma; + goto out_free_id; } nvme_trans_fill_read_cap(response, id_ns, cdb16); @@ -2237,9 +2066,8 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, response, xfer_len); kfree(response); - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr); - out: + out_free_id: + kfree(id_ns); return res; } @@ -2251,8 +2079,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, u32 alloc_len, xfer_len, resp_size; u8 *response; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; u32 ll_length, lun_id; u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET; @@ -2266,19 +2092,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, case ALL_LUNS_RETURNED: case ALL_WELL_KNOWN_LUNS_RETURNED: case RESTRICTED_LUNS_RETURNED: - /* NVMe Controller Identify */ - mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; + return res; - id_ctrl = mem; ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE; resp_size = ll_length + LUN_DATA_HEADER_SIZE; @@ -2288,13 +2106,13 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out_dma; + goto out_free_id; } response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; - goto out_dma; + goto out_free_id; } /* The first LUN ID will always be 0 per the SAM spec */ @@ -2315,9 +2133,8 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, response, xfer_len); kfree(response); - out_dma: - dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr); - out: + out_free_id: + kfree(id_ctrl); return res; } @@ -2379,12 +2196,23 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); } -static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 *cmd) +static int nvme_trans_synchronize_cache(struct nvme_ns *ns, + struct sg_io_hdr *hdr) { - int res; int nvme_sc; struct nvme_command c; + + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_cmd_flush; + c.common.nsid = cpu_to_le32(ns->ns_id); + + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0); + return nvme_trans_status_code(hdr, nvme_sc); +} + +static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ u8 immed, pcmod, pc, no_flush, start; immed = cmd[1] & 0x01; @@ -2400,12 +2228,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, } else { if (no_flush == 0) { /* Issue NVME FLUSH command prior to START STOP UNIT */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_cmd_flush; - c.common.nsid = cpu_to_le32(ns->ns_id); - - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - res = nvme_trans_status_code(hdr, nvme_sc); + int res = nvme_trans_synchronize_cache(ns, hdr); if (res) return res; } @@ -2414,20 +2237,6 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, } } -static int nvme_trans_synchronize_cache(struct nvme_ns *ns, - struct sg_io_hdr *hdr, u8 *cmd) -{ - int nvme_sc; - struct nvme_command c; - - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_cmd_flush; - c.common.nsid = cpu_to_le32(ns->ns_id); - - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); - return nvme_trans_status_code(hdr, nvme_sc); -} - static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { @@ -2563,13 +2372,11 @@ struct scsi_unmap_parm_list { static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - struct nvme_dev *dev = ns->dev; struct scsi_unmap_parm_list *plist; struct nvme_dsm_range *range; struct nvme_command c; int i, nvme_sc, res = -ENOMEM; u16 ndesc, list_len; - dma_addr_t dma_addr; list_len = get_unaligned_be16(&cmd[7]); if (!list_len) @@ -2589,8 +2396,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } - range = dma_alloc_coherent(dev->dev, ndesc * sizeof(*range), - &dma_addr, GFP_KERNEL); + range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL); if (!range) goto out; @@ -2603,14 +2409,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, memset(&c, 0, sizeof(c)); c.dsm.opcode = nvme_cmd_dsm; c.dsm.nsid = cpu_to_le32(ns->ns_id); - c.dsm.prp1 = cpu_to_le64(dma_addr); c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - nvme_sc = nvme_submit_sync_cmd(ns->queue, &c); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range, + ndesc * sizeof(*range)); res = nvme_trans_status_code(hdr, nvme_sc); - dma_free_coherent(dev->dev, ndesc * sizeof(*range), range, dma_addr); + kfree(range); out: kfree(plist); return res; @@ -2690,7 +2496,7 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) retcode = nvme_trans_start_stop(ns, hdr, cmd); break; case SYNCHRONIZE_CACHE: - retcode = nvme_trans_synchronize_cache(ns, hdr, cmd); + retcode = nvme_trans_synchronize_cache(ns, hdr); break; case FORMAT_UNIT: retcode = nvme_trans_format_unit(ns, hdr, cmd); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index de0e49a716b8..986bf8ad8e93 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -146,21 +146,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) return (sector >> (ns->lba_shift - 9)); } -/** - * nvme_free_iod - frees an nvme_iod - * @dev: The device that the I/O was submitted to - * @iod: The memory to free - */ -void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); - -int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t); -struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, - unsigned long addr, unsigned length); -void nvme_unmap_user_pages(struct nvme_dev *dev, int write, - struct nvme_iod *iod); -int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd); -int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, - dma_addr_t dma_addr); +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buf, unsigned bufflen); +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout); +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id); +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, dma_addr_t dma_addr, u32 *result); int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, -- cgit v1.2.3 From a0a931d6a2c1fbc5d5966ebf0e7a043748692c22 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 22 May 2015 12:28:31 -0600 Subject: NVMe: Fix obtaining command result Replaces req->sense_len usage, which is not owned by the LLD, to req->special to contain the command result for driver created commands, and sets the result unconditionally on completion. Signed-off-by: Keith Busch Cc: Christoph Hellwig Cc: Jens Axboe Fixes: d29ec8241c10 ("nvme: submit internal commands through the block layer") Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 03bd638e76dd..c42bc53f3765 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -606,13 +606,16 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, return; } if (req->cmd_type == REQ_TYPE_DRV_PRIV) { - req->sense_len = le32_to_cpup(&cqe->result); req->errors = status; } else { req->errors = nvme_error_status(status); } } else req->errors = 0; + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { + u32 result = le32_to_cpup(&cqe->result); + req->special = (void *)(uintptr_t)result; + } if (cmd_rq->aborted) dev_warn(nvmeq->dev->dev, @@ -1015,8 +1018,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, req->cmd = (unsigned char *)cmd; req->cmd_len = sizeof(struct nvme_command); - req->sense = NULL; - req->sense_len = 0; + req->special = (void *)0; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT); @@ -1033,7 +1035,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, if (bio) blk_rq_unmap_user(bio); if (result) - *result = req->sense_len; + *result = (u32)(uintptr_t)req->special; ret = req->errors; out: blk_mq_free_request(req); -- cgit v1.2.3 From f4ff414aeb472397d3b4fc15c22ca65bab219ec8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 28 May 2015 09:48:54 -0600 Subject: NVMe: Use requested sync command timeout Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index c42bc53f3765..4eb9917b2a7a 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1014,7 +1014,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, req->__sector = (sector_t) -1; req->bio = req->biotail = NULL; - req->timeout = ADMIN_TIMEOUT; + req->timeout = timeout ? timeout : ADMIN_TIMEOUT; req->cmd = (unsigned char *)cmd; req->cmd_len = sizeof(struct nvme_command); -- cgit v1.2.3 From 75619bfa904d0f2840b4274eb92ce47b2e1c472e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 28 May 2015 09:48:55 -0600 Subject: NVMe: End sync requests immediately on failure Do not retry failed sync commands so the original status may be seen without issuing unnecessary retries. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 4eb9917b2a7a..6ed1356e9eb5 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1010,6 +1010,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->cmd_type = REQ_TYPE_DRV_PRIV; + req->cmd_flags = REQ_FAILFAST_DRIVER; req->__data_len = 0; req->__sector = (sector_t) -1; req->bio = req->biotail = NULL; -- cgit v1.2.3 From 42483228d4c019ffc86b8dbea7dfbc3f9566fe7e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 1 Jun 2015 09:29:54 -0600 Subject: NVMe: Remove hctx reliance for multi-namespace The driver needs to track shared tags to support multiple namespaces that may be dynamically allocated or deleted. Relying on the first request_queue's hctx's is not appropriate as we cannot clear outstanding tags for all namespaces using this handle, nor can the driver easily track all request_queue's hctx as namespaces are attached/detached. Instead, this patch uses the nvme_dev's tagset to get the shared tag resources instead of through a request_queue hctx. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 52 +++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 33 deletions(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 6ed1356e9eb5..513908ff46c4 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -102,6 +102,7 @@ struct nvme_queue { spinlock_t q_lock; struct nvme_command *sq_cmds; volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -114,7 +115,6 @@ struct nvme_queue { u8 cq_phase; u8 cqe_seen; struct async_cmd_info cmdinfo; - struct blk_mq_hw_ctx *hctx; }; /* @@ -182,9 +182,12 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, struct nvme_dev *dev = data; struct nvme_queue *nvmeq = dev->queues[0]; - WARN_ON(nvmeq->hctx); - nvmeq->hctx = hctx; + WARN_ON(hctx_idx != 0); + WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); + WARN_ON(nvmeq->tags); + hctx->driver_data = nvmeq; + nvmeq->tags = &dev->admin_tagset.tags[0]; return 0; } @@ -201,27 +204,16 @@ static int nvme_admin_init_request(void *data, struct request *req, return 0; } -static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nvme_queue *nvmeq = hctx->driver_data; - - nvmeq->hctx = NULL; -} - static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; - struct nvme_queue *nvmeq = dev->queues[ - (hctx_idx % dev->queue_count) + 1]; - - if (!nvmeq->hctx) - nvmeq->hctx = hctx; + struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; - /* nvmeq queues are shared between namespaces. We assume here that - * blk-mq map the tags so they match up with the nvme queue tags. */ - WARN_ON(nvmeq->hctx->tags != hctx->tags); + if (!nvmeq->tags) + nvmeq->tags = &dev->tagset.tags[hctx_idx]; + WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); hctx->driver_data = nvmeq; return 0; } @@ -320,7 +312,7 @@ static void abort_completion(struct nvme_queue *nvmeq, void *ctx, u16 status = le16_to_cpup(&cqe->status) >> 1; u32 result = le32_to_cpup(&cqe->result); - blk_mq_free_hctx_request(nvmeq->hctx, req); + blk_mq_free_request(req); dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); ++nvmeq->dev->abort_limit; @@ -333,14 +325,13 @@ static void async_completion(struct nvme_queue *nvmeq, void *ctx, cmdinfo->result = le32_to_cpup(&cqe->result); cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; queue_kthread_work(cmdinfo->worker, &cmdinfo->work); - blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req); + blk_mq_free_request(cmdinfo->req); } static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq, unsigned int tag) { - struct blk_mq_hw_ctx *hctx = nvmeq->hctx; - struct request *req = blk_mq_tag_to_rq(hctx->tags, tag); + struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag); return blk_mq_rq_to_pdu(req); } @@ -1068,7 +1059,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) c.common.opcode = nvme_admin_async_event; c.common.command_id = req->tag; - blk_mq_free_hctx_request(nvmeq->hctx, req); + blk_mq_free_request(req); return __nvme_submit_cmd(nvmeq, &c); } @@ -1310,8 +1301,7 @@ static void nvme_abort_req(struct request *req) } } -static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx, - struct request *req, void *data, bool reserved) +static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved) { struct nvme_queue *nvmeq = data; void *ctx; @@ -1408,11 +1398,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) static void nvme_clear_queue(struct nvme_queue *nvmeq) { - struct blk_mq_hw_ctx *hctx = nvmeq->hctx; - spin_lock_irq(&nvmeq->q_lock); - if (hctx && hctx->tags) - blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq); + if (nvmeq->tags && *nvmeq->tags) + blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq); spin_unlock_irq(&nvmeq->q_lock); } @@ -1605,7 +1593,6 @@ static struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, - .exit_hctx = nvme_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; @@ -1614,7 +1601,6 @@ static struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_init_hctx, - .exit_hctx = nvme_exit_hctx, .init_request = nvme_init_request, .timeout = nvme_timeout, }; @@ -2724,11 +2710,11 @@ static void nvme_set_irq_hints(struct nvme_dev *dev) for (i = 0; i < dev->online_queues; i++) { nvmeq = dev->queues[i]; - if (!nvmeq->hctx) + if (!nvmeq->tags || !(*nvmeq->tags)) continue; irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, - nvmeq->hctx->cpumask); + blk_mq_tags_cpumask(*nvmeq->tags)); } } -- cgit v1.2.3 From 4cc06521ee1f153e0d292413a5bff7bbbdee92d0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 5 Jun 2015 10:30:08 -0600 Subject: NVMe: add sysfs and ioctl controller reset We need the ability to perform an nvme controller reset as discussed on the mailing list thread: http://lists.infradead.org/pipermail/linux-nvme/2015-March/001585.html This adds a sysfs entry that when written to will reset perform an NVMe controller reset if the controller was successfully initialized in the first place. This also adds locking around resetting the device in the async probe method so the driver can't schedule two resets. Signed-off-by: Keith Busch Cc: Brandon Schultz Cc: David Sariel Updated by Jens to: 1) Merge this with the ioctl reset patch from David Sariel. The ioctl path now shares the reset code from the sysfs path. 2) Don't flush work if we fail issuing the reset. Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/nvme.h | 1 + 2 files changed, 54 insertions(+) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 513908ff46c4..9682e29b4171 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -80,6 +80,7 @@ static wait_queue_head_t nvme_kthread_wait; static struct class *nvme_class; static void nvme_reset_failed_dev(struct work_struct *ws); +static int nvme_reset(struct nvme_dev *dev); static int nvme_process_cq(struct nvme_queue *nvmeq); struct async_cmd_info { @@ -2689,6 +2690,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -ENOTTY; ns = list_first_entry(&dev->namespaces, struct nvme_ns, list); return nvme_user_cmd(dev, ns, (void __user *)arg); + case NVME_IOCTL_RESET: + dev_warn(dev->dev, "resetting controller\n"); + return nvme_reset(dev); default: return -ENOTTY; } @@ -2839,6 +2843,44 @@ static void nvme_reset_workfn(struct work_struct *work) dev->reset_workfn(work); } +static int nvme_reset(struct nvme_dev *dev) +{ + int ret = -EBUSY; + + if (!dev->admin_q || blk_queue_dying(dev->admin_q)) + return -ENODEV; + + spin_lock(&dev_list_lock); + if (!work_pending(&dev->reset_work)) { + dev->reset_workfn = nvme_reset_failed_dev; + queue_work(nvme_workq, &dev->reset_work); + ret = 0; + } + spin_unlock(&dev_list_lock); + + if (!ret) { + flush_work(&dev->reset_work); + return 0; + } + + return ret; +} + +static ssize_t nvme_sysfs_reset(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct nvme_dev *ndev = dev_get_drvdata(dev); + int ret; + + ret = nvme_reset(ndev); + if (ret < 0) + return ret; + + return count; +} +static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); + static void nvme_async_probe(struct work_struct *work); static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -2883,12 +2925,20 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release_pools; } get_device(dev->device); + dev_set_drvdata(dev->device, dev); + + result = device_create_file(dev->device, &dev_attr_reset_controller); + if (result) + goto put_dev; INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->probe_work, nvme_async_probe); schedule_work(&dev->probe_work); return 0; + put_dev: + device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); + put_device(dev->device); release_pools: nvme_release_prp_pools(dev); release: @@ -2919,10 +2969,12 @@ static void nvme_async_probe(struct work_struct *work) nvme_set_irq_hints(dev); return; reset: + spin_lock(&dev_list_lock); if (!work_busy(&dev->reset_work)) { dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); } + spin_unlock(&dev_list_lock); } static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) @@ -2952,6 +3004,7 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->probe_work); flush_work(&dev->reset_work); + device_remove_file(dev->device, &dev_attr_reset_controller); nvme_dev_shutdown(dev); nvme_dev_remove(dev); nvme_dev_remove_admin(dev); diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index aef9a81b2d75..b660dc2fadfb 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -579,5 +579,6 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) #define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) +#define NVME_IOCTL_RESET _IO('N', 0x44) #endif /* _UAPI_LINUX_NVME_H */ -- cgit v1.2.3 From 36a7e993eedb2c3f11de3b686b351f75e1edbbb5 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 27 May 2015 12:26:23 -0600 Subject: NVMe: Memory barrier before queue_count is incremented Protects against reordering and/or preempting which would allow the kthread to access the queue descriptor before it is set up Signed-off-by: Jon Derrick Acked-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 9682e29b4171..cae7cac6cc43 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1453,9 +1453,12 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->qid = qid; - dev->queue_count++; dev->queues[qid] = nvmeq; + /* make sure queue descriptor is set before queue count, for kthread */ + mb(); + dev->queue_count++; + return nvmeq; free_cqdma: -- cgit v1.2.3 From a5768aa887fb636f0cc4c83a2f1242506aaf50f6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 1 Jun 2015 14:28:14 -0600 Subject: NVMe: Automatic namespace rescan Namespaces may be dynamically allocated and deleted or attached and detached. This has the driver rescan the device for namespace changes after each device reset or namespace change asynchronous event. There could potentially be many detached namespaces that we don't want polluting /dev/ with unusable block handles, so this will delete disks if the namespace is not active as indicated by the response from identify namespace. This also skips adding the disk if no capacity is provisioned to the namespace in the first place. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 159 ++++++++++++++++++++++++++++++++++++---------- include/linux/nvme.h | 1 + include/uapi/linux/nvme.h | 4 ++ 3 files changed, 132 insertions(+), 32 deletions(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index cae7cac6cc43..2072ae81c13a 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -300,9 +301,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) ++nvmeq->dev->event_limit; - if (status == NVME_SC_SUCCESS) - dev_warn(nvmeq->q_dmadev, - "async event result %08x\n", result); + if (status != NVME_SC_SUCCESS) + return; + + switch (result & 0xff07) { + case NVME_AER_NOTICE_NS_CHANGED: + dev_info(nvmeq->q_dmadev, "rescanning\n"); + schedule_work(&nvmeq->dev->scan_work); + default: + dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result); + } } static void abort_completion(struct nvme_queue *nvmeq, void *ctx, @@ -1923,8 +1931,13 @@ static int nvme_revalidate_disk(struct gendisk *disk) unsigned short bs; if (nvme_identify_ns(dev, ns->ns_id, &id)) { - dev_warn(dev->dev, "%s: Identify failure\n", __func__); - return 0; + dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__, + dev->instance, ns->ns_id); + return -ENODEV; + } + if (id->ncap == 0) { + kfree(id); + return -ENODEV; } old_ms = ns->ms; @@ -1958,7 +1971,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) !ns->ext) nvme_init_integrity(ns); - if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk))) + if (ns->ms && !blk_get_integrity(disk)) set_capacity(disk, 0); else set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); @@ -2073,11 +2086,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) * requires it. */ set_capacity(disk, 0); - nvme_revalidate_disk(ns->disk); + if (nvme_revalidate_disk(ns->disk)) + goto out_free_disk; + add_disk(ns->disk); if (ns->ms) revalidate_disk(ns->disk); return; + out_free_disk: + kfree(disk); + list_del(&ns->list); out_free_queue: blk_cleanup_queue(ns->queue); out_free_ns: @@ -2194,6 +2212,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return result; } +static void nvme_free_namespace(struct nvme_ns *ns) +{ + list_del(&ns->list); + + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + + put_disk(ns->disk); + kfree(ns); +} + +static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); + struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); + + return nsa->ns_id - nsb->ns_id; +} + +static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid) +{ + struct nvme_ns *ns; + + list_for_each_entry(ns, &dev->namespaces, list) { + if (ns->ns_id == nsid) + return ns; + if (ns->ns_id > nsid) + break; + } + return NULL; +} + +static inline bool nvme_io_incapable(struct nvme_dev *dev) +{ + return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS || + dev->online_queues < 2); +} + +static void nvme_ns_remove(struct nvme_ns *ns) +{ + bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue); + + if (kill) + blk_set_queue_dying(ns->queue); + if (ns->disk->flags & GENHD_FL_UP) { + if (blk_get_integrity(ns->disk)) + blk_integrity_unregister(ns->disk); + del_gendisk(ns->disk); + } + if (kill || !blk_queue_dying(ns->queue)) { + blk_mq_abort_requeue_list(ns->queue); + blk_cleanup_queue(ns->queue); + } +} + +static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) +{ + struct nvme_ns *ns, *next; + unsigned i; + + for (i = 1; i <= nn; i++) { + ns = nvme_find_ns(dev, i); + if (ns) { + if (revalidate_disk(ns->disk)) { + nvme_ns_remove(ns); + nvme_free_namespace(ns); + } + } else + nvme_alloc_ns(dev, i); + } + list_for_each_entry_safe(ns, next, &dev->namespaces, list) { + if (ns->ns_id > nn) { + nvme_ns_remove(ns); + nvme_free_namespace(ns); + } + } + list_sort(NULL, &dev->namespaces, ns_cmp); +} + +static void nvme_dev_scan(struct work_struct *work) +{ + struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); + struct nvme_id_ctrl *ctrl; + + if (!dev->tagset.tags) + return; + if (nvme_identify_ctrl(dev, &ctrl)) + return; + nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn)); + kfree(ctrl); +} + /* * Return: error value if an error occurred setting up the queues or calling * Identify Device. 0 if these succeeded, even if adding some of the @@ -2204,7 +2315,7 @@ static int nvme_dev_add(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); int res; - unsigned nn, i; + unsigned nn; struct nvme_id_ctrl *ctrl; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; @@ -2250,9 +2361,7 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; - for (i = 1; i <= nn; i++) - nvme_alloc_ns(dev, i); - + schedule_work(&dev->scan_work); return 0; } @@ -2552,17 +2661,8 @@ static void nvme_dev_remove(struct nvme_dev *dev) { struct nvme_ns *ns; - list_for_each_entry(ns, &dev->namespaces, list) { - if (ns->disk->flags & GENHD_FL_UP) { - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); - del_gendisk(ns->disk); - } - if (!blk_queue_dying(ns->queue)) { - blk_mq_abort_requeue_list(ns->queue); - blk_cleanup_queue(ns->queue); - } - } + list_for_each_entry(ns, &dev->namespaces, list) + nvme_ns_remove(ns); } static int nvme_setup_prp_pools(struct nvme_dev *dev) @@ -2621,16 +2721,8 @@ static void nvme_free_namespaces(struct nvme_dev *dev) { struct nvme_ns *ns, *next; - list_for_each_entry_safe(ns, next, &dev->namespaces, list) { - list_del(&ns->list); - - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); - - put_disk(ns->disk); - kfree(ns); - } + list_for_each_entry_safe(ns, next, &dev->namespaces, list) + nvme_free_namespace(ns); } static void nvme_free_dev(struct kref *kref) @@ -2814,6 +2906,7 @@ static int nvme_dev_resume(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } else { nvme_unfreeze_queues(dev); + schedule_work(&dev->scan_work); nvme_set_irq_hints(dev); } return 0; @@ -2935,6 +3028,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto put_dev; INIT_LIST_HEAD(&dev->node); + INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->probe_work, nvme_async_probe); schedule_work(&dev->probe_work); return 0; @@ -3007,6 +3101,7 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->probe_work); flush_work(&dev->reset_work); + flush_work(&dev->scan_work); device_remove_file(dev->device, &dev_attr_reset_controller); nvme_dev_shutdown(dev); nvme_dev_remove(dev); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 986bf8ad8e93..c0d94ed8ce9a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -92,6 +92,7 @@ struct nvme_dev { work_func_t reset_workfn; struct work_struct reset_work; struct work_struct probe_work; + struct work_struct scan_work; char name[12]; char serial[20]; char model[40]; diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index b660dc2fadfb..732b32e92b02 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -179,6 +179,10 @@ enum { NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, }; +enum { + NVME_AER_NOTICE_NS_CHANGED = 0x0002, +}; + struct nvme_lba_range_type { __u8 type; __u8 attributes; -- cgit v1.2.3 From e112af0dc9f55099b948e55077504a44b4162c79 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Fri, 5 Jun 2015 14:54:24 +0200 Subject: nvme: don't overwrite req->cmd_flags on sync cmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In __nvme_submit_sync_cmd, the request direction is overwritten when the REQ_FAILFAST_DRIVER flag is set. Signed-off-by: Matias Bjørling Reviewed-by: Christoph Hellwig Fixes: 75619bfa904d0 ("NVMe: End sync requests immediately on failure") Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 2072ae81c13a..12d5b7b03f9b 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1010,7 +1010,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->cmd_type = REQ_TYPE_DRV_PRIV; - req->cmd_flags = REQ_FAILFAST_DRIVER; + req->cmd_flags |= REQ_FAILFAST_DRIVER; req->__data_len = 0; req->__sector = (sector_t) -1; req->bio = req->biotail = NULL; -- cgit v1.2.3 From 71feb364e7faadc681e714f7fdc2bede208ba26c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 19 Jun 2015 11:07:30 -0600 Subject: NVMe: Fix IO for extended metadata formats This fixes io submit ioctl handling when using extended metadata formats. When these formats are used, the user provides a single virtually contiguous buffer containing both the block and metadata interleaved, so the metadata size needs to be added to the total length and not mapped as a separate transfer. The command is also driver generated, so this patch does not enforce blk-integrity extensions provide the metadata buffer. Reported-by: Marcin Dziegielewski Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/block/nvme-core.c') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 12d5b7b03f9b..a501d3e6ac92 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -852,7 +852,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * stripped/generated by the controller with PRACT=1. */ if (ns && ns->ms && !blk_integrity_rq(req)) { - if (!(ns->pi_type && ns->ms == 8)) { + if (!(ns->pi_type && ns->ms == 8) && + req->cmd_type != REQ_TYPE_DRV_PRIV) { req->errors = -EFAULT; blk_mq_complete_request(req); return BLK_MQ_RQ_QUEUE_OK; @@ -1747,15 +1748,14 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = (io.nblocks + 1) * ns->ms; write = io.opcode & 1; + if (ns->ext) { + length += meta_len; + meta_len = 0; + } if (meta_len) { if (((io.metadata & 3) || !io.metadata) && !ns->ext) return -EINVAL; - if (ns->ext) { - length += meta_len; - meta_len = 0; - } - meta = dma_alloc_coherent(dev->dev, meta_len, &meta_dma, GFP_KERNEL); if (!meta) { -- cgit v1.2.3