From 978628ec795b18179e74ef12cfcd35bae569d13e Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 17 May 2018 13:52:50 +0200 Subject: nvme: fix lockdep warning in nvme_mpath_clear_current_path When running blktest's nvme/005 with a lockdep enabled kernel the test case fails due to the following lockdep splat in dmesg: ============================= WARNING: suspicious RCU usage 4.17.0-rc5 #881 Not tainted ----------------------------- drivers/nvme/host/nvme.h:457 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by kworker/u32:5/1102: #0: (ptrval) ((wq_completion)"nvme-wq"){+.+.}, at: process_one_work+0x152/0x5c0 #1: (ptrval) ((work_completion)(&ctrl->scan_work)){+.+.}, at: process_one_work+0x152/0x5c0 #2: (ptrval) (&subsys->lock#2){+.+.}, at: nvme_ns_remove+0x43/0x1c0 [nvme_core] The only caller of nvme_mpath_clear_current_path() is nvme_ns_remove() which holds the subsys lock so it's likely a false positive, but when using rcu_access_pointer(), we're telling rcu and lockdep that we're only after the pointer falue. Fixes: 32acab3181c7 ("nvme: implement multipath access to nvme subsystems") Signed-off-by: Johannes Thumshirn Suggested-by: Paul E. McKenney Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 35d9915829e2..ec6e4acc4d48 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -22,6 +22,7 @@ #include #include #include +#include extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -449,7 +450,7 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; - if (head && ns == srcu_dereference(head->current_path, &head->srcu)) + if (head && ns == rcu_access_pointer(head->current_path)) rcu_assign_pointer(head->current_path, NULL); } struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); -- cgit v1.2.3 From e9a9853c23c13a37546397b61b270999fd0fb759 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Wed, 23 May 2018 17:56:11 +0300 Subject: nvme: host: core: fix precedence of ternary operator Ternary operator have lower precedence then bitwise or, so 'cdw10' was calculated wrong. Signed-off-by: Ivan Bornyakov Reviewed-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5e1d6d69b307..dc8aa2c1c22a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1578,7 +1578,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { - u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; + u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } @@ -1590,7 +1590,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key) static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; + u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } -- cgit v1.2.3 From a8e3e0bb749c0893a2cfd24652a77f45eea338d3 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Thu, 24 May 2018 17:51:33 +0800 Subject: nvme-pci: set nvmeq->cq_vector after alloc cq/sq Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will invoke free_irq for it and cause a 'Trying to free already-free IRQ xxx' warning if the create CQ/SQ command times out. Signed-off-by: Jianchao Wang Reviewed-by: Keith Busch [hch: fixed to pass a s16 and clean up the comment] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 917e1714f7d9..41880cb9e502 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1076,7 +1076,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, - struct nvme_queue *nvmeq) + struct nvme_queue *nvmeq, s16 vector) { struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; @@ -1091,7 +1091,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); + c.create_cq.irq_vector = cpu_to_le16(vector); return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1466,6 +1466,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) { struct nvme_dev *dev = nvmeq->dev; int result; + s16 vector; if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth), @@ -1478,15 +1479,21 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) * A queue's vector matches the queue identifier unless the controller * has only one vector available. */ - nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid; - result = adapter_alloc_cq(dev, qid, nvmeq); + vector = dev->num_vecs == 1 ? 0 : qid; + result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result < 0) - goto release_vector; + goto out; result = adapter_alloc_sq(dev, qid, nvmeq); if (result < 0) goto release_cq; + /* + * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will + * invoke free_irq for it and cause a 'Trying to free already-free IRQ + * xxx' warning if the create CQ/SQ command times out. + */ + nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); result = queue_request_irq(nvmeq); if (result < 0) @@ -1494,13 +1501,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; - release_sq: +release_sq: + nvmeq->cq_vector = -1; dev->online_queues--; adapter_delete_sq(dev, qid); - release_cq: +release_cq: adapter_delete_cq(dev, qid); - release_vector: - nvmeq->cq_vector = -1; +out: return result; } -- cgit v1.2.3 From 72cd4cc28e234ed7189ee508ed65ab60c80a97c8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 May 2018 16:16:04 -0600 Subject: nvme-pci: Fix AER reset handling The nvme timeout handling doesn't do anything if the pci channel is offline, which is the case when recovering from PCI error event, so it was a bad idea to sync the controller reset in this state. This patch flushes the reset work in the error_resume callback instead when the channel is back to online. This keeps AER handling serialized and can recover from timeouts. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199757 Fixes: cc1d5e749a2e ("nvme/pci: Sync controller reset for AER slot_reset") Reported-by: Alex Gagniuc Tested-by: Alex Gagniuc Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 41880cb9e502..79b48052ea11 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2706,19 +2706,15 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - nvme_reset_ctrl_sync(&dev->ctrl); - - switch (dev->ctrl.state) { - case NVME_CTRL_LIVE: - case NVME_CTRL_ADMIN_ONLY: - return PCI_ERS_RESULT_RECOVERED; - default: - return PCI_ERS_RESULT_DISCONNECT; - } + nvme_reset_ctrl(&dev->ctrl); + return PCI_ERS_RESULT_RECOVERED; } static void nvme_error_resume(struct pci_dev *pdev) { + struct nvme_dev *dev = pci_get_drvdata(pdev); + + flush_work(&dev->ctrl.reset_work); pci_cleanup_aer_uncorrect_error_status(pdev); } -- cgit v1.2.3 From 2e050f00a0f0e07467050cb4afae0234941e5bf3 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Thu, 24 May 2018 09:27:38 +0800 Subject: nvme-rdma: stop admin queue before freeing it For any failure after nvme_rdma_start_queue in nvme_rdma_configure_admin_queue, the admin queue will be freed with the NVME_RDMA_Q_LIVE flag still set. Once nvme_rdma_stop_queue is invoked, that will cause a use-after-free. BUG: KASAN: use-after-free in rdma_disconnect+0x1f/0xe0 [rdma_cm] To fix it, call nvme_rdma_stop_queue for all the failed cases after nvme_rdma_start_queue. Signed-off-by: Jianchao Wang Suggested-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1eb4438a8763..2181299ce8f5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -778,7 +778,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; + goto out_stop_queue; } ctrl->ctrl.sqsize = @@ -786,23 +786,25 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) - goto out_cleanup_queue; + goto out_stop_queue; ctrl->ctrl.max_hw_sectors = (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); error = nvme_init_identify(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_stop_queue; error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (error) - goto out_cleanup_queue; + goto out_stop_queue; return 0; +out_stop_queue: + nvme_rdma_stop_queue(&ctrl->queues[0]); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->ctrl.admin_q); -- cgit v1.2.3 From 90fcaf5d54c57037e9f879f17b58497db7156c3e Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 11 May 2018 17:50:24 -0700 Subject: nvme-fc: remove setting DNR on exception conditions Current code will set DNR if the controller is deleting or there is an error during controller init. None of this is necessary. Remove the code that sets DNR Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8b66879b4ebf..ac35a80f5532 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1686,16 +1686,6 @@ done: goto check_error; } - /* - * Force failures of commands if we're killing the controller - * or have an error on a command used to create an new association - */ - if (status && - (blk_queue_dying(rq->q) || - ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_CONNECTING)) - status |= cpu_to_le16(NVME_SC_DNR << 1); - __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); nvme_end_request(rq, status, result); -- cgit v1.2.3 From ffecb0b452d5cbdd2ac3614bdf6cd39ba47b0cb8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 11 May 2018 17:50:22 -0700 Subject: nvme-fabrics: remove unnecessary controller subnqn validation After creating the nvme controller, nvmf_create_ctrl() validates the newly created subsysnqn vs the one specified by the options. In general, this is an unnecessary check as the Connect message should implicitly ensure this value matches. With the change to the FC transport to do an asynchronous connect for the first association create, the transport will return to nvmf_create_ctrl() before that first association has been established, thus the subnqn will not yet be set. Remove the unnecessary validation. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 7ae732a77fe8..6fdf499d16b4 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -983,16 +983,6 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_module_put; } - if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) { - dev_warn(ctrl->device, - "controller returned incorrect NQN: \"%s\".\n", - ctrl->subsys->subnqn); - module_put(ops->module); - up_read(&nvmf_transports_rwsem); - nvme_delete_ctrl_sync(ctrl); - return ERR_PTR(-EINVAL); - } - module_put(ops->module); up_read(&nvmf_transports_rwsem); return ctrl; -- cgit v1.2.3 From 461fbc8f0ed1ac18ce6b7bf7ab0b7444f7c7ff57 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 May 2018 16:18:15 +0200 Subject: nvme-fabrics: centralize discovery controller defaults When connecting to the discovery controller we have certain defaults to observe, so centralize them to avoid inconsistencies due to argument ordering. Signed-off-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 6fdf499d16b4..5ad6104577f0 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -689,10 +689,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->discovery_nqn = !(strcmp(opts->subsysnqn, NVME_DISC_SUBSYS_NAME)); - if (opts->discovery_nqn) { - opts->kato = 0; - opts->nr_io_queues = 0; - } break; case NVMF_OPT_TRADDR: p = match_strdup(args); @@ -851,6 +847,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } } + if (opts->discovery_nqn) { + opts->kato = 0; + opts->nr_io_queues = 0; + } if (ctrl_loss_tmo < 0) opts->max_reconnects = -1; else -- cgit v1.2.3 From 181303d03525ea52d2d002fb8ee04e769aaa4ce4 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 May 2018 16:18:17 +0200 Subject: nvme-fabrics: allow duplicate connections to the discovery controller The whole point of the discovery controller is that it can accept multiple connections. Additionally the cmic field is not even defined for the discovery controller identify page. Signed-off-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- drivers/nvme/host/fabrics.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc8aa2c1c22a..fd206a6adad5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2184,7 +2184,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) * Verify that the subsystem actually supports multiple * controllers, else bail out. */ - if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { + if (!ctrl->opts->discovery_nqn && + nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { dev_err(ctrl->device, "ignoring ctrl due to duplicate subnqn (%s).\n", found->subnqn); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5ad6104577f0..8cb3d73e957d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -850,6 +850,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, if (opts->discovery_nqn) { opts->kato = 0; opts->nr_io_queues = 0; + opts->duplicate_connect = true; } if (ctrl_loss_tmo < 0) opts->max_reconnects = -1; -- cgit v1.2.3 From b40b83e365d6ccc2d900e7f481bab91146a68407 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 8 May 2018 02:02:33 -0400 Subject: nvmet: make a few error messages more generic "nvmet_check_ctrl_status()" is called from admin-cmd.c along with io-cmd.c, make the error message more generic. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index e95424f172fd..4a9908bb7de0 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -710,13 +710,13 @@ out: u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) { if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", + pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", cmd->common.opcode, req->sq->qid); return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", + pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", cmd->common.opcode, req->sq->qid); req->ns = NULL; return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; -- cgit v1.2.3 From 618cff4285dc0ef6ebb99f715116e7af62565293 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 10 May 2018 02:46:30 -0400 Subject: nvmet: remove duplicate NULL initialization for req->ns Remove the duplicate NULL initialization for req->ns. req->ns is always initialized to NULL in nvmet_req_init(), so there is no need to reset it later on failures unless we have previously assigned a value to it. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 -- drivers/nvme/target/core.c | 1 - drivers/nvme/target/discovery.c | 2 -- drivers/nvme/target/fabrics-cmd.c | 4 ---- drivers/nvme/target/io-cmd.c | 4 +--- 5 files changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 5e0e9fcc0d4d..d1afcfd89aa3 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -548,8 +548,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) struct nvme_command *cmd = req->cmd; u16 ret; - req->ns = NULL; - ret = nvmet_check_ctrl_status(req, cmd); if (unlikely(ret)) return ret; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 4a9908bb7de0..6d8eaf3f89c5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -718,7 +718,6 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", cmd->common.opcode, req->sq->qid); - req->ns = NULL; return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } return 0; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 231e04e0a496..08656b849bd6 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -187,8 +187,6 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { pr_err("got cmd %d while not ready\n", cmd->common.opcode); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 19e9e42ae943..d84ae004cb85 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -77,8 +77,6 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - switch (cmd->fabrics.fctype) { case nvme_fabrics_type_property_set: req->data_len = 0; @@ -242,8 +240,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - req->ns = NULL; - if (cmd->common.opcode != nvme_fabrics_command) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index cd2344179673..e5eb2db4d20f 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -195,10 +195,8 @@ u16 nvmet_parse_io_cmd(struct nvmet_req *req) u16 ret; ret = nvmet_check_ctrl_status(req, cmd); - if (unlikely(ret)) { - req->ns = NULL; + if (unlikely(ret)) return ret; - } req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); if (unlikely(!req->ns)) -- cgit v1.2.3 From d5eff33ee6f80864d889d64fd3f6ea7b78dd1b24 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 23 May 2018 00:34:39 -0400 Subject: nvmet: add simple file backed ns support This patch adds simple file backed namespace support for NVMeOF target. The new file io-cmd-file.c is responsible for handling the code for I/O commands when ns is file backed. Also, we introduce mempools based slow path using sync I/Os for file backed ns to ensure forward progress under reclaim. The old block device based implementation is moved to io-cmd-bdev.c and use a "nvmet_bdev_" symbol prefix. The enable/disable calls are also move into the respective files. Signed-off-by: Chaitanya Kulkarni [hch: updated changelog, fixed double req->ns lookup in bdev case] Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Makefile | 4 +- drivers/nvme/target/admin-cmd.c | 8 + drivers/nvme/target/core.c | 50 ++++--- drivers/nvme/target/io-cmd-bdev.c | 241 ++++++++++++++++++++++++++++++ drivers/nvme/target/io-cmd-file.c | 300 ++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/io-cmd.c | 228 ----------------------------- drivers/nvme/target/nvmet.h | 28 +++- 7 files changed, 610 insertions(+), 249 deletions(-) create mode 100644 drivers/nvme/target/io-cmd-bdev.c create mode 100644 drivers/nvme/target/io-cmd-file.c delete mode 100644 drivers/nvme/target/io-cmd.c diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile index 488250189c99..8118c93391c6 100644 --- a/drivers/nvme/target/Makefile +++ b/drivers/nvme/target/Makefile @@ -6,8 +6,8 @@ obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o -nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \ - discovery.o +nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \ + discovery.o io-cmd-file.o io-cmd-bdev.o nvme-loop-y += loop.o nvmet-rdma-y += rdma.o nvmet-fc-y += fc.o diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index d1afcfd89aa3..b2ba95b2eef7 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -45,6 +45,10 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, return NVME_SC_INVALID_NS; } + /* we don't have the right data for file backed ns */ + if (!ns->bdev) + goto out; + host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]); host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); @@ -54,6 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, put_unaligned_le64(data_units_read, &slog->data_units_read[0]); put_unaligned_le64(host_writes, &slog->host_writes[0]); put_unaligned_le64(data_units_written, &slog->data_units_written[0]); +out: nvmet_put_namespace(ns); return NVME_SC_SUCCESS; @@ -71,6 +76,9 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, rcu_read_lock(); list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { + /* we don't have the right data for file backed ns */ + if (!ns->bdev) + continue; host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); data_units_read += part_stat_read(ns->bdev->bd_part, sectors[READ]); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6d8eaf3f89c5..800aaf96ddcd 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -271,6 +271,12 @@ void nvmet_put_namespace(struct nvmet_ns *ns) percpu_ref_put(&ns->ref); } +static void nvmet_ns_dev_disable(struct nvmet_ns *ns) +{ + nvmet_bdev_ns_disable(ns); + nvmet_file_ns_disable(ns); +} + int nvmet_ns_enable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; @@ -281,23 +287,16 @@ int nvmet_ns_enable(struct nvmet_ns *ns) if (ns->enabled) goto out_unlock; - ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, - NULL); - if (IS_ERR(ns->bdev)) { - pr_err("failed to open block device %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); - ret = PTR_ERR(ns->bdev); - ns->bdev = NULL; + ret = nvmet_bdev_ns_enable(ns); + if (ret) + ret = nvmet_file_ns_enable(ns); + if (ret) goto out_unlock; - } - - ns->size = i_size_read(ns->bdev->bd_inode); - ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL); if (ret) - goto out_blkdev_put; + goto out_dev_put; if (ns->nsid > subsys->max_nsid) subsys->max_nsid = ns->nsid; @@ -328,9 +327,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) out_unlock: mutex_unlock(&subsys->lock); return ret; -out_blkdev_put: - blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); - ns->bdev = NULL; +out_dev_put: + nvmet_ns_dev_disable(ns); goto out_unlock; } @@ -366,8 +364,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); - if (ns->bdev) - blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); + nvmet_ns_dev_disable(ns); out_unlock: mutex_unlock(&subsys->lock); } @@ -499,6 +496,25 @@ int nvmet_sq_init(struct nvmet_sq *sq) } EXPORT_SYMBOL_GPL(nvmet_sq_init); +static u16 nvmet_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + u16 ret; + + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) + return ret; + + req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); + if (unlikely(!req->ns)) + return NVME_SC_INVALID_NS | NVME_SC_DNR; + + if (req->ns->file) + return nvmet_file_parse_io_cmd(req); + else + return nvmet_bdev_parse_io_cmd(req); +} + bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) { diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c new file mode 100644 index 000000000000..e0b0f7df70c2 --- /dev/null +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -0,0 +1,241 @@ +/* + * NVMe I/O command implementation. + * Copyright (c) 2015-2016 HGST, a Western Digital Company. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include "nvmet.h" + +int nvmet_bdev_ns_enable(struct nvmet_ns *ns) +{ + int ret; + + ns->bdev = blkdev_get_by_path(ns->device_path, + FMODE_READ | FMODE_WRITE, NULL); + if (IS_ERR(ns->bdev)) { + ret = PTR_ERR(ns->bdev); + if (ret != -ENOTBLK) { + pr_err("failed to open block device %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); + } + ns->bdev = NULL; + return ret; + } + ns->size = i_size_read(ns->bdev->bd_inode); + ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + return 0; +} + +void nvmet_bdev_ns_disable(struct nvmet_ns *ns) +{ + if (ns->bdev) { + blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); + ns->bdev = NULL; + } +} + +static void nvmet_bio_done(struct bio *bio) +{ + struct nvmet_req *req = bio->bi_private; + + nvmet_req_complete(req, + bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); + + if (bio != &req->b.inline_bio) + bio_put(bio); +} + +static void nvmet_bdev_execute_rw(struct nvmet_req *req) +{ + int sg_cnt = req->sg_cnt; + struct bio *bio = &req->b.inline_bio; + struct scatterlist *sg; + sector_t sector; + blk_qc_t cookie; + int op, op_flags = 0, i; + + if (!req->sg_cnt) { + nvmet_req_complete(req, 0); + return; + } + + if (req->cmd->rw.opcode == nvme_cmd_write) { + op = REQ_OP_WRITE; + op_flags = REQ_SYNC | REQ_IDLE; + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) + op_flags |= REQ_FUA; + } else { + op = REQ_OP_READ; + } + + sector = le64_to_cpu(req->cmd->rw.slba); + sector <<= (req->ns->blksize_shift - 9); + + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + bio_set_dev(bio, req->ns->bdev); + bio->bi_iter.bi_sector = sector; + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + bio_set_op_attrs(bio, op, op_flags); + + for_each_sg(req->sg, sg, req->sg_cnt, i) { + while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) + != sg->length) { + struct bio *prev = bio; + + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + bio_set_dev(bio, req->ns->bdev); + bio->bi_iter.bi_sector = sector; + bio_set_op_attrs(bio, op, op_flags); + + bio_chain(bio, prev); + submit_bio(prev); + } + + sector += sg->length >> 9; + sg_cnt--; + } + + cookie = submit_bio(bio); + + blk_poll(bdev_get_queue(req->ns->bdev), cookie); +} + +static void nvmet_bdev_execute_flush(struct nvmet_req *req) +{ + struct bio *bio = &req->b.inline_bio; + + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + bio_set_dev(bio, req->ns->bdev); + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + + submit_bio(bio); +} + +static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns, + struct nvme_dsm_range *range, struct bio **bio) +{ + int ret; + + ret = __blkdev_issue_discard(ns->bdev, + le64_to_cpu(range->slba) << (ns->blksize_shift - 9), + le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), + GFP_KERNEL, 0, bio); + if (ret && ret != -EOPNOTSUPP) + return NVME_SC_INTERNAL | NVME_SC_DNR; + return 0; +} + +static void nvmet_bdev_execute_discard(struct nvmet_req *req) +{ + struct nvme_dsm_range range; + struct bio *bio = NULL; + int i; + u16 status; + + for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { + status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, + sizeof(range)); + if (status) + break; + + status = nvmet_bdev_discard_range(req->ns, &range, &bio); + if (status) + break; + } + + if (bio) { + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + if (status) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + } else { + submit_bio(bio); + } + } else { + nvmet_req_complete(req, status); + } +} + +static void nvmet_bdev_execute_dsm(struct nvmet_req *req) +{ + switch (le32_to_cpu(req->cmd->dsm.attributes)) { + case NVME_DSMGMT_AD: + nvmet_bdev_execute_discard(req); + return; + case NVME_DSMGMT_IDR: + case NVME_DSMGMT_IDW: + default: + /* Not supported yet */ + nvmet_req_complete(req, 0); + return; + } +} + +static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) +{ + struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; + struct bio *bio = NULL; + u16 status = NVME_SC_SUCCESS; + sector_t sector; + sector_t nr_sector; + + sector = le64_to_cpu(write_zeroes->slba) << + (req->ns->blksize_shift - 9); + nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << + (req->ns->blksize_shift - 9)); + + if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, + GFP_KERNEL, &bio, 0)) + status = NVME_SC_INTERNAL | NVME_SC_DNR; + + if (bio) { + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + submit_bio(bio); + } else { + nvmet_req_complete(req, status); + } +} + +u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + + switch (cmd->common.opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + req->execute = nvmet_bdev_execute_rw; + req->data_len = nvmet_rw_len(req); + return 0; + case nvme_cmd_flush: + req->execute = nvmet_bdev_execute_flush; + req->data_len = 0; + return 0; + case nvme_cmd_dsm: + req->execute = nvmet_bdev_execute_dsm; + req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * + sizeof(struct nvme_dsm_range); + return 0; + case nvme_cmd_write_zeroes: + req->execute = nvmet_bdev_execute_write_zeroes; + return 0; + default: + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + } +} diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c new file mode 100644 index 000000000000..ca1ccf867856 --- /dev/null +++ b/drivers/nvme/target/io-cmd-file.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe Over Fabrics Target File I/O commands implementation. + * Copyright (c) 2017-2018 Western Digital Corporation or its + * affiliates. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include "nvmet.h" + +#define NVMET_MAX_MPOOL_BVEC 16 +#define NVMET_MIN_MPOOL_OBJ 16 + +void nvmet_file_ns_disable(struct nvmet_ns *ns) +{ + if (ns->file) { + mempool_destroy(ns->bvec_pool); + ns->bvec_pool = NULL; + kmem_cache_destroy(ns->bvec_cache); + ns->bvec_cache = NULL; + fput(ns->file); + ns->file = NULL; + } +} + +int nvmet_file_ns_enable(struct nvmet_ns *ns) +{ + int ret; + struct kstat stat; + + ns->file = filp_open(ns->device_path, + O_RDWR | O_LARGEFILE | O_DIRECT, 0); + if (IS_ERR(ns->file)) { + pr_err("failed to open file %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); + return PTR_ERR(ns->file); + } + + ret = vfs_getattr(&ns->file->f_path, + &stat, STATX_SIZE, AT_STATX_FORCE_SYNC); + if (ret) + goto err; + + ns->size = stat.size; + ns->blksize_shift = file_inode(ns->file)->i_blkbits; + + ns->bvec_cache = kmem_cache_create("nvmet-bvec", + NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ns->bvec_cache) + goto err; + + ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab, + mempool_free_slab, ns->bvec_cache); + + if (!ns->bvec_pool) + goto err; + + return ret; +err: + ns->size = 0; + ns->blksize_shift = 0; + nvmet_file_ns_disable(ns); + return ret; +} + +static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) +{ + bv->bv_page = sg_page_iter_page(iter); + bv->bv_offset = iter->sg->offset; + bv->bv_len = PAGE_SIZE - iter->sg->offset; +} + +static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, + unsigned long nr_segs, size_t count) +{ + struct kiocb *iocb = &req->f.iocb; + ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter); + struct iov_iter iter; + int ki_flags = 0, rw; + ssize_t ret; + + if (req->cmd->rw.opcode == nvme_cmd_write) { + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) + ki_flags = IOCB_DSYNC; + call_iter = req->ns->file->f_op->write_iter; + rw = WRITE; + } else { + call_iter = req->ns->file->f_op->read_iter; + rw = READ; + } + + iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count); + + iocb->ki_pos = pos; + iocb->ki_filp = req->ns->file; + iocb->ki_flags = IOCB_DIRECT | ki_flags; + + ret = call_iter(iocb, &iter); + + if (ret != -EIOCBQUEUED && iocb->ki_complete) + iocb->ki_complete(iocb, ret, 0); + + return ret; +} + +static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) +{ + struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb); + + if (req->f.bvec != req->inline_bvec) { + if (likely(req->f.mpool_alloc == false)) + kfree(req->f.bvec); + else + mempool_free(req->f.bvec, req->ns->bvec_pool); + } + + nvmet_req_complete(req, ret != req->data_len ? + NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_rw(struct nvmet_req *req) +{ + ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); + struct sg_page_iter sg_pg_iter; + unsigned long bv_cnt = 0; + bool is_sync = false; + size_t len = 0, total_len = 0; + ssize_t ret = 0; + loff_t pos; + + if (!req->sg_cnt || !nr_bvec) { + nvmet_req_complete(req, 0); + return; + } + + if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) + req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), + GFP_KERNEL); + else + req->f.bvec = req->inline_bvec; + + req->f.mpool_alloc = false; + if (unlikely(!req->f.bvec)) { + /* fallback under memory pressure */ + req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL); + req->f.mpool_alloc = true; + if (nr_bvec > NVMET_MAX_MPOOL_BVEC) + is_sync = true; + } + + pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; + + memset(&req->f.iocb, 0, sizeof(struct kiocb)); + for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { + nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); + len += req->f.bvec[bv_cnt].bv_len; + total_len += req->f.bvec[bv_cnt].bv_len; + bv_cnt++; + + WARN_ON_ONCE((nr_bvec - 1) < 0); + + if (unlikely(is_sync) && + (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) { + ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len); + if (ret < 0) + goto out; + pos += len; + bv_cnt = 0; + len = 0; + } + nr_bvec--; + } + + if (WARN_ON_ONCE(total_len != req->data_len)) + ret = -EIO; +out: + if (unlikely(is_sync || ret)) { + nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0); + return; + } + req->f.iocb.ki_complete = nvmet_file_io_done; + nvmet_file_submit_bvec(req, pos, bv_cnt, total_len); +} + +static void nvmet_file_flush_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + int ret; + + ret = vfs_fsync(req->ns->file, 1); + + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_flush(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_flush_work); + schedule_work(&req->f.work); +} + +static void nvmet_file_execute_discard(struct nvmet_req *req) +{ + int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + struct nvme_dsm_range range; + loff_t offset; + loff_t len; + int i, ret; + + for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { + if (nvmet_copy_from_sgl(req, i * sizeof(range), &range, + sizeof(range))) + break; + offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; + len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; + ret = vfs_fallocate(req->ns->file, mode, offset, len); + if (ret) + break; + } + + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_dsm_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + + switch (le32_to_cpu(req->cmd->dsm.attributes)) { + case NVME_DSMGMT_AD: + nvmet_file_execute_discard(req); + return; + case NVME_DSMGMT_IDR: + case NVME_DSMGMT_IDW: + default: + /* Not supported yet */ + nvmet_req_complete(req, 0); + return; + } +} + +static void nvmet_file_execute_dsm(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_dsm_work); + schedule_work(&req->f.work); +} + +static void nvmet_file_write_zeroes_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; + int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE; + loff_t offset; + loff_t len; + int ret; + + offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift; + len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << + req->ns->blksize_shift); + + ret = vfs_fallocate(req->ns->file, mode, offset, len); + nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); +} + +static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); + schedule_work(&req->f.work); +} + +u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + + switch (cmd->common.opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + req->execute = nvmet_file_execute_rw; + req->data_len = nvmet_rw_len(req); + return 0; + case nvme_cmd_flush: + req->execute = nvmet_file_execute_flush; + req->data_len = 0; + return 0; + case nvme_cmd_dsm: + req->execute = nvmet_file_execute_dsm; + req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * + sizeof(struct nvme_dsm_range); + return 0; + case nvme_cmd_write_zeroes: + req->execute = nvmet_file_execute_write_zeroes; + req->data_len = 0; + return 0; + default: + pr_err("unhandled cmd for file ns %d on qid %d\n", + cmd->common.opcode, req->sq->qid); + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + } +} diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c deleted file mode 100644 index e5eb2db4d20f..000000000000 --- a/drivers/nvme/target/io-cmd.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * NVMe I/O command implementation. - * Copyright (c) 2015-2016 HGST, a Western Digital Company. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include "nvmet.h" - -static void nvmet_bio_done(struct bio *bio) -{ - struct nvmet_req *req = bio->bi_private; - - nvmet_req_complete(req, - bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); - - if (bio != &req->inline_bio) - bio_put(bio); -} - -static inline u32 nvmet_rw_len(struct nvmet_req *req) -{ - return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << - req->ns->blksize_shift; -} - -static void nvmet_execute_rw(struct nvmet_req *req) -{ - int sg_cnt = req->sg_cnt; - struct bio *bio = &req->inline_bio; - struct scatterlist *sg; - sector_t sector; - blk_qc_t cookie; - int op, op_flags = 0, i; - - if (!req->sg_cnt) { - nvmet_req_complete(req, 0); - return; - } - - if (req->cmd->rw.opcode == nvme_cmd_write) { - op = REQ_OP_WRITE; - op_flags = REQ_SYNC | REQ_IDLE; - if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) - op_flags |= REQ_FUA; - } else { - op = REQ_OP_READ; - } - - sector = le64_to_cpu(req->cmd->rw.slba); - sector <<= (req->ns->blksize_shift - 9); - - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); - bio_set_dev(bio, req->ns->bdev); - bio->bi_iter.bi_sector = sector; - bio->bi_private = req; - bio->bi_end_io = nvmet_bio_done; - bio_set_op_attrs(bio, op, op_flags); - - for_each_sg(req->sg, sg, req->sg_cnt, i) { - while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) - != sg->length) { - struct bio *prev = bio; - - bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); - bio_set_dev(bio, req->ns->bdev); - bio->bi_iter.bi_sector = sector; - bio_set_op_attrs(bio, op, op_flags); - - bio_chain(bio, prev); - submit_bio(prev); - } - - sector += sg->length >> 9; - sg_cnt--; - } - - cookie = submit_bio(bio); - - blk_poll(bdev_get_queue(req->ns->bdev), cookie); -} - -static void nvmet_execute_flush(struct nvmet_req *req) -{ - struct bio *bio = &req->inline_bio; - - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); - bio_set_dev(bio, req->ns->bdev); - bio->bi_private = req; - bio->bi_end_io = nvmet_bio_done; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - - submit_bio(bio); -} - -static u16 nvmet_discard_range(struct nvmet_ns *ns, - struct nvme_dsm_range *range, struct bio **bio) -{ - int ret; - - ret = __blkdev_issue_discard(ns->bdev, - le64_to_cpu(range->slba) << (ns->blksize_shift - 9), - le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), - GFP_KERNEL, 0, bio); - if (ret && ret != -EOPNOTSUPP) - return NVME_SC_INTERNAL | NVME_SC_DNR; - return 0; -} - -static void nvmet_execute_discard(struct nvmet_req *req) -{ - struct nvme_dsm_range range; - struct bio *bio = NULL; - int i; - u16 status; - - for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { - status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, - sizeof(range)); - if (status) - break; - - status = nvmet_discard_range(req->ns, &range, &bio); - if (status) - break; - } - - if (bio) { - bio->bi_private = req; - bio->bi_end_io = nvmet_bio_done; - if (status) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); - } else { - submit_bio(bio); - } - } else { - nvmet_req_complete(req, status); - } -} - -static void nvmet_execute_dsm(struct nvmet_req *req) -{ - switch (le32_to_cpu(req->cmd->dsm.attributes)) { - case NVME_DSMGMT_AD: - nvmet_execute_discard(req); - return; - case NVME_DSMGMT_IDR: - case NVME_DSMGMT_IDW: - default: - /* Not supported yet */ - nvmet_req_complete(req, 0); - return; - } -} - -static void nvmet_execute_write_zeroes(struct nvmet_req *req) -{ - struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; - struct bio *bio = NULL; - u16 status = NVME_SC_SUCCESS; - sector_t sector; - sector_t nr_sector; - - sector = le64_to_cpu(write_zeroes->slba) << - (req->ns->blksize_shift - 9); - nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << - (req->ns->blksize_shift - 9)); - - if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, - GFP_KERNEL, &bio, 0)) - status = NVME_SC_INTERNAL | NVME_SC_DNR; - - if (bio) { - bio->bi_private = req; - bio->bi_end_io = nvmet_bio_done; - submit_bio(bio); - } else { - nvmet_req_complete(req, status); - } -} - -u16 nvmet_parse_io_cmd(struct nvmet_req *req) -{ - struct nvme_command *cmd = req->cmd; - u16 ret; - - ret = nvmet_check_ctrl_status(req, cmd); - if (unlikely(ret)) - return ret; - - req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); - if (unlikely(!req->ns)) - return NVME_SC_INVALID_NS | NVME_SC_DNR; - - switch (cmd->common.opcode) { - case nvme_cmd_read: - case nvme_cmd_write: - req->execute = nvmet_execute_rw; - req->data_len = nvmet_rw_len(req); - return 0; - case nvme_cmd_flush: - req->execute = nvmet_execute_flush; - req->data_len = 0; - return 0; - case nvme_cmd_dsm: - req->execute = nvmet_execute_dsm; - req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * - sizeof(struct nvme_dsm_range); - return 0; - case nvme_cmd_write_zeroes: - req->execute = nvmet_execute_write_zeroes; - return 0; - default: - pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, - req->sq->qid); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; - } -} diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 15fd84ab21f8..2d09afcfe505 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -43,6 +43,7 @@ struct nvmet_ns { struct list_head dev_link; struct percpu_ref ref; struct block_device *bdev; + struct file *file; u32 nsid; u32 blksize_shift; loff_t size; @@ -57,6 +58,8 @@ struct nvmet_ns { struct config_group group; struct completion disable_done; + mempool_t *bvec_pool; + struct kmem_cache *bvec_cache; }; static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) @@ -222,8 +225,18 @@ struct nvmet_req { struct nvmet_cq *cq; struct nvmet_ns *ns; struct scatterlist *sg; - struct bio inline_bio; struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC]; + union { + struct { + struct bio inline_bio; + } b; + struct { + bool mpool_alloc; + struct kiocb iocb; + struct bio_vec *bvec; + struct work_struct work; + } f; + }; int sg_cnt; /* data length as parsed from the command: */ size_t data_len; @@ -263,7 +276,8 @@ struct nvmet_async_event { }; u16 nvmet_parse_connect_cmd(struct nvmet_req *req); -u16 nvmet_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); u16 nvmet_parse_admin_cmd(struct nvmet_req *req); u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); @@ -338,4 +352,14 @@ extern struct rw_semaphore nvmet_config_sem; bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, const char *hostnqn); +int nvmet_bdev_ns_enable(struct nvmet_ns *ns); +int nvmet_file_ns_enable(struct nvmet_ns *ns); +void nvmet_bdev_ns_disable(struct nvmet_ns *ns); +void nvmet_file_ns_disable(struct nvmet_ns *ns); + +static inline u32 nvmet_rw_len(struct nvmet_req *req) +{ + return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << + req->ns->blksize_shift; +} #endif /* _NVMET_H */ -- cgit v1.2.3 From 17d78252ee568ecf94df6829c81578c0602991eb Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 21 May 2018 16:27:42 -0700 Subject: nvmet-fc: increase LS buffer count per fc port Todays limit on concurrent LS's is very small - 4 buffers. With large subsystem counts or large numbers of initiators connecting, the limit may be exceeded. Raise the LS buffer count to 256. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 33ee8d3145f8..408279cb6f2c 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -31,7 +31,7 @@ /* *************************** Data Structures/Defines ****************** */ -#define NVMET_LS_CTX_COUNT 4 +#define NVMET_LS_CTX_COUNT 256 /* for this implementation, assume small single frame rqst/rsp */ #define NVME_FC_MAX_LS_BUFFER_SIZE 2048 -- cgit v1.2.3 From eb464833a2e787996474ad33dafa2c5336d4c477 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Fri, 11 May 2018 02:38:15 -0400 Subject: nvmet-loop: use nr_phys_segments when map rq to sgl Use blk_rq_nr_phys_segments() instead of blk_rq_payload_bytes() to check if a command contains data to me mapped. This fixes the case where a struct requests contains LBAs, but no data will actually be send, e.g. the pending Write Zeroes support. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 27a8561c0cb9..4284cefe6d32 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -174,7 +174,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, &queue->nvme_sq, &nvme_loop_ops)) return BLK_STS_OK; - if (blk_rq_payload_bytes(req)) { + if (blk_rq_nr_phys_segments(req)) { iod->sg_table.sgl = iod->first_sgl; if (sg_alloc_table_chained(&iod->sg_table, blk_rq_nr_phys_segments(req), -- cgit v1.2.3 From 1e5f44616287b24a2c032274d6a5e3690dfd279b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 25 May 2018 11:04:03 +0200 Subject: nvme: fix KASAN warning when parsing host nqn The host nqn actually is smaller than the space reserved for it, so we should be using strlcpy to keep KASAN happy. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8cb3d73e957d..aa318136460e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -57,7 +57,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) goto out_unlock; kref_init(&host->ref); - memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); + strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); list_add_tail(&host->list, &nvmf_hosts); out_unlock: -- cgit v1.2.3 From 75c8b19a23100455339af947e032f1a221344c68 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 25 May 2018 11:06:27 +0200 Subject: nvme: fixup memory leak in nvme_init_identify() If nvme_get_effects_log() failed the 'id' buffer from the previous nvme_identify_ctrl() call will never be freed. Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd206a6adad5..2c4cf65641a6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2316,7 +2316,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { ret = nvme_get_effects_log(ctrl); if (ret < 0) - return ret; + goto out_free; } if (!ctrl->identified) { -- cgit v1.2.3