From 18479ddb7fd5fd0994bd10a95618bf866713a11b Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 9 Mar 2021 00:48:04 +0530 Subject: nvme: reduce checks for zero command effects For passthrough I/O commands, effects are usually to be zero. nvme_passthrough_end() does three checks in futility for this case. Bail out of function-call/checks. Signed-off-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a5653892d773..3bbaf48833a8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1137,7 +1137,8 @@ void nvme_execute_passthru_rq(struct request *rq) effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); blk_execute_rq(disk, rq, 0); - nvme_passthru_end(ctrl, effects); + if (effects) /* nothing to be done for zero cmd effects */ + nvme_passthru_end(ctrl, effects); } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); -- cgit v1.2.3 From f21c4769d0de00f4873792f8e6f2d1c04c8cd898 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 28 Feb 2021 18:06:04 -0800 Subject: nvme: rename nvme_init_identify() This is a prep patch so that we can move the identify data structure related code initialization from nvme_init_identify() into a helper. Rename the function nvmet_init_identify() to nvmet_init_ctrl_finish(). Next patch will move the nvme_id_ctrl related initialization from newly renamed function nvme_init_ctrl_finish() into the nvme_init_identify() helper. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 ++++---- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/tcp.c | 2 +- drivers/nvme/target/loop.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3bbaf48833a8..703f6ce6620d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1120,7 +1120,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) mutex_unlock(&ctrl->scan_lock); } if (effects & NVME_CMD_EFFECTS_CCC) - nvme_init_identify(ctrl); + nvme_init_ctrl_finish(ctrl); if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); @@ -1980,7 +1980,7 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) * In order to be more cautious use controller's max_hw_sectors value * to configure the maximum sectors for the write-zeroes which is * configured based on the controller's MDTS field in the - * nvme_init_identify() if available. + * nvme_init_ctrl_finish() if available. */ if (ns->ctrl->max_hw_sectors == UINT_MAX) max_blocks = (u64)USHRT_MAX + 1; @@ -3066,7 +3066,7 @@ out: * register in our nvme_ctrl structure. This should be called as soon as * the admin queue is fully up and running. */ -int nvme_init_identify(struct nvme_ctrl *ctrl) +int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; int ret, page_shift; @@ -3253,7 +3253,7 @@ out_free: kfree(id); return ret; } -EXPORT_SYMBOL_GPL(nvme_init_identify); +EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish); static int nvme_dev_open(struct inode *inode, struct file *file) { diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 73d073748389..cb5cdef000bd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3086,7 +3086,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - ret = nvme_init_identify(&ctrl->ctrl); + ret = nvme_init_ctrl_finish(&ctrl->ctrl); if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index e82407d1ec23..76de7ed55d90 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -599,7 +599,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); -int nvme_init_identify(struct nvme_ctrl *ctrl); +int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f03177589c02..ecd11b1febf8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2645,7 +2645,7 @@ static void nvme_reset_work(struct work_struct *work) */ dev->ctrl.max_integrity_segments = 1; - result = nvme_init_identify(&dev->ctrl); + result = nvme_init_ctrl_finish(&dev->ctrl); if (result) goto out; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 53ac4d7442ba..9c710839b03a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -917,7 +917,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - error = nvme_init_identify(&ctrl->ctrl); + error = nvme_init_ctrl_finish(&ctrl->ctrl); if (error) goto out_quiesce_queue; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 69f59d2c5799..735e768f9f43 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1875,7 +1875,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) blk_mq_unquiesce_queue(ctrl->admin_q); - error = nvme_init_identify(ctrl); + error = nvme_init_ctrl_finish(ctrl); if (error) goto out_quiesce_queue; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index cb6f86572b24..a7f97c8b2f77 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -396,7 +396,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - error = nvme_init_identify(&ctrl->ctrl); + error = nvme_init_ctrl_finish(&ctrl->ctrl); if (error) goto out_cleanup_queue; -- cgit v1.2.3 From 44ef5611c2a56538c60211672f73e4ff7df913c7 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 28 Feb 2021 18:06:05 -0800 Subject: nvme: split init identify into helper The function nvme_init_ctrl_finish() (formerly nvme_init_identify()) has grown over the period of time about ~200 lines given the size of nvme id ctrl data structure. Move the nvme_id_ctrl data structure related initilzation into helper nvme_init_identify() and call it from nvme_init_ctrl_finish(). When we move the code into nvme_init_identify() change the local variable i from int to unsigned int and remove the duplicate kfree() after nvme_mpath_init() and jump to the label out_free if nvme_mpath_ini() fails. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 55 ++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 23 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 703f6ce6620d..ce16d24ffdce 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3061,28 +3061,14 @@ out: return 0; } -/* - * Initialize the cached copies of the Identify data and various controller - * register in our nvme_ctrl structure. This should be called as soon as - * the admin queue is fully up and running. - */ -int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) +static int nvme_init_identify(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; int ret, page_shift; u32 max_hw_sectors; bool prev_apst_enabled; - ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); - if (ret) { - dev_err(ctrl->device, "Reading VS failed (%d)\n", ret); - return ret; - } page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; - ctrl->sqsize = min_t(u16, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize); - - if (ctrl->vs >= NVME_VS(1, 1, 0)) - ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap); ret = nvme_identify_ctrl(ctrl, &id); if (ret) { @@ -3100,7 +3086,7 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) ctrl->cntlid = le16_to_cpu(id->cntlid); if (!ctrl->identified) { - int i; + unsigned int i; ret = nvme_init_subsystem(ctrl, id); if (ret) @@ -3213,16 +3199,43 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) } ret = nvme_mpath_init(ctrl, id); - kfree(id); - if (ret < 0) - return ret; + goto out_free; if (ctrl->apst_enabled && !prev_apst_enabled) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); +out_free: + kfree(id); + return ret; +} + +/* + * Initialize the cached copies of the Identify data and various controller + * register in our nvme_ctrl structure. This should be called as soon as + * the admin queue is fully up and running. + */ +int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) +{ + int ret; + + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); + if (ret) { + dev_err(ctrl->device, "Reading VS failed (%d)\n", ret); + return ret; + } + + ctrl->sqsize = min_t(u16, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize); + + if (ctrl->vs >= NVME_VS(1, 1, 0)) + ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap); + + ret = nvme_init_identify(ctrl); + if (ret) + return ret; + ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; @@ -3248,10 +3261,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) ctrl->identified = true; return 0; - -out_free: - kfree(id); - return ret; } EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish); -- cgit v1.2.3 From 7a36604668b9b1f84126ef0342144ba5b07e518f Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 28 Feb 2021 18:06:06 -0800 Subject: nvme: mark nvme_setup_passsthru() inline Since nvmet_setup_passthru() function falls in fast path when called from the NVMeOF passthru backend, make it inline. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ce16d24ffdce..aa7b03290cef 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -726,7 +726,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9; } -static void nvme_setup_passthrough(struct request *req, +static inline void nvme_setup_passthrough(struct request *req, struct nvme_command *cmd) { memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); -- cgit v1.2.3 From c03fd85de293a4f65fcb94a795bf4c12a432bb6c Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 28 Feb 2021 18:06:08 -0800 Subject: nvme: don't check nvme_req flags for new req MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nvme_clear_request() has a check for flag REQ_DONTPREP and it is called from nvme_init_request() and nvme_setuo_cmd(). The function nvme_init_request() is called from nvme_alloc_request() and nvme_alloc_request_qid(). From these two callers new request is allocated everytime. For newly allocated request RQF_DONTPREP is never set. Since after getting a tag, block layer sets the req->rq_flags == 0 and never sets the REQ_DONTPREP when returning the request :- nvme_alloc_request() blk_mq_alloc_request() blk_mq_rq_ctx_init() rq->rq_flags = 0 <---- nvme_alloc_request_qid() blk_mq_alloc_request_hctx() blk_mq_rq_ctx_init() rq->rq_flags = 0 <---- The block layer does set req->rq_flags but REQ_DONTPREP is not one of them and that is set by the driver. That means we can unconditinally set the REQ_DONTPREP value to the rq->rq_flags when nvme_init_request()->nvme_clear_request() is called from above two callers. Move the check for REQ_DONTPREP from nvme_clear_nvme_request() into nvme_setup_cmd(). This is needed since nvme_alloc_request() now gets called from fast path when NVMeOF target is configured with passthru backend to avoid unnecessary checks in the fast path. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index aa7b03290cef..d6ecef28b851 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -575,11 +575,9 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU); static inline void nvme_clear_nvme_request(struct request *req) { - if (!(req->rq_flags & RQF_DONTPREP)) { - nvme_req(req)->retries = 0; - nvme_req(req)->flags = 0; - req->rq_flags |= RQF_DONTPREP; - } + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; } static inline unsigned int nvme_req_op(struct nvme_command *cmd) @@ -893,7 +891,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { blk_status_t ret = BLK_STS_OK; - nvme_clear_nvme_request(req); + if (!(req->rq_flags & RQF_DONTPREP)) + nvme_clear_nvme_request(req); memset(cmd, 0, sizeof(*cmd)); switch (req_op(req)) { -- cgit v1.2.3 From f1c772d581843e3a14bbd62ef7e40b56fc307f27 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 28 Feb 2021 18:06:11 -0800 Subject: nvme: add new line after variable declatation Add a new line in functions nvme_pr_preempt(), nvme_pr_clear(), and nvme_pr_release() after variable declaration which follows the rest of the code in the nvme/host/core.c. No functional change(s) in this patch. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d6ecef28b851..17c4ca591817 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2325,18 +2325,21 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } static int nvme_pr_clear(struct block_device *bdev, u64 key) { u32 cdw10 = 1 | (key ? 1 << 3 : 0); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); } static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } -- cgit v1.2.3 From f4b9e6c90c572519041f4c5d9c4c3dd50aff42d4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 17 Mar 2021 13:37:03 -0700 Subject: nvme: use driver pdu command for passthrough All nvme transport drivers preallocate an nvme command for each request. Assume to use that command for nvme_setup_cmd() instead of requiring drivers pass a pointer to it. All nvme drivers must initialize the generic nvme_request 'cmd' to point to the transport's preallocated nvme_command. The generic nvme_request cmd pointer had previously been used only as a temporary copy for passthrough commands. Since it now points to the command that gets dispatched, passthrough commands must directly set it up prior to executing the request. Signed-off-by: Keith Busch Reviewed-by: Jens Axboe Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 23 ++++++++++------------- drivers/nvme/host/fc.c | 5 ++--- drivers/nvme/host/nvme.h | 3 +-- drivers/nvme/host/pci.c | 3 ++- drivers/nvme/host/rdma.c | 5 +++-- drivers/nvme/host/tcp.c | 5 ++++- drivers/nvme/target/loop.c | 4 +++- 7 files changed, 25 insertions(+), 23 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 17c4ca591817..c3f94eb90669 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -575,6 +575,9 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU); static inline void nvme_clear_nvme_request(struct request *req) { + struct nvme_command *cmd = nvme_req(req)->cmd; + + memset(cmd, 0, sizeof(*cmd)); nvme_req(req)->retries = 0; nvme_req(req)->flags = 0; req->rq_flags |= RQF_DONTPREP; @@ -593,9 +596,12 @@ static inline void nvme_init_request(struct request *req, else /* no queuedata implies admin queue */ req->timeout = NVME_ADMIN_TIMEOUT; + /* passthru commands should let the driver set the SGL flags */ + cmd->common.flags &= ~NVME_CMD_SGL_ALL; + req->cmd_flags |= REQ_FAILFAST_DRIVER; nvme_clear_nvme_request(req); - nvme_req(req)->cmd = cmd; + memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } struct request *nvme_alloc_request(struct request_queue *q, @@ -724,14 +730,6 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9; } -static inline void nvme_setup_passthrough(struct request *req, - struct nvme_command *cmd) -{ - memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); - /* passthru commands should let the driver set the SGL flags */ - cmd->common.flags &= ~NVME_CMD_SGL_ALL; -} - static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { @@ -886,19 +884,18 @@ void nvme_cleanup_cmd(struct request *req) } EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); -blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, - struct nvme_command *cmd) +blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) { + struct nvme_command *cmd = nvme_req(req)->cmd; blk_status_t ret = BLK_STS_OK; if (!(req->rq_flags & RQF_DONTPREP)) nvme_clear_nvme_request(req); - memset(cmd, 0, sizeof(*cmd)); switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: - nvme_setup_passthrough(req, cmd); + /* these are setup prior to execution in nvme_init_request() */ break; case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index fcf6fd83d08d..f54ffb792acc 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2128,6 +2128,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, op->op.fcp_req.first_sgl = op->sgl; op->op.fcp_req.private = &op->priv[0]; nvme_req(rq)->ctrl = &ctrl->ctrl; + nvme_req(rq)->cmd = &op->op.cmd_iu.sqe; return res; } @@ -2759,8 +2760,6 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl = queue->ctrl; struct request *rq = bd->rq; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; - struct nvme_command *sqe = &cmdiu->sqe; enum nvmefc_fcp_datadir io_dir; bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); u32 data_len; @@ -2770,7 +2769,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); - ret = nvme_setup_cmd(ns, rq, sqe); + ret = nvme_setup_cmd(ns, rq); if (ret) return ret; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 76de7ed55d90..b0863c59fac4 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -623,8 +623,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags); void nvme_cleanup_cmd(struct request *req); -blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, - struct nvme_command *cmd); +blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1a0912146c74..d47bb18b976a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -430,6 +430,7 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, iod->nvmeq = nvmeq; nvme_req(req)->ctrl = &dev->ctrl; + nvme_req(req)->cmd = &iod->cmd; return 0; } @@ -932,7 +933,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) return BLK_STS_IOERR; - ret = nvme_setup_cmd(ns, req, cmnd); + ret = nvme_setup_cmd(ns, req); if (ret) return ret; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9c710839b03a..d6bc43e6c8a6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -314,6 +314,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, NVME_RDMA_DATA_SGL_SIZE; req->queue = queue; + nvme_req(rq)->cmd = req->sqe.data; return 0; } @@ -2038,7 +2039,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq = bd->rq; struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_qe *sqe = &req->sqe; - struct nvme_command *c = sqe->data; + struct nvme_command *c = nvme_req(rq)->cmd; struct ib_device *dev; bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags); blk_status_t ret; @@ -2061,7 +2062,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); - ret = nvme_setup_cmd(ns, rq, c); + ret = nvme_setup_cmd(ns, rq); if (ret) goto unmap_qe; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 735e768f9f43..7de9bee1e5e9 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -417,6 +417,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, { struct nvme_tcp_ctrl *ctrl = set->driver_data; struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_tcp_cmd_pdu *pdu; int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx]; u8 hdgst = nvme_tcp_hdgst_len(queue); @@ -427,8 +428,10 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, if (!req->pdu) return -ENOMEM; + pdu = req->pdu; req->queue = queue; nvme_req(rq)->ctrl = &ctrl->ctrl; + nvme_req(rq)->cmd = &pdu->cmd; return 0; } @@ -2259,7 +2262,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0; blk_status_t ret; - ret = nvme_setup_cmd(ns, rq, &pdu->cmd); + ret = nvme_setup_cmd(ns, rq); if (ret) return ret; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index a7f97c8b2f77..b741854fc957 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -141,7 +141,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req); - ret = nvme_setup_cmd(ns, req, &iod->cmd); + ret = nvme_setup_cmd(ns, req); if (ret) return ret; @@ -205,8 +205,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, unsigned int numa_node) { struct nvme_loop_ctrl *ctrl = set->driver_data; + struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); nvme_req(req)->ctrl = &ctrl->ctrl; + nvme_req(req)->cmd = &iod->cmd; return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req), (set == &ctrl->tag_set) ? hctx_idx + 1 : 0); } -- cgit v1.2.3 From ed4a854b062b841ebc1aa576f27daf72d07150a5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 17 Mar 2021 13:33:41 -0700 Subject: nvme: warn of unhandled effects only once We don't need to repeatedly spam the kernel logs with the same warning about unhandled passthrough IO effects. Just one warning is sufficient to observe this condition occurs. Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c3f94eb90669..40215a0246e4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1072,9 +1072,9 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) if (ns->head->effects) effects = le32_to_cpu(ns->head->effects->iocs[opcode]); if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) - dev_warn(ctrl->device, - "IO command:%02x has unhandled effects:%08x\n", - opcode, effects); + dev_warn_once(ctrl->device, + "IO command:%02x has unhandled effects:%08x\n", + opcode, effects); return 0; } -- cgit v1.2.3 From bff4bcf3cfc1595e0ef2aeb774b2403c88de1486 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 1 Apr 2021 11:54:10 +0200 Subject: nvme: use sysfs_emit instead of sprintf sysfs_emit is the recommended API to use for formatting strings to be returned to user space. It is equivalent to scnprintf and aware of the PAGE_SIZE buffer size. Suggested-by: Chaitanya Kulkarni Signed-off-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 40 ++++++++++++++++++++-------------------- drivers/nvme/host/multipath.c | 8 ++++---- 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40215a0246e4..b94a30e7298d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2876,8 +2876,8 @@ static ssize_t subsys_##field##_show(struct device *dev, \ { \ struct nvme_subsystem *subsys = \ container_of(dev, struct nvme_subsystem, dev); \ - return sprintf(buf, "%.*s\n", \ - (int)sizeof(subsys->field), subsys->field); \ + return sysfs_emit(buf, "%.*s\n", \ + (int)sizeof(subsys->field), subsys->field); \ } \ static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show); @@ -3407,13 +3407,13 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, int model_len = sizeof(subsys->model); if (!uuid_is_null(&ids->uuid)) - return sprintf(buf, "uuid.%pU\n", &ids->uuid); + return sysfs_emit(buf, "uuid.%pU\n", &ids->uuid); if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - return sprintf(buf, "eui.%16phN\n", ids->nguid); + return sysfs_emit(buf, "eui.%16phN\n", ids->nguid); if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - return sprintf(buf, "eui.%8phN\n", ids->eui64); + return sysfs_emit(buf, "eui.%8phN\n", ids->eui64); while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' || subsys->serial[serial_len - 1] == '\0')) @@ -3422,7 +3422,7 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, subsys->model[model_len - 1] == '\0')) model_len--; - return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, + return sysfs_emit(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, serial_len, subsys->serial, model_len, subsys->model, head->ns_id); } @@ -3431,7 +3431,7 @@ static DEVICE_ATTR_RO(wwid); static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); + return sysfs_emit(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); } static DEVICE_ATTR_RO(nguid); @@ -3446,23 +3446,23 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, if (uuid_is_null(&ids->uuid)) { printk_ratelimited(KERN_WARNING "No UUID available providing old NGUID\n"); - return sprintf(buf, "%pU\n", ids->nguid); + return sysfs_emit(buf, "%pU\n", ids->nguid); } - return sprintf(buf, "%pU\n", &ids->uuid); + return sysfs_emit(buf, "%pU\n", &ids->uuid); } static DEVICE_ATTR_RO(uuid); static ssize_t eui_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); + return sysfs_emit(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); } static DEVICE_ATTR_RO(eui); static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev_to_ns_head(dev)->ns_id); + return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ns_id); } static DEVICE_ATTR_RO(nsid); @@ -3527,7 +3527,7 @@ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sprintf(buf, "%.*s\n", \ + return sysfs_emit(buf, "%.*s\n", \ (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); @@ -3541,7 +3541,7 @@ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sprintf(buf, "%d\n", ctrl->field); \ + return sysfs_emit(buf, "%d\n", ctrl->field); \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); @@ -3589,9 +3589,9 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && state_name[ctrl->state]) - return sprintf(buf, "%s\n", state_name[ctrl->state]); + return sysfs_emit(buf, "%s\n", state_name[ctrl->state]); - return sprintf(buf, "unknown state\n"); + return sysfs_emit(buf, "unknown state\n"); } static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL); @@ -3643,9 +3643,9 @@ static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev, struct nvmf_ctrl_options *opts = ctrl->opts; if (ctrl->opts->max_reconnects == -1) - return sprintf(buf, "off\n"); - return sprintf(buf, "%d\n", - opts->max_reconnects * opts->reconnect_delay); + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", + opts->max_reconnects * opts->reconnect_delay); } static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev, @@ -3675,8 +3675,8 @@ static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev, struct nvme_ctrl *ctrl = dev_get_drvdata(dev); if (ctrl->opts->reconnect_delay == -1) - return sprintf(buf, "off\n"); - return sprintf(buf, "%d\n", ctrl->opts->reconnect_delay); + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", ctrl->opts->reconnect_delay); } static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev, diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a1d476e1ac02..e62369d3eae3 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -602,8 +602,8 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev, struct nvme_subsystem *subsys = container_of(dev, struct nvme_subsystem, dev); - return sprintf(buf, "%s\n", - nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); + return sysfs_emit(buf, "%s\n", + nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); } static ssize_t nvme_subsys_iopolicy_store(struct device *dev, @@ -628,7 +628,7 @@ SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR, static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid); + return sysfs_emit(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid); } DEVICE_ATTR_RO(ana_grpid); @@ -637,7 +637,7 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]); + return sysfs_emit(buf, "%s\n", nvme_ana_state_names[ns->ana_state]); } DEVICE_ATTR_RO(ana_state); -- cgit v1.2.3 From 25a64e4e7ef6da605a86ec1bff18d2c3c6ed5329 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 1 Apr 2021 11:54:11 +0200 Subject: nvme: remove superfluous else in nvme_ctrl_loss_tmo_store If there is an error we will leave the function early. So there is no need for an else. Remove it. Signed-off-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b94a30e7298d..d2b4c5567209 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3659,7 +3659,7 @@ static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev, if (err) return -EINVAL; - else if (ctrl_loss_tmo < 0) + if (ctrl_loss_tmo < 0) opts->max_reconnects = -1; else opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, -- cgit v1.2.3 From 09fbed636382867733c1713c9fe2fa2926dac537 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 1 Apr 2021 11:54:12 +0200 Subject: nvme: export fast_io_fail_tmo to sysfs Commit 8c4dfea97f15 ("nvme-fabrics: reject I/O to offline device") introduced fast_io_fail_tmo but didn't export the value to sysfs. The value can be set during the 'nvme connect'. Export the timeout value to user space via sysfs to allow runtime configuration. Cc: Victor Gladkov Signed-off-by: Daniel Wagner Reviewed-by: Ewan D. Milne Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d2b4c5567209..11fca6459812 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3696,6 +3696,36 @@ static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev, static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store); +static ssize_t nvme_ctrl_fast_io_fail_tmo_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->opts->fast_io_fail_tmo == -1) + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", ctrl->opts->fast_io_fail_tmo); +} + +static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + int fast_io_fail_tmo, err; + + err = kstrtoint(buf, 10, &fast_io_fail_tmo); + if (err) + return -EINVAL; + + if (fast_io_fail_tmo < 0) + opts->fast_io_fail_tmo = -1; + else + opts->fast_io_fail_tmo = fast_io_fail_tmo; + return count; +} +static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR, + nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store); + static struct attribute *nvme_dev_attrs[] = { &dev_attr_reset_controller.attr, &dev_attr_rescan_controller.attr, @@ -3715,6 +3745,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_hostid.attr, &dev_attr_ctrl_loss_tmo.attr, &dev_attr_reconnect_delay.attr, + &dev_attr_fast_io_fail_tmo.attr, NULL }; -- cgit v1.2.3 From c881a23fb6f7eb901155d25ba8dd1af0b8c7923b Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 26 Mar 2021 19:48:00 +0000 Subject: nvme: disallow passthru cmd from targeting a nsid != nsid of the block dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a passthru command targets a specific namespace, the ns parameter to nvme_user_cmd()/nvme_user_cmd64() is set. However, there is currently no validation that the nsid specified in the passthru command targets the namespace/nsid represented by the block device that the ioctl was performed on. Add a check that validates that the nsid in the passthru command matches that of the supplied namespace. Signed-off-by: Niklas Cassel Reviewed-by: Javier González Reviewed-by: Sagi Grimberg Reviewed-by: Kanchan Joshi Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 11fca6459812..3f3b985c9fa6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1632,6 +1632,12 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return -EFAULT; if (cmd.flags) return -EINVAL; + if (ns && cmd.nsid != ns->head->ns_id) { + dev_err(ctrl->device, + "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", + current->comm, cmd.nsid, ns->head->ns_id); + return -EINVAL; + } memset(&c, 0, sizeof(c)); c.common.opcode = cmd.opcode; @@ -1676,6 +1682,12 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return -EFAULT; if (cmd.flags) return -EINVAL; + if (ns && cmd.nsid != ns->head->ns_id) { + dev_err(ctrl->device, + "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", + current->comm, cmd.nsid, ns->head->ns_id); + return -EINVAL; + } memset(&c, 0, sizeof(c)); c.common.opcode = cmd.opcode; -- cgit v1.2.3 From 5befc7c26e5a98cd49789fb1beb52c62bd472dba Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 24 Mar 2021 16:18:05 -0700 Subject: nvme: implement non-mdts command limits Commands that access LBA contents without a data transfer between the host historically have not had a spec defined upper limit. The driver set the queue constraints for such commands to the max data transfer size just to be safe, but this artificial constraint frequently limits devices below their capabilities. The NVMe Workgroup ratified TP4040 defines how a controller may advertise their non-MDTS limits. Use these if provided and default to the current constraints if not. Since the Dataset Management command limits are defined in logical blocks, but without a namespace to tell us the logical block size, the code defaults to the safe 512b size. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 106 ++++++++++++++++++++++++++++++++--------------- drivers/nvme/host/nvme.h | 3 ++ include/linux/nvme.h | 10 +++++ 3 files changed, 85 insertions(+), 34 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3f3b985c9fa6..e37e2ecd574c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1948,7 +1948,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) struct request_queue *queue = disk->queue; u32 size = queue_logical_block_size(queue); - if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { + if (ctrl->max_discard_sectors == 0) { blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); return; } @@ -1966,39 +1966,13 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) return; - blk_queue_max_discard_sectors(queue, UINT_MAX); - blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); + blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); + blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) -{ - u64 max_blocks; - - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) - return; - /* - * Even though NVMe spec explicitly states that MDTS is not - * applicable to the write-zeroes:- "The restriction does not apply to - * commands that do not transfer data between the host and the - * controller (e.g., Write Uncorrectable ro Write Zeroes command).". - * In order to be more cautious use controller's max_hw_sectors value - * to configure the maximum sectors for the write-zeroes which is - * configured based on the controller's MDTS field in the - * nvme_init_ctrl_finish() if available. - */ - if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_blocks = (u64)USHRT_MAX + 1; - else - max_blocks = ns->ctrl->max_hw_sectors + 1; - - blk_queue_max_write_zeroes_sectors(disk->queue, - nvme_lba_to_sect(ns, max_blocks)); -} - static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) { return !uuid_is_null(&ids->uuid) || @@ -2168,7 +2142,8 @@ static void nvme_update_disk_info(struct gendisk *disk, set_capacity_and_notify(disk, capacity); nvme_config_discard(disk, ns); - nvme_config_write_zeroes(disk, ns); + blk_queue_max_write_zeroes_sectors(disk->queue, + ns->ctrl->max_zeroes_sectors); set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || test_bit(NVME_NS_FORCE_RO, &ns->flags)); @@ -3072,14 +3047,72 @@ out: return 0; } +static inline u32 nvme_mps_to_sectors(struct nvme_ctrl *ctrl, u32 units) +{ + u32 page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; + + return 1 << (units + page_shift - 9); +} + +static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) +{ + struct nvme_command c = { }; + struct nvme_id_ctrl_nvm *id; + int ret; + + if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { + ctrl->max_discard_sectors = UINT_MAX; + ctrl->max_discard_segments = NVME_DSM_MAX_RANGES; + } else { + ctrl->max_discard_sectors = 0; + ctrl->max_discard_segments = 0; + } + + /* + * Even though NVMe spec explicitly states that MDTS is not applicable + * to the write-zeroes, we are cautious and limit the size to the + * controllers max_hw_sectors value, which is based on the MDTS field + * and possibly other limiting factors. + */ + if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) && + !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) + ctrl->max_zeroes_sectors = ctrl->max_hw_sectors; + else + ctrl->max_zeroes_sectors = 0; + + if (nvme_ctrl_limited_cns(ctrl)) + return 0; + + id = kzalloc(sizeof(*id), GFP_KERNEL); + if (!id) + return 0; + + c.identify.opcode = nvme_admin_identify; + c.identify.cns = NVME_ID_CNS_CS_CTRL; + c.identify.csi = NVME_CSI_NVM; + + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); + if (ret) + goto free_data; + + if (id->dmrl) + ctrl->max_discard_segments = id->dmrl; + if (id->dmrsl) + ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl); + if (id->wzsl) + ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); + +free_data: + kfree(id); + return ret; +} + static int nvme_init_identify(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; - int ret, page_shift; u32 max_hw_sectors; bool prev_apst_enabled; - - page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; + int ret; ret = nvme_identify_ctrl(ctrl, &id); if (ret) { @@ -3136,7 +3169,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; if (id->mdts) - max_hw_sectors = 1 << (id->mdts + page_shift - 9); + max_hw_sectors = nvme_mps_to_sectors(ctrl, id->mdts); else max_hw_sectors = UINT_MAX; ctrl->max_hw_sectors = @@ -3247,6 +3280,10 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) if (ret) return ret; + ret = nvme_init_non_mdts_limits(ctrl); + if (ret < 0) + return ret; + ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; @@ -4808,6 +4845,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b0863c59fac4..815c032a190e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -276,6 +276,9 @@ struct nvme_ctrl { u32 max_hw_sectors; u32 max_segments; u32 max_integrity_segments; + u32 max_discard_sectors; + u32 max_discard_segments; + u32 max_zeroes_sectors; #ifdef CONFIG_BLK_DEV_ZONED u32 max_zone_append; #endif diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b08787cd0881..edcbd60b88b9 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -405,6 +405,16 @@ struct nvme_id_ctrl_zns { __u8 rsvd1[4095]; }; +struct nvme_id_ctrl_nvm { + __u8 vsl; + __u8 wzsl; + __u8 wusl; + __u8 dmrl; + __le32 dmrsl; + __le64 dmsl; + __u8 rsvd16[4080]; +}; + enum { NVME_ID_CNS_NS = 0x00, NVME_ID_CNS_CTRL = 0x01, -- cgit v1.2.3 From 8609c63fce58e94d82f6b6bf29c7806062e2e867 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 2 Apr 2021 18:58:20 +0200 Subject: nvme: fix handling of large MDTS values Instead of triggering an integer overflow and undefined behavior if MDTS is large, set max_hw_sectors to UINT_MAX. Signed-off-by: Bart Van Assche Reviewed-by: Keith Busch [hch: rebased to account for the new nvme_mps_to_sectors helper] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e37e2ecd574c..314705da2c10 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3049,9 +3049,11 @@ out: static inline u32 nvme_mps_to_sectors(struct nvme_ctrl *ctrl, u32 units) { - u32 page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; + u32 page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12, val; - return 1 << (units + page_shift - 9); + if (check_shl_overflow(1U, units + page_shift - 9, &val)) + return UINT_MAX; + return val; } static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) -- cgit v1.2.3 From 95d54bd1a4c1873aa0e2d4c09966f37954c32b80 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 10 Apr 2021 20:16:21 +0000 Subject: nvme: remove single trailing whitespace There is a single trailing whitespace in core.c. Since this is just a single whitespace, the chances of this affecting backports to stable should be quite low, so let's just remove it. Signed-off-by: Niklas Cassel Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 314705da2c10..d27edc37885d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3289,7 +3289,7 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; - + ret = nvme_configure_timestamp(ctrl); if (ret < 0) return ret; -- cgit v1.2.3 From 3089738868b665ecc25e7f36e004c073883c16a0 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 7 Apr 2021 17:49:29 +0200 Subject: nvme: add a nvme_ns_head_multipath helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the multipath gendisk out of #ifdef CONFIG_NVME_MULTIPATH and add a new nvme_ns_head_multipath that uses it to check if a ns_head has a multipath device associated with it. Signed-off-by: Minwoo Im [hch: added the IS_ENABLED, converted a few existing users] Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 8 ++------ drivers/nvme/host/nvme.h | 7 ++++++- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d27edc37885d..f9234b68087a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1873,11 +1873,9 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) { struct nvme_ns *ns = bdev->bd_disk->private_data; -#ifdef CONFIG_NVME_MULTIPATH /* should never be called due to GENHD_FL_HIDDEN */ - if (WARN_ON_ONCE(ns->head->disk)) + if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head))) goto fail; -#endif if (!kref_get_unless_zero(&ns->kref)) goto fail; if (!try_module_get(ns->ctrl->ops->module)) @@ -2215,8 +2213,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) return ret; } -#ifdef CONFIG_NVME_MULTIPATH - if (ns->head->disk) { + if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); nvme_update_disk_info(ns->head->disk, ns, id); blk_stack_limits(&ns->head->disk->queue->limits, @@ -2224,7 +2221,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) blk_queue_update_readahead(ns->head->disk->queue); blk_mq_unfreeze_queue(ns->head->disk->queue); } -#endif return 0; out_unfreeze: diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 815c032a190e..67ff5d41e7d0 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -413,8 +413,8 @@ struct nvme_ns_head { bool shared; int instance; struct nvme_effects_log *effects; -#ifdef CONFIG_NVME_MULTIPATH struct gendisk *disk; +#ifdef CONFIG_NVME_MULTIPATH struct bio_list requeue_list; spinlock_t requeue_lock; struct work_struct requeue_work; @@ -425,6 +425,11 @@ struct nvme_ns_head { #endif }; +static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head) +{ + return IS_ENABLED(CONFIG_NVME_MULTIPATH) && head->disk; +} + enum nvme_ns_features { NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ -- cgit v1.2.3 From 9953ab0c5ae722dabbfa89a82c0d30a261125da0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Apr 2021 12:46:46 +0200 Subject: nvme: cleanup setting the disk name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return false from nvme_set_disk_name and let the caller set the non-multipath name instead of duplicating the naming information in two places. Also remove the pointless local variables for the disk name and flags and the not needed ctrl argument. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González --- drivers/nvme/host/core.c | 17 +++++++++++------ drivers/nvme/host/multipath.c | 24 ++++++++++++------------ drivers/nvme/host/nvme.h | 14 ++++---------- 3 files changed, 27 insertions(+), 28 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f9234b68087a..d9a33a4f7cc6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3998,8 +3998,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns *ns; struct gendisk *disk; struct nvme_id_ns *id; - char disk_name[DISK_NAME_LEN]; - int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT; + int node = ctrl->numa_node; if (nvme_identify_ns(ctrl, nsid, ids, &id)) return; @@ -4025,7 +4024,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED)) goto out_free_queue; - nvme_set_disk_name(disk_name, ns, ctrl, &flags); disk = alloc_disk_node(0, node); if (!disk) @@ -4034,15 +4032,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, disk->fops = &nvme_bdev_ops; disk->private_data = ns; disk->queue = ns->queue; - disk->flags = flags; - memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); + disk->flags = GENHD_FL_EXT_DEVT; + /* + * Without the multipath code enabled, multiple controller per + * subsystems are visible as devices and thus we cannot use the + * subsystem instance. + */ + if (!nvme_mpath_set_disk_name(ns, disk->disk_name, &disk->flags)) + sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, + ns->head->instance); ns->disk = disk; if (nvme_update_ns_info(ns, id)) goto out_put_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { - if (nvme_nvm_register(ns, disk_name, node)) { + if (nvme_nvm_register(ns, disk->disk_name, node)) { dev_warn(ctrl->device, "LightNVM init failure\n"); goto out_put_disk; } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 987920e17d01..5ebf9ccb38f4 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -50,19 +50,19 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) * and those that have a single controller and use the controller node * directly. */ -void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, - struct nvme_ctrl *ctrl, int *flags) -{ - if (!multipath) { - sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); - } else if (ns->head->disk) { - sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, - ctrl->instance, ns->head->instance); - *flags = GENHD_FL_HIDDEN; - } else { - sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, - ns->head->instance); +bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags) +{ + if (!multipath) + return false; + if (!ns->head->disk) { + sprintf(disk_name, "nvme%dn%d", ns->ctrl->subsys->instance, + ns->head->instance); + return true; } + sprintf(disk_name, "nvme%dc%dn%d", ns->ctrl->subsys->instance, + ns->ctrl->instance, ns->head->instance); + *flags = GENHD_FL_HIDDEN; + return true; } void nvme_failover_req(struct request *req) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 67ff5d41e7d0..2ef0a355fbb4 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -668,8 +668,7 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) void nvme_mpath_unfreeze(struct nvme_subsystem *subsys); void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); -void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, - struct nvme_ctrl *ctrl, int *flags); +bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags); void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); @@ -708,16 +707,11 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) { return false; } -/* - * Without the multipath code enabled, multiple controller per subsystems are - * visible as devices and thus we cannot use the subsystem instance. - */ -static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, - struct nvme_ctrl *ctrl, int *flags) +static inline bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, + int *flags) { - sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); + return false; } - static inline void nvme_failover_req(struct request *req) { } -- cgit v1.2.3 From d7790d3739cfd5051ba8990732a2dce795d4fae2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Aug 2020 10:33:14 +0200 Subject: nvme: pass a user pointer to nvme_nvm_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the proper user pointer instead of the not all that useful integer representation. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/lightnvm.c | 8 ++++---- drivers/nvme/host/nvme.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d9a33a4f7cc6..2e932935e05e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1819,7 +1819,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, break; default: if (ns->ndev) - ret = nvme_nvm_ioctl(ns, cmd, arg); + ret = nvme_nvm_ioctl(ns, cmd, argp); else ret = -ENOTTY; } diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index e3240d189093..848e55bbb64f 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -930,15 +930,15 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, return ret; } -int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) +int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp) { switch (cmd) { case NVME_NVM_IOCTL_ADMIN_VIO: - return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg); + return nvme_nvm_user_vcmd(ns, 1, argp); case NVME_NVM_IOCTL_IO_VIO: - return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg); + return nvme_nvm_user_vcmd(ns, 0, argp); case NVME_NVM_IOCTL_SUBMIT_VIO: - return nvme_nvm_submit_vio(ns, (void __user *)arg); + return nvme_nvm_submit_vio(ns, argp); default: return -ENOTTY; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2ef0a355fbb4..70018ae2cb18 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -799,7 +799,7 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); extern const struct attribute_group nvme_nvm_attr_group; -int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); +int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp); #else static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) @@ -809,7 +809,7 @@ static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, - unsigned long arg) + void __user *argp) { return -ENOTTY; } -- cgit v1.2.3 From a5d737f10022bd5d2acf5127935ff813fc832f6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Aug 2020 10:30:50 +0200 Subject: nvme: factor out a nvme_ns_ioctl helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out a helper for the namespace based ioctls. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2e932935e05e..85acb9f608cd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1783,6 +1783,26 @@ static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, return ret; } +static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, + void __user *argp) +{ + switch (cmd) { + case NVME_IOCTL_ID: + force_successful_syscall_return(); + return ns->head->ns_id; + case NVME_IOCTL_IO_CMD: + return nvme_user_cmd(ns->ctrl, ns, argp); + case NVME_IOCTL_SUBMIT_IO: + return nvme_submit_io(ns, argp); + case NVME_IOCTL_IO64_CMD: + return nvme_user_cmd64(ns->ctrl, ns, argp); + default: + if (!ns->ndev) + return -ENOTTY; + return nvme_nvm_ioctl(ns, cmd, argp); + } +} + static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1803,27 +1823,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, if (is_ctrl_ioctl(cmd)) return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); - switch (cmd) { - case NVME_IOCTL_ID: - force_successful_syscall_return(); - ret = ns->head->ns_id; - break; - case NVME_IOCTL_IO_CMD: - ret = nvme_user_cmd(ns->ctrl, ns, argp); - break; - case NVME_IOCTL_SUBMIT_IO: - ret = nvme_submit_io(ns, argp); - break; - case NVME_IOCTL_IO64_CMD: - ret = nvme_user_cmd64(ns->ctrl, ns, argp); - break; - default: - if (ns->ndev) - ret = nvme_nvm_ioctl(ns, cmd, argp); - else - ret = -ENOTTY; - } - + ret = nvme_ns_ioctl(ns, cmd, argp); nvme_put_ns_from_disk(head, srcu_idx); return ret; } -- cgit v1.2.3 From 89b3d6e60550ded5a88ae69cd04d17558e948878 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Apr 2021 14:04:42 +0200 Subject: nvme: simplify the compat ioctl handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't bother defining a separate compat_ioctl handler, and just handle the NVME_IOCTL_SUBMIT_IO32 case inline. Also only defined it for those ABIs (currently just i386 vs x86_64) that are affected. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González --- drivers/nvme/host/core.c | 69 ++++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 43 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 85acb9f608cd..cb20c5c8bbc4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1783,6 +1783,24 @@ static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, return ret; } +#ifdef COMPAT_FOR_U64_ALIGNMENT +struct nvme_user_io32 { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +} __attribute__((__packed__)); +#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) +#endif /* COMPAT_FOR_U64_ALIGNMENT */ + static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp) { @@ -1792,6 +1810,14 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, return ns->head->ns_id; case NVME_IOCTL_IO_CMD: return nvme_user_cmd(ns->ctrl, ns, argp); + /* + * struct nvme_user_io can have different padding on some 32-bit ABIs. + * Just accept the compat version as all fields that are used are the + * same size and at the same offset. + */ +#ifdef COMPAT_FOR_U64_ALIGNMENT + case NVME_IOCTL_SUBMIT_IO32: +#endif case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, argp); case NVME_IOCTL_IO64_CMD: @@ -1828,47 +1854,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, return ret; } -#ifdef CONFIG_COMPAT -struct nvme_user_io32 { - __u8 opcode; - __u8 flags; - __u16 control; - __u16 nblocks; - __u16 rsvd; - __u64 metadata; - __u64 addr; - __u64 slba; - __u32 dsmgmt; - __u32 reftag; - __u16 apptag; - __u16 appmask; -} __attribute__((__packed__)); - -#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) - -static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - /* - * Corresponds to the difference of NVME_IOCTL_SUBMIT_IO - * between 32 bit programs and 64 bit kernel. - * The cause is that the results of sizeof(struct nvme_user_io), - * which is used to define NVME_IOCTL_SUBMIT_IO, - * are not same between 32 bit compiler and 64 bit compiler. - * NVME_IOCTL_SUBMIT_IO32 is for 64 bit kernel handling - * NVME_IOCTL_SUBMIT_IO issued from 32 bit programs. - * Other IOCTL numbers are same between 32 bit and 64 bit. - * So there is nothing to do regarding to other IOCTL numbers. - */ - if (cmd == NVME_IOCTL_SUBMIT_IO32) - return nvme_ioctl(bdev, mode, NVME_IOCTL_SUBMIT_IO, arg); - - return nvme_ioctl(bdev, mode, cmd, arg); -} -#else -#define nvme_compat_ioctl NULL -#endif /* CONFIG_COMPAT */ - static int nvme_open(struct block_device *bdev, fmode_t mode) { struct nvme_ns *ns = bdev->bd_disk->private_data; @@ -2356,7 +2341,6 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit); static const struct block_device_operations nvme_bdev_ops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_compat_ioctl, .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, @@ -2385,7 +2369,6 @@ const struct block_device_operations nvme_ns_head_ops = { .open = nvme_ns_head_open, .release = nvme_ns_head_release, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_compat_ioctl, .getgeo = nvme_getgeo, .report_zones = nvme_report_zones, .pr_ops = &nvme_pr_ops, -- cgit v1.2.3 From 2f907f7f96d96c518652410b90ad2edb50305a4c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Aug 2020 10:55:32 +0200 Subject: nvme: simplify block device ioctl handling for the !multipath case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only use the existing ioctl handler for the multipath case, and add a simpler one that reverts to the pre-multipath case for not shared use case. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González --- drivers/nvme/host/core.c | 83 +++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 36 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cb20c5c8bbc4..a89de83e12be 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1757,30 +1757,17 @@ static bool is_ctrl_ioctl(unsigned int cmd) return false; } -static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, - struct nvme_ns_head *head, - int srcu_idx) +static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, + void __user *argp) { - struct nvme_ctrl *ctrl = ns->ctrl; - int ret; - - nvme_get_ctrl(ns->ctrl); - nvme_put_ns_from_disk(head, srcu_idx); - switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - ret = nvme_user_cmd(ctrl, NULL, argp); - break; + return nvme_user_cmd(ctrl, NULL, argp); case NVME_IOCTL_ADMIN64_CMD: - ret = nvme_user_cmd64(ctrl, NULL, argp); - break; + return nvme_user_cmd64(ctrl, NULL, argp); default: - ret = sed_ioctl(ctrl->opal_dev, cmd, argp); - break; + return sed_ioctl(ctrl->opal_dev, cmd, argp); } - nvme_put_ctrl(ctrl); - return ret; } #ifdef COMPAT_FOR_U64_ALIGNMENT @@ -1832,26 +1819,12 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct nvme_ns_head *head = NULL; + struct nvme_ns *ns = bdev->bd_disk->private_data; void __user *argp = (void __user *)arg; - struct nvme_ns *ns; - int srcu_idx, ret; - - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); - if (unlikely(!ns)) - return -EWOULDBLOCK; - /* - * Handle ioctls that apply to the controller instead of the namespace - * seperately and drop the ns SRCU reference early. This avoids a - * deadlock when deleting namespaces using the passthrough interface. - */ if (is_ctrl_ioctl(cmd)) - return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); - - ret = nvme_ns_ioctl(ns, cmd, argp); - nvme_put_ns_from_disk(head, srcu_idx); - return ret; + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp); + return nvme_ns_ioctl(ns, cmd, argp); } static int nvme_open(struct block_device *bdev, fmode_t mode) @@ -2363,12 +2336,50 @@ static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) nvme_put_ns_head(disk->private_data); } +static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, + void __user *argp, struct nvme_ns_head *head, int srcu_idx) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + int ret; + + nvme_get_ctrl(ns->ctrl); + nvme_put_ns_from_disk(head, srcu_idx); + ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); + nvme_put_ctrl(ctrl); + return ret; +} + +static int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct nvme_ns_head *head = NULL; + void __user *argp = (void __user *)arg; + struct nvme_ns *ns; + int srcu_idx, ret; + + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + return -EWOULDBLOCK; + + /* + * Handle ioctls that apply to the controller instead of the namespace + * seperately and drop the ns SRCU reference early. This avoids a + * deadlock when deleting namespaces using the passthrough interface. + */ + if (is_ctrl_ioctl(cmd)) + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); + + ret = nvme_ns_ioctl(ns, cmd, argp); + nvme_put_ns_from_disk(head, srcu_idx); + return ret; +} + const struct block_device_operations nvme_ns_head_ops = { .owner = THIS_MODULE, .submit_bio = nvme_ns_head_submit_bio, .open = nvme_ns_head_open, .release = nvme_ns_head_release, - .ioctl = nvme_ioctl, + .ioctl = nvme_ns_head_ioctl, .getgeo = nvme_getgeo, .report_zones = nvme_report_zones, .pr_ops = &nvme_pr_ops, -- cgit v1.2.3 From 3557a4409701a132e8f86ad234ac8cf6e97b052e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Aug 2020 11:11:49 +0200 Subject: nvme: don't bother to look up a namespace for controller ioctls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't bother to look up a namespace just to drop if after retreiving the controller for the multipath case. Just look up a live controller for the subsystem directly. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González --- drivers/nvme/host/core.c | 66 ++++++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 24 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a89de83e12be..7710cf59afa9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2336,42 +2336,60 @@ static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) nvme_put_ns_head(disk->private_data); } -static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, struct nvme_ns_head *head, int srcu_idx) +static struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys) { - struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_ctrl *ctrl; int ret; - nvme_get_ctrl(ns->ctrl); - nvme_put_ns_from_disk(head, srcu_idx); - ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); + ret = mutex_lock_killable(&nvme_subsystems_lock); + if (ret) + return ERR_PTR(ret); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { + if (ctrl->state == NVME_CTRL_LIVE) + goto found; + } + mutex_unlock(&nvme_subsystems_lock); + return ERR_PTR(-EWOULDBLOCK); +found: + nvme_get_ctrl(ctrl); + mutex_unlock(&nvme_subsystems_lock); + return ctrl; +} + +static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head, + unsigned int cmd, void __user *argp) +{ + struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys); + int ret; + + if (IS_ERR(ctrl)) + return PTR_ERR(ctrl); + ret = nvme_ctrl_ioctl(ctrl, cmd, argp); nvme_put_ctrl(ctrl); return ret; } +static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head, + unsigned int cmd, void __user *argp) +{ + int srcu_idx = srcu_read_lock(&head->srcu); + struct nvme_ns *ns = nvme_find_path(head); + int ret = -EWOULDBLOCK; + + if (ns) + ret = nvme_ns_ioctl(ns, cmd, argp); + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} + static int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct nvme_ns_head *head = NULL; - void __user *argp = (void __user *)arg; - struct nvme_ns *ns; - int srcu_idx, ret; - - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); - if (unlikely(!ns)) - return -EWOULDBLOCK; + struct nvme_ns_head *head = bdev->bd_disk->private_data; - /* - * Handle ioctls that apply to the controller instead of the namespace - * seperately and drop the ns SRCU reference early. This avoids a - * deadlock when deleting namespaces using the passthrough interface. - */ if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); - - ret = nvme_ns_ioctl(ns, cmd, argp); - nvme_put_ns_from_disk(head, srcu_idx); - return ret; + return nvme_ns_head_ctrl_ioctl(head, cmd, (void __user *)arg); + return nvme_ns_head_ns_ioctl(head, cmd, (void __user *)arg); } const struct block_device_operations nvme_ns_head_ops = { -- cgit v1.2.3 From 2405252a680e2151046f4f256d706c3ca92fedef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 10 Apr 2021 08:42:03 +0200 Subject: nvme: move the ioctl code to a separate file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split out the ioctl code from core.c into a new file. Also update copyrights while we're at it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/Makefile | 2 +- drivers/nvme/host/core.c | 450 +------------------------------------------- drivers/nvme/host/ioctl.c | 455 +++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 10 +- 4 files changed, 468 insertions(+), 449 deletions(-) create mode 100644 drivers/nvme/host/ioctl.c (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index d7f6a87687b8..cbc509784b2e 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o obj-$(CONFIG_NVME_TCP) += nvme-tcp.o -nvme-core-y := core.o +nvme-core-y := core.o ioctl.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7710cf59afa9..b20ffa25f0f0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -112,7 +112,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) set_capacity_and_notify(ns->disk, 0); } -static void nvme_queue_scan(struct nvme_ctrl *ctrl) +void nvme_queue_scan(struct nvme_ctrl *ctrl) { /* * Only new queue scan work when admin and IO queues are both alive @@ -179,7 +179,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_reset_ctrl); -static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) +int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) { int ret; @@ -1016,40 +1016,6 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); -static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, - unsigned len, u32 seed, bool write) -{ - struct bio_integrity_payload *bip; - int ret = -ENOMEM; - void *buf; - - buf = kmalloc(len, GFP_KERNEL); - if (!buf) - goto out; - - ret = -EFAULT; - if (write && copy_from_user(buf, ubuf, len)) - goto out_free_meta; - - bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); - if (IS_ERR(bip)) { - ret = PTR_ERR(bip); - goto out_free_meta; - } - - bip->bip_iter.bi_size = len; - bip->bip_iter.bi_sector = seed; - ret = bio_integrity_add_page(bio, virt_to_page(buf), len, - offset_in_page(buf)); - if (ret == len) - return buf; - ret = -ENOMEM; -out_free_meta: - kfree(buf); -out: - return ERR_PTR(ret); -} - static u32 nvme_known_admin_effects(u8 opcode) { switch (opcode) { @@ -1138,66 +1104,6 @@ void nvme_execute_passthru_rq(struct request *rq) } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); -static int nvme_submit_user_cmd(struct request_queue *q, - struct nvme_command *cmd, void __user *ubuffer, - unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u64 *result, unsigned timeout) -{ - bool write = nvme_is_write(cmd); - struct nvme_ns *ns = q->queuedata; - struct block_device *bdev = ns ? ns->disk->part0 : NULL; - struct request *req; - struct bio *bio = NULL; - void *meta = NULL; - int ret; - - req = nvme_alloc_request(q, cmd, 0); - if (IS_ERR(req)) - return PTR_ERR(req); - - if (timeout) - req->timeout = timeout; - nvme_req(req)->flags |= NVME_REQ_USERCMD; - - if (ubuffer && bufflen) { - ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, - GFP_KERNEL); - if (ret) - goto out; - bio = req->bio; - if (bdev) - bio_set_dev(bio, bdev); - if (bdev && meta_buffer && meta_len) { - meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, - meta_seed, write); - if (IS_ERR(meta)) { - ret = PTR_ERR(meta); - goto out_unmap; - } - req->cmd_flags |= REQ_INTEGRITY; - } - } - - nvme_execute_passthru_rq(req); - if (nvme_req(req)->flags & NVME_REQ_CANCELLED) - ret = -EINTR; - else - ret = nvme_req(req)->status; - if (result) - *result = le64_to_cpu(nvme_req(req)->result.u64); - if (meta && !ret && !write) { - if (copy_to_user(meta_buffer, meta, meta_len)) - ret = -EFAULT; - } - kfree(meta); - out_unmap: - if (bio) - blk_rq_unmap_user(bio); - out: - blk_mq_free_request(req); - return ret; -} - static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) { struct nvme_ctrl *ctrl = rq->end_io_data; @@ -1542,182 +1448,6 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl) queue_work(nvme_wq, &ctrl->async_event_work); } -/* - * Convert integer values from ioctl structures to user pointers, silently - * ignoring the upper bits in the compat case to match behaviour of 32-bit - * kernels. - */ -static void __user *nvme_to_user_ptr(uintptr_t ptrval) -{ - if (in_compat_syscall()) - ptrval = (compat_uptr_t)ptrval; - return (void __user *)ptrval; -} - -static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) -{ - struct nvme_user_io io; - struct nvme_command c; - unsigned length, meta_len; - void __user *metadata; - - if (copy_from_user(&io, uio, sizeof(io))) - return -EFAULT; - if (io.flags) - return -EINVAL; - - switch (io.opcode) { - case nvme_cmd_write: - case nvme_cmd_read: - case nvme_cmd_compare: - break; - default: - return -EINVAL; - } - - length = (io.nblocks + 1) << ns->lba_shift; - - if ((io.control & NVME_RW_PRINFO_PRACT) && - ns->ms == sizeof(struct t10_pi_tuple)) { - /* - * Protection information is stripped/inserted by the - * controller. - */ - if (nvme_to_user_ptr(io.metadata)) - return -EINVAL; - meta_len = 0; - metadata = NULL; - } else { - meta_len = (io.nblocks + 1) * ns->ms; - metadata = nvme_to_user_ptr(io.metadata); - } - - if (ns->features & NVME_NS_EXT_LBAS) { - length += meta_len; - meta_len = 0; - } else if (meta_len) { - if ((io.metadata & 3) || !io.metadata) - return -EINVAL; - } - - memset(&c, 0, sizeof(c)); - c.rw.opcode = io.opcode; - c.rw.flags = io.flags; - c.rw.nsid = cpu_to_le32(ns->head->ns_id); - c.rw.slba = cpu_to_le64(io.slba); - c.rw.length = cpu_to_le16(io.nblocks); - c.rw.control = cpu_to_le16(io.control); - c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); - c.rw.reftag = cpu_to_le32(io.reftag); - c.rw.apptag = cpu_to_le16(io.apptag); - c.rw.appmask = cpu_to_le16(io.appmask); - - return nvme_submit_user_cmd(ns->queue, &c, - nvme_to_user_ptr(io.addr), length, - metadata, meta_len, lower_32_bits(io.slba), NULL, 0); -} - -static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd __user *ucmd) -{ - struct nvme_passthru_cmd cmd; - struct nvme_command c; - unsigned timeout = 0; - u64 result; - int status; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (copy_from_user(&cmd, ucmd, sizeof(cmd))) - return -EFAULT; - if (cmd.flags) - return -EINVAL; - if (ns && cmd.nsid != ns->head->ns_id) { - dev_err(ctrl->device, - "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", - current->comm, cmd.nsid, ns->head->ns_id); - return -EINVAL; - } - - memset(&c, 0, sizeof(c)); - c.common.opcode = cmd.opcode; - c.common.flags = cmd.flags; - c.common.nsid = cpu_to_le32(cmd.nsid); - c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); - c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); - c.common.cdw10 = cpu_to_le32(cmd.cdw10); - c.common.cdw11 = cpu_to_le32(cmd.cdw11); - c.common.cdw12 = cpu_to_le32(cmd.cdw12); - c.common.cdw13 = cpu_to_le32(cmd.cdw13); - c.common.cdw14 = cpu_to_le32(cmd.cdw14); - c.common.cdw15 = cpu_to_le32(cmd.cdw15); - - if (cmd.timeout_ms) - timeout = msecs_to_jiffies(cmd.timeout_ms); - - status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - nvme_to_user_ptr(cmd.addr), cmd.data_len, - nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &result, timeout); - - if (status >= 0) { - if (put_user(result, &ucmd->result)) - return -EFAULT; - } - - return status; -} - -static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd64 __user *ucmd) -{ - struct nvme_passthru_cmd64 cmd; - struct nvme_command c; - unsigned timeout = 0; - int status; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (copy_from_user(&cmd, ucmd, sizeof(cmd))) - return -EFAULT; - if (cmd.flags) - return -EINVAL; - if (ns && cmd.nsid != ns->head->ns_id) { - dev_err(ctrl->device, - "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", - current->comm, cmd.nsid, ns->head->ns_id); - return -EINVAL; - } - - memset(&c, 0, sizeof(c)); - c.common.opcode = cmd.opcode; - c.common.flags = cmd.flags; - c.common.nsid = cpu_to_le32(cmd.nsid); - c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); - c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); - c.common.cdw10 = cpu_to_le32(cmd.cdw10); - c.common.cdw11 = cpu_to_le32(cmd.cdw11); - c.common.cdw12 = cpu_to_le32(cmd.cdw12); - c.common.cdw13 = cpu_to_le32(cmd.cdw13); - c.common.cdw14 = cpu_to_le32(cmd.cdw14); - c.common.cdw15 = cpu_to_le32(cmd.cdw15); - - if (cmd.timeout_ms) - timeout = msecs_to_jiffies(cmd.timeout_ms); - - status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - nvme_to_user_ptr(cmd.addr), cmd.data_len, - nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &cmd.result, timeout); - - if (status >= 0) { - if (put_user(cmd.result, &ucmd->result)) - return -EFAULT; - } - - return status; -} - /* * Issue ioctl requests on the first available path. Note that unlike normal * block layer requests we will not retry failed request on another controller. @@ -1748,85 +1478,6 @@ void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) srcu_read_unlock(&head->srcu, idx); } -static bool is_ctrl_ioctl(unsigned int cmd) -{ - if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) - return true; - if (is_sed_ioctl(cmd)) - return true; - return false; -} - -static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, - void __user *argp) -{ - switch (cmd) { - case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp); - case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp); - default: - return sed_ioctl(ctrl->opal_dev, cmd, argp); - } -} - -#ifdef COMPAT_FOR_U64_ALIGNMENT -struct nvme_user_io32 { - __u8 opcode; - __u8 flags; - __u16 control; - __u16 nblocks; - __u16 rsvd; - __u64 metadata; - __u64 addr; - __u64 slba; - __u32 dsmgmt; - __u32 reftag; - __u16 apptag; - __u16 appmask; -} __attribute__((__packed__)); -#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) -#endif /* COMPAT_FOR_U64_ALIGNMENT */ - -static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp) -{ - switch (cmd) { - case NVME_IOCTL_ID: - force_successful_syscall_return(); - return ns->head->ns_id; - case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, argp); - /* - * struct nvme_user_io can have different padding on some 32-bit ABIs. - * Just accept the compat version as all fields that are used are the - * same size and at the same offset. - */ -#ifdef COMPAT_FOR_U64_ALIGNMENT - case NVME_IOCTL_SUBMIT_IO32: -#endif - case NVME_IOCTL_SUBMIT_IO: - return nvme_submit_io(ns, argp); - case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp); - default: - if (!ns->ndev) - return -ENOTTY; - return nvme_nvm_ioctl(ns, cmd, argp); - } -} - -static int nvme_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct nvme_ns *ns = bdev->bd_disk->private_data; - void __user *argp = (void __user *)arg; - - if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, argp); - return nvme_ns_ioctl(ns, cmd, argp); -} - static int nvme_open(struct block_device *bdev, fmode_t mode) { struct nvme_ns *ns = bdev->bd_disk->private_data; @@ -2336,7 +1987,7 @@ static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) nvme_put_ns_head(disk->private_data); } -static struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys) +struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys) { struct nvme_ctrl *ctrl; int ret; @@ -2356,42 +2007,6 @@ found: return ctrl; } -static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head, - unsigned int cmd, void __user *argp) -{ - struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys); - int ret; - - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); - ret = nvme_ctrl_ioctl(ctrl, cmd, argp); - nvme_put_ctrl(ctrl); - return ret; -} - -static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head, - unsigned int cmd, void __user *argp) -{ - int srcu_idx = srcu_read_lock(&head->srcu); - struct nvme_ns *ns = nvme_find_path(head); - int ret = -EWOULDBLOCK; - - if (ns) - ret = nvme_ns_ioctl(ns, cmd, argp); - srcu_read_unlock(&head->srcu, srcu_idx); - return ret; -} - -static int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct nvme_ns_head *head = bdev->bd_disk->private_data; - - if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(head, cmd, (void __user *)arg); - return nvme_ns_head_ns_ioctl(head, cmd, (void __user *)arg); -} - const struct block_device_operations nvme_ns_head_ops = { .owner = THIS_MODULE, .submit_bio = nvme_ns_head_submit_bio, @@ -3354,65 +2969,6 @@ static int nvme_dev_release(struct inode *inode, struct file *file) return 0; } -static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) -{ - struct nvme_ns *ns; - int ret; - - down_read(&ctrl->namespaces_rwsem); - if (list_empty(&ctrl->namespaces)) { - ret = -ENOTTY; - goto out_unlock; - } - - ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); - if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { - dev_warn(ctrl->device, - "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); - ret = -EINVAL; - goto out_unlock; - } - - dev_warn(ctrl->device, - "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); - kref_get(&ns->kref); - up_read(&ctrl->namespaces_rwsem); - - ret = nvme_user_cmd(ctrl, ns, argp); - nvme_put_ns(ns); - return ret; - -out_unlock: - up_read(&ctrl->namespaces_rwsem); - return ret; -} - -static long nvme_dev_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct nvme_ctrl *ctrl = file->private_data; - void __user *argp = (void __user *)arg; - - switch (cmd) { - case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp); - case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp); - case NVME_IOCTL_IO_CMD: - return nvme_dev_user_cmd(ctrl, argp); - case NVME_IOCTL_RESET: - dev_warn(ctrl->device, "resetting controller\n"); - return nvme_reset_ctrl_sync(ctrl); - case NVME_IOCTL_SUBSYS_RESET: - return nvme_reset_subsystem(ctrl); - case NVME_IOCTL_RESCAN: - nvme_queue_scan(ctrl); - return 0; - default: - return -ENOTTY; - } -} - static const struct file_operations nvme_dev_fops = { .owner = THIS_MODULE, .open = nvme_dev_open, diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c new file mode 100644 index 000000000000..8e05d65c9e93 --- /dev/null +++ b/drivers/nvme/host/ioctl.c @@ -0,0 +1,455 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2011-2014, Intel Corporation. + * Copyright (c) 2017-2021 Christoph Hellwig. + */ +#include /* for force_successful_syscall_return */ +#include +#include "nvme.h" + +/* + * Convert integer values from ioctl structures to user pointers, silently + * ignoring the upper bits in the compat case to match behaviour of 32-bit + * kernels. + */ +static void __user *nvme_to_user_ptr(uintptr_t ptrval) +{ + if (in_compat_syscall()) + ptrval = (compat_uptr_t)ptrval; + return (void __user *)ptrval; +} + +static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, + unsigned len, u32 seed, bool write) +{ + struct bio_integrity_payload *bip; + int ret = -ENOMEM; + void *buf; + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + goto out; + + ret = -EFAULT; + if (write && copy_from_user(buf, ubuf, len)) + goto out_free_meta; + + bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); + if (IS_ERR(bip)) { + ret = PTR_ERR(bip); + goto out_free_meta; + } + + bip->bip_iter.bi_size = len; + bip->bip_iter.bi_sector = seed; + ret = bio_integrity_add_page(bio, virt_to_page(buf), len, + offset_in_page(buf)); + if (ret == len) + return buf; + ret = -ENOMEM; +out_free_meta: + kfree(buf); +out: + return ERR_PTR(ret); +} + +static int nvme_submit_user_cmd(struct request_queue *q, + struct nvme_command *cmd, void __user *ubuffer, + unsigned bufflen, void __user *meta_buffer, unsigned meta_len, + u32 meta_seed, u64 *result, unsigned timeout) +{ + bool write = nvme_is_write(cmd); + struct nvme_ns *ns = q->queuedata; + struct block_device *bdev = ns ? ns->disk->part0 : NULL; + struct request *req; + struct bio *bio = NULL; + void *meta = NULL; + int ret; + + req = nvme_alloc_request(q, cmd, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + + if (timeout) + req->timeout = timeout; + nvme_req(req)->flags |= NVME_REQ_USERCMD; + + if (ubuffer && bufflen) { + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, + GFP_KERNEL); + if (ret) + goto out; + bio = req->bio; + if (bdev) + bio_set_dev(bio, bdev); + if (bdev && meta_buffer && meta_len) { + meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, + meta_seed, write); + if (IS_ERR(meta)) { + ret = PTR_ERR(meta); + goto out_unmap; + } + req->cmd_flags |= REQ_INTEGRITY; + } + } + + nvme_execute_passthru_rq(req); + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; + if (result) + *result = le64_to_cpu(nvme_req(req)->result.u64); + if (meta && !ret && !write) { + if (copy_to_user(meta_buffer, meta, meta_len)) + ret = -EFAULT; + } + kfree(meta); + out_unmap: + if (bio) + blk_rq_unmap_user(bio); + out: + blk_mq_free_request(req); + return ret; +} + + +static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) +{ + struct nvme_user_io io; + struct nvme_command c; + unsigned length, meta_len; + void __user *metadata; + + if (copy_from_user(&io, uio, sizeof(io))) + return -EFAULT; + if (io.flags) + return -EINVAL; + + switch (io.opcode) { + case nvme_cmd_write: + case nvme_cmd_read: + case nvme_cmd_compare: + break; + default: + return -EINVAL; + } + + length = (io.nblocks + 1) << ns->lba_shift; + + if ((io.control & NVME_RW_PRINFO_PRACT) && + ns->ms == sizeof(struct t10_pi_tuple)) { + /* + * Protection information is stripped/inserted by the + * controller. + */ + if (nvme_to_user_ptr(io.metadata)) + return -EINVAL; + meta_len = 0; + metadata = NULL; + } else { + meta_len = (io.nblocks + 1) * ns->ms; + metadata = nvme_to_user_ptr(io.metadata); + } + + if (ns->features & NVME_NS_EXT_LBAS) { + length += meta_len; + meta_len = 0; + } else if (meta_len) { + if ((io.metadata & 3) || !io.metadata) + return -EINVAL; + } + + memset(&c, 0, sizeof(c)); + c.rw.opcode = io.opcode; + c.rw.flags = io.flags; + c.rw.nsid = cpu_to_le32(ns->head->ns_id); + c.rw.slba = cpu_to_le64(io.slba); + c.rw.length = cpu_to_le16(io.nblocks); + c.rw.control = cpu_to_le16(io.control); + c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); + c.rw.reftag = cpu_to_le32(io.reftag); + c.rw.apptag = cpu_to_le16(io.apptag); + c.rw.appmask = cpu_to_le16(io.appmask); + + return nvme_submit_user_cmd(ns->queue, &c, + nvme_to_user_ptr(io.addr), length, + metadata, meta_len, lower_32_bits(io.slba), NULL, 0); +} + +static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd __user *ucmd) +{ + struct nvme_passthru_cmd cmd; + struct nvme_command c; + unsigned timeout = 0; + u64 result; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + if (cmd.flags) + return -EINVAL; + if (ns && cmd.nsid != ns->head->ns_id) { + dev_err(ctrl->device, + "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", + current->comm, cmd.nsid, ns->head->ns_id); + return -EINVAL; + } + + memset(&c, 0, sizeof(c)); + c.common.opcode = cmd.opcode; + c.common.flags = cmd.flags; + c.common.nsid = cpu_to_le32(cmd.nsid); + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); + c.common.cdw10 = cpu_to_le32(cmd.cdw10); + c.common.cdw11 = cpu_to_le32(cmd.cdw11); + c.common.cdw12 = cpu_to_le32(cmd.cdw12); + c.common.cdw13 = cpu_to_le32(cmd.cdw13); + c.common.cdw14 = cpu_to_le32(cmd.cdw14); + c.common.cdw15 = cpu_to_le32(cmd.cdw15); + + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); + + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, + nvme_to_user_ptr(cmd.addr), cmd.data_len, + nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, + 0, &result, timeout); + + if (status >= 0) { + if (put_user(result, &ucmd->result)) + return -EFAULT; + } + + return status; +} + +static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd64 __user *ucmd) +{ + struct nvme_passthru_cmd64 cmd; + struct nvme_command c; + unsigned timeout = 0; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + if (cmd.flags) + return -EINVAL; + if (ns && cmd.nsid != ns->head->ns_id) { + dev_err(ctrl->device, + "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", + current->comm, cmd.nsid, ns->head->ns_id); + return -EINVAL; + } + + memset(&c, 0, sizeof(c)); + c.common.opcode = cmd.opcode; + c.common.flags = cmd.flags; + c.common.nsid = cpu_to_le32(cmd.nsid); + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); + c.common.cdw10 = cpu_to_le32(cmd.cdw10); + c.common.cdw11 = cpu_to_le32(cmd.cdw11); + c.common.cdw12 = cpu_to_le32(cmd.cdw12); + c.common.cdw13 = cpu_to_le32(cmd.cdw13); + c.common.cdw14 = cpu_to_le32(cmd.cdw14); + c.common.cdw15 = cpu_to_le32(cmd.cdw15); + + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); + + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, + nvme_to_user_ptr(cmd.addr), cmd.data_len, + nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, + 0, &cmd.result, timeout); + + if (status >= 0) { + if (put_user(cmd.result, &ucmd->result)) + return -EFAULT; + } + + return status; +} + +static bool is_ctrl_ioctl(unsigned int cmd) +{ + if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) + return true; + if (is_sed_ioctl(cmd)) + return true; + return false; +} + +static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, + void __user *argp) +{ + switch (cmd) { + case NVME_IOCTL_ADMIN_CMD: + return nvme_user_cmd(ctrl, NULL, argp); + case NVME_IOCTL_ADMIN64_CMD: + return nvme_user_cmd64(ctrl, NULL, argp); + default: + return sed_ioctl(ctrl->opal_dev, cmd, argp); + } +} + +#ifdef COMPAT_FOR_U64_ALIGNMENT +struct nvme_user_io32 { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +} __attribute__((__packed__)); +#define NVME_IOCTL_SUBMIT_IO32 _IOW('N', 0x42, struct nvme_user_io32) +#endif /* COMPAT_FOR_U64_ALIGNMENT */ + +static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, + void __user *argp) +{ + switch (cmd) { + case NVME_IOCTL_ID: + force_successful_syscall_return(); + return ns->head->ns_id; + case NVME_IOCTL_IO_CMD: + return nvme_user_cmd(ns->ctrl, ns, argp); + /* + * struct nvme_user_io can have different padding on some 32-bit ABIs. + * Just accept the compat version as all fields that are used are the + * same size and at the same offset. + */ +#ifdef COMPAT_FOR_U64_ALIGNMENT + case NVME_IOCTL_SUBMIT_IO32: +#endif + case NVME_IOCTL_SUBMIT_IO: + return nvme_submit_io(ns, argp); + case NVME_IOCTL_IO64_CMD: + return nvme_user_cmd64(ns->ctrl, ns, argp); + default: + if (!ns->ndev) + return -ENOTTY; + return nvme_nvm_ioctl(ns, cmd, argp); + } +} + +int nvme_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct nvme_ns *ns = bdev->bd_disk->private_data; + void __user *argp = (void __user *)arg; + + if (is_ctrl_ioctl(cmd)) + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp); + return nvme_ns_ioctl(ns, cmd, argp); +} + +#ifdef CONFIG_NVME_MULTIPATH +static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head, + unsigned int cmd, void __user *argp) +{ + struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys); + int ret; + + if (IS_ERR(ctrl)) + return PTR_ERR(ctrl); + ret = nvme_ctrl_ioctl(ctrl, cmd, argp); + nvme_put_ctrl(ctrl); + return ret; +} + +static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head, + unsigned int cmd, void __user *argp) +{ + int srcu_idx = srcu_read_lock(&head->srcu); + struct nvme_ns *ns = nvme_find_path(head); + int ret = -EWOULDBLOCK; + + if (ns) + ret = nvme_ns_ioctl(ns, cmd, argp); + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} + +int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct nvme_ns_head *head = bdev->bd_disk->private_data; + + if (is_ctrl_ioctl(cmd)) + return nvme_ns_head_ctrl_ioctl(head, cmd, (void __user *)arg); + return nvme_ns_head_ns_ioctl(head, cmd, (void __user *)arg); +} +#endif /* CONFIG_NVME_MULTIPATH */ + +static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) +{ + struct nvme_ns *ns; + int ret; + + down_read(&ctrl->namespaces_rwsem); + if (list_empty(&ctrl->namespaces)) { + ret = -ENOTTY; + goto out_unlock; + } + + ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); + if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { + dev_warn(ctrl->device, + "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); + ret = -EINVAL; + goto out_unlock; + } + + dev_warn(ctrl->device, + "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); + kref_get(&ns->kref); + up_read(&ctrl->namespaces_rwsem); + + ret = nvme_user_cmd(ctrl, ns, argp); + nvme_put_ns(ns); + return ret; + +out_unlock: + up_read(&ctrl->namespaces_rwsem); + return ret; +} + +long nvme_dev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct nvme_ctrl *ctrl = file->private_data; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case NVME_IOCTL_ADMIN_CMD: + return nvme_user_cmd(ctrl, NULL, argp); + case NVME_IOCTL_ADMIN64_CMD: + return nvme_user_cmd64(ctrl, NULL, argp); + case NVME_IOCTL_IO_CMD: + return nvme_dev_user_cmd(ctrl, argp); + case NVME_IOCTL_RESET: + dev_warn(ctrl->device, "resetting controller\n"); + return nvme_reset_ctrl_sync(ctrl); + case NVME_IOCTL_SUBSYS_RESET: + return nvme_reset_subsystem(ctrl); + case NVME_IOCTL_RESCAN: + nvme_queue_scan(ctrl); + return 0; + default: + return -ENOTTY; + } +} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 70018ae2cb18..d41c9ceeafa1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -647,14 +647,22 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); +int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); - +void nvme_queue_scan(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, void *log, size_t size, u64 offset); struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, struct nvme_ns_head **head, int *srcu_idx); void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx); +struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys); +int nvme_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); +int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); +long nvme_dev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct block_device_operations nvme_ns_head_ops; -- cgit v1.2.3 From 871ca3ef132650b9b7777c2f2fd15b72c282d792 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Apr 2021 14:20:40 +0200 Subject: nvme: factor out a nvme_tryget_ns_head helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to avoid opencoding ns_head->ref manipulations. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b20ffa25f0f0..88ae847f99fa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -549,6 +549,11 @@ static void nvme_free_ns_head(struct kref *ref) kfree(head); } +static bool nvme_tryget_ns_head(struct nvme_ns_head *head) +{ + return kref_get_unless_zero(&head->ref); +} + static void nvme_put_ns_head(struct nvme_ns_head *head) { kref_put(&head->ref, nvme_free_ns_head); @@ -1975,9 +1980,7 @@ static const struct block_device_operations nvme_bdev_ops = { #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) { - struct nvme_ns_head *head = bdev->bd_disk->private_data; - - if (!kref_get_unless_zero(&head->ref)) + if (!nvme_tryget_ns_head(bdev->bd_disk->private_data)) return -ENXIO; return 0; } @@ -3404,7 +3407,7 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id == nsid && kref_get_unless_zero(&h->ref)) + if (h->ns_id == nsid && nvme_tryget_ns_head(h)) return h; } -- cgit v1.2.3 From 1496bd4936d215fed40b008e9486c38e6acf01db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Apr 2021 14:22:12 +0200 Subject: nvme: move nvme_ns_head_ops to multipath.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the multipath block_device_operations to multipath.c, where they belong. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 31 ++++--------------------------- drivers/nvme/host/multipath.c | 25 ++++++++++++++++++++++++- drivers/nvme/host/nvme.h | 5 ++++- 3 files changed, 32 insertions(+), 29 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88ae847f99fa..b1f8d94bbe09 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -549,12 +549,12 @@ static void nvme_free_ns_head(struct kref *ref) kfree(head); } -static bool nvme_tryget_ns_head(struct nvme_ns_head *head) +bool nvme_tryget_ns_head(struct nvme_ns_head *head) { return kref_get_unless_zero(&head->ref); } -static void nvme_put_ns_head(struct nvme_ns_head *head) +void nvme_put_ns_head(struct nvme_ns_head *head) { kref_put(&head->ref, nvme_free_ns_head); } @@ -1511,7 +1511,7 @@ static void nvme_release(struct gendisk *disk, fmode_t mode) nvme_put_ns(ns); } -static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) +int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) { /* some standard values */ geo->heads = 1 << 6; @@ -1937,7 +1937,7 @@ static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } -static const struct pr_ops nvme_pr_ops = { +const struct pr_ops nvme_pr_ops = { .pr_register = nvme_pr_register, .pr_reserve = nvme_pr_reserve, .pr_release = nvme_pr_release, @@ -1978,18 +1978,6 @@ static const struct block_device_operations nvme_bdev_ops = { }; #ifdef CONFIG_NVME_MULTIPATH -static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) -{ - if (!nvme_tryget_ns_head(bdev->bd_disk->private_data)) - return -ENXIO; - return 0; -} - -static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) -{ - nvme_put_ns_head(disk->private_data); -} - struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys) { struct nvme_ctrl *ctrl; @@ -2009,17 +1997,6 @@ found: mutex_unlock(&nvme_subsystems_lock); return ctrl; } - -const struct block_device_operations nvme_ns_head_ops = { - .owner = THIS_MODULE, - .submit_bio = nvme_ns_head_submit_bio, - .open = nvme_ns_head_open, - .release = nvme_ns_head_release, - .ioctl = nvme_ns_head_ioctl, - .getgeo = nvme_getgeo, - .report_zones = nvme_report_zones, - .pr_ops = &nvme_pr_ops, -}; #endif /* CONFIG_NVME_MULTIPATH */ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5ebf9ccb38f4..68918ea1d3d0 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -294,7 +294,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) return false; } -blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) +static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) { struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data; struct device *dev = disk_to_dev(head->disk); @@ -334,6 +334,29 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) return ret; } +static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) +{ + if (!nvme_tryget_ns_head(bdev->bd_disk->private_data)) + return -ENXIO; + return 0; +} + +static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) +{ + nvme_put_ns_head(disk->private_data); +} + +const struct block_device_operations nvme_ns_head_ops = { + .owner = THIS_MODULE, + .submit_bio = nvme_ns_head_submit_bio, + .open = nvme_ns_head_open, + .release = nvme_ns_head_release, + .ioctl = nvme_ns_head_ioctl, + .getgeo = nvme_getgeo, + .report_zones = nvme_report_zones, + .pr_ops = &nvme_pr_ops, +}; + static void nvme_requeue_work(struct work_struct *work) { struct nvme_ns_head *head = diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d41c9ceeafa1..c6102ce83bb4 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -656,6 +656,8 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, struct nvme_ns_head **head, int *srcu_idx); void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx); +bool nvme_tryget_ns_head(struct nvme_ns_head *head); +void nvme_put_ns_head(struct nvme_ns_head *head); struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys); int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); @@ -663,8 +665,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); extern const struct attribute_group *nvme_ns_id_attr_groups[]; +extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH @@ -688,7 +692,6 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); -blk_qc_t nvme_ns_head_submit_bio(struct bio *bio); static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) { -- cgit v1.2.3 From f5b9a51db29c31f4e486b08d1d823d6f75f2c2c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Apr 2021 14:36:47 +0200 Subject: nvme: factor out nvme_ns_open and nvme_ns_release helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These will be reused for the per-namespace character devices. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b1f8d94bbe09..ded60d50fc56 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1483,9 +1483,8 @@ void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) srcu_read_unlock(&head->srcu, idx); } -static int nvme_open(struct block_device *bdev, fmode_t mode) +static int nvme_ns_open(struct nvme_ns *ns) { - struct nvme_ns *ns = bdev->bd_disk->private_data; /* should never be called due to GENHD_FL_HIDDEN */ if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head))) @@ -1503,14 +1502,23 @@ fail: return -ENXIO; } -static void nvme_release(struct gendisk *disk, fmode_t mode) +static void nvme_ns_release(struct nvme_ns *ns) { - struct nvme_ns *ns = disk->private_data; module_put(ns->ctrl->ops->module); nvme_put_ns(ns); } +static int nvme_open(struct block_device *bdev, fmode_t mode) +{ + return nvme_ns_open(bdev->bd_disk->private_data); +} + +static void nvme_release(struct gendisk *disk, fmode_t mode) +{ + nvme_ns_release(disk->private_data); +} + int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) { /* some standard values */ -- cgit v1.2.3 From a9e0e6bc728ebcfe9f6acdca84e5c6cafee895cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Apr 2021 15:03:16 +0200 Subject: nvme: let namespace probing continue for unsupported features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of failing to scan the namespace entirely when unsupported features are detected, just mark the gendisk hidden but allow other access like the upcoming per-namespace character device. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González --- drivers/nvme/host/core.c | 11 ++++++++++- drivers/nvme/host/zns.c | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ded60d50fc56..288ac47ff5b4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1832,7 +1832,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (blk_queue_is_zoned(ns->queue)) { ret = nvme_revalidate_zones(ns); if (ret && !nvme_first_scan(ns->disk)) - return ret; + goto out; } if (nvme_ns_head_multipath(ns->head)) { @@ -1847,6 +1847,15 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) out_unfreeze: blk_mq_unfreeze_queue(ns->disk->queue); +out: + /* + * If probing fails due an unsupported feature, hide the block device, + * but still allow other access. + */ + if (ret == -ENODEV) { + ns->disk->flags |= GENHD_FL_HIDDEN; + ret = 0; + } return ret; } diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index bc2f344f0ae0..475dd45c3db4 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -96,7 +96,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) dev_warn(ns->ctrl->device, "zone operations:%x not supported for namespace:%u\n", le16_to_cpu(id->zoc), ns->head->ns_id); - status = -EINVAL; + status = -ENODEV; goto free_data; } @@ -105,7 +105,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) dev_warn(ns->ctrl->device, "invalid zone size:%llu for namespace:%u\n", ns->zsze, ns->head->ns_id); - status = -EINVAL; + status = -ENODEV; goto free_data; } -- cgit v1.2.3 From d6609084b0b81abc74dc9db0281cdd0e074df5d4 Mon Sep 17 00:00:00 2001 From: Gopal Tiwari Date: Wed, 14 Apr 2021 14:16:45 +0530 Subject: nvme: fix NULL derefence in nvme_ctrl_fast_io_fail_tmo_show/store Adding entry for dev_attr_fast_io_fail_tmo to avoid the kernel crash while reading and writing the fast_io_fail_tmo. Fixes: 09fbed636382 (nvme: export fast_io_fail_tmo to sysfs) Signed-off-by: Gopal Tiwari Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 288ac47ff5b4..40f08e6325ef 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3379,6 +3379,8 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts) return 0; + if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts) + return 0; return a->mode; } -- cgit v1.2.3 From a70b81bd4d9d2d6c05cfe6ef2a10bccc2e04357a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 16 Apr 2021 13:46:20 +0200 Subject: nvme: sanitize KATO setting According to the NVMe base spec the KATO commands should be sent at half of the KATO interval, to properly account for round-trip times. As we now will only ever send one KATO command per connection we can easily use the recommended values. This also fixes a potential issue where the request timeout for the KATO command does not match the value in the connect command, which might be causing spurious connection drops from the target. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 17 ++++++++++++++--- drivers/nvme/host/fabrics.c | 4 +--- drivers/nvme/host/nvme.h | 1 - 3 files changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40f08e6325ef..0cb097cd6a8e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1109,6 +1109,17 @@ void nvme_execute_passthru_rq(struct request *rq) } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); +/* + * Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1: + * + * The host should send Keep Alive commands at half of the Keep Alive Timeout + * accounting for transport roundtrip times [..]. + */ +static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) +{ + queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2); +} + static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) { struct nvme_ctrl *ctrl = rq->end_io_data; @@ -1131,7 +1142,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) startka = true; spin_unlock_irqrestore(&ctrl->lock, flags); if (startka) - queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); + nvme_queue_keep_alive_work(ctrl); } static int nvme_keep_alive(struct nvme_ctrl *ctrl) @@ -1161,7 +1172,7 @@ static void nvme_keep_alive_work(struct work_struct *work) dev_dbg(ctrl->device, "reschedule traffic based keep-alive timer\n"); ctrl->comp_seen = false; - queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); + nvme_queue_keep_alive_work(ctrl); return; } @@ -1178,7 +1189,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) if (unlikely(ctrl->kato == 0)) return; - queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); + nvme_queue_keep_alive_work(ctrl); } void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 604ab0e5a2ad..13c2747e3d00 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -379,10 +379,8 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) /* * Set keep-alive timeout in seconds granularity (ms * 1000) - * and add a grace period for controller kato enforcement */ - cmd.connect.kato = ctrl->kato ? - cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000) : 0; + cmd.connect.kato = cpu_to_le32(ctrl->kato * 1000); if (ctrl->opts->disable_sqflow) cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c6102ce83bb4..49276186d5bd 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -27,7 +27,6 @@ extern unsigned int admin_timeout; #define NVME_ADMIN_TIMEOUT (admin_timeout * HZ) #define NVME_DEFAULT_KATO 5 -#define NVME_KATO_GRACE 10 #ifdef CONFIG_ARCH_NO_SG_CHAIN #define NVME_INLINE_SG_CNT 0 -- cgit v1.2.3 From 74c22990f08c9f922f775939a4ebc814ca2c49eb Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 16 Apr 2021 13:46:21 +0200 Subject: nvme: add 'kato' sysfs attribute Add a 'kato' controller sysfs attribute to display the current keep-alive timeout value (if any). This allows userspace to identify persistent discovery controllers, as these will have a non-zero KATO value. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0cb097cd6a8e..d6fd44774e9f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3172,6 +3172,7 @@ nvme_show_int_function(cntlid); nvme_show_int_function(numa_node); nvme_show_int_function(queue_count); nvme_show_int_function(sqsize); +nvme_show_int_function(kato); static ssize_t nvme_sysfs_delete(struct device *dev, struct device_attribute *attr, const char *buf, @@ -3369,6 +3370,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_ctrl_loss_tmo.attr, &dev_attr_reconnect_delay.attr, &dev_attr_fast_io_fail_tmo.attr, + &dev_attr_kato.attr, NULL }; -- cgit v1.2.3 From 53fe2a30bc168db9700e00206d991ff934973cf1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Apr 2021 11:46:12 +0200 Subject: nvme: do not try to reconfigure APST when the controller is not live Do not call nvme_configure_apst when the controller is not live, given that nvme_configure_apst will fail due the lack of an admin queue when the controller is being torn down and nvme_set_latency_tolerance is called from dev_pm_qos_hide_latency_tolerance. Fixes: 510a405d945b("nvme: fix memory leak for power latency tolerance") Reported-by: Peng Liu Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d6fd44774e9f..11d343c420b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2321,7 +2321,8 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val) if (ctrl->ps_max_latency_us != latency) { ctrl->ps_max_latency_us = latency; - nvme_configure_apst(ctrl); + if (ctrl->state == NVME_CTRL_LIVE) + nvme_configure_apst(ctrl); } } -- cgit v1.2.3 From 60df5de9b0532aff59a00475b57c265b4a3620e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Apr 2021 08:47:44 +0200 Subject: nvme: cleanup nvme_configure_apst Remove a level of indentation from the main code implementating the table search by using a goto for the APST not supported case. Also move the main comment above the function. Signed-off-by: Christoph Hellwig Reviewed-by: Niklas Cassel --- drivers/nvme/host/core.c | 149 ++++++++++++++++++++++------------------------- 1 file changed, 69 insertions(+), 80 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 11d343c420b6..b905f91f14eb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2181,28 +2181,28 @@ static int nvme_configure_acre(struct nvme_ctrl *ctrl) return ret; } +/* + * APST (Autonomous Power State Transition) lets us program a table of power + * state transitions that the controller will perform automatically. + * We configure it with a simple heuristic: we are willing to spend at most 2% + * of the time transitioning between power states. Therefore, when running in + * any given state, we will enter the next lower-power non-operational state + * after waiting 50 * (enlat + exlat) microseconds, as long as that state's exit + * latency is under the requested maximum latency. + * + * We will not autonomously enter any non-operational state for which the total + * latency exceeds ps_max_latency_us. + * + * Users can set ps_max_latency_us to zero to turn off APST. + */ static int nvme_configure_apst(struct nvme_ctrl *ctrl) { - /* - * APST (Autonomous Power State Transition) lets us program a - * table of power state transitions that the controller will - * perform automatically. We configure it with a simple - * heuristic: we are willing to spend at most 2% of the time - * transitioning between power states. Therefore, when running - * in any given state, we will enter the next lower-power - * non-operational state after waiting 50 * (enlat + exlat) - * microseconds, as long as that state's exit latency is under - * the requested maximum latency. - * - * We will not autonomously enter any non-operational state for - * which the total latency exceeds ps_max_latency_us. Users - * can set ps_max_latency_us to zero to turn off APST. - */ - - unsigned apste; struct nvme_feat_auto_pst *table; + unsigned apste = 0; u64 max_lat_us = 0; + __le64 target = 0; int max_ps = -1; + int state; int ret; /* @@ -2223,83 +2223,72 @@ static int nvme_configure_apst(struct nvme_ctrl *ctrl) if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) { /* Turn off APST. */ - apste = 0; dev_dbg(ctrl->device, "APST disabled\n"); - } else { - __le64 target = cpu_to_le64(0); - int state; - - /* - * Walk through all states from lowest- to highest-power. - * According to the spec, lower-numbered states use more - * power. NPSS, despite the name, is the index of the - * lowest-power state, not the number of states. - */ - for (state = (int)ctrl->npss; state >= 0; state--) { - u64 total_latency_us, exit_latency_us, transition_ms; - - if (target) - table->entries[state] = target; - - /* - * Don't allow transitions to the deepest state - * if it's quirked off. - */ - if (state == ctrl->npss && - (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) - continue; - - /* - * Is this state a useful non-operational state for - * higher-power states to autonomously transition to? - */ - if (!(ctrl->psd[state].flags & - NVME_PS_FLAGS_NON_OP_STATE)) - continue; - - exit_latency_us = - (u64)le32_to_cpu(ctrl->psd[state].exit_lat); - if (exit_latency_us > ctrl->ps_max_latency_us) - continue; + goto done; + } - total_latency_us = - exit_latency_us + - le32_to_cpu(ctrl->psd[state].entry_lat); + /* + * Walk through all states from lowest- to highest-power. + * According to the spec, lower-numbered states use more power. NPSS, + * despite the name, is the index of the lowest-power state, not the + * number of states. + */ + for (state = (int)ctrl->npss; state >= 0; state--) { + u64 total_latency_us, exit_latency_us, transition_ms; - /* - * This state is good. Use it as the APST idle - * target for higher power states. - */ - transition_ms = total_latency_us + 19; - do_div(transition_ms, 20); - if (transition_ms > (1 << 24) - 1) - transition_ms = (1 << 24) - 1; + if (target) + table->entries[state] = target; - target = cpu_to_le64((state << 3) | - (transition_ms << 8)); + /* + * Don't allow transitions to the deepest state if it's quirked + * off. + */ + if (state == ctrl->npss && + (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) + continue; - if (max_ps == -1) - max_ps = state; + /* + * Is this state a useful non-operational state for higher-power + * states to autonomously transition to? + */ + if (!(ctrl->psd[state].flags & NVME_PS_FLAGS_NON_OP_STATE)) + continue; - if (total_latency_us > max_lat_us) - max_lat_us = total_latency_us; - } + exit_latency_us = (u64)le32_to_cpu(ctrl->psd[state].exit_lat); + if (exit_latency_us > ctrl->ps_max_latency_us) + continue; - apste = 1; + total_latency_us = exit_latency_us + + le32_to_cpu(ctrl->psd[state].entry_lat); - if (max_ps == -1) { - dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n"); - } else { - dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n", - max_ps, max_lat_us, (int)sizeof(*table), table); - } + /* + * This state is good. Use it as the APST idle target for + * higher power states. + */ + transition_ms = total_latency_us + 19; + do_div(transition_ms, 20); + if (transition_ms > (1 << 24) - 1) + transition_ms = (1 << 24) - 1; + + target = cpu_to_le64((state << 3) | (transition_ms << 8)); + if (max_ps == -1) + max_ps = state; + if (total_latency_us > max_lat_us) + max_lat_us = total_latency_us; } + if (max_ps == -1) + dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n"); + else + dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n", + max_ps, max_lat_us, (int)sizeof(*table), table); + apste = 1; + +done: ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, table, sizeof(*table), NULL); if (ret) dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret); - kfree(table); return ret; } -- cgit v1.2.3 From 2637baed78010eeaae274feb5b99ce90933fadfb Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 21 Apr 2021 16:45:04 +0900 Subject: nvme: introduce generic per-namespace chardev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace has not been allowed to I/O to device that's failed to be initialized. This patch introduces generic per-namespace character device to allow userspace to I/O regardless the block device is there or not. The chardev naming convention will similar to the existing blkdev naming, using a ng prefix instead of nvme, i.e. - /dev/ngXnY It also supports multipath which means it will not expose chardev for the hidden namespace blkdevs (e.g., nvmeXcYnZ). If /dev/ngXnY is created for a ns_head, then I/O request will be routed to a specific controller selected by the iopolicy of the subsystem. Signed-off-by: Minwoo Im Signed-off-by: Javier González Reviewed-by: Keith Busch Tested-by: Kanchan Joshi Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 87 +++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/ioctl.c | 38 ++++++++++++++++--- drivers/nvme/host/multipath.c | 51 +++++++++++++++++++++++-- drivers/nvme/host/nvme.h | 13 +++++++ 4 files changed, 180 insertions(+), 9 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b905f91f14eb..2f45e8fcdd7c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -89,6 +89,10 @@ static dev_t nvme_ctrl_base_chr_devt; static struct class *nvme_class; static struct class *nvme_subsys_class; +static DEFINE_IDA(nvme_ns_chr_minor_ida); +static dev_t nvme_ns_chr_devt; +static struct class *nvme_ns_chr_class; + static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); @@ -3429,6 +3433,66 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, return 0; } +void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device) +{ + cdev_device_del(cdev, cdev_device); + ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(cdev_device->devt)); +} + +int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, + const struct file_operations *fops, struct module *owner) +{ + int minor, ret; + + minor = ida_simple_get(&nvme_ns_chr_minor_ida, 0, 0, GFP_KERNEL); + if (minor < 0) + return minor; + cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor); + cdev_device->class = nvme_ns_chr_class; + device_initialize(cdev_device); + cdev_init(cdev, fops); + cdev->owner = owner; + ret = cdev_device_add(cdev, cdev_device); + if (ret) + ida_simple_remove(&nvme_ns_chr_minor_ida, minor); + return ret; +} + +static int nvme_ns_chr_open(struct inode *inode, struct file *file) +{ + return nvme_ns_open(container_of(inode->i_cdev, struct nvme_ns, cdev)); +} + +static int nvme_ns_chr_release(struct inode *inode, struct file *file) +{ + nvme_ns_release(container_of(inode->i_cdev, struct nvme_ns, cdev)); + return 0; +} + +static const struct file_operations nvme_ns_chr_fops = { + .owner = THIS_MODULE, + .open = nvme_ns_chr_open, + .release = nvme_ns_chr_release, + .unlocked_ioctl = nvme_ns_chr_ioctl, + .compat_ioctl = compat_ptr_ioctl, +}; + +static int nvme_add_ns_cdev(struct nvme_ns *ns) +{ + int ret; + + ns->cdev_device.parent = ns->ctrl->device; + ret = dev_set_name(&ns->cdev_device, "ng%dn%d", + ns->ctrl->instance, ns->head->instance); + if (ret) + return ret; + ret = nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops, + ns->ctrl->ops->module); + if (ret) + kfree_const(ns->cdev_device.kobj.name); + return ret; +} + static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { @@ -3630,6 +3694,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, nvme_get_ctrl(ctrl); device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); + if (!nvme_ns_head_multipath(ns->head)) + nvme_add_ns_cdev(ns); nvme_mpath_add_disk(ns, id); nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); @@ -3674,6 +3740,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ if (ns->disk->flags & GENHD_FL_UP) { + if (!nvme_ns_head_multipath(ns->head)) + nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); if (blk_get_integrity(ns->disk)) @@ -4464,8 +4532,24 @@ static int __init nvme_core_init(void) result = PTR_ERR(nvme_subsys_class); goto destroy_class; } + + result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS, + "nvme-generic"); + if (result < 0) + goto destroy_subsys_class; + + nvme_ns_chr_class = class_create(THIS_MODULE, "nvme-generic"); + if (IS_ERR(nvme_ns_chr_class)) { + result = PTR_ERR(nvme_ns_chr_class); + goto unregister_generic_ns; + } + return 0; +unregister_generic_ns: + unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); +destroy_subsys_class: + class_destroy(nvme_subsys_class); destroy_class: class_destroy(nvme_class); unregister_chrdev: @@ -4482,12 +4566,15 @@ out: static void __exit nvme_core_exit(void) { + class_destroy(nvme_ns_chr_class); class_destroy(nvme_subsys_class); class_destroy(nvme_class); + unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_workqueue(nvme_delete_wq); destroy_workqueue(nvme_reset_wq); destroy_workqueue(nvme_wq); + ida_destroy(&nvme_ns_chr_minor_ida); ida_destroy(&nvme_instance_ida); } diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 8e05d65c9e93..502f8e4a2a1f 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -346,15 +346,27 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, } } +static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg) +{ + if (is_ctrl_ioctl(cmd)) + return nvme_ctrl_ioctl(ns->ctrl, cmd, arg); + return nvme_ns_ioctl(ns, cmd, arg); +} + int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = bdev->bd_disk->private_data; - void __user *argp = (void __user *)arg; - if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, argp); - return nvme_ns_ioctl(ns, cmd, argp); + return __nvme_ioctl(ns, cmd, (void __user *)arg); +} + +long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct nvme_ns *ns = + container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); + + return __nvme_ioctl(ns, cmd, (void __user *)arg); } #ifdef CONFIG_NVME_MULTIPATH @@ -388,10 +400,24 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct nvme_ns_head *head = bdev->bd_disk->private_data; + void __user *argp = (void __user *)arg; + + if (is_ctrl_ioctl(cmd)) + return nvme_ns_head_ctrl_ioctl(head, cmd, argp); + return nvme_ns_head_ns_ioctl(head, cmd, argp); +} + +long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct cdev *cdev = file_inode(file)->i_cdev; + struct nvme_ns_head *head = + container_of(cdev, struct nvme_ns_head, cdev); + void __user *argp = (void __user *)arg; if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(head, cmd, (void __user *)arg); - return nvme_ns_head_ns_ioctl(head, cmd, (void __user *)arg); + return nvme_ns_head_ctrl_ioctl(head, cmd, argp); + return nvme_ns_head_ns_ioctl(head, cmd, argp); } #endif /* CONFIG_NVME_MULTIPATH */ diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 68918ea1d3d0..0d0de3433f37 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -357,6 +357,48 @@ const struct block_device_operations nvme_ns_head_ops = { .pr_ops = &nvme_pr_ops, }; +static inline struct nvme_ns_head *cdev_to_ns_head(struct cdev *cdev) +{ + return container_of(cdev, struct nvme_ns_head, cdev); +} + +static int nvme_ns_head_chr_open(struct inode *inode, struct file *file) +{ + if (!nvme_tryget_ns_head(cdev_to_ns_head(inode->i_cdev))) + return -ENXIO; + return 0; +} + +static int nvme_ns_head_chr_release(struct inode *inode, struct file *file) +{ + nvme_put_ns_head(cdev_to_ns_head(inode->i_cdev)); + return 0; +} + +static const struct file_operations nvme_ns_head_chr_fops = { + .owner = THIS_MODULE, + .open = nvme_ns_head_chr_open, + .release = nvme_ns_head_chr_release, + .unlocked_ioctl = nvme_ns_head_chr_ioctl, + .compat_ioctl = compat_ptr_ioctl, +}; + +static int nvme_add_ns_head_cdev(struct nvme_ns_head *head) +{ + int ret; + + head->cdev_device.parent = &head->subsys->dev; + ret = dev_set_name(&head->cdev_device, "ng%dn%d", + head->subsys->instance, head->instance); + if (ret) + return ret; + ret = nvme_cdev_add(&head->cdev, &head->cdev_device, + &nvme_ns_head_chr_fops, THIS_MODULE); + if (ret) + kfree_const(head->cdev_device.kobj.name); + return ret; +} + static void nvme_requeue_work(struct work_struct *work) { struct nvme_ns_head *head = @@ -435,9 +477,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) if (!head->disk) return; - if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) + if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); + nvme_add_ns_head_cdev(head); + } mutex_lock(&head->lock); if (nvme_path_is_optimized(ns)) { @@ -714,8 +758,10 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; - if (head->disk->flags & GENHD_FL_UP) + if (head->disk->flags & GENHD_FL_UP) { + nvme_cdev_del(&head->cdev, &head->cdev_device); del_gendisk(head->disk); + } blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); @@ -785,4 +831,3 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl) kfree(ctrl->ana_log_buf); ctrl->ana_log_buf = NULL; } - diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 49276186d5bd..773dde5b231d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -412,6 +412,10 @@ struct nvme_ns_head { bool shared; int instance; struct nvme_effects_log *effects; + + struct cdev cdev; + struct device cdev_device; + struct gendisk *disk; #ifdef CONFIG_NVME_MULTIPATH struct bio_list requeue_list; @@ -464,6 +468,9 @@ struct nvme_ns { #define NVME_NS_ANA_PENDING 2 #define NVME_NS_FORCE_RO 3 + struct cdev cdev; + struct device cdev_device; + struct nvme_fault_inject fault_inject; }; @@ -658,10 +665,16 @@ void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx); bool nvme_tryget_ns_head(struct nvme_ns_head *head); void nvme_put_ns_head(struct nvme_ns_head *head); struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys); +int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, + const struct file_operations *fops, struct module *owner); +void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device); int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); +long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); +long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); -- cgit v1.2.3