diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-13 15:15:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-13 15:15:15 -0700 |
commit | edda415314804c29fa07e538938fa07947012d8f (patch) | |
tree | 0428db94253f73bb0744f52d26645c33830756f3 /drivers/nvme/host | |
parent | 3e565a351ed3e94352bfbe0be06c659fc8fafb19 (diff) | |
parent | bb06ec31452fb2da1594f88035c2ecea4e0652f4 (diff) | |
download | linux-edda415314804c29fa07e538938fa07947012d8f.tar.bz2 |
Merge tag 'for-linus-20180413' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"Followup fixes for this merge window. This contains:
- Series from Ming, fixing corner cases in our CPU <-> queue mapping.
This triggered repeated warnings on especially s390, but I also hit
it in cpu hot plug/unplug testing while doing IO on NVMe on x86-64.
- Another fix from Ming, ensuring that we always order budget and
driver tag identically, avoiding a deadlock on QD=1 devices.
- Loop locking regression fix from this merge window, from Omar.
- Another loop locking fix, this time missing an unlock, from Tetsuo
Handa.
- Fix for racing IO submission with device removal from Bart.
- sr reference fix from me, fixing a case where disk change or
getevents can race with device removal.
- Set of nvme fixes by way of Keith, from various contributors"
* tag 'for-linus-20180413' of git://git.kernel.dk/linux-block: (28 commits)
nvme: expand nvmf_check_if_ready checks
nvme: Use admin command effects for admin commands
nvmet: fix space padding in serial number
nvme: check return value of init_srcu_struct function
nvmet: Fix nvmet_execute_write_zeroes sector count
nvme-pci: Separate IO and admin queue IRQ vectors
nvme-pci: Remove unused queue parameter
nvme-pci: Skip queue deletion if there are no queues
nvme: target: fix buffer overflow
nvme: don't send keep-alives to the discovery controller
nvme: unexport nvme_start_keep_alive
nvme-loop: fix kernel oops in case of unhandled command
nvme: enforce 64bit offset for nvme_get_log_ext fn
sr: get/drop reference to device in revalidate and check_events
blk-mq: Revert "blk-mq: reimplement blk_mq_hw_queue_mapped"
blk-mq: Avoid that submitting a bio concurrently with device removal triggers a crash
backing: silence compiler warning using __printf
blk-mq: remove code for dealing with remapping queue
blk-mq: reimplement blk_mq_hw_queue_mapped
blk-mq: don't check queue mapped in __blk_mq_delay_run_hw_queue()
...
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 33 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 83 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 33 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 12 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 4 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 35 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 14 |
7 files changed, 133 insertions, 81 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 197a6ba9700f..9df4f71e58ca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -376,6 +376,15 @@ static void nvme_put_ns(struct nvme_ns *ns) kref_put(&ns->kref, nvme_free_ns); } +static inline void nvme_clear_nvme_request(struct request *req) +{ + if (!(req->rq_flags & RQF_DONTPREP)) { + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; + } +} + struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) { @@ -392,6 +401,7 @@ struct request *nvme_alloc_request(struct request_queue *q, return req; req->cmd_flags |= REQ_FAILFAST_DRIVER; + nvme_clear_nvme_request(req); nvme_req(req)->cmd = cmd; return req; @@ -608,11 +618,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { blk_status_t ret = BLK_STS_OK; - if (!(req->rq_flags & RQF_DONTPREP)) { - nvme_req(req)->retries = 0; - nvme_req(req)->flags = 0; - req->rq_flags |= RQF_DONTPREP; - } + nvme_clear_nvme_request(req); switch (req_op(req)) { case REQ_OP_DRV_IN: @@ -742,6 +748,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, @@ -826,7 +833,7 @@ static void nvme_keep_alive_work(struct work_struct *work) } } -void nvme_start_keep_alive(struct nvme_ctrl *ctrl) +static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) { if (unlikely(ctrl->kato == 0)) return; @@ -836,7 +843,6 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl) ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); } -EXPORT_SYMBOL_GPL(nvme_start_keep_alive); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) { @@ -1103,7 +1109,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } if (ctrl->effects) - effects = le32_to_cpu(ctrl->effects->iocs[opcode]); + effects = le32_to_cpu(ctrl->effects->acs[opcode]); else effects = nvme_known_admin_effects(opcode); @@ -2220,7 +2226,7 @@ out_unlock: int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 log_page, void *log, - size_t size, size_t offset) + size_t size, u64 offset) { struct nvme_command c = { }; unsigned long dwlen = size / 4 - 1; @@ -2235,8 +2241,8 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.get_log_page.lid = log_page; c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1)); c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); - c.get_log_page.lpol = cpu_to_le32(offset & ((1ULL << 32) - 1)); - c.get_log_page.lpou = cpu_to_le32(offset >> 32ULL); + c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); + c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset)); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } @@ -2833,7 +2839,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_free_head; head->instance = ret; INIT_LIST_HEAD(&head->list); - init_srcu_struct(&head->srcu); + ret = init_srcu_struct(&head->srcu); + if (ret) + goto out_ida_remove; head->subsys = ctrl->subsys; head->ns_id = nsid; kref_init(&head->ref); @@ -2855,6 +2863,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, return head; out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); +out_ida_remove: ida_simple_remove(&ctrl->subsys->ns_ida, head->instance); out_free_head: kfree(head); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8f0f34d06d46..124c458806df 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -536,6 +536,85 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( return NULL; } +blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, + bool queue_live, bool is_connected) +{ + struct nvme_command *cmd = nvme_req(rq)->cmd; + + if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected)) + return BLK_STS_OK; + + switch (ctrl->state) { + case NVME_CTRL_DELETING: + goto reject_io; + + case NVME_CTRL_NEW: + case NVME_CTRL_CONNECTING: + if (!is_connected) + /* + * This is the case of starting a new + * association but connectivity was lost + * before it was fully created. We need to + * error the commands used to initialize the + * controller so the reconnect can go into a + * retry attempt. The commands should all be + * marked REQ_FAILFAST_DRIVER, which will hit + * the reject path below. Anything else will + * be queued while the state settles. + */ + goto reject_or_queue_io; + + if ((queue_live && + !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) || + (!queue_live && blk_rq_is_passthrough(rq) && + cmd->common.opcode == nvme_fabrics_command && + cmd->fabrics.fctype == nvme_fabrics_type_connect)) + /* + * If queue is live, allow only commands that + * are internally generated pass through. These + * are commands on the admin queue to initialize + * the controller. This will reject any ioctl + * admin cmds received while initializing. + * + * If the queue is not live, allow only a + * connect command. This will reject any ioctl + * admin cmd as well as initialization commands + * if the controller reverted the queue to non-live. + */ + return BLK_STS_OK; + + /* + * fall-thru to the reject_or_queue_io clause + */ + break; + + /* these cases fall-thru + * case NVME_CTRL_LIVE: + * case NVME_CTRL_RESETTING: + */ + default: + break; + } + +reject_or_queue_io: + /* + * Any other new io is something we're not in a state to send + * to the device. Default action is to busy it and retry it + * after the controller state is recovered. However, anything + * marked for failfast or nvme multipath is immediately failed. + * Note: commands used to initialize the controller will be + * marked for failfast. + * Note: nvme cli/ioctl commands are marked for failfast. + */ + if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) + return BLK_STS_RESOURCE; + +reject_io: + nvme_req(rq)->status = NVME_SC_ABORT_REQ; + return BLK_STS_IOERR; +} +EXPORT_SYMBOL_GPL(nvmf_check_if_ready); + static const match_table_t opt_tokens = { { NVMF_OPT_TRANSPORT, "transport=%s" }, { NVMF_OPT_TRADDR, "traddr=%s" }, @@ -608,8 +687,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->discovery_nqn = !(strcmp(opts->subsysnqn, NVME_DISC_SUBSYS_NAME)); - if (opts->discovery_nqn) + if (opts->discovery_nqn) { + opts->kato = 0; opts->nr_io_queues = 0; + } break; case NVMF_OPT_TRADDR: p = match_strdup(args); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a3145d90c1d2..ef46c915b7b5 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -157,36 +157,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); - -static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, - struct request *rq) -{ - struct nvme_command *cmd = nvme_req(rq)->cmd; - - /* - * We cannot accept any other command until the connect command has - * completed, so only allow connect to pass. - */ - if (!blk_rq_is_passthrough(rq) || - cmd->common.opcode != nvme_fabrics_command || - cmd->fabrics.fctype != nvme_fabrics_type_connect) { - /* - * Connecting state means transport disruption or initial - * establishment, which can take a long time and even might - * fail permanently, fail fast to give upper layers a chance - * to failover. - * Deleting state means that the ctrl will never accept commands - * again, fail it permanently. - */ - if (ctrl->state == NVME_CTRL_CONNECTING || - ctrl->state == NVME_CTRL_DELETING) { - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; - } - return BLK_STS_RESOURCE; /* try again later */ - } - - return BLK_STS_OK; -} +blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, + struct request *rq, bool queue_live, bool is_connected); #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c6e719b2f3ca..6cb26bcf6ec0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2277,14 +2277,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, return BLK_STS_OK; } -static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, - struct request *rq) -{ - if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags))) - return nvmf_check_init_req(&queue->ctrl->ctrl, rq); - return BLK_STS_OK; -} - static blk_status_t nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) @@ -2300,7 +2292,9 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, u32 data_len; blk_status_t ret; - ret = nvme_fc_is_ready(queue, rq); + ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, + test_bit(NVME_FC_Q_LIVE, &queue->flags), + ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE); if (unlikely(ret)) return ret; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cf93690b3ffc..061fecfd44f5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -105,6 +105,7 @@ struct nvme_request { enum { NVME_REQ_CANCELLED = (1 << 0), + NVME_REQ_USERCMD = (1 << 1), }; static inline struct nvme_request *nvme_req(struct request *req) @@ -422,7 +423,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, unsigned timeout, int qid, int at_head, blk_mq_req_flags_t flags); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); -void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); @@ -430,7 +430,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - u8 log_page, void *log, size_t size, size_t offset); + u8 log_page, void *log, size_t size, u64 offset); extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 295fbec1e5f2..fbc71fac6f1e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -84,6 +84,7 @@ struct nvme_dev { struct dma_pool *prp_small_pool; unsigned online_queues; unsigned max_qid; + unsigned int num_vecs; int q_depth; u32 db_stride; void __iomem *bar; @@ -414,7 +415,8 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0); + return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), + dev->num_vecs > 1 ? 1 /* admin queue */ : 0); } /** @@ -1380,8 +1382,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, return 0; } -static int nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth, int node) +static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) { struct nvme_queue *nvmeq = &dev->queues[qid]; @@ -1457,7 +1458,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) nvmeq->sq_cmds_io = dev->cmb + offset; } - nvmeq->cq_vector = qid - 1; + /* + * A queue's vector matches the queue identifier unless the controller + * has only one vector available. + */ + nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid; result = adapter_alloc_cq(dev, qid, nvmeq); if (result < 0) goto release_vector; @@ -1596,8 +1601,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) if (result < 0) return result; - result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, - dev_to_node(dev->dev)); + result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); if (result) return result; @@ -1630,9 +1634,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) int ret = 0; for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { - /* vector == qid - 1, match nvme_create_queue */ - if (nvme_alloc_queue(dev, i, dev->q_depth, - pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { + if (nvme_alloc_queue(dev, i, dev->q_depth)) { ret = -ENOMEM; break; } @@ -1914,6 +1916,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; + struct irq_affinity affd = { + .pre_vectors = 1 + }; + nr_io_queues = num_possible_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) @@ -1949,11 +1955,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * setting up the full range we need. */ pci_free_irq_vectors(pdev); - nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY); - if (nr_io_queues <= 0) + result = pci_alloc_irq_vectors_affinity(pdev, 1, nr_io_queues + 1, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); + if (result <= 0) return -EIO; - dev->max_qid = nr_io_queues; + dev->num_vecs = result; + dev->max_qid = max(result - 1, 1); /* * Should investigate if there's a performance win from allocating @@ -2201,7 +2208,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); - if (!dead) { + if (!dead && dev->ctrl.queue_count > 0) { /* * If the controller is still alive tell it to stop using the * host memory buffer. In theory the shutdown / reset should diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 758537e9ba07..1eb4438a8763 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1601,17 +1601,6 @@ nvme_rdma_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } -/* - * We cannot accept any other command until the Connect command has completed. - */ -static inline blk_status_t -nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq) -{ - if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) - return nvmf_check_init_req(&queue->ctrl->ctrl, rq); - return BLK_STS_OK; -} - static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1627,7 +1616,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); - ret = nvme_rdma_is_ready(queue, rq); + ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, + test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true); if (unlikely(ret)) return ret; |