From 726be2c72efc0a64c206e854b8996ad3ab9c7507 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Tue, 22 Mar 2022 10:20:48 +0100 Subject: nvme: fix the read-only state for zoned namespaces with unsupposed features commit 2f4c9ba23b88 ("nvme: export zoned namespaces without Zone Append support read-only") marks zoned namespaces without append support read-only. It does iso by setting NVME_NS_FORCE_RO in ns->flags in nvme_update_zone_info and checking for that flag later in nvme_update_disk_info to mark the disk as read-only. But commit 73d90386b559 ("nvme: cleanup zone information initialization") rearranged nvme_update_disk_info to be called before nvme_update_zone_info and thus not marking the disk as read-only. The call order cannot be just reverted because nvme_update_zone_info sets certain queue parameters such as zone_write_granularity that depend on the prior call to nvme_update_disk_info. Remove the call to set_disk_ro in nvme_update_disk_info. and call set_disk_ro after nvme_update_zone_info and nvme_update_disk_info to set the permission for ZNS drives correctly. The same applies to the multipath disk path. Fixes: 73d90386b559 ("nvme: cleanup zone information initialization") Signed-off-by: Pankaj Raghav Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3c0461129bca..ccc5877d514b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1867,9 +1867,6 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); blk_queue_max_write_zeroes_sectors(disk->queue, ns->ctrl->max_zeroes_sectors); - - set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || - test_bit(NVME_NS_FORCE_RO, &ns->flags)); } static inline bool nvme_first_scan(struct gendisk *disk) @@ -1930,6 +1927,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) goto out_unfreeze; } + set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); @@ -1942,6 +1941,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); nvme_update_disk_info(ns->head->disk, ns, id); + set_disk_ro(ns->head->disk, + (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); -- cgit v1.2.3 From 2e21e4454bd3435ef6e3b84492dcbfaaf9d8769c Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Tue, 22 Mar 2022 10:35:12 +0800 Subject: nvme-pci: expose use_threaded_interrupts read-only in sysfs Allow reading /sys/module/nvme/parameters/use_threaded_interrupts to see if the use_threaded_interrupts module parameter is in use. Signed-off-by: Xin Hao Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9f4f3884fefe..9f3c392fe7a1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -44,7 +44,7 @@ #define NVME_MAX_SEGS 127 static int use_threaded_interrupts; -module_param(use_threaded_interrupts, int, 0); +module_param(use_threaded_interrupts, int, 0444); static bool use_cmb_sqes = true; module_param(use_cmb_sqes, bool, 0444); -- cgit v1.2.3 From bc360b0b1611566e1bd47384daf49af6a1c51837 Mon Sep 17 00:00:00 2001 From: Monish Kumar R Date: Wed, 16 Mar 2022 13:24:49 +0530 Subject: nvme-pci: add quirks for Samsung X5 SSDs Add quirks to not fail the initialization and to have quick resume latency after cold/warm reboot. Signed-off-by: Monish Kumar R Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9f3c392fe7a1..66f1eee2509a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3466,7 +3466,10 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | NVME_QUIRK_SKIP_CID_GEN }, - + { PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY| + NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; -- cgit v1.2.3 From 8832cf922151e9dfa2821736beb0ae2dd3968b6e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 21 Mar 2022 13:57:27 +0200 Subject: nvmet: use a private workqueue instead of the system workqueue Any attempt to flush kernel-global WQs has possibility of deadlock so we should simply stop using them, instead introduce nvmet_wq which is the generic nvmet workqueue for work elements that don't explicitly require a dedicated workqueue (by the mere fact that they are using the system_wq). Changes were done using the following replaces: - s/schedule_work(/queue_work(nvmet_wq, /g - s/schedule_delayed_work(/queue_delayed_work(nvmet_wq, /g - s/flush_scheduled_work()/flush_workqueue(nvmet_wq)/g Reported-by: Tetsuo Handa Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/configfs.c | 2 +- drivers/nvme/target/core.c | 24 ++++++++++++++++++------ drivers/nvme/target/fc.c | 8 ++++---- drivers/nvme/target/fcloop.c | 16 ++++++++-------- drivers/nvme/target/io-cmd-file.c | 6 +++--- drivers/nvme/target/loop.c | 4 ++-- drivers/nvme/target/nvmet.h | 1 + drivers/nvme/target/passthru.c | 2 +- drivers/nvme/target/rdma.c | 12 ++++++------ drivers/nvme/target/tcp.c | 10 +++++----- 11 files changed, 50 insertions(+), 37 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 46d0dab686dd..397daaf51f1b 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -988,7 +988,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req; mutex_unlock(&ctrl->lock); - schedule_work(&ctrl->async_event_work); + queue_work(nvmet_wq, &ctrl->async_event_work); } void nvmet_execute_keep_alive(struct nvmet_req *req) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index b650312dd4ae..c542f8457b1e 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1593,7 +1593,7 @@ static void nvmet_port_release(struct config_item *item) struct nvmet_port *port = to_nvmet_port(item); /* Let inflight controllers teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); list_del(&port->global_entry); kfree(port->ana_state); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 92af37d33ec3..cd833b1dd47c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -20,6 +20,9 @@ struct workqueue_struct *zbd_wq; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); +struct workqueue_struct *nvmet_wq; +EXPORT_SYMBOL_GPL(nvmet_wq); + /* * This read/write semaphore is used to synchronize access to configuration * information on a target system that will result in discovery log page @@ -205,7 +208,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, list_add_tail(&aen->entry, &ctrl->async_events); mutex_unlock(&ctrl->lock); - schedule_work(&ctrl->async_event_work); + queue_work(nvmet_wq, &ctrl->async_event_work); } static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) @@ -385,7 +388,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) if (reset_tbkas) { pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", ctrl->cntlid); - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); return; } @@ -403,7 +406,7 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) pr_debug("ctrl %d start keep-alive timer for %d secs\n", ctrl->cntlid, ctrl->kato); - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); } void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) @@ -1478,7 +1481,7 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) mutex_lock(&ctrl->lock); if (!(ctrl->csts & NVME_CSTS_CFS)) { ctrl->csts |= NVME_CSTS_CFS; - schedule_work(&ctrl->fatal_err_work); + queue_work(nvmet_wq, &ctrl->fatal_err_work); } mutex_unlock(&ctrl->lock); } @@ -1620,9 +1623,15 @@ static int __init nvmet_init(void) goto out_free_zbd_work_queue; } + nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0); + if (!nvmet_wq) { + error = -ENOMEM; + goto out_free_buffered_work_queue; + } + error = nvmet_init_discovery(); if (error) - goto out_free_work_queue; + goto out_free_nvmet_work_queue; error = nvmet_init_configfs(); if (error) @@ -1631,7 +1640,9 @@ static int __init nvmet_init(void) out_exit_discovery: nvmet_exit_discovery(); -out_free_work_queue: +out_free_nvmet_work_queue: + destroy_workqueue(nvmet_wq); +out_free_buffered_work_queue: destroy_workqueue(buffered_io_wq); out_free_zbd_work_queue: destroy_workqueue(zbd_wq); @@ -1643,6 +1654,7 @@ static void __exit nvmet_exit(void) nvmet_exit_configfs(); nvmet_exit_discovery(); ida_destroy(&cntlid_ida); + destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); destroy_workqueue(zbd_wq); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index de90001fc5c4..ab2627e17bb9 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1491,7 +1491,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); } @@ -1546,7 +1546,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); } @@ -1592,7 +1592,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); return; @@ -2060,7 +2060,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, iod->rqstdatalen = lsreqbuf_len; iod->hosthandle = hosthandle; - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); return 0; } diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 54606f1872b4..5c16372f3b53 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -360,7 +360,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, spin_lock(&rport->lock); list_add_tail(&rport->ls_list, &tls_req->ls_list); spin_unlock(&rport->lock); - schedule_work(&rport->ls_work); + queue_work(nvmet_wq, &rport->ls_work); return ret; } @@ -393,7 +393,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, spin_lock(&rport->lock); list_add_tail(&rport->ls_list, &tls_req->ls_list); spin_unlock(&rport->lock); - schedule_work(&rport->ls_work); + queue_work(nvmet_wq, &rport->ls_work); } return 0; @@ -448,7 +448,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, spin_lock(&tport->lock); list_add_tail(&tport->ls_list, &tls_req->ls_list); spin_unlock(&tport->lock); - schedule_work(&tport->ls_work); + queue_work(nvmet_wq, &tport->ls_work); return ret; } @@ -480,7 +480,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, spin_lock(&tport->lock); list_add_tail(&tport->ls_list, &tls_req->ls_list); spin_unlock(&tport->lock); - schedule_work(&tport->ls_work); + queue_work(nvmet_wq, &tport->ls_work); } return 0; @@ -520,7 +520,7 @@ fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport) tgt_rscn->tport = tgtport->private; INIT_WORK(&tgt_rscn->work, fcloop_tgt_rscn_work); - schedule_work(&tgt_rscn->work); + queue_work(nvmet_wq, &tgt_rscn->work); } static void @@ -739,7 +739,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); kref_init(&tfcp_req->ref); - schedule_work(&tfcp_req->fcp_rcv_work); + queue_work(nvmet_wq, &tfcp_req->fcp_rcv_work); return 0; } @@ -921,7 +921,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - schedule_work(&tfcp_req->tio_done_work); + queue_work(nvmet_wq, &tfcp_req->tio_done_work); } static void @@ -976,7 +976,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, if (abortio) /* leave the reference while the work item is scheduled */ - WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); + WARN_ON(!queue_work(nvmet_wq, &tfcp_req->abort_rcv_work)); else { /* * as the io has already had the done callback made, diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 6485dc8eb974..f3d58abf11e0 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -283,7 +283,7 @@ static void nvmet_file_execute_flush(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_flush_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } static void nvmet_file_execute_discard(struct nvmet_req *req) @@ -343,7 +343,7 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req) if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; INIT_WORK(&req->f.work, nvmet_file_dsm_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } static void nvmet_file_write_zeroes_work(struct work_struct *w) @@ -373,7 +373,7 @@ static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 23f9d6f88804..59024af2da2e 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -166,7 +166,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->req.transfer_len = blk_rq_payload_bytes(req); } - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); return BLK_STS_OK; } @@ -187,7 +187,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg) return; } - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); } static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d910c6aad4b6..69818752a33a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -366,6 +366,7 @@ struct nvmet_req { extern struct workqueue_struct *buffered_io_wq; extern struct workqueue_struct *zbd_wq; +extern struct workqueue_struct *nvmet_wq; static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index a4de1e0d518b..5247c24538eb 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -283,7 +283,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) if (req->p.use_workqueue || effects) { INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work); req->p.rq = rq; - schedule_work(&req->p.work); + queue_work(nvmet_wq, &req->p.work); } else { rq->end_io_data = req; blk_execute_rq_nowait(rq, false, nvmet_passthru_req_done); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2446d0918a41..2fab0b219b25 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1584,7 +1584,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); @@ -1669,7 +1669,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } } @@ -1699,7 +1699,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } /** @@ -1773,7 +1773,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, if (!queue) { struct nvmet_rdma_port *port = cm_id->context; - schedule_delayed_work(&port->repair_work, 0); + queue_delayed_work(nvmet_wq, &port->repair_work, 0); break; } fallthrough; @@ -1903,7 +1903,7 @@ static void nvmet_rdma_repair_port_work(struct work_struct *w) nvmet_rdma_disable_port(port); ret = nvmet_rdma_enable_port(port); if (ret) - schedule_delayed_work(&port->repair_work, 5 * HZ); + queue_delayed_work(nvmet_wq, &port->repair_work, 5 * HZ); } static int nvmet_rdma_add_port(struct nvmet_port *nport) @@ -2053,7 +2053,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data } mutex_unlock(&nvmet_rdma_queue_mutex); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } static struct ib_client nvmet_rdma_ib_client = { diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 83ca577f72be..2793554e622e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1269,7 +1269,7 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) spin_lock(&queue->state_lock); if (queue->state != NVMET_TCP_Q_DISCONNECTING) { queue->state = NVMET_TCP_Q_DISCONNECTING; - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } spin_unlock(&queue->state_lock); } @@ -1684,7 +1684,7 @@ static void nvmet_tcp_listen_data_ready(struct sock *sk) goto out; if (sk->sk_state == TCP_LISTEN) - schedule_work(&port->accept_work); + queue_work(nvmet_wq, &port->accept_work); out: read_unlock_bh(&sk->sk_callback_lock); } @@ -1815,7 +1815,7 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) if (sq->qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } queue->nr_cmds = sq->size * 2; @@ -1876,12 +1876,12 @@ static void __exit nvmet_tcp_exit(void) nvmet_unregister_transport(&nvmet_tcp_ops); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); mutex_lock(&nvmet_tcp_queue_mutex); list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) kernel_sock_shutdown(queue->sock, SHUT_RDWR); mutex_unlock(&nvmet_tcp_queue_mutex); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); } -- cgit v1.2.3 From 63bc732c3aefaa0ad61e290fded9de0f5c8ccd0e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Mar 2022 01:30:14 +0000 Subject: nvmet: remove redundant assignment after left shift The left shift is followed by a re-assignment back to cc_css, the assignment is redundant. Fix this by replacing the "<<=" operator with "<<" instead. This cleans up the clang scan build warning: drivers/nvme/target/core.c:1124:10: warning: Although the value stored to 'cc_css' is used in the enclosing expression, the value is never actually read from 'cc_css' [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cd833b1dd47c..7b2b72cf4423 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1123,7 +1123,7 @@ static inline u8 nvmet_cc_iocqes(u32 cc) static inline bool nvmet_css_supported(u8 cc_css) { - switch (cc_css <<= NVME_CC_CSS_SHIFT) { + switch (cc_css << NVME_CC_CSS_SHIFT) { case NVME_CC_CSS_NVM: case NVME_CC_CSS_CSI: return true; -- cgit v1.2.3 From 5974ea7ce0f9a5987fc8cf5e08ad6e3e70bb542e Mon Sep 17 00:00:00 2001 From: Sungup Moon Date: Mon, 14 Mar 2022 20:05:45 +0900 Subject: nvme: allow duplicate NSIDs for private namespaces A NVMe subsystem with multiple controller can have private namespaces that use the same NSID under some conditions: "If Namespace Management, ANA Reporting, or NVM Sets are supported, the NSIDs shall be unique within the NVM subsystem. If the Namespace Management, ANA Reporting, and NVM Sets are not supported, then NSIDs: a) for shared namespace shall be unique; and b) for private namespace are not required to be unique." Reference: Section 6.1.6 NSID and Namespace Usage; NVM Express 1.4c spec. Make sure this specific setup is supported in Linux. Fixes: 9ad1927a3bc2 ("nvme: always search for namespace head") Signed-off-by: Sungup Moon [hch: refactored and fixed the controller vs subsystem based naming conflict] Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 15 ++++++++++----- drivers/nvme/host/multipath.c | 7 ++++--- drivers/nvme/host/nvme.h | 19 +++++++++++++++++++ include/linux/nvme.h | 1 + 4 files changed, 34 insertions(+), 8 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ccc5877d514b..6dc05c1fa7db 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3627,15 +3627,20 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; -static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, +static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_head *h; - lockdep_assert_held(&subsys->lock); + lockdep_assert_held(&ctrl->subsys->lock); - list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id != nsid) + list_for_each_entry(h, &ctrl->subsys->nsheads, entry) { + /* + * Private namespaces can share NSIDs under some conditions. + * In that case we can't use the same ns_head for namespaces + * with the same NSID. + */ + if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h)) continue; if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; @@ -3829,7 +3834,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } mutex_lock(&ctrl->subsys->lock); - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl, nsid); if (!head) { ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids); if (ret) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index c97d7f843977..ba90555124c4 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -482,10 +482,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) /* * Add a multipath node if the subsystems supports multiple controllers. - * We also do this for private namespaces as the namespace sharing data could - * change after a rescan. + * We also do this for private namespaces as the namespace sharing flag + * could change after a rescan. */ - if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || + !nvme_is_unique_nsid(ctrl, head) || !multipath) return 0; head->disk = blk_alloc_disk(ctrl->numa_node); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1ea908d43e17..1552a48719d6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -722,6 +722,25 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, return queue_live; return __nvme_check_ready(ctrl, rq, queue_live); } + +/* + * NSID shall be unique for all shared namespaces, or if at least one of the + * following conditions is met: + * 1. Namespace Management is supported by the controller + * 2. ANA is supported by the controller + * 3. NVM Set are supported by the controller + * + * In other case, private namespace are not required to report a unique NSID. + */ +static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head) +{ + return head->shared || + (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) || + (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) || + (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); +} + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9dbc3ef4daf7..2dcee34d467d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -345,6 +345,7 @@ enum { NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, -- cgit v1.2.3 From d6d6742772d712ed2238f5071b96baf4924f5fad Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Mon, 21 Mar 2022 15:43:04 -0700 Subject: nvme: fix RCU hole that allowed for endless looping in multipath round robin Make nvme_ns_remove match the assumptions elsewhere. 1) !NVME_NS_READY needs to be srcu synchronized to make sure nothing is running in __nvme_find_path or nvme_round_robin_path that will re-assign this ns to current_path. 2) Any matching current_path entries need to be cleared before removing from the siblings list, to prevent calling nvme_round_robin_path with an "old" ns that's off list. 3) Finally the list_del_rcu can happen, and then synchronize again before releasing any reference counts. Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6dc05c1fa7db..a7d72ef01171 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4031,6 +4031,16 @@ static void nvme_ns_remove(struct nvme_ns *ns) set_capacity(ns->disk, 0); nvme_fault_inject_fini(&ns->fault_inject); + /* + * Ensure that !NVME_NS_READY is seen by other threads to prevent + * this ns going back into current_path. + */ + synchronize_srcu(&ns->head->srcu); + + /* wait for concurrent submissions */ + if (nvme_mpath_clear_current_path(ns)) + synchronize_srcu(&ns->head->srcu); + mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); if (list_empty(&ns->head->list)) { @@ -4042,10 +4052,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) /* guarantee not available in head->list */ synchronize_rcu(); - /* wait for concurrent submissions */ - if (nvme_mpath_clear_current_path(ns)) - synchronize_srcu(&ns->head->srcu); - if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); -- cgit v1.2.3 From a4a6f3c8f61c3cfbda4998ad94596059ad7e4332 Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Thu, 24 Mar 2022 13:05:11 -0600 Subject: nvme-multipath: fix hang when disk goes live over reconnect nvme_mpath_init_identify() invoked from nvme_init_identify() fetches a fresh ANA log from the ctrl. This is essential to have an up to date path states for both existing namespaces and for those scan_work may discover once the ctrl is up. This happens in the following cases: 1) A new ctrl is being connected. 2) An existing ctrl is successfully reconnected. 3) An existing ctrl is being reset. While in (1) ctrl->namespaces is empty, (2 & 3) may have namespaces, and nvme_read_ana_log() may call nvme_update_ns_ana_state(). This result in a hang when the ANA state of an existing namespace changes and makes the disk live: nvme_mpath_set_live() issues IO to the namespace through the ctrl, which does NOT have IO queues yet. See sample hang below. Solution: - nvme_update_ns_ana_state() to call set_live only if ctrl is live - nvme_read_ana_log() call from nvme_mpath_init_identify() therefore only fetches and parses the ANA log; any erros in this process will fail the ctrl setup as appropriate; - a separate function nvme_mpath_update() is called in nvme_start_ctrl(); this parses the ANA log without fetching it. At this point the ctrl is live, therefore, disks can be set live normally. Sample failure: nvme nvme0: starting error recovery nvme nvme0: Reconnecting in 10 seconds... block nvme0n6: no usable path - requeuing I/O INFO: task kworker/u8:3:312 blocked for more than 122 seconds. Tainted: G E 5.14.5-1.el7.elrepo.x86_64 #1 Workqueue: nvme-wq nvme_tcp_reconnect_ctrl_work [nvme_tcp] Call Trace: __schedule+0x2a2/0x7e0 schedule+0x4e/0xb0 io_schedule+0x16/0x40 wait_on_page_bit_common+0x15c/0x3e0 do_read_cache_page+0x1e0/0x410 read_cache_page+0x12/0x20 read_part_sector+0x46/0x100 read_lba+0x121/0x240 efi_partition+0x1d2/0x6a0 bdev_disk_changed.part.0+0x1df/0x430 bdev_disk_changed+0x18/0x20 blkdev_get_whole+0x77/0xe0 blkdev_get_by_dev+0xd2/0x3a0 __device_add_disk+0x1ed/0x310 device_add_disk+0x13/0x20 nvme_mpath_set_live+0x138/0x1b0 [nvme_core] nvme_update_ns_ana_state+0x2b/0x30 [nvme_core] nvme_update_ana_state+0xca/0xe0 [nvme_core] nvme_parse_ana_log+0xac/0x170 [nvme_core] nvme_read_ana_log+0x7d/0xe0 [nvme_core] nvme_mpath_init_identify+0x105/0x150 [nvme_core] nvme_init_identify+0x2df/0x4d0 [nvme_core] nvme_init_ctrl_finish+0x8d/0x3b0 [nvme_core] nvme_tcp_setup_ctrl+0x337/0x390 [nvme_tcp] nvme_tcp_reconnect_ctrl_work+0x24/0x40 [nvme_tcp] process_one_work+0x1bd/0x360 worker_thread+0x50/0x3d0 Signed-off-by: Anton Eidelman Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/multipath.c | 25 +++++++++++++++++++++++-- drivers/nvme/host/nvme.h | 4 ++++ 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a7d72ef01171..f204c6f78b5b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4522,6 +4522,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); + nvme_mpath_update(ctrl); } nvme_change_uevent(ctrl, "NVME_EVENT=connected"); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ba90555124c4..7fc58e1f6b09 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -613,8 +613,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); - - if (nvme_state_is_live(ns->ana_state)) + /* + * nvme_mpath_set_live() will trigger I/O to the multipath path device + * and in turn to this path device. However we cannot accept this I/O + * if the controller is not live. This may deadlock if called from + * nvme_mpath_init_identify() and the ctrl will never complete + * initialization, preventing I/O from completing. For this case we + * will reprocess the ANA log page in nvme_mpath_update() once the + * controller is ready. + */ + if (nvme_state_is_live(ns->ana_state) && + ns->ctrl->state == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -701,6 +710,18 @@ static void nvme_ana_work(struct work_struct *work) nvme_read_ana_log(ctrl); } +void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ + u32 nr_change_groups = 0; + + if (!ctrl->ana_log_buf) + return; + + mutex_lock(&ctrl->ana_lock); + nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state); + mutex_unlock(&ctrl->ana_lock); +} + static void nvme_anatt_timeout(struct timer_list *t) { struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1552a48719d6..a1f8403ffd78 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -800,6 +800,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); +void nvme_mpath_update(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); @@ -871,6 +872,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; } +static inline void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ +} static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { } -- cgit v1.2.3