From fef912bf860e8e7e48a2bfb978a356bba743a8b7 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 28 Sep 2018 08:17:19 +0200 Subject: block: genhd: add 'groups' argument to device_add_disk Update device_add_disk() to take an 'groups' argument so that individual drivers can register a device with additional sysfs attributes. This avoids race condition the driver would otherwise have if these groups were to be created with sysfs_add_groups(). Signed-off-by: Martin Wilck Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/multipath.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dd8ec1dd9219..0e824e8c8fd7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3099,7 +3099,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_get_ctrl(ctrl); - device_add_disk(ctrl->device, ns->disk); + device_add_disk(ctrl->device, ns->disk, NULL); if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group)) pr_warn("%s: failed to create sysfs group for identification\n", diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5a9562881d4e..477af51d01e8 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -283,7 +283,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) return; if (!(head->disk->flags & GENHD_FL_UP)) { - device_add_disk(&head->subsys->dev, head->disk); + device_add_disk(&head->subsys->dev, head->disk, NULL); if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, &nvme_ns_id_attr_group)) dev_warn(&head->subsys->dev, -- cgit v1.2.3 From 33b14f67a4e1eabd219fd6543da8f15ed86b641c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 28 Sep 2018 08:17:20 +0200 Subject: nvme: register ns_id attributes as default sysfs groups We should be registering the ns_id attribute as default sysfs attribute groups, otherwise we have a race condition between the uevent and the attributes appearing in sysfs. Suggested-by: Bart van Assche Reviewed-by: Keith Busch Signed-off-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 21 ++++----- drivers/nvme/host/lightnvm.c | 105 ++++++++++++++++++------------------------ drivers/nvme/host/multipath.c | 15 ++---- drivers/nvme/host/nvme.h | 10 +--- 4 files changed, 59 insertions(+), 92 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0e824e8c8fd7..e0a9e1c5b30e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2734,6 +2734,14 @@ const struct attribute_group nvme_ns_id_attr_group = { .is_visible = nvme_ns_id_attrs_are_visible, }; +const struct attribute_group *nvme_ns_id_attr_groups[] = { + &nvme_ns_id_attr_group, +#ifdef CONFIG_NVM + &nvme_nvm_attr_group, +#endif + NULL, +}; + #define nvme_show_str_function(field) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -3099,14 +3107,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_get_ctrl(ctrl); - device_add_disk(ctrl->device, ns->disk, NULL); - if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group)) - pr_warn("%s: failed to create sysfs group for identification\n", - ns->disk->disk_name); - if (ns->ndev && nvme_nvm_register_sysfs(ns)) - pr_warn("%s: failed to register lightnvm sysfs group for identification\n", - ns->disk->disk_name); + device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); nvme_mpath_add_disk(ns, id); nvme_fault_inject_init(ns); @@ -3132,10 +3133,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_fault_inject_fini(ns); if (ns->disk && ns->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group); - if (ns->ndev) - nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); if (blk_get_integrity(ns->disk)) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 6fe5923c95d4..1e4f97538838 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -1190,10 +1190,29 @@ static NVM_DEV_ATTR_12_RO(multiplane_modes); static NVM_DEV_ATTR_12_RO(media_capabilities); static NVM_DEV_ATTR_12_RO(max_phys_secs); -static struct attribute *nvm_dev_attrs_12[] = { +/* 2.0 values */ +static NVM_DEV_ATTR_20_RO(groups); +static NVM_DEV_ATTR_20_RO(punits); +static NVM_DEV_ATTR_20_RO(chunks); +static NVM_DEV_ATTR_20_RO(clba); +static NVM_DEV_ATTR_20_RO(ws_min); +static NVM_DEV_ATTR_20_RO(ws_opt); +static NVM_DEV_ATTR_20_RO(maxoc); +static NVM_DEV_ATTR_20_RO(maxocpu); +static NVM_DEV_ATTR_20_RO(mw_cunits); +static NVM_DEV_ATTR_20_RO(write_typ); +static NVM_DEV_ATTR_20_RO(write_max); +static NVM_DEV_ATTR_20_RO(reset_typ); +static NVM_DEV_ATTR_20_RO(reset_max); + +static struct attribute *nvm_dev_attrs[] = { + /* version agnostic attrs */ &dev_attr_version.attr, &dev_attr_capabilities.attr, + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + /* 1.2 attrs */ &dev_attr_vendor_opcode.attr, &dev_attr_device_mode.attr, &dev_attr_media_manager.attr, @@ -1208,8 +1227,6 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_page_size.attr, &dev_attr_hw_sector_size.attr, &dev_attr_oob_sector_size.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_prog_typ.attr, &dev_attr_prog_max.attr, &dev_attr_erase_typ.attr, @@ -1218,33 +1235,7 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_media_capabilities.attr, &dev_attr_max_phys_secs.attr, - NULL, -}; - -static const struct attribute_group nvm_dev_attr_group_12 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_12, -}; - -/* 2.0 values */ -static NVM_DEV_ATTR_20_RO(groups); -static NVM_DEV_ATTR_20_RO(punits); -static NVM_DEV_ATTR_20_RO(chunks); -static NVM_DEV_ATTR_20_RO(clba); -static NVM_DEV_ATTR_20_RO(ws_min); -static NVM_DEV_ATTR_20_RO(ws_opt); -static NVM_DEV_ATTR_20_RO(maxoc); -static NVM_DEV_ATTR_20_RO(maxocpu); -static NVM_DEV_ATTR_20_RO(mw_cunits); -static NVM_DEV_ATTR_20_RO(write_typ); -static NVM_DEV_ATTR_20_RO(write_max); -static NVM_DEV_ATTR_20_RO(reset_typ); -static NVM_DEV_ATTR_20_RO(reset_max); - -static struct attribute *nvm_dev_attrs_20[] = { - &dev_attr_version.attr, - &dev_attr_capabilities.attr, - + /* 2.0 attrs */ &dev_attr_groups.attr, &dev_attr_punits.attr, &dev_attr_chunks.attr, @@ -1255,8 +1246,6 @@ static struct attribute *nvm_dev_attrs_20[] = { &dev_attr_maxocpu.attr, &dev_attr_mw_cunits.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_write_typ.attr, &dev_attr_write_max.attr, &dev_attr_reset_typ.attr, @@ -1265,44 +1254,38 @@ static struct attribute *nvm_dev_attrs_20[] = { NULL, }; -static const struct attribute_group nvm_dev_attr_group_20 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_20, -}; - -int nvme_nvm_register_sysfs(struct nvme_ns *ns) +static umode_t nvm_dev_attrs_visible(struct kobject *kobj, + struct attribute *attr, int index) { + struct device *dev = container_of(kobj, struct device, kobj); + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns *ns = disk->private_data; struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; + struct device_attribute *dev_attr = + container_of(attr, typeof(*dev_attr), attr); if (!ndev) - return -EINVAL; - - switch (geo->major_ver_id) { - case 1: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); - case 2: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); - } - - return -EINVAL; -} + return 0; -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) -{ - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; + if (dev_attr->show == nvm_dev_attr_show) + return attr->mode; - switch (geo->major_ver_id) { + switch (ndev->geo.major_ver_id) { case 1: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); + if (dev_attr->show == nvm_dev_attr_show_12) + return attr->mode; break; case 2: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); + if (dev_attr->show == nvm_dev_attr_show_20) + return attr->mode; break; } + + return 0; } + +const struct attribute_group nvme_nvm_attr_group = { + .name = "lightnvm", + .attrs = nvm_dev_attrs, + .is_visible = nvm_dev_attrs_visible, +}; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 477af51d01e8..8e846095c42d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -282,13 +282,9 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) if (!head->disk) return; - if (!(head->disk->flags & GENHD_FL_UP)) { - device_add_disk(&head->subsys->dev, head->disk, NULL); - if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group)) - dev_warn(&head->subsys->dev, - "failed to create id group.\n"); - } + if (!(head->disk->flags & GENHD_FL_UP)) + device_add_disk(&head->subsys->dev, head->disk, + nvme_ns_id_attr_groups); kblockd_schedule_work(&ns->head->requeue_work); } @@ -494,11 +490,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; - if (head->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group); + if (head->disk->flags & GENHD_FL_UP) del_gendisk(head->disk); - } blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bb4a2003c097..2503f8fd54da 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -459,7 +459,7 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset); -extern const struct attribute_group nvme_ns_id_attr_group; +extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH @@ -551,8 +551,7 @@ static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl) void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); -int nvme_nvm_register_sysfs(struct nvme_ns *ns); -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); +extern const struct attribute_group nvme_nvm_attr_group; int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; @@ -563,11 +562,6 @@ static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, } static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; -static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns) -{ - return 0; -} -static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {}; static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) { -- cgit v1.2.3 From 53b3a66163ea6cc7a86e0a3a04b1166d96665824 Mon Sep 17 00:00:00 2001 From: "Milan P. Gandhi" Date: Thu, 9 Aug 2018 21:49:24 +0530 Subject: nvme: fix typo in nvme_identify_ns_descs Signed-off-by: Milan P. Gandhi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e0a9e1c5b30e..f0778d3dd2f8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -971,7 +971,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, uuid_copy(&ids->uuid, data + pos + sizeof(*cur)); break; default: - /* Skip unnkown types */ + /* Skip unknown types */ len = cur->nidl; break; } -- cgit v1.2.3 From d93cb3927ca5528bd21f2635a2300f9a1426ac46 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 10 Sep 2018 17:39:33 -0700 Subject: nvmet: remove redundant module prefix This patch removes the redundant module prefix used in the pr_err() when nvmet_get_smart_log_nsid() failed to find the namespace provided as a part of smart-log command. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2008fa62a373..7a45f4477679 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -58,7 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); if (!ns) { - pr_err("nvmet : Could not find namespace id : %d\n", + pr_err("Could not find namespace id : %d\n", le32_to_cpu(req->cmd->get_log_page.nsid)); return NVME_SC_INVALID_NS; } -- cgit v1.2.3 From d4e4230c8f5646a19a0e58765a30fb2bab5f1dcc Mon Sep 17 00:00:00 2001 From: "Milan P. Gandhi" Date: Fri, 10 Aug 2018 14:54:02 +0530 Subject: nvme-fc: fix for a minor typos Signed-off-by: Milan P. Gandhi Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 ++-- drivers/nvme/target/fc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 611e70cae754..fd700073d312 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1385,7 +1385,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) __nvme_fc_finish_ls_req(lsop); - /* fc-nvme iniator doesn't care about success or failure of cmd */ + /* fc-nvme initiator doesn't care about success or failure of cmd */ kfree(lsop); } @@ -3159,7 +3159,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 29b4b236afd8..a3905673e17f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2468,7 +2468,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], -- cgit v1.2.3 From ea96d6496ff59b2b26dc9e13dc8f57d77731eb37 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 9 Aug 2018 16:48:14 -0700 Subject: nvmet_fc: support target port removal with nvmet layer Currently, if a targetport has been connected to via the nvmet config (in other words, the add_port() transport routine called, and the nvmet port pointer stored for using in upcalls on new io), and if the targetport is then removed (say the lldd driver decides to unload or fully reset its hardware) and then re-added (the lldd driver reloads or reinits its hardware), the port pointer has been lost so there's no way to continue to post commands up to nvmet via the transport port. Correct by allocating a small "port context" structure that will be linked to by the targetport. The context will save the targetport WWN's and the nvmet port pointer to use for it. Initial allocation will occur when the targetport is bound to via add_port. The context will be deallocated when remove_port() is called. If a targetport is removed while nvmet has the active port context, the targetport will be unlinked from the port context before removal. If a new targetport is registered, the port contexts without a binding are looked through and if the WWN's match (so it's the same as nvmet's port context) the port context is linked to the new target port. Thus new io can be received on the new targetport and operation resumes with nvmet. Additionally, this also resolves nvmet configuration changing out from underneath of the nvme-fc target port (for example: a nvmetcli clear). Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 128 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 120 insertions(+), 8 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a3905673e17f..ef286b72d958 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -110,11 +110,19 @@ struct nvmet_fc_tgtport { struct list_head ls_busylist; struct list_head assoc_list; struct ida assoc_cnt; - struct nvmet_port *port; + struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; }; +struct nvmet_fc_port_entry { + struct nvmet_fc_tgtport *tgtport; + struct nvmet_port *port; + u64 node_name; + u64 port_name; + struct list_head pe_list; +}; + struct nvmet_fc_defer_fcp_req { struct list_head req_list; struct nvmefc_tgt_fcp_req *fcp_req; @@ -132,7 +140,6 @@ struct nvmet_fc_tgt_queue { atomic_t zrspcnt; atomic_t rsn; spinlock_t qlock; - struct nvmet_port *port; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvmet_fc_tgt_assoc *assoc; @@ -221,6 +228,7 @@ static DEFINE_SPINLOCK(nvmet_fc_tgtlock); static LIST_HEAD(nvmet_fc_target_list); static DEFINE_IDA(nvmet_fc_tgtport_cnt); +static LIST_HEAD(nvmet_fc_portentry_list); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); @@ -645,7 +653,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->qid = qid; queue->sqsize = sqsize; queue->assoc = assoc; - queue->port = assoc->tgtport->port; queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); INIT_LIST_HEAD(&queue->avail_defer_list); @@ -957,6 +964,83 @@ nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport, return ret; } +static void +nvmet_fc_portentry_bind(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_port_entry *pe, + struct nvmet_port *port) +{ + lockdep_assert_held(&nvmet_fc_tgtlock); + + pe->tgtport = tgtport; + tgtport->pe = pe; + + pe->port = port; + port->priv = pe; + + pe->node_name = tgtport->fc_target_port.node_name; + pe->port_name = tgtport->fc_target_port.port_name; + INIT_LIST_HEAD(&pe->pe_list); + + list_add_tail(&pe->pe_list, &nvmet_fc_portentry_list); +} + +static void +nvmet_fc_portentry_unbind(struct nvmet_fc_port_entry *pe) +{ + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + if (pe->tgtport) + pe->tgtport->pe = NULL; + list_del(&pe->pe_list); + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/* + * called when a targetport deregisters. Breaks the relationship + * with the nvmet port, but leaves the port_entry in place so that + * re-registration can resume operation. + */ +static void +nvmet_fc_portentry_unbind_tgt(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_port_entry *pe; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + pe = tgtport->pe; + if (pe) + pe->tgtport = NULL; + tgtport->pe = NULL; + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/* + * called when a new targetport is registered. Looks in the + * existing nvmet port_entries to see if the nvmet layer is + * configured for the targetport's wwn's. (the targetport existed, + * nvmet configured, the lldd unregistered the tgtport, and is now + * reregistering the same targetport). If so, set the nvmet port + * port entry on the targetport. + */ +static void +nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_port_entry *pe; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) { + if (tgtport->fc_target_port.node_name == pe->node_name && + tgtport->fc_target_port.port_name == pe->port_name) { + WARN_ON(pe->tgtport); + tgtport->pe = pe; + pe->tgtport = tgtport; + break; + } + } + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} /** * nvme_fc_register_targetport - transport entry point called by an @@ -1034,6 +1118,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, goto out_free_newrec; } + nvmet_fc_portentry_rebind_tgt(newrec); + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); list_add_tail(&newrec->tgt_list, &nvmet_fc_target_list); spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); @@ -1171,6 +1257,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + nvmet_fc_portentry_unbind_tgt(tgtport); + /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); @@ -2147,7 +2235,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) /* - * Actual processing routine for received FC-NVME LS Requests from the LLD + * Actual processing routine for received FC-NVME I/O Requests from the LLD */ static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, @@ -2157,6 +2245,13 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, u32 xfrlen = be32_to_cpu(cmdiu->data_len); int ret; + /* + * if there is no nvmet mapping to the targetport there + * shouldn't be requests. just terminate them. + */ + if (!tgtport->pe) + goto transport_error; + /* * Fused commands are currently not supported in the linux * implementation. @@ -2184,7 +2279,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.rsp = &fod->rspiubuf.cqe; - fod->req.port = fod->queue->port; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); @@ -2508,6 +2603,7 @@ static int nvmet_fc_add_port(struct nvmet_port *port) { struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_port_entry *pe; struct nvmet_fc_traddr traddr = { 0L, 0L }; unsigned long flags; int ret; @@ -2524,24 +2620,40 @@ nvmet_fc_add_port(struct nvmet_port *port) if (ret) return ret; + pe = kzalloc(sizeof(*pe), GFP_KERNEL); + if (!pe) + return -ENOMEM; + ret = -ENXIO; spin_lock_irqsave(&nvmet_fc_tgtlock, flags); list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { if ((tgtport->fc_target_port.node_name == traddr.nn) && (tgtport->fc_target_port.port_name == traddr.pn)) { - tgtport->port = port; - ret = 0; + /* a FC port can only be 1 nvmet port id */ + if (!tgtport->pe) { + nvmet_fc_portentry_bind(tgtport, pe, port); + ret = 0; + } else + ret = -EALREADY; break; } } spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + + if (ret) + kfree(pe); + return ret; } static void nvmet_fc_remove_port(struct nvmet_port *port) { - /* nothing to do */ + struct nvmet_fc_port_entry *pe = port->priv; + + nvmet_fc_portentry_unbind(pe); + + kfree(pe); } static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { -- cgit v1.2.3 From 97faec531460c949d7120672b8c77e2f41f8d6d7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 13 Sep 2018 16:17:38 -0700 Subject: nvme_fc: add 'nvme_discovery' sysfs attribute to fc transport device The fc transport device should allow for a rediscovery, as userspace might have lost the events. Example is udev events not handled during system startup. This patch add a sysfs entry 'nvme_discovery' on the fc class to have it replay all udev discovery events for all local port/remote port address pairs. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 104 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index fd700073d312..9d201b35397d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -122,6 +122,7 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; spinlock_t lock; @@ -210,7 +211,6 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt); * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. */ -static struct class *fc_class; static struct device *fc_udev_device; @@ -507,6 +507,7 @@ nvme_fc_free_rport(struct kref *ref) list_del(&rport->endp_list); spin_unlock_irqrestore(&nvme_fc_lock, flags); + WARN_ON(!list_empty(&rport->disc_list)); ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); kfree(rport); @@ -694,6 +695,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); INIT_LIST_HEAD(&newrec->ls_req_list); + INIT_LIST_HEAD(&newrec->disc_list); kref_init(&newrec->ref); atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); @@ -3254,6 +3256,90 @@ static struct nvmf_transport_ops nvme_fc_transport = { .create_ctrl = nvme_fc_create_ctrl, }; +/* Arbitrary successive failures max. With lots of subsystems could be high */ +#define DISCOVERY_MAX_FAIL 20 + +static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long flags; + LIST_HEAD(local_disc_list); + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + int failcnt = 0; + + spin_lock_irqsave(&nvme_fc_lock, flags); +restart: + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + list_for_each_entry(rport, &lport->endp_list, endp_list) { + if (!nvme_fc_lport_get(lport)) + continue; + if (!nvme_fc_rport_get(rport)) { + /* + * This is a temporary condition. Upon restart + * this rport will be gone from the list. + * + * Revert the lport put and retry. Anything + * added to the list already will be skipped (as + * they are no longer list_empty). Loops should + * resume at rports that were not yet seen. + */ + nvme_fc_lport_put(lport); + + if (failcnt++ < DISCOVERY_MAX_FAIL) + goto restart; + + pr_err("nvme_discovery: too many reference " + "failures\n"); + goto process_local_list; + } + if (list_empty(&rport->disc_list)) + list_add_tail(&rport->disc_list, + &local_disc_list); + } + } + +process_local_list: + while (!list_empty(&local_disc_list)) { + rport = list_first_entry(&local_disc_list, + struct nvme_fc_rport, disc_list); + list_del_init(&rport->disc_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + lport = rport->lport; + /* signal discovery. Won't hurt if it repeats */ + nvme_fc_signal_discovery_scan(lport, rport); + nvme_fc_rport_put(rport); + nvme_fc_lport_put(lport); + + spin_lock_irqsave(&nvme_fc_lock, flags); + } + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return count; +} +static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); + +static struct attribute *nvme_fc_attrs[] = { + &dev_attr_nvme_discovery.attr, + NULL +}; + +static struct attribute_group nvme_fc_attr_group = { + .attrs = nvme_fc_attrs, +}; + +static const struct attribute_group *nvme_fc_attr_groups[] = { + &nvme_fc_attr_group, + NULL +}; + +static struct class fc_class = { + .name = "fc", + .dev_groups = nvme_fc_attr_groups, + .owner = THIS_MODULE, +}; + static int __init nvme_fc_init_module(void) { int ret; @@ -3272,16 +3358,16 @@ static int __init nvme_fc_init_module(void) * put in place, this code will move to a more generic * location for the class. */ - fc_class = class_create(THIS_MODULE, "fc"); - if (IS_ERR(fc_class)) { + ret = class_register(&fc_class); + if (ret) { pr_err("couldn't register class fc\n"); - return PTR_ERR(fc_class); + return ret; } /* * Create a device for the FC-centric udev events */ - fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL, + fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL, "fc_udev_device"); if (IS_ERR(fc_udev_device)) { pr_err("couldn't create fc_udev device!\n"); @@ -3296,9 +3382,9 @@ static int __init nvme_fc_init_module(void) return 0; out_destroy_device: - device_destroy(fc_class, MKDEV(0, 0)); + device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: - class_destroy(fc_class); + class_unregister(&fc_class); return ret; } @@ -3313,8 +3399,8 @@ static void __exit nvme_fc_exit_module(void) ida_destroy(&nvme_fc_local_port_cnt); ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(fc_class, MKDEV(0, 0)); - class_destroy(fc_class); + device_destroy(&fc_class, MKDEV(0, 0)); + class_unregister(&fc_class); } module_init(nvme_fc_init_module); -- cgit v1.2.3 From 09bd1ff4b15143bc0e6dd2adf39f59f6ab6e2621 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 17 Sep 2018 10:47:06 -0700 Subject: nvme-core: add async event trace helper This patch adds a new event for nvme async event notification. We print the async event in the decoded format when we recognize the event otherwise we just dump the result. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 +++++++++-- drivers/nvme/host/trace.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f0778d3dd2f8..089d744e5065 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3408,16 +3408,21 @@ static void nvme_fw_act_work(struct work_struct *work) static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) { - switch ((result & 0xff00) >> 8) { + u32 aer_notice_type = (result & 0xff00) >> 8; + + switch (aer_notice_type) { case NVME_AER_NOTICE_NS_CHANGED: + trace_nvme_async_event(ctrl, aer_notice_type); set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: + trace_nvme_async_event(ctrl, aer_notice_type); queue_work(nvme_wq, &ctrl->fw_act_work); break; #ifdef CONFIG_NVME_MULTIPATH case NVME_AER_NOTICE_ANA: + trace_nvme_async_event(ctrl, aer_notice_type); if (!ctrl->ana_log_buf) break; queue_work(nvme_wq, &ctrl->ana_work); @@ -3432,11 +3437,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, volatile union nvme_result *res) { u32 result = le32_to_cpu(res->u32); + u32 aer_type = result & 0x07; if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS) return; - switch (result & 0x7) { + switch (aer_type) { case NVME_AER_NOTICE: nvme_handle_aen_notice(ctrl, result); break; @@ -3444,6 +3450,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, case NVME_AER_SMART: case NVME_AER_CSS: case NVME_AER_VS: + trace_nvme_async_event(ctrl, aer_type); ctrl->aen_result = result; break; default: diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index a490790d6691..196d5bd56718 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -156,6 +156,34 @@ TRACE_EVENT(nvme_complete_rq, ); +#define aer_name(aer) { aer, #aer } + +TRACE_EVENT(nvme_async_event, + TP_PROTO(struct nvme_ctrl *ctrl, u32 result), + TP_ARGS(ctrl, result), + TP_STRUCT__entry( + __field(int, ctrl_id) + __field(u32, result) + ), + TP_fast_assign( + __entry->ctrl_id = ctrl->instance; + __entry->result = result; + ), + TP_printk("nvme%d: NVME_AEN=%#08x [%s]", + __entry->ctrl_id, __entry->result, + __print_symbolic(__entry->result, + aer_name(NVME_AER_NOTICE_NS_CHANGED), + aer_name(NVME_AER_NOTICE_ANA), + aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_ERROR), + aer_name(NVME_AER_SMART), + aer_name(NVME_AER_CSS), + aer_name(NVME_AER_VS)) + ) +); + +#undef aer_name + #endif /* _TRACE_NVME_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 783f4a4408e1251d17f333ad56abac24dde988b9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 27 Sep 2018 16:58:54 -0700 Subject: nvme: call nvme_complete_rq when nvmf_check_ready fails for mpath I/O When an io is rejected by nvmf_check_ready() due to validation of the controller state, the nvmf_fail_nonready_command() will normally return BLK_STS_RESOURCE to requeue and retry. However, if the controller is dying or the I/O is marked for NVMe multipath, the I/O is failed so that the controller can terminate or so that the io can be issued on a different path. Unfortunately, as this reject point is before the transport has accepted the command, blk-mq ends up completing the I/O and never calls nvme_complete_rq(), which is where multipath may preserve or re-route the I/O. The end result is, the device user ends up seeing an EIO error. Example: single path connectivity, controller is under load, and a reset is induced. An I/O is received: a) while the reset state has been set but the queues have yet to be stopped; or b) after queues are started (at end of reset) but before the reconnect has completed. The I/O finishes with an EIO status. This patch makes the following changes: - Adds the HOST_PATH_ERROR pathing status from TP4028 - Modifies the reject point such that it appears to queue successfully, but actually completes the io with the new pathing status and calls nvme_complete_rq(). - nvme_complete_rq() recognizes the new status, avoids resetting the controller (likely was already done in order to get this new status), and calls the multipather to clear the current path that errored. This allows the next command (retry or new command) to select a new path if there is one. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 7 +++++-- drivers/nvme/host/multipath.c | 7 +++++++ include/linux/nvme.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 206d63cb1afc..bcd09d3a44da 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -552,8 +552,11 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, ctrl->state != NVME_CTRL_DEAD && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; + + nvme_req(rq)->status = NVME_SC_HOST_PATH_ERROR; + blk_mq_start_request(rq); + nvme_complete_rq(rq); + return BLK_STS_OK; } EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index bfbc6d5b1d93..ac16093a7928 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -77,6 +77,13 @@ void nvme_failover_req(struct request *req) queue_work(nvme_wq, &ns->ctrl->ana_work); } break; + case NVME_SC_HOST_PATH_ERROR: + /* + * Temporary transport disruption in talking to the controller. + * Try to send on a new path. + */ + nvme_mpath_clear_current_path(ns); + break; default: /* * Reset the controller for any non-ANA error as we don't know diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 68e91ef5494c..818dbe9331be 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1241,6 +1241,7 @@ enum { NVME_SC_ANA_PERSISTENT_LOSS = 0x301, NVME_SC_ANA_INACCESSIBLE = 0x302, NVME_SC_ANA_TRANSITION = 0x303, + NVME_SC_HOST_PATH_ERROR = 0x370, NVME_SC_DNR = 0x4000, }; -- cgit v1.2.3 From 73383adfad245bb84e6d6ef7830f01048fcfc217 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 28 Sep 2018 15:40:43 -0700 Subject: nvmet: don't split large I/Os unconditionally If we know that the I/O size exceeds our inline bio vec, no point using it and split the rest to begin with. We could in theory reuse the inline bio and only allocate the bio_vec, but its really not worth optimizing for. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-bdev.c | 9 +++++++-- drivers/nvme/target/nvmet.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 7bc9f6240432..f93fb5711142 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -58,7 +58,7 @@ static void nvmet_bio_done(struct bio *bio) static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; - struct bio *bio = &req->b.inline_bio; + struct bio *bio; struct scatterlist *sg; sector_t sector; blk_qc_t cookie; @@ -81,7 +81,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector = le64_to_cpu(req->cmd->rw.slba); sector <<= (req->ns->blksize_shift - 9); - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) { + bio = &req->b.inline_bio; + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + } else { + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + } bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio->bi_private = req; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index ec9af4ee03b6..08f7b57a1203 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -264,6 +264,7 @@ struct nvmet_fabrics_ops { }; #define NVMET_MAX_INLINE_BIOVEC 8 +#define NVMET_MAX_INLINE_DATA_LEN NVMET_MAX_INLINE_BIOVEC * PAGE_SIZE struct nvmet_req { struct nvme_command *cmd; -- cgit v1.2.3 From f333444708f82c4a4d3ccac004da0bfd9cfdfa42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Sep 2018 09:51:29 +0200 Subject: nvme: take node locality into account when selecting a path Make current_path an array with an entry for every possible node, and cache the best path on a per-node basis. Take the node distance into account when selecting it. This is primarily useful for dual-ported PCIe devices which are connected to PCIe root ports on different sockets. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 7 +++++- drivers/nvme/host/multipath.c | 50 +++++++++++++++++++++++++++++++++---------- drivers/nvme/host/nvme.h | 25 ++++++++-------------- 3 files changed, 54 insertions(+), 28 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 089d744e5065..2db33a752e2b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2908,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_id_ns *id) { struct nvme_ns_head *head; + size_t size = sizeof(*head); int ret = -ENOMEM; - head = kzalloc(sizeof(*head), GFP_KERNEL); +#ifdef CONFIG_NVME_MULTIPATH + size += num_possible_nodes() * sizeof(struct nvme_ns *); +#endif + + head = kzalloc(size, GFP_KERNEL); if (!head) goto out; ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ac16093a7928..52987052b7fc 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -117,29 +117,55 @@ static const char *nvme_ana_state_names[] = { [NVME_ANA_CHANGE] = "change", }; -static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) +void nvme_mpath_clear_current_path(struct nvme_ns *ns) { - struct nvme_ns *ns, *fallback = NULL; + struct nvme_ns_head *head = ns->head; + int node; + + if (!head) + return; + + for_each_node(node) { + if (ns == rcu_access_pointer(head->current_path[node])) + rcu_assign_pointer(head->current_path[node], NULL); + } +} + +static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) +{ + int found_distance = INT_MAX, fallback_distance = INT_MAX, distance; + struct nvme_ns *found = NULL, *fallback = NULL, *ns; list_for_each_entry_rcu(ns, &head->list, siblings) { if (ns->ctrl->state != NVME_CTRL_LIVE || test_bit(NVME_NS_ANA_PENDING, &ns->flags)) continue; + + distance = node_distance(node, dev_to_node(ns->ctrl->dev)); + switch (ns->ana_state) { case NVME_ANA_OPTIMIZED: - rcu_assign_pointer(head->current_path, ns); - return ns; + if (distance < found_distance) { + found_distance = distance; + found = ns; + } + break; case NVME_ANA_NONOPTIMIZED: - fallback = ns; + if (distance < fallback_distance) { + fallback_distance = distance; + fallback = ns; + } break; default: break; } } - if (fallback) - rcu_assign_pointer(head->current_path, fallback); - return fallback; + if (!found) + found = fallback; + if (found) + rcu_assign_pointer(head->current_path[node], found); + return found; } static inline bool nvme_path_is_optimized(struct nvme_ns *ns) @@ -150,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns) inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) { - struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); + int node = numa_node_id(); + struct nvme_ns *ns; + ns = srcu_dereference(head->current_path[node], &head->srcu); if (unlikely(!ns || !nvme_path_is_optimized(ns))) - ns = __nvme_find_path(head); + ns = __nvme_find_path(head, node); return ns; } @@ -200,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) int srcu_idx; srcu_idx = srcu_read_lock(&head->srcu); - ns = srcu_dereference(head->current_path, &head->srcu); + ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu); if (likely(ns && nvme_path_is_optimized(ns))) found = ns->queue->poll_fn(q, qc); srcu_read_unlock(&head->srcu, srcu_idx); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2503f8fd54da..9fefba039d1e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -277,14 +277,6 @@ struct nvme_ns_ids { * only ever has a single entry for private namespaces. */ struct nvme_ns_head { -#ifdef CONFIG_NVME_MULTIPATH - struct gendisk *disk; - struct nvme_ns __rcu *current_path; - struct bio_list requeue_list; - spinlock_t requeue_lock; - struct work_struct requeue_work; - struct mutex lock; -#endif struct list_head list; struct srcu_struct srcu; struct nvme_subsystem *subsys; @@ -293,6 +285,14 @@ struct nvme_ns_head { struct list_head entry; struct kref ref; int instance; +#ifdef CONFIG_NVME_MULTIPATH + struct gendisk *disk; + struct bio_list requeue_list; + spinlock_t requeue_lock; + struct work_struct requeue_work; + struct mutex lock; + struct nvme_ns __rcu *current_path[]; +#endif }; #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS @@ -474,14 +474,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); - -static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) -{ - struct nvme_ns_head *head = ns->head; - - if (head && ns == rcu_access_pointer(head->current_path)) - rcu_assign_pointer(head->current_path, NULL); -} +void nvme_mpath_clear_current_path(struct nvme_ns *ns); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) -- cgit v1.2.3 From 2acf70ade79d26b97611a8df52eb22aa33814cd4 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 27 Sep 2018 11:00:31 -0700 Subject: nvmet-rdma: use a private workqueue for delete Queue deletion is done asynchronous when the last reference on the queue is dropped. Thus, in order to make sure we don't over allocate under a connect/disconnect storm, we let queue deletion complete before making forward progress. However, given that we flush the system_wq from rdma_cm context which runs from a workqueue context, we can have a circular locking complaint [1]. Fix that by using a private workqueue for queue deletion. [1]: ====================================================== WARNING: possible circular locking dependency detected 4.19.0-rc4-dbg+ #3 Not tainted ------------------------------------------------------ kworker/5:0/39 is trying to acquire lock: 00000000a10b6db9 (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x6f/0x440 [rdma_cm] but task is already holding lock: 00000000331b4e2c ((work_completion)(&queue->release_work)){+.+.}, at: process_one_work+0x3ed/0xa20 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 ((work_completion)(&queue->release_work)){+.+.}: process_one_work+0x474/0xa20 worker_thread+0x63/0x5a0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 -> #2 ((wq_completion)"events"){+.+.}: flush_workqueue+0xf3/0x970 nvmet_rdma_cm_handler+0x133d/0x1734 [nvmet_rdma] cma_ib_req_handler+0x72f/0xf90 [rdma_cm] cm_process_work+0x2e/0x110 [ib_cm] cm_req_handler+0x135b/0x1c30 [ib_cm] cm_work_handler+0x2b7/0x38cd [ib_cm] process_one_work+0x4ae/0xa20 nvmet_rdma:nvmet_rdma_cm_handler: nvmet_rdma: disconnected (10): status 0 id 0000000040357082 worker_thread+0x63/0x5a0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 nvme nvme0: Reconnecting in 10 seconds... -> #1 (&id_priv->handler_mutex/1){+.+.}: __mutex_lock+0xfe/0xbe0 mutex_lock_nested+0x1b/0x20 cma_ib_req_handler+0x6aa/0xf90 [rdma_cm] cm_process_work+0x2e/0x110 [ib_cm] cm_req_handler+0x135b/0x1c30 [ib_cm] cm_work_handler+0x2b7/0x38cd [ib_cm] process_one_work+0x4ae/0xa20 worker_thread+0x63/0x5a0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 -> #0 (&id_priv->handler_mutex){+.+.}: lock_acquire+0xc5/0x200 __mutex_lock+0xfe/0xbe0 mutex_lock_nested+0x1b/0x20 rdma_destroy_id+0x6f/0x440 [rdma_cm] nvmet_rdma_release_queue_work+0x8e/0x1b0 [nvmet_rdma] process_one_work+0x4ae/0xa20 worker_thread+0x63/0x5a0 kthread+0x1cf/0x1f0 ret_from_fork+0x24/0x30 Fixes: 777dc82395de ("nvmet-rdma: occasionally flush ongoing controller teardown") Reported-by: Bart Van Assche Signed-off-by: Sagi Grimberg Tested-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index bfc4da660bb4..5becca88ccbe 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -122,6 +122,7 @@ struct nvmet_rdma_device { int inline_page_count; }; +struct workqueue_struct *nvmet_rdma_delete_wq; static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); @@ -1267,12 +1268,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_rdma_delete_wq); } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); /* Destroying rdma_cm id is not needed here */ return 0; } @@ -1337,7 +1338,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); } } @@ -1367,7 +1368,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); } /** @@ -1649,8 +1650,17 @@ static int __init nvmet_rdma_init(void) if (ret) goto err_ib_client; + nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvmet_rdma_delete_wq) { + ret = -ENOMEM; + goto err_unreg_transport; + } + return 0; +err_unreg_transport: + nvmet_unregister_transport(&nvmet_rdma_ops); err_ib_client: ib_unregister_client(&nvmet_rdma_ib_client); return ret; @@ -1658,6 +1668,7 @@ err_ib_client: static void __exit nvmet_rdma_exit(void) { + destroy_workqueue(nvmet_rdma_delete_wq); nvmet_unregister_transport(&nvmet_rdma_ops); ib_unregister_client(&nvmet_rdma_ib_client); WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); -- cgit v1.2.3 From aff3fb18f957de93e629c7d3d2c4ef1f360aa511 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Tue, 9 Oct 2018 13:11:36 +0200 Subject: lightnvm: move bad block and chunk state logic to core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk implements two data paths for recovery line state. One for 1.2 and another for 2.0, instead of having pblk implement these, combine them in the core to reduce complexity and make available to other targets. The new interface will adhere to the 2.0 chunk definition, including managing open chunks with an active write pointer. To provide this interface, a 1.2 device recovers the state of the chunks by manually detecting if a chunk is either free/open/close/offline, and if open, scanning the flash pages sequentially to find the next writeable page. This process takes on average ~10 seconds on a device with 64 dies, 1024 blocks and 60us read access time. The process can be parallelized but is left out for maintenance simplicity, as the 1.2 specification is deprecated. For 2.0 devices, the logic is maintained internally in the drive and retrieved through the 2.0 interface. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 309 +++++++++++++++++++++++++++++++++++-------- drivers/lightnvm/pblk-core.c | 6 +- drivers/lightnvm/pblk-init.c | 116 +--------------- drivers/lightnvm/pblk.h | 2 +- drivers/nvme/host/lightnvm.c | 4 +- include/linux/lightnvm.h | 15 +-- 6 files changed, 265 insertions(+), 187 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 964352720a03..8df188e0767e 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -717,46 +717,6 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); } -int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta, - struct ppa_addr ppa, int nchks) -{ - struct nvm_dev *dev = tgt_dev->parent; - - nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - - return dev->ops->get_chk_meta(tgt_dev->parent, meta, - (sector_t)ppa.ppa, nchks); -} -EXPORT_SYMBOL(nvm_get_chunk_meta); - -int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_rq rqd; - int ret; - - if (nr_ppas > NVM_MAX_VLBA) { - pr_err("nvm: unable to update all blocks atomically\n"); - return -EINVAL; - } - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); - nvm_rq_tgt_to_dev(tgt_dev, &rqd); - - ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(tgt_dev, &rqd); - if (ret) { - pr_err("nvm: failed bb mark\n"); - return -EINVAL; - } - - return 0; -} -EXPORT_SYMBOL(nvm_set_tgt_bb_tbl); - static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd) { int flags = 0; @@ -830,27 +790,159 @@ void nvm_end_io(struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_end_io); +static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd) +{ + if (!dev->ops->submit_io_sync) + return -ENODEV; + + rqd->flags = nvm_set_flags(&dev->geo, rqd); + + return dev->ops->submit_io_sync(dev, rqd); +} + +static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) +{ + struct nvm_rq rqd = { NULL }; + struct bio bio; + struct bio_vec bio_vec; + struct page *page; + int ret; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + bio_init(&bio, &bio_vec, 1); + bio_add_page(&bio, page, PAGE_SIZE, 0); + bio_set_op_attrs(&bio, REQ_OP_READ, 0); + + rqd.bio = &bio; + rqd.opcode = NVM_OP_PREAD; + rqd.is_seq = 1; + rqd.nr_ppas = 1; + rqd.ppa_addr = generic_to_dev_addr(dev, ppa); + + ret = nvm_submit_io_sync_raw(dev, &rqd); + if (ret) + return ret; + + __free_page(page); + + return rqd.error; +} + /* - * folds a bad block list from its plane representation to its virtual - * block representation. The fold is done in place and reduced size is - * returned. - * - * If any of the planes status are bad or grown bad block, the virtual block - * is marked bad. If not bad, the first plane state acts as the block state. + * Scans a 1.2 chunk first and last page to determine if its state. + * If the chunk is found to be open, also scan it to update the write + * pointer. */ -int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) +static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa, + struct nvm_chk_meta *meta) { struct nvm_geo *geo = &dev->geo; - int blk, offset, pl, blktype; + int ret, pg, pl; - if (nr_blks != geo->num_chk * geo->pln_mode) - return -EINVAL; + /* sense first page */ + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) /* valid data */ + meta->state = NVM_CHK_ST_OPEN; + else if (ret > 0) { + /* + * If empty page, the chunk is free, else it is an + * actual io error. In that case, mark it offline. + */ + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + meta->state = NVM_CHK_ST_FREE; + return 0; + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->state = NVM_CHK_ST_OPEN; + goto scan; + default: + return -ret; /* other io error */ + } + } + + /* sense last page */ + ppa.g.pg = geo->num_pg - 1; + ppa.g.pl = geo->num_pln - 1; + + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) { /* Chunk fully written */ + meta->state = NVM_CHK_ST_CLOSED; + meta->wp = geo->clba; + return 0; + } else if (ret > 0) { + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->state = NVM_CHK_ST_OPEN; + break; + default: + return -ret; /* other io error */ + } + } + +scan: + /* + * chunk is open, we scan sequentially to update the write pointer. + * We make the assumption that targets write data across all planes + * before moving to the next page. + */ + for (pg = 0; pg < geo->num_pg; pg++) { + for (pl = 0; pl < geo->num_pln; pl++) { + ppa.g.pg = pg; + ppa.g.pl = pl; + + ret = nvm_bb_chunk_sense(dev, ppa); + if (ret < 0) /* io error */ + return ret; + else if (ret == 0) { + meta->wp += geo->ws_min; + } else if (ret > 0) { + switch (ret) { + case NVM_RSP_ERR_EMPTYPAGE: + return 0; + case NVM_RSP_ERR_FAILCRC: + case NVM_RSP_ERR_FAILECC: + case NVM_RSP_WARN_HIGHECC: + meta->wp += geo->ws_min; + break; + default: + return -ret; /* other io error */ + } + } + } + } + + return 0; +} + +/* + * folds a bad block list from its plane representation to its + * chunk representation. + * + * If any of the planes status are bad or grown bad, the chunk is marked + * offline. If not bad, the first plane state acts as the chunk state. + */ +static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa, + u8 *blks, int nr_blks, struct nvm_chk_meta *meta) +{ + struct nvm_geo *geo = &dev->geo; + int ret, blk, pl, offset, blktype; for (blk = 0; blk < geo->num_chk; blk++) { offset = blk * geo->pln_mode; blktype = blks[offset]; - /* Bad blocks on any planes take precedence over other types */ for (pl = 0; pl < geo->pln_mode; pl++) { if (blks[offset + pl] & (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { @@ -859,23 +951,124 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) } } - blks[blk] = blktype; + ppa.g.blk = blk; + + meta->wp = 0; + meta->type = NVM_CHK_TP_W_SEQ; + meta->wi = 0; + meta->slba = generic_to_dev_addr(dev, ppa).ppa; + meta->cnlb = dev->geo.clba; + + if (blktype == NVM_BLK_T_FREE) { + ret = nvm_bb_chunk_scan(dev, ppa, meta); + if (ret) + return ret; + } else { + meta->state = NVM_CHK_ST_OFFLINE; + } + + meta++; } - return geo->num_chk; + return 0; +} + +static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba, + int nchks, struct nvm_chk_meta *meta) +{ + struct nvm_geo *geo = &dev->geo; + struct ppa_addr ppa; + u8 *blks; + int ch, lun, nr_blks; + int ret; + + ppa.ppa = slba; + ppa = dev_to_generic_addr(dev, ppa); + + if (ppa.g.blk != 0) + return -EINVAL; + + if ((nchks % geo->num_chk) != 0) + return -EINVAL; + + nr_blks = geo->num_chk * geo->pln_mode; + + blks = kmalloc(nr_blks, GFP_KERNEL); + if (!blks) + return -ENOMEM; + + for (ch = ppa.g.ch; ch < geo->num_ch; ch++) { + for (lun = ppa.g.lun; lun < geo->num_lun; lun++) { + struct ppa_addr ppa_gen, ppa_dev; + + if (!nchks) + goto done; + + ppa_gen.ppa = 0; + ppa_gen.g.ch = ch; + ppa_gen.g.lun = lun; + ppa_dev = generic_to_dev_addr(dev, ppa_gen); + + ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks); + if (ret) + goto done; + + ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks, + meta); + if (ret) + goto done; + + meta += geo->num_chk; + nchks -= geo->num_chk; + } + } +done: + kfree(blks); + return ret; } -EXPORT_SYMBOL(nvm_bb_tbl_fold); -int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, - u8 *blks) +int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, + int nchks, struct nvm_chk_meta *meta) { struct nvm_dev *dev = tgt_dev->parent; nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - return dev->ops->get_bb_tbl(dev, ppa, blks); + if (dev->geo.version == NVM_OCSSD_SPEC_12) + return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta); + + return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta); +} +EXPORT_SYMBOL_GPL(nvm_get_chunk_meta); + +int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, + int nr_ppas, int type) +{ + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_rq rqd; + int ret; + + if (dev->geo.version == NVM_OCSSD_SPEC_20) + return 0; + + if (nr_ppas > NVM_MAX_VLBA) { + pr_err("nvm: unable to update all blocks atomically\n"); + return -EINVAL; + } + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); + nvm_rq_tgt_to_dev(tgt_dev, &rqd); + + ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); + nvm_free_rqd_ppalist(tgt_dev, &rqd); + if (ret) + return -EINVAL; + + return 0; } -EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); +EXPORT_SYMBOL_GPL(nvm_set_chunk_meta); static int nvm_core_init(struct nvm_dev *dev) { diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 72de7456845b..e0b513d07e14 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -27,7 +27,7 @@ static void pblk_line_mark_bb(struct work_struct *work) struct ppa_addr *ppa = line_ws->priv; int ret; - ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); + ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); if (ret) { struct pblk_line *line; int pos; @@ -110,7 +110,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) * * The caller is responsible for freeing the returned structure */ -struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) +struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -126,7 +126,7 @@ struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) if (!meta) return ERR_PTR(-ENOMEM); - ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks); + ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta); if (ret) { kfree(meta); return ERR_PTR(-EIO); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 039f62d05e84..53bd52114aee 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -540,67 +540,6 @@ static void pblk_lines_free(struct pblk *pblk) kfree(pblk->lines); } -static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun, - u8 *blks, int nr_blks) -{ - struct ppa_addr ppa; - int ret; - - ppa.ppa = 0; - ppa.g.ch = rlun->bppa.g.ch; - ppa.g.lun = rlun->bppa.g.lun; - - ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); - if (ret) - return ret; - - nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); - if (nr_blks < 0) - return -EIO; - - return 0; -} - -static void *pblk_bb_get_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - u8 *meta; - int i, nr_blks, blk_per_lun; - int ret; - - blk_per_lun = geo->num_chk * geo->pln_mode; - nr_blks = blk_per_lun * geo->all_luns; - - meta = kmalloc(nr_blks, GFP_KERNEL); - if (!meta) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < geo->all_luns; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - u8 *meta_pos = meta + i * blk_per_lun; - - ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun); - if (ret) { - kfree(meta); - return ERR_PTR(-EIO); - } - } - - return meta; -} - -static void *pblk_chunk_get_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - if (geo->version == NVM_OCSSD_SPEC_12) - return pblk_bb_get_meta(pblk); - else - return pblk_chunk_get_info(pblk); -} - static int pblk_luns_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; @@ -699,51 +638,7 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); } -static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line, - void *chunk_meta) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int i, chk_per_lun, nr_bad_chks = 0; - - chk_per_lun = geo->num_chk * geo->pln_mode; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - struct nvm_chk_meta *chunk; - int pos = pblk_ppa_to_pos(geo, rlun->bppa); - u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun; - - chunk = &line->chks[pos]; - - /* - * In 1.2 spec. chunk state is not persisted by the device. Thus - * some of the values are reset each time pblk is instantiated, - * so we have to assume that the block is closed. - */ - if (lun_bb_meta[line->id] == NVM_BLK_T_FREE) - chunk->state = NVM_CHK_ST_CLOSED; - else - chunk->state = NVM_CHK_ST_OFFLINE; - - chunk->type = NVM_CHK_TP_W_SEQ; - chunk->wi = 0; - chunk->slba = -1; - chunk->cnlb = geo->clba; - chunk->wp = 0; - - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) - continue; - - set_bit(pos, line->blk_bitmap); - nr_bad_chks++; - } - - return nr_bad_chks; -} - -static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, +static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, struct nvm_chk_meta *meta) { struct nvm_tgt_dev *dev = pblk->dev; @@ -790,8 +685,6 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, void *chunk_meta, int line_id) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; long nr_bad_chks, chk_in_line; @@ -804,10 +697,7 @@ static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, line->vsc = &l_mg->vsc_list[line_id]; spin_lock_init(&line->lock); - if (geo->version == NVM_OCSSD_SPEC_12) - nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta); - else - nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta); + nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta); chk_in_line = lm->blk_per_line - nr_bad_chks; if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line || @@ -1058,7 +948,7 @@ static int pblk_lines_init(struct pblk *pblk) if (ret) goto fail_free_meta; - chunk_meta = pblk_chunk_get_meta(pblk); + chunk_meta = pblk_get_chunk_meta(pblk); if (IS_ERR(chunk_meta)) { ret = PTR_ERR(chunk_meta); goto fail_free_luns; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 48b3035df3c4..579b4ea9716c 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -774,7 +774,7 @@ void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); void pblk_discard(struct pblk *pblk, struct bio *bio); -struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk); +struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, struct nvm_chk_meta *lp, struct ppa_addr ppa); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 1e4f97538838..e42af7771fe5 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -567,8 +567,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, * Expect the lba in device format */ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, - struct nvm_chk_meta *meta, - sector_t slba, int nchks) + sector_t slba, int nchks, + struct nvm_chk_meta *meta) { struct nvm_geo *geo = &ndev->geo; struct nvme_ns *ns = ndev->q->queuedata; diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index f4a84694e5e2..0106984400bc 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -86,8 +86,8 @@ struct nvm_chk_meta; typedef int (nvm_id_fn)(struct nvm_dev *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); -typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, struct nvm_chk_meta *, - sector_t, int); +typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int, + struct nvm_chk_meta *); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); @@ -532,18 +532,13 @@ extern struct nvm_dev *nvm_alloc_dev(int); extern int nvm_register(struct nvm_dev *); extern void nvm_unregister(struct nvm_dev *); - -extern int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, - struct nvm_chk_meta *meta, struct ppa_addr ppa, - int nchks); - -extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, +extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr, + int, struct nvm_chk_meta *); +extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); extern void nvm_end_io(struct nvm_rq *); -extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); -extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); #else /* CONFIG_NVM */ struct nvm_dev_ops; -- cgit v1.2.3 From 090ee26fd51270cc3bd54a0efbc716ede320ad27 Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:01 +0200 Subject: lightnvm: use internal allocation for chunk log page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lightnvm subsystem provides helpers to retrieve chunk metadata, where the target needs to provide a buffer to store the metadata. An implicit assumption is that this buffer is contiguous and can be used to retrieve the data from the device. If the device exposes too many chunks, then kmalloc might fail, thus failing instance creation. This patch removes this assumption by implementing an internal buffer in the lightnvm subsystem to retrieve chunk metadata. Targets can then use virtual memory allocations. Since this is a target API change, adapt pblk accordingly. Signed-off-by: Javier González Reviewed-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 4 ++-- drivers/lightnvm/pblk-init.c | 2 +- drivers/nvme/host/lightnvm.c | 25 +++++++++++++++++-------- 3 files changed, 20 insertions(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 84f3b4912b92..875f3cf615ac 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -120,7 +120,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) /* * Get information for all chunks from the device. * - * The caller is responsible for freeing the returned structure + * The caller is responsible for freeing (vmalloc) the returned structure */ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) { @@ -134,7 +134,7 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) ppa.ppa = 0; len = geo->all_chunks * sizeof(*meta); - meta = kzalloc(len, GFP_KERNEL); + meta = vzalloc(len); if (!meta) return ERR_PTR(-ENOMEM); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index fb66bc84d5ca..7ef8249108f0 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1039,7 +1039,7 @@ static int pblk_lines_init(struct pblk *pblk) pblk_set_provision(pblk, nr_free_chks); - kfree(chunk_meta); + vfree(chunk_meta); return 0; fail_free_lines: diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index e42af7771fe5..7d0a4d3b0a48 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -573,7 +573,7 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, struct nvm_geo *geo = &ndev->geo; struct nvme_ns *ns = ndev->q->queuedata; struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta; + struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off; struct ppa_addr ppa; size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); size_t log_pos, offset, len; @@ -585,6 +585,10 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, */ max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024); + dev_meta = kmalloc(max_len, GFP_KERNEL); + if (!dev_meta) + return -ENOMEM; + /* Normalize lba address space to obtain log offset */ ppa.ppa = slba; ppa = dev_to_generic_addr(ndev, ppa); @@ -598,6 +602,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, while (left) { len = min_t(unsigned int, left, max_len); + memset(dev_meta, 0, max_len); + dev_meta_off = dev_meta; + ret = nvme_get_log(ctrl, ns->head->ns_id, NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len, offset); @@ -607,21 +614,23 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, } for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { - meta->state = dev_meta->state; - meta->type = dev_meta->type; - meta->wi = dev_meta->wi; - meta->slba = le64_to_cpu(dev_meta->slba); - meta->cnlb = le64_to_cpu(dev_meta->cnlb); - meta->wp = le64_to_cpu(dev_meta->wp); + meta->state = dev_meta_off->state; + meta->type = dev_meta_off->type; + meta->wi = dev_meta_off->wi; + meta->slba = le64_to_cpu(dev_meta_off->slba); + meta->cnlb = le64_to_cpu(dev_meta_off->cnlb); + meta->wp = le64_to_cpu(dev_meta_off->wp); meta++; - dev_meta++; + dev_meta_off++; } offset += len; left -= len; } + kfree(dev_meta); + return ret; } -- cgit v1.2.3 From 6fd05cad5ee1290b276dd8ed90a1e019b1fa577a Mon Sep 17 00:00:00 2001 From: Javier González Date: Tue, 9 Oct 2018 13:12:12 +0200 Subject: lightnvm: do no update csecs and sos on 1.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.2 devices exposes their data and metadata size through the separate identify command. Make sure that the NVMe LBA format does not override these values. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 7d0a4d3b0a48..a4f3b263cd6c 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -977,6 +977,9 @@ void nvme_nvm_update_nvm_info(struct nvme_ns *ns) struct nvm_dev *ndev = ns->ndev; struct nvm_geo *geo = &ndev->geo; + if (geo->version == NVM_OCSSD_SPEC_12) + return; + geo->csecs = 1 << ns->lba_shift; geo->sos = ns->ms; } -- cgit v1.2.3 From 886fabf693263e8651c0c4ab84fc626ad6d3a6e7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 5 Oct 2018 09:49:37 -0600 Subject: nvme: update node paths after adding new path The nvme namespace paths were being updated only when the current path was not set or nonoptimized. If a new path comes online that is a better path for its NUMA node, the multipath selector may continue using the previously set path on a potentially further node. This patch re-runs the path assignment after successfully adding a new optimized path. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 52987052b7fc..5e3cc8c59a39 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -321,6 +321,15 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); + if (nvme_path_is_optimized(ns)) { + int node, srcu_idx; + + srcu_idx = srcu_read_lock(&head->srcu); + for_each_node(node) + __nvme_find_path(head, node); + srcu_read_unlock(&head->srcu, srcu_idx); + } + kblockd_schedule_work(&ns->head->requeue_work); } -- cgit v1.2.3 From 48440ab6dc275a3144474e8c5f45fab854d6e20f Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 22 Aug 2018 19:58:45 -0700 Subject: nvmet: remove unreachable code Get rid of the unreachable code in the nvmet_parse_discovery_cmd(). Keep the error message identical to the admin-cmd.c and io-cmd*.c Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/discovery.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index eae29f493a07..d1954f4ca28d 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -219,12 +219,10 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: - pr_err("unsupported cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - pr_err("unhandled cmd %d\n", cmd->common.opcode); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } int __init nvmet_init_discovery(void) -- cgit v1.2.3 From 43a6f8fb619730fe5989e2430669626b2b5e13a0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:49 -0700 Subject: nvmet: use strcmp() instead of strncmp() for subsystem lookup strncmp() stops comparing when either the end of one of the first two arguments is reached or when 'n' characters have been compared, whichever comes first. That means that strncmp(s1, s2, n) is equivalent to strcmp(s1, s2) if n exceeds the length of s1 or the length of s2. Since that is the case in nvmet_find_get_subsys(), change strncmp() into strcmp(). This patch avoids that the following warning is reported by smatch: drivers/nvme/target/core.c:940:1 nvmet_find_get_subsys() error: strncmp() '"nqn.2014-08.org.nvmexpress.discovery"' too small (37 vs 223) Signed-off-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b5ec96abd048..0acdff9e6842 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1105,8 +1105,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, if (!port) return NULL; - if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, - NVMF_NQN_SIZE)) { + if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) return NULL; return nvmet_disc_subsys; -- cgit v1.2.3 From 35da77d556c17980f9bd6892828a70d7a1a8a145 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:54 -0700 Subject: nvmet-rdma: check for timeout in nvme_rdma_wait_for_cm() Check whether queue->cm_error holds a value before reading it. This patch addresses Coverity ID 1373774: unchecked return value. Signed-off-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dc042017c293..e7be903041a8 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -233,8 +233,15 @@ static void nvme_rdma_qp_event(struct ib_event *event, void *context) static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue) { - wait_for_completion_interruptible_timeout(&queue->cm_done, + int ret; + + ret = wait_for_completion_interruptible_timeout(&queue->cm_done, msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1); + if (ret < 0) + return ret; + if (ret == 0) + return -ETIMEDOUT; + WARN_ON_ONCE(queue->cm_error > 0); return queue->cm_error; } -- cgit v1.2.3 From eb090c4c948ccc7a051451261cf1426edf83f3eb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:39 -0700 Subject: nvme-core: declare local symbols static This patch avoids that sparse complains about missing declarations. Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2db33a752e2b..63932dea74a1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2729,7 +2729,7 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, return a->mode; } -const struct attribute_group nvme_ns_id_attr_group = { +static const struct attribute_group nvme_ns_id_attr_group = { .attrs = nvme_ns_id_attrs, .is_visible = nvme_ns_id_attrs_are_visible, }; -- cgit v1.2.3 From bb2a1d4e804aa41eef0003a192a674f844dbca23 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:41 -0700 Subject: nvme-core: rework a NQN copying operation Although it is easy to see that the code in nvme_init_subnqn() guarantees that the subsys->nqn string is '\0'-terminated, apparently Coverity is not smart enough to see this. Make it easier for Coverity to analyze this code by changing the strncpy() call into a strlcpy() call. This patch does not change the behavior of the code but fixes Coveritiy ID 1423720. Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 63932dea74a1..8cecb36b5af1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2076,7 +2076,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); return; } -- cgit v1.2.3 From 40581d1a91a1527e1e15350e479156810a389a96 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:43 -0700 Subject: nvme-pci: fix nvme_suspend_queue() kernel-doc header This patch avoids that the kernel-doc tool complains about the nvme_suspend_queue() function header when building with W=1. Signed-off-by: Bart Van Assche Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d668682f91df..450481c2fd17 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1249,7 +1249,7 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) /** * nvme_suspend_queue - put queue into suspended state - * @nvmeq - queue to suspend + * @nvmeq: queue to suspend */ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { -- cgit v1.2.3 From 5eadc9cce17100caef88e972abeeeca7ef6d8a92 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:51 -0700 Subject: nvmet: use strlcpy() instead of strcpy() Although the code modified by this patch looks fine to me, this patch avoids that Coverity reports the following complaint (ID 1364971 and ID 1364973): "You might overrun the 256-character fixed-size string id->subnqn". Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/discovery.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 7a45f4477679..1179f6314323 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -353,7 +353,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strcpy(id->subnqn, ctrl->subsys->subsysnqn); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); /* Max command capsule size is sqe + single page of in-capsule data */ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index d1954f4ca28d..bc0aa0bf1543 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -174,7 +174,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strcpy(id->subnqn, ctrl->subsys->subsysnqn); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); -- cgit v1.2.3 From 0d3ebdec9394c984f3aa59ea97541f2243952b55 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:53 -0700 Subject: nvmet-rdma: declare local symbols static This patch avoids that sparse complains about missing declarations. Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5becca88ccbe..bd265aceb90c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -122,7 +122,7 @@ struct nvmet_rdma_device { int inline_page_count; }; -struct workqueue_struct *nvmet_rdma_delete_wq; +static struct workqueue_struct *nvmet_rdma_delete_wq; static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); -- cgit v1.2.3 From 8eacd1bd21d6913ec27e6120e9a8733352e191d3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:52 -0700 Subject: nvmet: avoid integer overflow in the discard code Although I'm not sure whether it is a good idea to support large discard commands, I think integer overflow for discard ranges larger than 4 GB should be avoided. This patch avoids that smatch reports the following: drivers/nvme/target/io-cmd-file.c:249:1 nvmet_file_execute_discard() warn: should '((range.nlb)) << req->ns->blksize_shift' be a 64 bit type? Fixes: d5eff33ee6f8 ("nvmet: add simple file backed ns support") Signed-off-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 81a9dc5290a8..39d972e2595f 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -246,7 +246,8 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) break; offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; - len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; + len = le32_to_cpu(range.nlb); + len <<= req->ns->blksize_shift; if (offset + len > req->ns->size) { ret = NVME_SC_LBA_RANGE | NVME_SC_DNR; break; -- cgit v1.2.3 From 76c910c7cf6d2d325c24439855a606cf1d414d29 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:44 -0700 Subject: nvme-fc: fix kernel-doc headers This patch avoids that the kernel-doc tool complains about several multiple function headers when building with W=1. Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9d201b35397d..d838987fffe1 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -317,7 +317,7 @@ out_done: * @template: LLDD entrypoints and operational parameters for the port * @dev: physical hardware device node port corresponds to. Will be * used for DMA mappings - * @lport_p: pointer to a local port pointer. Upon success, the routine + * @portptr: pointer to a local port pointer. Upon success, the routine * will allocate a nvme_fc_local_port structure and place its * address in the local port pointer. Upon failure, local port * pointer will be set to 0. @@ -425,8 +425,7 @@ EXPORT_SYMBOL_GPL(nvme_fc_register_localport); * nvme_fc_unregister_localport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME host FC port. - * @localport: pointer to the (registered) local port that is to be - * deregistered. + * @portptr: pointer to the (registered) local port that is to be deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -632,7 +631,7 @@ __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, * @localport: pointer to the (registered) local port that the remote * subsystem port is connected to. * @pinfo: pointer to information about the port to be registered - * @rport_p: pointer to a remote port pointer. Upon success, the routine + * @portptr: pointer to a remote port pointer. Upon success, the routine * will allocate a nvme_fc_remote_port structure and place its * address in the remote port pointer. Upon failure, remote port * pointer will be set to 0. @@ -809,8 +808,8 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME subsystem FC port. - * @remoteport: pointer to the (registered) remote port that is to be - * deregistered. + * @portptr: pointer to the (registered) remote port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno -- cgit v1.2.3 From d3d0bc78be300098104d9fde9ca1330694a70f45 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:45 -0700 Subject: nvme-fc: introduce struct nvme_fcp_op_w_sgl This patch does not change any functionality but makes the intent of the code more clear. Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index d838987fffe1..fdadc9464f6f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "nvme.h" #include "fabrics.h" @@ -104,6 +105,12 @@ struct nvme_fc_fcp_op { struct nvme_fc_ersp_iu rsp_iu; }; +struct nvme_fcp_op_w_sgl { + struct nvme_fc_fcp_op op; + struct scatterlist sgl[SG_CHUNK_SIZE]; + uint8_t priv[0]; +}; + struct nvme_fc_lport { struct nvme_fc_local_port localport; @@ -1686,6 +1693,8 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, struct request *rq, u32 rqno) { + struct nvme_fcp_op_w_sgl *op_w_sgl = + container_of(op, typeof(*op_w_sgl), op); struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; int ret = 0; @@ -1695,7 +1704,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->fcp_req.rspaddr = &op->rsp_iu; op->fcp_req.rsplen = sizeof(op->rsp_iu); op->fcp_req.done = nvme_fc_fcpio_done; - op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; + op->fcp_req.first_sgl = &op_w_sgl->sgl[0]; op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; op->ctrl = ctrl; op->queue = queue; @@ -1734,12 +1743,12 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { struct nvme_fc_ctrl *ctrl = set->driver_data; - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; nvme_req(rq)->ctrl = &ctrl->ctrl; - return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); + return __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); } static int @@ -2423,10 +2432,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; + ctrl->tag_set.cmd_size = + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz); ctrl->tag_set.driver_data = ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; @@ -3028,10 +3036,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.cmd_size = + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; -- cgit v1.2.3 From 0d2bdf9f4134582bc7c4b82cb516cb27952127d0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:46 -0700 Subject: nvme-fc: rework the request initialization code Instead of setting and then clearing the first_sgl pointer for AEN requests, leave that pointer zero. This patch does not change how requests are initialized but avoids that Coverity reports the following complaint for nvme_fc_init_aen_ops(): CID 1418400 (#1 of 1): Out-of-bounds access (OVERRUN) 4. overrun-buffer-val: Overrunning buffer pointed to by aen_op of 312 bytes by passing it to a function which accesses it at byte offset 312. Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index fdadc9464f6f..e52b9d3c0bd6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->fcp_req.rspaddr = &op->rsp_iu; op->fcp_req.rsplen = sizeof(op->rsp_iu); op->fcp_req.done = nvme_fc_fcpio_done; - op->fcp_req.first_sgl = &op_w_sgl->sgl[0]; op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; op->ctrl = ctrl; op->queue = queue; @@ -1746,9 +1745,14 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; + int res; nvme_req(rq)->ctrl = &ctrl->ctrl; - return __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); + res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); + if (res) + return res; + op->op.fcp_req.first_sgl = &op->sgl[0]; + return res; } static int @@ -1778,7 +1782,6 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) } aen_op->flags = FCOP_FLAGS_AEN; - aen_op->fcp_req.first_sgl = NULL; /* no sg list */ aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); -- cgit v1.2.3 From 1c4665272ca73335a662a0fb6a9604ec76983756 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Oct 2018 14:28:47 -0700 Subject: nvmet-fc: fix kernel-doc headers This patch avoids that the kernel-doc tool complains about two function headers when building with W=1. Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index ef286b72d958..409081a03b24 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1245,8 +1245,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) * nvme_fc_unregister_targetport - transport entry point called by an * LLDD to deregister/remove a previously * registered a local NVME subsystem FC port. - * @tgtport: pointer to the (registered) target port that is to be - * deregistered. + * @target_port: pointer to the (registered) target port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -1749,7 +1749,7 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work) * * If this routine returns error, the LLDD should abort the exchange. * - * @tgtport: pointer to the (registered) target port the LS was + * @target_port: pointer to the (registered) target port the LS was * received on. * @lsreq: pointer to a lsreq request structure to be used to reference * the exchange corresponding to the LS. -- cgit v1.2.3 From 202359c007f6b1d6247a062c0682d6d5bcd3e7d7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Oct 2018 08:08:19 -0700 Subject: nvme-core: make implicit seed truncation explicit The nvme_user_io.slba field is 64 bits wide. That value is copied into the 32-bit bio_integrity_payload.bip_iter.bi_sector field. Make that truncation explicit to avoid that Coverity complains about implicit truncation. See also Coverity ID 1056486 on http://scan.coverity.com/projects/linux. Signed-off-by: Bart Van Assche Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8cecb36b5af1..65c42448e904 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1132,7 +1132,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return nvme_submit_user_cmd(ns->queue, &c, (void __user *)(uintptr_t)io.addr, length, - metadata, meta_len, io.slba, NULL, 0); + metadata, meta_len, lower_32_bits(io.slba), NULL, 0); } static u32 nvme_known_admin_effects(u8 opcode) -- cgit v1.2.3 From 1216e9ef18b84f4fb5934792368fb01eb3540520 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Oct 2018 08:08:20 -0700 Subject: nvmet-fcloop: suppress a compiler warning Building with W=1 enables the compiler warning -Wimplicit-fallthrough=3. That option does not recognize the fall-through comment in the fcloop driver. Add a fall-through comment that is recognized for -Wimplicit-fallthrough=3. This patch avoids that the compiler reports the following warning when building with W=1: drivers/nvme/target/fcloop.c:647:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (op == NVMET_FCOP_READDATA) ^ Signed-off-by: Bart Van Assche Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 5251689a1d9a..291f4121f516 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -648,6 +648,7 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, break; /* Fall-Thru to RSP handling */ + /* FALLTHRU */ case NVMET_FCOP_RSP: if (fcpreq) { -- cgit v1.2.3 From cb4bfda62afa25b4eee3d635d33fccdd9485dd7c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 15 Oct 2018 10:19:06 -0600 Subject: nvme-pci: fix hot removal during error handling A removal waits for the reset_work to complete. If a surprise removal occurs around the same time as an error triggered controller reset, and reset work happened to dispatch a command to the removed controller, the command won't be recovered since the timeout work doesn't do anything during error recovery. We wouldn't want to wait for timeout handling anyway, so this patch fixes this by disabling the controller and killing admin queues prior to syncing with the reset_work. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 450481c2fd17..72737009b82d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2564,13 +2564,12 @@ static void nvme_remove(struct pci_dev *pdev) struct nvme_dev *dev = pci_get_drvdata(pdev); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); - - cancel_work_sync(&dev->ctrl.reset_work); pci_set_drvdata(pdev, NULL); if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); nvme_dev_disable(dev, true); + nvme_dev_remove_admin(dev); } flush_work(&dev->ctrl.reset_work); -- cgit v1.2.3 From 3045c0d05e728134aefb8adbbc56a4d876a0bdce Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 17 Oct 2018 11:34:15 -0700 Subject: nvme-pci: remove duplicate check This is a cleanup patch doesn't change any functionality. It removes the duplicate call to the blk_integrity_rq() in the nvme_map_data(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 72737009b82d..4e023cd007e1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -772,10 +772,10 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) goto out_unmap; - } - if (blk_integrity_rq(req)) cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); + } + return BLK_STS_OK; out_unmap: -- cgit v1.2.3 From bb59b8e57493465fac8658bba103f7c4cc5d874a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 19 Oct 2018 00:50:29 -0700 Subject: nvme-rdma: always have a valid trsvcid If not passed, we set the default trsvcid. We can rely on having trsvcid and can simplify the controller matching logic. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e7be903041a8..03fff72b96f1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1860,26 +1860,11 @@ static inline bool __nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl, struct nvmf_ctrl_options *opts) { - char *stdport = __stringify(NVME_RDMA_IP_PORT); - - if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) || - strcmp(opts->traddr, ctrl->ctrl.opts->traddr)) + strcmp(opts->traddr, ctrl->ctrl.opts->traddr) || + strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) return false; - if (opts->mask & NVMF_OPT_TRSVCID && - ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) - return false; - } else if (opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(opts->trsvcid, stdport)) - return false; - } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(stdport, ctrl->ctrl.opts->trsvcid)) - return false; - } - /* else, it's a match as both have stdport. Fall to next checks */ - /* * checking the local address is rough. In most cases, one * is not specified and the host port is selected by the stack. @@ -1939,7 +1924,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvme_rdma_ctrl *ctrl; int ret; bool changed; - char *port; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -1947,15 +1931,21 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - if (opts->mask & NVMF_OPT_TRSVCID) - port = opts->trsvcid; - else - port = __stringify(NVME_RDMA_IP_PORT); + if (!(opts->mask & NVMF_OPT_TRSVCID)) { + opts->trsvcid = + kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL); + if (!opts->trsvcid) { + ret = -ENOMEM; + goto out_free_ctrl; + } + opts->mask |= NVMF_OPT_TRSVCID; + } ret = inet_pton_with_scope(&init_net, AF_UNSPEC, - opts->traddr, port, &ctrl->addr); + opts->traddr, opts->trsvcid, &ctrl->addr); if (ret) { - pr_err("malformed address passed: %s:%s\n", opts->traddr, port); + pr_err("malformed address passed: %s:%s\n", + opts->traddr, opts->trsvcid); goto out_free_ctrl; } -- cgit v1.2.3 From b7c7be6f6bd28ffea7f608ac2d806b8a4bdc82fe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 18 Oct 2018 17:40:40 -0700 Subject: nvme-fabrics: move controller options matching to fabrics IP transports will most likely use the same controller options matching when detecting a duplicate connect. Move it to fabrics. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 30 ++++++++++++++++++++++++++++++ drivers/nvme/host/fabrics.h | 2 ++ drivers/nvme/host/rdma.c | 35 +---------------------------------- 3 files changed, 33 insertions(+), 34 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index bcd09d3a44da..bd0969db6225 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -868,6 +868,36 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts, return 0; } +bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts) +{ + if (!nvmf_ctlr_matches_baseopts(ctrl, opts) || + strcmp(opts->traddr, ctrl->opts->traddr) || + strcmp(opts->trsvcid, ctrl->opts->trsvcid)) + return false; + + /* + * Checking the local address is rough. In most cases, none is specified + * and the host port is selected by the stack. + * + * Assume no match if: + * - local address is specified and address is not the same + * - local address is not specified but remote is, or vice versa + * (admin using specific host_traddr when it matters). + */ + if ((opts->mask & NVMF_OPT_HOST_TRADDR) && + (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { + if (strcmp(opts->host_traddr, ctrl->opts->host_traddr)) + return false; + } else if ((opts->mask & NVMF_OPT_HOST_TRADDR) || + (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(nvmf_ip_options_match); + static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, unsigned int allowed_opts) { diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index aa2fdb2a2e8f..6ea6275f332a 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -166,6 +166,8 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq); bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live); +bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts); static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 03fff72b96f1..d181cafedc58 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1856,39 +1856,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .stop_ctrl = nvme_rdma_stop_ctrl, }; -static inline bool -__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl, - struct nvmf_ctrl_options *opts) -{ - if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) || - strcmp(opts->traddr, ctrl->ctrl.opts->traddr) || - strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) - return false; - - /* - * checking the local address is rough. In most cases, one - * is not specified and the host port is selected by the stack. - * - * Assume no match if: - * local address is specified and address is not the same - * local address is not specified but remote is, or vice versa - * (admin using specific host_traddr when it matters). - */ - if (opts->mask & NVMF_OPT_HOST_TRADDR && - ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) { - if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr)) - return false; - } else if (opts->mask & NVMF_OPT_HOST_TRADDR || - ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) - return false; - /* - * if neither controller had an host port specified, assume it's - * a match as everything else matched. - */ - - return true; -} - /* * Fails a connection request if it matches an existing controller * (association) with the same tuple: @@ -1909,7 +1876,7 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts) mutex_lock(&nvme_rdma_ctrl_mutex); list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { - found = __nvme_rdma_options_match(ctrl, opts); + found = nvmf_ip_options_match(&ctrl->ctrl, opts); if (found) break; } -- cgit v1.2.3