diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-17 16:57:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-17 16:57:47 -0700 |
commit | 7ad67ca5534ee7c958559c4ad610f05c4578e361 (patch) | |
tree | dc6b6a8a6b70b5f25b07bcdc06d8e77e705f6822 /drivers/nvme/host/core.c | |
parent | 5260c2b863ef1152445ce93476c95d8c8a727eef (diff) | |
parent | 9c7eddf1b080f98fed1aadb74fe784f29bf77a08 (diff) | |
download | linux-7ad67ca5534ee7c958559c4ad610f05c4578e361.tar.bz2 |
Merge tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
- Two NVMe pull requests:
- ana log parse fix from Anton
- nvme quirks support for Apple devices from Ben
- fix missing bio completion tracing for multipath stack devices
from Hannes and Mikhail
- IP TOS settings for nvme rdma and tcp transports from Israel
- rq_dma_dir cleanups from Israel
- tracing for Get LBA Status command from Minwoo
- Some nvme-tcp cleanups from Minwoo, Potnuri and Myself
- Some consolidation between the fabrics transports for handling
the CAP register
- reset race with ns scanning fix for fabrics (move fabrics
commands to a dedicated request queue with a different lifetime
from the admin request queue)."
- controller reset and namespace scan races fixes
- nvme discovery log change uevent support
- naming improvements from Keith
- multiple discovery controllers reject fix from James
- some regular cleanups from various people
- Series fixing (and re-fixing) null_blk debug printing and nr_devices
checks (André)
- A few pull requests from Song, with fixes from Andy, Guoqing,
Guilherme, Neil, Nigel, and Yufen.
- REQ_OP_ZONE_RESET_ALL support (Chaitanya)
- Bio merge handling unification (Christoph)
- Pick default elevator correctly for devices with special needs
(Damien)
- Block stats fixes (Hou)
- Timeout and support devices nbd fixes (Mike)
- Series fixing races around elevator switching and device add/remove
(Ming)
- sed-opal cleanups (Revanth)
- Per device weight support for BFQ (Fam)
- Support for blk-iocost, a new model that can properly account cost of
IO workloads. (Tejun)
- blk-cgroup writeback fixes (Tejun)
- paride queue init fixes (zhengbin)
- blk_set_runtime_active() cleanup (Stanley)
- Block segment mapping optimizations (Bart)
- lightnvm fixes (Hans/Minwoo/YueHaibing)
- Various little fixes and cleanups
* tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block: (186 commits)
null_blk: format pr_* logs with pr_fmt
null_blk: match the type of parameter nr_devices
null_blk: do not fail the module load with zero devices
block: also check RQF_STATS in blk_mq_need_time_stamp()
block: make rq sector size accessible for block stats
bfq: Fix bfq linkage error
raid5: use bio_end_sector in r5_next_bio
raid5: remove STRIPE_OPS_REQ_PENDING
md: add feature flag MD_FEATURE_RAID0_LAYOUT
md/raid0: avoid RAID0 data corruption due to layout confusion.
raid5: don't set STRIPE_HANDLE to stripe which is in batch list
raid5: don't increment read_errors on EILSEQ return
nvmet: fix a wrong error status returned in error log page
nvme: send discovery log page change events to userspace
nvme: add uevent variables for controller devices
nvme: enable aen regardless of the presence of I/O queues
nvme-fabrics: allow discovery subsystems accept a kato
nvmet: Use PTR_ERR_OR_ZERO() in nvmet_init_discovery()
nvme: Remove redundant assignment of cq vector
nvme: Assign subsys instance from first ctrl
...
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r-- | drivers/nvme/host/core.c | 201 |
1 files changed, 140 insertions, 61 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d3d6b7bd6903..1ede1763a5ee 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -22,12 +22,12 @@ #include <linux/pm_qos.h> #include <asm/unaligned.h> -#define CREATE_TRACE_POINTS -#include "trace.h" - #include "nvme.h" #include "fabrics.h" +#define CREATE_TRACE_POINTS +#include "trace.h" + #define NVME_MINORS (1U << MINORBITS) unsigned int admin_timeout = 60; @@ -81,7 +81,6 @@ EXPORT_SYMBOL_GPL(nvme_reset_wq); struct workqueue_struct *nvme_delete_wq; EXPORT_SYMBOL_GPL(nvme_delete_wq); -static DEFINE_IDA(nvme_subsystems_ida); static LIST_HEAD(nvme_subsystems); static DEFINE_MUTEX(nvme_subsystems_lock); @@ -197,9 +196,9 @@ static inline bool nvme_ns_has_pi(struct nvme_ns *ns) return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); } -static blk_status_t nvme_error_status(struct request *req) +static blk_status_t nvme_error_status(u16 status) { - switch (nvme_req(req)->status & 0x7ff) { + switch (status & 0x7ff) { case NVME_SC_SUCCESS: return BLK_STS_OK; case NVME_SC_CAP_EXCEEDED: @@ -226,6 +225,8 @@ static blk_status_t nvme_error_status(struct request *req) return BLK_STS_PROTECTION; case NVME_SC_RESERVATION_CONFLICT: return BLK_STS_NEXUS; + case NVME_SC_HOST_PATH_ERROR: + return BLK_STS_TRANSPORT; default: return BLK_STS_IOERR; } @@ -260,7 +261,7 @@ static void nvme_retry_req(struct request *req) void nvme_complete_rq(struct request *req) { - blk_status_t status = nvme_error_status(req); + blk_status_t status = nvme_error_status(nvme_req(req)->status); trace_nvme_complete_rq(req); @@ -279,6 +280,8 @@ void nvme_complete_rq(struct request *req) return; } } + + nvme_trace_bio_complete(req, status); blk_mq_end_request(req, status); } EXPORT_SYMBOL_GPL(nvme_complete_rq); @@ -288,8 +291,12 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); - nvme_req(req)->status = NVME_SC_ABORT_REQ; - blk_mq_complete_request_sync(req); + /* don't abort one completed request */ + if (blk_mq_request_completed(req)) + return true; + + nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR; + blk_mq_complete_request(req); return true; } EXPORT_SYMBOL_GPL(nvme_cancel_request); @@ -1088,10 +1095,9 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n NVME_IDENTIFY_DATA_SIZE); } -static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, - unsigned nsid) +static int nvme_identify_ns(struct nvme_ctrl *ctrl, + unsigned nsid, struct nvme_id_ns **id) { - struct nvme_id_ns *id; struct nvme_command c = { }; int error; @@ -1100,18 +1106,17 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, c.identify.nsid = cpu_to_le32(nsid); c.identify.cns = NVME_ID_CNS_NS; - id = kmalloc(sizeof(*id), GFP_KERNEL); - if (!id) - return NULL; + *id = kmalloc(sizeof(**id), GFP_KERNEL); + if (!*id) + return -ENOMEM; - error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); + error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); if (error) { dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); - kfree(id); - return NULL; + kfree(*id); } - return id; + return error; } static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, @@ -1180,7 +1185,8 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) EXPORT_SYMBOL_GPL(nvme_set_queue_count); #define NVME_AEN_SUPPORTED \ - (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE) + (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | \ + NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_DISC_CHANGE) static void nvme_enable_aen(struct nvme_ctrl *ctrl) { @@ -1195,6 +1201,8 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl) if (status) dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n", supported_aens); + + queue_work(nvme_wq, &ctrl->async_event_work); } static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) @@ -1594,9 +1602,11 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); } -static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, +static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, struct nvme_id_ns *id, struct nvme_ns_ids *ids) { + int ret = 0; + memset(ids, 0, sizeof(*ids)); if (ctrl->vs >= NVME_VS(1, 1, 0)) @@ -1607,10 +1617,12 @@ static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, /* Don't treat error as fatal we potentially * already have a NGUID or EUI-64 */ - if (nvme_identify_ns_descs(ctrl, nsid, ids)) + ret = nvme_identify_ns_descs(ctrl, nsid, ids); + if (ret) dev_warn(ctrl->device, - "%s: Identify Descriptors failed\n", __func__); + "Identify Descriptors failed (%d)\n", ret); } + return ret; } static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) @@ -1738,25 +1750,37 @@ static int nvme_revalidate_disk(struct gendisk *disk) return -ENODEV; } - id = nvme_identify_ns(ctrl, ns->head->ns_id); - if (!id) - return -ENODEV; + ret = nvme_identify_ns(ctrl, ns->head->ns_id, &id); + if (ret) + goto out; if (id->ncap == 0) { ret = -ENODEV; - goto out; + goto free_id; } __nvme_revalidate_disk(disk, id); - nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); + ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); + if (ret) + goto free_id; + if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) { dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); ret = -ENODEV; } -out: +free_id: kfree(id); +out: + /* + * Only fail the function if we got a fatal error back from the + * device, otherwise ignore the error and just move on. + */ + if (ret == -ENOMEM || (ret > 0 && !(ret & NVME_SC_DNR))) + ret = 0; + else if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } @@ -1952,7 +1976,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) * bits', but doing so may cause the device to complete commands to the * admin queue ... and we don't know what memory that might be pointing at! */ -int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) +int nvme_disable_ctrl(struct nvme_ctrl *ctrl) { int ret; @@ -1966,20 +1990,27 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); - return nvme_wait_ready(ctrl, cap, false); + return nvme_wait_ready(ctrl, ctrl->cap, false); } EXPORT_SYMBOL_GPL(nvme_disable_ctrl); -int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) +int nvme_enable_ctrl(struct nvme_ctrl *ctrl) { /* * Default to a 4K page size, with the intention to update this * path in the future to accomodate architectures with differing * kernel and IO page sizes. */ - unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12; + unsigned dev_page_min, page_shift = 12; int ret; + ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); + if (ret) { + dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret); + return ret; + } + dev_page_min = NVME_CAP_MPSMIN(ctrl->cap) + 12; + if (page_shift < dev_page_min) { dev_err(ctrl->device, "Minimum device page size %u too large for host (%u)\n", @@ -1998,7 +2029,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) return ret; - return nvme_wait_ready(ctrl, cap, true); + return nvme_wait_ready(ctrl, ctrl->cap, true); } EXPORT_SYMBOL_GPL(nvme_enable_ctrl); @@ -2332,7 +2363,8 @@ static void nvme_release_subsystem(struct device *dev) struct nvme_subsystem *subsys = container_of(dev, struct nvme_subsystem, dev); - ida_simple_remove(&nvme_subsystems_ida, subsys->instance); + if (subsys->instance >= 0) + ida_simple_remove(&nvme_instance_ida, subsys->instance); kfree(subsys); } @@ -2361,6 +2393,17 @@ static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn) lockdep_assert_held(&nvme_subsystems_lock); + /* + * Fail matches for discovery subsystems. This results + * in each discovery controller bound to a unique subsystem. + * This avoids issues with validating controller values + * that can only be true when there is a single unique subsystem. + * There may be multiple and completely independent entities + * that provide discovery controllers. + */ + if (!strcmp(subsysnqn, NVME_DISC_SUBSYS_NAME)) + return NULL; + list_for_each_entry(subsys, &nvme_subsystems, entry) { if (strcmp(subsys->subnqn, subsysnqn)) continue; @@ -2461,12 +2504,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); if (!subsys) return -ENOMEM; - ret = ida_simple_get(&nvme_subsystems_ida, 0, 0, GFP_KERNEL); - if (ret < 0) { - kfree(subsys); - return ret; - } - subsys->instance = ret; + + subsys->instance = -1; mutex_init(&subsys->lock); kref_init(&subsys->ref); INIT_LIST_HEAD(&subsys->ctrls); @@ -2485,7 +2524,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) subsys->dev.class = nvme_subsys_class; subsys->dev.release = nvme_release_subsystem; subsys->dev.groups = nvme_subsys_attrs_groups; - dev_set_name(&subsys->dev, "nvme-subsys%d", subsys->instance); + dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance); device_initialize(&subsys->dev); mutex_lock(&nvme_subsystems_lock); @@ -2517,6 +2556,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) goto out_put_subsystem; } + if (!found) + subsys->instance = ctrl->instance; ctrl->subsys = subsys; list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); mutex_unlock(&nvme_subsystems_lock); @@ -2574,7 +2615,6 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl) int nvme_init_identify(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; - u64 cap; int ret, page_shift; u32 max_hw_sectors; bool prev_apst_enabled; @@ -2584,16 +2624,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) dev_err(ctrl->device, "Reading VS failed (%d)\n", ret); return ret; } - - ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap); - if (ret) { - dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret); - return ret; - } - page_shift = NVME_CAP_MPSMIN(cap) + 12; + page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; + ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize); if (ctrl->vs >= NVME_VS(1, 1, 0)) - ctrl->subsystem = NVME_CAP_NSSRC(cap); + ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap); ret = nvme_identify_ctrl(ctrl, &id); if (ret) { @@ -3184,7 +3219,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ns_id = nsid; kref_init(&head->ref); - nvme_report_ns_ids(ctrl, nsid, id, &head->ids); + ret = nvme_report_ns_ids(ctrl, nsid, id, &head->ids); + if (ret) + goto out_cleanup_srcu; ret = __nvme_check_ids(ctrl->subsys, head); if (ret) { @@ -3209,6 +3246,8 @@ out_ida_remove: out_free_head: kfree(head); out: + if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); return ERR_PTR(ret); } @@ -3232,7 +3271,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } else { struct nvme_ns_ids ids; - nvme_report_ns_ids(ctrl, nsid, id, &ids); + ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); + if (ret) + goto out_unlock; + if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", @@ -3247,6 +3289,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, out_unlock: mutex_unlock(&ctrl->subsys->lock); + if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } @@ -3338,11 +3382,9 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); - id = nvme_identify_ns(ctrl, nsid); - if (!id) { - ret = -EIO; + ret = nvme_identify_ns(ctrl, nsid, &id); + if (ret) goto out_free_queue; - } if (id->ncap == 0) { ret = -EINVAL; @@ -3404,6 +3446,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_cleanup_queue(ns->queue); out_free_ns: kfree(ns); + if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } @@ -3617,6 +3661,33 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_remove_namespaces); +static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct nvme_ctrl *ctrl = + container_of(dev, struct nvme_ctrl, ctrl_device); + struct nvmf_ctrl_options *opts = ctrl->opts; + int ret; + + ret = add_uevent_var(env, "NVME_TRTYPE=%s", ctrl->ops->name); + if (ret) + return ret; + + if (opts) { + ret = add_uevent_var(env, "NVME_TRADDR=%s", opts->traddr); + if (ret) + return ret; + + ret = add_uevent_var(env, "NVME_TRSVCID=%s", + opts->trsvcid ?: "none"); + if (ret) + return ret; + + ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s", + opts->host_traddr ?: "none"); + } + return ret; +} + static void nvme_aen_uevent(struct nvme_ctrl *ctrl) { char *envp[2] = { NULL, NULL }; @@ -3723,6 +3794,9 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) queue_work(nvme_wq, &ctrl->ana_work); break; #endif + case NVME_AER_NOTICE_DISC_CHANGED: + ctrl->aen_result = result; + break; default: dev_warn(ctrl->device, "async event result %08x\n", result); } @@ -3769,10 +3843,10 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->kato) nvme_start_keep_alive(ctrl); + nvme_enable_aen(ctrl); + if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); - nvme_enable_aen(ctrl); - queue_work(nvme_wq, &ctrl->async_event_work); nvme_start_queues(ctrl); } } @@ -3792,7 +3866,9 @@ static void nvme_free_ctrl(struct device *dev) container_of(dev, struct nvme_ctrl, ctrl_device); struct nvme_subsystem *subsys = ctrl->subsys; - ida_simple_remove(&nvme_instance_ida, ctrl->instance); + if (subsys && ctrl->instance != subsys->instance) + ida_simple_remove(&nvme_instance_ida, ctrl->instance); + kfree(ctrl->effects); nvme_mpath_uninit(ctrl); __free_page(ctrl->discard_page); @@ -3992,6 +4068,9 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl) list_for_each_entry(ns, &ctrl->namespaces, list) blk_sync_queue(ns->queue); up_read(&ctrl->namespaces_rwsem); + + if (ctrl->admin_q) + blk_sync_queue(ctrl->admin_q); } EXPORT_SYMBOL_GPL(nvme_sync_queues); @@ -4050,6 +4129,7 @@ static int __init nvme_core_init(void) result = PTR_ERR(nvme_class); goto unregister_chrdev; } + nvme_class->dev_uevent = nvme_class_uevent; nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem"); if (IS_ERR(nvme_subsys_class)) { @@ -4074,7 +4154,6 @@ out: static void __exit nvme_core_exit(void) { - ida_destroy(&nvme_subsystems_ida); class_destroy(nvme_subsys_class); class_destroy(nvme_class); unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); |