diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-13 10:19:16 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-13 10:19:16 -0800 |
commit | 36869cb93d36269f34800b3384ba7991060a69cf (patch) | |
tree | 1ff266dcb3386bb1403494aa89647a96fd2396cd /drivers/nvme | |
parent | 9439b3710df688d853eb6cb4851256f2c92b1797 (diff) | |
parent | 7cd54aa8438947602cf68eda1db327822b9b8e6b (diff) | |
download | linux-36869cb93d36269f34800b3384ba7991060a69cf.tar.bz2 |
Merge branch 'for-4.10/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
"This is the main block pull request this series. Contrary to previous
release, I've kept the core and driver changes in the same branch. We
always ended up having dependencies between the two for obvious
reasons, so makes more sense to keep them together. That said, I'll
probably try and keep more topical branches going forward, especially
for cycles that end up being as busy as this one.
The major parts of this pull request is:
- Improved support for O_DIRECT on block devices, with a small
private implementation instead of using the pig that is
fs/direct-io.c. From Christoph.
- Request completion tracking in a scalable fashion. This is utilized
by two components in this pull, the new hybrid polling and the
writeback queue throttling code.
- Improved support for polling with O_DIRECT, adding a hybrid mode
that combines pure polling with an initial sleep. From me.
- Support for automatic throttling of writeback queues on the block
side. This uses feedback from the device completion latencies to
scale the queue on the block side up or down. From me.
- Support from SMR drives in the block layer and for SD. From Hannes
and Shaun.
- Multi-connection support for nbd. From Josef.
- Cleanup of request and bio flags, so we have a clear split between
which are bio (or rq) private, and which ones are shared. From
Christoph.
- A set of patches from Bart, that improve how we handle queue
stopping and starting in blk-mq.
- Support for WRITE_ZEROES from Chaitanya.
- Lightnvm updates from Javier/Matias.
- Supoort for FC for the nvme-over-fabrics code. From James Smart.
- A bunch of fixes from a whole slew of people, too many to name
here"
* 'for-4.10/block' of git://git.kernel.dk/linux-block: (182 commits)
blk-stat: fix a few cases of missing batch flushing
blk-flush: run the queue when inserting blk-mq flush
elevator: make the rqhash helpers exported
blk-mq: abstract out blk_mq_dispatch_rq_list() helper
blk-mq: add blk_mq_start_stopped_hw_queue()
block: improve handling of the magic discard payload
blk-wbt: don't throttle discard or write zeroes
nbd: use dev_err_ratelimited in io path
nbd: reset the setup task for NBD_CLEAR_SOCK
nvme-fabrics: Add FC LLDD loopback driver to test FC-NVME
nvme-fabrics: Add target support for FC transport
nvme-fabrics: Add host support for FC transport
nvme-fabrics: Add FC transport LLDD api definitions
nvme-fabrics: Add FC transport FC-NVME definitions
nvme-fabrics: Add FC transport error codes to nvme.h
Add type 0x28 NVME type code to scsi fc headers
nvme-fabrics: patch target code in prep for FC transport support
nvme-fabrics: set sqe.command_id in core not transports
parser: add u64 number parser
nvme-rdma: align to generic ib_event logging helper
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/Kconfig | 17 | ||||
-rw-r--r-- | drivers/nvme/host/Makefile | 3 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 150 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 33 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 2586 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 223 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 51 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 47 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 43 | ||||
-rw-r--r-- | drivers/nvme/host/scsi.c | 11 | ||||
-rw-r--r-- | drivers/nvme/target/Kconfig | 24 | ||||
-rw-r--r-- | drivers/nvme/target/Makefile | 4 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 3 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 20 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 22 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 14 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 2288 | ||||
-rw-r--r-- | drivers/nvme/target/fcloop.c | 1148 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd.c | 39 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 26 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 8 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 8 |
22 files changed, 6520 insertions, 248 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index f7d37a62f874..90745a616df7 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -43,3 +43,20 @@ config NVME_RDMA from https://github.com/linux-nvme/nvme-cli. If unsure, say N. + +config NVME_FC + tristate "NVM Express over Fabrics FC host driver" + depends on BLOCK + depends on HAS_DMA + select NVME_CORE + select NVME_FABRICS + select SG_POOL + help + This provides support for the NVMe over Fabrics protocol using + the FC transport. This allows you to use remote block devices + exported using the NVMe protocol set. + + To configure a NVMe over Fabrics controller use the nvme-cli tool + from https://github.com/linux-nvme/nvme-cli. + + If unsure, say N. diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 47abcec23514..f1a7d945fbb6 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_NVME_CORE) += nvme-core.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o +obj-$(CONFIG_NVME_FC) += nvme-fc.o nvme-core-y := core.o nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o @@ -12,3 +13,5 @@ nvme-y += pci.o nvme-fabrics-y += fabrics.o nvme-rdma-y += rdma.o + +nvme-fc-y += fc.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 79e679d12f3b..35b3fee5a453 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -201,13 +201,7 @@ fail: void nvme_requeue_req(struct request *req) { - unsigned long flags; - - blk_mq_requeue_request(req); - spin_lock_irqsave(req->q->queue_lock, flags); - if (!blk_queue_stopped(req->q)) - blk_mq_kick_requeue_list(req->q); - spin_unlock_irqrestore(req->q->queue_lock, flags); + blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); } EXPORT_SYMBOL_GPL(nvme_requeue_req); @@ -227,8 +221,7 @@ struct request *nvme_alloc_request(struct request_queue *q, req->cmd_type = REQ_TYPE_DRV_PRIV; req->cmd_flags |= REQ_FAILFAST_DRIVER; - req->cmd = (unsigned char *)cmd; - req->cmd_len = sizeof(struct nvme_command); + nvme_req(req)->cmd = cmd; return req; } @@ -246,8 +239,6 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { struct nvme_dsm_range *range; - struct page *page; - int offset; unsigned int nr_bytes = blk_rq_bytes(req); range = kmalloc(sizeof(*range), GFP_ATOMIC); @@ -264,19 +255,27 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, cmnd->dsm.nr = 0; cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - req->completion_data = range; - page = virt_to_page(range); - offset = offset_in_page(range); - blk_add_request_payload(req, page, offset, sizeof(*range)); + req->special_vec.bv_page = virt_to_page(range); + req->special_vec.bv_offset = offset_in_page(range); + req->special_vec.bv_len = sizeof(*range); + req->rq_flags |= RQF_SPECIAL_PAYLOAD; - /* - * we set __data_len back to the size of the area to be discarded - * on disk. This allows us to report completion on the full amount - * of blocks described by the request. - */ - req->__data_len = nr_bytes; + return BLK_MQ_RQ_QUEUE_OK; +} - return 0; +static inline void nvme_setup_write_zeroes(struct nvme_ns *ns, + struct request *req, struct nvme_command *cmnd) +{ + struct nvme_write_zeroes_cmd *write_zeroes = &cmnd->write_zeroes; + + memset(cmnd, 0, sizeof(*cmnd)); + write_zeroes->opcode = nvme_cmd_write_zeroes; + write_zeroes->nsid = cpu_to_le32(ns->ns_id); + write_zeroes->slba = + cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + write_zeroes->length = + cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + write_zeroes->control = 0; } static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req, @@ -295,7 +294,6 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req, memset(cmnd, 0, sizeof(*cmnd)); cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); - cmnd->rw.command_id = req->tag; cmnd->rw.nsid = cpu_to_le32(ns->ns_id); cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); @@ -324,17 +322,21 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req, int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd) { - int ret = 0; + int ret = BLK_MQ_RQ_QUEUE_OK; if (req->cmd_type == REQ_TYPE_DRV_PRIV) - memcpy(cmd, req->cmd, sizeof(*cmd)); + memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); else if (req_op(req) == REQ_OP_FLUSH) nvme_setup_flush(ns, cmd); else if (req_op(req) == REQ_OP_DISCARD) ret = nvme_setup_discard(ns, req, cmd); + else if (req_op(req) == REQ_OP_WRITE_ZEROES) + nvme_setup_write_zeroes(ns, req, cmd); else nvme_setup_rw(ns, req, cmd); + cmd->common.command_id = req->tag; + return ret; } EXPORT_SYMBOL_GPL(nvme_setup_cmd); @@ -344,7 +346,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); * if the result is positive, it's an NVM Express status code */ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - struct nvme_completion *cqe, void *buffer, unsigned bufflen, + union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, int flags) { struct request *req; @@ -355,7 +357,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; - req->special = cqe; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); @@ -364,6 +365,8 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } blk_execute_rq(req->q, NULL, req, at_head); + if (result) + *result = nvme_req(req)->result; ret = req->errors; out: blk_mq_free_request(req); @@ -385,7 +388,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, u32 *result, unsigned timeout) { bool write = nvme_is_write(cmd); - struct nvme_completion cqe; struct nvme_ns *ns = q->queuedata; struct gendisk *disk = ns ? ns->disk : NULL; struct request *req; @@ -398,7 +400,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; - req->special = &cqe; if (ubuffer && bufflen) { ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, @@ -453,7 +454,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, blk_execute_rq(req->q, disk, req, 0); ret = req->errors; if (result) - *result = le32_to_cpu(cqe.result); + *result = le32_to_cpu(nvme_req(req)->result.u32); if (meta && !ret && !write) { if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; @@ -602,7 +603,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, void *buffer, size_t buflen, u32 *result) { struct nvme_command c; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&c, 0, sizeof(c)); @@ -610,10 +611,10 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, c.features.nsid = cpu_to_le32(nsid); c.features.fid = cpu_to_le32(fid); - ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, buffer, buflen, 0, + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) - *result = le32_to_cpu(cqe.result); + *result = le32_to_cpu(res.u32); return ret; } @@ -621,7 +622,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, void *buffer, size_t buflen, u32 *result) { struct nvme_command c; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&c, 0, sizeof(c)); @@ -629,10 +630,10 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) - *result = le32_to_cpu(cqe.result); + *result = le32_to_cpu(res.u32); return ret; } @@ -951,6 +952,10 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); + if (ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) + blk_queue_max_write_zeroes_sectors(ns->queue, + ((u32)(USHRT_MAX + 1) * bs) >> 9); + blk_mq_unfreeze_queue(disk->queue); } @@ -1683,27 +1688,24 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_revalidate_ns(ns, &id)) goto out_free_queue; - if (nvme_nvm_ns_supported(ns, id)) { - if (nvme_nvm_register(ns, disk_name, node, - &nvme_ns_attr_group)) { - dev_warn(ctrl->dev, "%s: LightNVM init failure\n", - __func__); - goto out_free_id; - } - } else { - disk = alloc_disk_node(0, node); - if (!disk) - goto out_free_id; + if (nvme_nvm_ns_supported(ns, id) && + nvme_nvm_register(ns, disk_name, node)) { + dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__); + goto out_free_id; + } - disk->fops = &nvme_fops; - disk->private_data = ns; - disk->queue = ns->queue; - disk->flags = GENHD_FL_EXT_DEVT; - memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); - ns->disk = disk; + disk = alloc_disk_node(0, node); + if (!disk) + goto out_free_id; - __nvme_revalidate_disk(disk, id); - } + disk->fops = &nvme_fops; + disk->private_data = ns; + disk->queue = ns->queue; + disk->flags = GENHD_FL_EXT_DEVT; + memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); + ns->disk = disk; + + __nvme_revalidate_disk(disk, id); mutex_lock(&ctrl->namespaces_mutex); list_add_tail(&ns->list, &ctrl->namespaces); @@ -1713,14 +1715,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) kfree(id); - if (ns->ndev) - return; - device_add_disk(ctrl->device, ns->disk); if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_attr_group)) pr_warn("%s: failed to create sysfs group for identification\n", ns->disk->disk_name); + if (ns->ndev && nvme_nvm_register_sysfs(ns)) + pr_warn("%s: failed to register lightnvm sysfs group for identification\n", + ns->disk->disk_name); return; out_free_id: kfree(id); @@ -1742,6 +1744,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) blk_integrity_unregister(ns->disk); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_attr_group); + if (ns->ndev) + nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); @@ -1905,18 +1909,25 @@ static void nvme_async_event_work(struct work_struct *work) spin_unlock_irq(&ctrl->lock); } -void nvme_complete_async_event(struct nvme_ctrl *ctrl, - struct nvme_completion *cqe) +void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, + union nvme_result *res) { - u16 status = le16_to_cpu(cqe->status) >> 1; - u32 result = le32_to_cpu(cqe->result); + u32 result = le32_to_cpu(res->u32); + bool done = true; - if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) { + switch (le16_to_cpu(status) >> 1) { + case NVME_SC_SUCCESS: + done = false; + /*FALLTHRU*/ + case NVME_SC_ABORT_REQ: ++ctrl->event_limit; schedule_work(&ctrl->async_event_work); + break; + default: + break; } - if (status != NVME_SC_SUCCESS) + if (done) return; switch (result & 0xff07) { @@ -2078,14 +2089,8 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->namespaces_mutex); - list_for_each_entry(ns, &ctrl->namespaces, list) { - spin_lock_irq(ns->queue->queue_lock); - queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); - spin_unlock_irq(ns->queue->queue_lock); - - blk_mq_cancel_requeue_work(ns->queue); - blk_mq_stop_hw_queues(ns->queue); - } + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_mq_quiesce_queue(ns->queue); mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_stop_queues); @@ -2096,7 +2101,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) { - queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); blk_mq_start_stopped_hw_queues(ns->queue, true); blk_mq_kick_requeue_list(ns->queue); } diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5a3f008d3480..916d13608059 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -161,7 +161,7 @@ EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn); int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) { struct nvme_command cmd; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&cmd, 0, sizeof(cmd)); @@ -169,11 +169,11 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) cmd.prop_get.fctype = nvme_fabrics_type_property_get; cmd.prop_get.offset = cpu_to_le32(off); - ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, NULL, 0, 0, + ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0, NVME_QID_ANY, 0, 0); if (ret >= 0) - *val = le64_to_cpu(cqe.result64); + *val = le64_to_cpu(res.u64); if (unlikely(ret != 0)) dev_err(ctrl->device, "Property Get error: %d, offset %#x\n", @@ -207,7 +207,7 @@ EXPORT_SYMBOL_GPL(nvmf_reg_read32); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { struct nvme_command cmd; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&cmd, 0, sizeof(cmd)); @@ -216,11 +216,11 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) cmd.prop_get.attrib = 1; cmd.prop_get.offset = cpu_to_le32(off); - ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, NULL, 0, 0, + ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0, NVME_QID_ANY, 0, 0); if (ret >= 0) - *val = le64_to_cpu(cqe.result64); + *val = le64_to_cpu(res.u64); if (unlikely(ret != 0)) dev_err(ctrl->device, "Property Get error: %d, offset %#x\n", @@ -368,7 +368,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) { struct nvme_command cmd; - struct nvme_completion cqe; + union nvme_result res; struct nvmf_connect_data *data; int ret; @@ -400,16 +400,16 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); - ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, + ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, data, sizeof(*data), 0, NVME_QID_ANY, 1, BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (ret) { - nvmf_log_connect_error(ctrl, ret, le32_to_cpu(cqe.result), + nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); goto out_free_data; } - ctrl->cntlid = le16_to_cpu(cqe.result16); + ctrl->cntlid = le16_to_cpu(res.u16); out_free_data: kfree(data); @@ -441,7 +441,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) { struct nvme_command cmd; struct nvmf_connect_data *data; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&cmd, 0, sizeof(cmd)); @@ -459,11 +459,11 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); - ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &cqe, + ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, data, sizeof(*data), 0, qid, 1, BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (ret) { - nvmf_log_connect_error(ctrl, ret, le32_to_cpu(cqe.result), + nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); } kfree(data); @@ -576,7 +576,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, nqnlen = strlen(opts->subsysnqn); if (nqnlen >= NVMF_NQN_SIZE) { pr_err("%s needs to be < %d bytes\n", - opts->subsysnqn, NVMF_NQN_SIZE); + opts->subsysnqn, NVMF_NQN_SIZE); ret = -EINVAL; goto out; } @@ -666,10 +666,12 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, if (nqnlen >= NVMF_NQN_SIZE) { pr_err("%s needs to be < %d bytes\n", p, NVMF_NQN_SIZE); + kfree(p); ret = -EINVAL; goto out; } opts->host = nvmf_host_add(p); + kfree(p); if (!opts->host) { ret = -ENOMEM; goto out; @@ -825,8 +827,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) out_unlock: mutex_unlock(&nvmf_transports_mutex); out_free_opts: - nvmf_host_put(opts->host); - kfree(opts); + nvmf_free_options(opts); return ERR_PTR(ret); } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c new file mode 100644 index 000000000000..771e2e761872 --- /dev/null +++ b/drivers/nvme/host/fc.c @@ -0,0 +1,2586 @@ +/* + * Copyright (c) 2016 Avago Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful. + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, + * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO + * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. + * See the GNU General Public License for more details, a copy of which + * can be found in the file COPYING included with this package + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/parser.h> +#include <uapi/scsi/fc/fc_fs.h> +#include <uapi/scsi/fc/fc_els.h> + +#include "nvme.h" +#include "fabrics.h" +#include <linux/nvme-fc-driver.h> +#include <linux/nvme-fc.h> + + +/* *************************** Data Structures/Defines ****************** */ + + +/* + * We handle AEN commands ourselves and don't even let the + * block layer know about them. + */ +#define NVME_FC_NR_AEN_COMMANDS 1 +#define NVME_FC_AQ_BLKMQ_DEPTH \ + (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) +#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) + +enum nvme_fc_queue_flags { + NVME_FC_Q_CONNECTED = (1 << 0), +}; + +#define NVMEFC_QUEUE_DELAY 3 /* ms units */ + +struct nvme_fc_queue { + struct nvme_fc_ctrl *ctrl; + struct device *dev; + struct blk_mq_hw_ctx *hctx; + void *lldd_handle; + int queue_size; + size_t cmnd_capsule_len; + u32 qnum; + u32 rqcnt; + u32 seqno; + + u64 connection_id; + atomic_t csn; + + unsigned long flags; +} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ + +struct nvmefc_ls_req_op { + struct nvmefc_ls_req ls_req; + + struct nvme_fc_ctrl *ctrl; + struct nvme_fc_queue *queue; + struct request *rq; + + int ls_error; + struct completion ls_done; + struct list_head lsreq_list; /* ctrl->ls_req_list */ + bool req_queued; +}; + +enum nvme_fcpop_state { + FCPOP_STATE_UNINIT = 0, + FCPOP_STATE_IDLE = 1, + FCPOP_STATE_ACTIVE = 2, + FCPOP_STATE_ABORTED = 3, +}; + +struct nvme_fc_fcp_op { + struct nvme_request nreq; /* + * nvme/host/core.c + * requires this to be + * the 1st element in the + * private structure + * associated with the + * request. + */ + struct nvmefc_fcp_req fcp_req; + + struct nvme_fc_ctrl *ctrl; + struct nvme_fc_queue *queue; + struct request *rq; + + atomic_t state; + u32 rqno; + u32 nents; + + struct nvme_fc_cmd_iu cmd_iu; + struct nvme_fc_ersp_iu rsp_iu; +}; + +struct nvme_fc_lport { + struct nvme_fc_local_port localport; + + struct ida endp_cnt; + struct list_head port_list; /* nvme_fc_port_list */ + struct list_head endp_list; + struct device *dev; /* physical device for dma */ + struct nvme_fc_port_template *ops; + struct kref ref; +} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ + +struct nvme_fc_rport { + struct nvme_fc_remote_port remoteport; + + struct list_head endp_list; /* for lport->endp_list */ + struct list_head ctrl_list; + spinlock_t lock; + struct kref ref; +} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ + +enum nvme_fcctrl_state { + FCCTRL_INIT = 0, + FCCTRL_ACTIVE = 1, +}; + +struct nvme_fc_ctrl { + spinlock_t lock; + struct nvme_fc_queue *queues; + u32 queue_count; + + struct device *dev; + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + u32 cnum; + + u64 association_id; + + u64 cap; + + struct list_head ctrl_list; /* rport->ctrl_list */ + struct list_head ls_req_list; + + struct blk_mq_tag_set admin_tag_set; + struct blk_mq_tag_set tag_set; + + struct work_struct delete_work; + struct kref ref; + int state; + + struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; + + struct nvme_ctrl ctrl; +}; + +static inline struct nvme_fc_ctrl * +to_fc_ctrl(struct nvme_ctrl *ctrl) +{ + return container_of(ctrl, struct nvme_fc_ctrl, ctrl); +} + +static inline struct nvme_fc_lport * +localport_to_lport(struct nvme_fc_local_port *portptr) +{ + return container_of(portptr, struct nvme_fc_lport, localport); +} + +static inline struct nvme_fc_rport * +remoteport_to_rport(struct nvme_fc_remote_port *portptr) +{ + return container_of(portptr, struct nvme_fc_rport, remoteport); +} + +static inline struct nvmefc_ls_req_op * +ls_req_to_lsop(struct nvmefc_ls_req *lsreq) +{ + return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); +} + +static inline struct nvme_fc_fcp_op * +fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) +{ + return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); +} + + + +/* *************************** Globals **************************** */ + + +static DEFINE_SPINLOCK(nvme_fc_lock); + +static LIST_HEAD(nvme_fc_lport_list); +static DEFINE_IDA(nvme_fc_local_port_cnt); +static DEFINE_IDA(nvme_fc_ctrl_cnt); + +static struct workqueue_struct *nvme_fc_wq; + + + +/* *********************** FC-NVME Port Management ************************ */ + +static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); +static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, + struct nvme_fc_queue *, unsigned int); + + +/** + * nvme_fc_register_localport - transport entry point called by an + * LLDD to register the existence of a NVME + * host FC port. + * @pinfo: pointer to information about the port to be registered + * @template: LLDD entrypoints and operational parameters for the port + * @dev: physical hardware device node port corresponds to. Will be + * used for DMA mappings + * @lport_p: pointer to a local port pointer. Upon success, the routine + * will allocate a nvme_fc_local_port structure and place its + * address in the local port pointer. Upon failure, local port + * pointer will be set to 0. + * + * Returns: + * a completion status. Must be 0 upon success; a negative errno + * (ex: -ENXIO) upon failure. + */ +int +nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, + struct nvme_fc_port_template *template, + struct device *dev, + struct nvme_fc_local_port **portptr) +{ + struct nvme_fc_lport *newrec; + unsigned long flags; + int ret, idx; + + if (!template->localport_delete || !template->remoteport_delete || + !template->ls_req || !template->fcp_io || + !template->ls_abort || !template->fcp_abort || + !template->max_hw_queues || !template->max_sgl_segments || + !template->max_dif_sgl_segments || !template->dma_boundary) { + ret = -EINVAL; + goto out_reghost_failed; + } + + newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), + GFP_KERNEL); + if (!newrec) { + ret = -ENOMEM; + goto out_reghost_failed; + } + + idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); + if (idx < 0) { + ret = -ENOSPC; + goto out_fail_kfree; + } + + if (!get_device(dev) && dev) { + ret = -ENODEV; + goto out_ida_put; + } + + INIT_LIST_HEAD(&newrec->port_list); + INIT_LIST_HEAD(&newrec->endp_list); + kref_init(&newrec->ref); + newrec->ops = template; + newrec->dev = dev; + ida_init(&newrec->endp_cnt); + newrec->localport.private = &newrec[1]; + newrec->localport.node_name = pinfo->node_name; + newrec->localport.port_name = pinfo->port_name; + newrec->localport.port_role = pinfo->port_role; + newrec->localport.port_id = pinfo->port_id; + newrec->localport.port_state = FC_OBJSTATE_ONLINE; + newrec->localport.port_num = idx; + + spin_lock_irqsave(&nvme_fc_lock, flags); + list_add_tail(&newrec->port_list, &nvme_fc_lport_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + if (dev) + dma_set_seg_boundary(dev, template->dma_boundary); + + *portptr = &newrec->localport; + return 0; + +out_ida_put: + ida_simple_remove(&nvme_fc_local_port_cnt, idx); +out_fail_kfree: + kfree(newrec); +out_reghost_failed: + *portptr = NULL; + + return ret; +} +EXPORT_SYMBOL_GPL(nvme_fc_register_localport); + +static void +nvme_fc_free_lport(struct kref *ref) +{ + struct nvme_fc_lport *lport = + container_of(ref, struct nvme_fc_lport, ref); + unsigned long flags; + + WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); + WARN_ON(!list_empty(&lport->endp_list)); + + /* remove from transport list */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_del(&lport->port_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + /* let the LLDD know we've finished tearing it down */ + lport->ops->localport_delete(&lport->localport); + + ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); + ida_destroy(&lport->endp_cnt); + + put_device(lport->dev); + + kfree(lport); +} + +static void +nvme_fc_lport_put(struct nvme_fc_lport *lport) +{ + kref_put(&lport->ref, nvme_fc_free_lport); +} + +static int +nvme_fc_lport_get(struct nvme_fc_lport *lport) +{ + return kref_get_unless_zero(&lport->ref); +} + +/** + * nvme_fc_unregister_localport - transport entry point called by an + * LLDD to deregister/remove a previously + * registered a NVME host FC port. + * @localport: pointer to the (registered) local port that is to be + * deregistered. + * + * Returns: + * a completion status. Must be 0 upon success; a negative errno + * (ex: -ENXIO) upon failure. + */ +int +nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) +{ + struct nvme_fc_lport *lport = localport_to_lport(portptr); + unsigned long flags; + + if (!portptr) + return -EINVAL; + + spin_lock_irqsave(&nvme_fc_lock, flags); + + if (portptr->port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&nvme_fc_lock, flags); + return -EINVAL; + } + portptr->port_state = FC_OBJSTATE_DELETED; + + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + nvme_fc_lport_put(lport); + + return 0; +} +EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); + +/** + * nvme_fc_register_remoteport - transport entry point called by an + * LLDD to register the existence of a NVME + * subsystem FC port on its fabric. + * @localport: pointer to the (registered) local port that the remote + * subsystem port is connected to. + * @pinfo: pointer to information about the port to be registered + * @rport_p: pointer to a remote port pointer. Upon success, the routine + * will allocate a nvme_fc_remote_port structure and place its + * address in the remote port pointer. Upon failure, remote port + * pointer will be set to 0. + * + * Returns: + * a completion status. Must be 0 upon success; a negative errno + * (ex: -ENXIO) upon failure. + */ +int +nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, + struct nvme_fc_port_info *pinfo, + struct nvme_fc_remote_port **portptr) +{ + struct nvme_fc_lport *lport = localport_to_lport(localport); + struct nvme_fc_rport *newrec; + unsigned long flags; + int ret, idx; + + newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), + GFP_KERNEL); + if (!newrec) { + ret = -ENOMEM; + goto out_reghost_failed; + } + + if (!nvme_fc_lport_get(lport)) { + ret = -ESHUTDOWN; + goto out_kfree_rport; + } + + idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); + if (idx < 0) { + ret = -ENOSPC; + goto out_lport_put; + } + + INIT_LIST_HEAD(&newrec->endp_list); + INIT_LIST_HEAD(&newrec->ctrl_list); + kref_init(&newrec->ref); + spin_lock_init(&newrec->lock); + newrec->remoteport.localport = &lport->localport; + newrec->remoteport.private = &newrec[1]; + newrec->remoteport.port_role = pinfo->port_role; + newrec->remoteport.node_name = pinfo->node_name; + newrec->remoteport.port_name = pinfo->port_name; + newrec->remoteport.port_id = pinfo->port_id; + newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; + newrec->remoteport.port_num = idx; + + spin_lock_irqsave(&nvme_fc_lock, flags); + list_add_tail(&newrec->endp_list, &lport->endp_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + *portptr = &newrec->remoteport; + return 0; + +out_lport_put: + nvme_fc_lport_put(lport); +out_kfree_rport: + kfree(newrec); +out_reghost_failed: + *portptr = NULL; + return ret; + +} +EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); + +static void +nvme_fc_free_rport(struct kref *ref) +{ + struct nvme_fc_rport *rport = + container_of(ref, struct nvme_fc_rport, ref); + struct nvme_fc_lport *lport = + localport_to_lport(rport->remoteport.localport); + unsigned long flags; + + WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); + WARN_ON(!list_empty(&rport->ctrl_list)); + + /* remove from lport list */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_del(&rport->endp_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + /* let the LLDD know we've finished tearing it down */ + lport->ops->remoteport_delete(&rport->remoteport); + + ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); + + kfree(rport); + + nvme_fc_lport_put(lport); +} + +static void +nvme_fc_rport_put(struct nvme_fc_rport *rport) +{ + kref_put(&rport->ref, nvme_fc_free_rport); +} + +static int +nvme_fc_rport_get(struct nvme_fc_rport *rport) +{ + return kref_get_unless_zero(&rport->ref); +} + +/** + * nvme_fc_unregister_remoteport - transport entry point called by an + * LLDD to deregister/remove a previously + * registered a NVME subsystem FC port. + * @remoteport: pointer to the (registered) remote port that is to be + * deregistered. + * + * Returns: + * a completion status. Must be 0 upon success; a negative errno + * (ex: -ENXIO) upon failure. + */ +int +nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); + struct nvme_fc_ctrl *ctrl; + unsigned long flags; + + if (!portptr) + return -EINVAL; + + spin_lock_irqsave(&rport->lock, flags); + + if (portptr->port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + return -EINVAL; + } + portptr->port_state = FC_OBJSTATE_DELETED; + + /* tear down all associations to the remote port */ + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) + __nvme_fc_del_ctrl(ctrl); + + spin_unlock_irqrestore(&rport->lock, flags); + + nvme_fc_rport_put(rport); + return 0; +} +EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); + + +/* *********************** FC-NVME DMA Handling **************************** */ + +/* + * The fcloop device passes in a NULL device pointer. Real LLD's will + * pass in a valid device pointer. If NULL is passed to the dma mapping + * routines, depending on the platform, it may or may not succeed, and + * may crash. + * + * As such: + * Wrapper all the dma routines and check the dev pointer. + * + * If simple mappings (return just a dma address, we'll noop them, + * returning a dma address of 0. + * + * On more complex mappings (dma_map_sg), a pseudo routine fills + * in the scatter list, setting all dma addresses to 0. + */ + +static inline dma_addr_t +fc_dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction dir) +{ + return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; +} + +static inline int +fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dev ? dma_mapping_error(dev, dma_addr) : 0; +} + +static inline void +fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + if (dev) + dma_unmap_single(dev, addr, size, dir); +} + +static inline void +fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + if (dev) + dma_sync_single_for_cpu(dev, addr, size, dir); +} + +static inline void +fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + if (dev) + dma_sync_single_for_device(dev, addr, size, dir); +} + +/* pseudo dma_map_sg call */ +static int +fc_map_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + WARN_ON(nents == 0 || sg[0].length == 0); + + for_each_sg(sg, s, nents, i) { + s->dma_address = 0L; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + s->dma_length = s->length; +#endif + } + return nents; +} + +static inline int +fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ + return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); +} + +static inline void +fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ + if (dev) + dma_unmap_sg(dev, sg, nents, dir); +} + + +/* *********************** FC-NVME LS Handling **************************** */ + +static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); +static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); + + +static void +__nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, + struct nvmefc_ls_req_op *lsop) +{ + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + unsigned long flags; + + spin_lock_irqsave(&ctrl->lock, flags); + + if (!lsop->req_queued) { + spin_unlock_irqrestore(&ctrl->lock, flags); + return; + } + + list_del(&lsop->lsreq_list); + + lsop->req_queued = false; + + spin_unlock_irqrestore(&ctrl->lock, flags); + + fc_dma_unmap_single(ctrl->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); + + nvme_fc_ctrl_put(ctrl); +} + +static int +__nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, + struct nvmefc_ls_req_op *lsop, + void (*done)(struct nvmefc_ls_req *req, int status)) +{ + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + unsigned long flags; + int ret; + + if (!nvme_fc_ctrl_get(ctrl)) + return -ESHUTDOWN; + + lsreq->done = done; + lsop->ctrl = ctrl; + lsop->req_queued = false; + INIT_LIST_HEAD(&lsop->lsreq_list); + init_completion(&lsop->ls_done); + + lsreq->rqstdma = fc_dma_map_single(ctrl->dev, lsreq->rqstaddr, + lsreq->rqstlen + lsreq->rsplen, + DMA_BIDIRECTIONAL); + if (fc_dma_mapping_error(ctrl->dev, lsreq->rqstdma)) { + nvme_fc_ctrl_put(ctrl); + dev_err(ctrl->dev, + "els request command failed EFAULT.\n"); + return -EFAULT; + } + lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; + + spin_lock_irqsave(&ctrl->lock, flags); + + list_add_tail(&lsop->lsreq_list, &ctrl->ls_req_list); + + lsop->req_queued = true; + + spin_unlock_irqrestore(&ctrl->lock, flags); + + ret = ctrl->lport->ops->ls_req(&ctrl->lport->localport, + &ctrl->rport->remoteport, lsreq); + if (ret) + lsop->ls_error = ret; + + return ret; +} + +static void +nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) +{ + struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); + + lsop->ls_error = status; + complete(&lsop->ls_done); +} + +static int +nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) +{ + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; + int ret; + + ret = __nvme_fc_send_ls_req(ctrl, lsop, nvme_fc_send_ls_req_done); + + if (!ret) + /* + * No timeout/not interruptible as we need the struct + * to exist until the lldd calls us back. Thus mandate + * wait until driver calls back. lldd responsible for + * the timeout action + */ + wait_for_completion(&lsop->ls_done); + + __nvme_fc_finish_ls_req(ctrl, lsop); + + if (ret) { + dev_err(ctrl->dev, + "ls request command failed (%d).\n", ret); + return ret; + } + + /* ACC or RJT payload ? */ + if (rjt->w0.ls_cmd == FCNVME_LS_RJT) + return -ENXIO; + + return 0; +} + +static void +nvme_fc_send_ls_req_async(struct nvme_fc_ctrl *ctrl, + struct nvmefc_ls_req_op *lsop, + void (*done)(struct nvmefc_ls_req *req, int status)) +{ + int ret; + + ret = __nvme_fc_send_ls_req(ctrl, lsop, done); + + /* don't wait for completion */ + + if (ret) + done(&lsop->ls_req, ret); +} + +/* Validation Error indexes into the string table below */ +enum { + VERR_NO_ERROR = 0, + VERR_LSACC = 1, + VERR_LSDESC_RQST = 2, + VERR_LSDESC_RQST_LEN = 3, + VERR_ASSOC_ID = 4, + VERR_ASSOC_ID_LEN = 5, + VERR_CONN_ID = 6, + VERR_CONN_ID_LEN = 7, + VERR_CR_ASSOC = 8, + VERR_CR_ASSOC_ACC_LEN = 9, + VERR_CR_CONN = 10, + VERR_CR_CONN_ACC_LEN = 11, + VERR_DISCONN = 12, + VERR_DISCONN_ACC_LEN = 13, +}; + +static char *validation_errors[] = { + "OK", + "Not LS_ACC", + "Not LSDESC_RQST", + "Bad LSDESC_RQST Length", + "Not Association ID", + "Bad Association ID Length", + "Not Connection ID", + "Bad Connection ID Length", + "Not CR_ASSOC Rqst", + "Bad CR_ASSOC ACC Length", + "Not CR_CONN Rqst", + "Bad CR_CONN ACC Length", + "Not Disconnect Rqst", + "Bad Disconnect ACC Length", +}; + +static int +nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) +{ + struct nvmefc_ls_req_op *lsop; + struct nvmefc_ls_req *lsreq; + struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; + struct fcnvme_ls_cr_assoc_acc *assoc_acc; + int ret, fcret = 0; + + lsop = kzalloc((sizeof(*lsop) + + ctrl->lport->ops->lsrqst_priv_sz + + sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); + if (!lsop) { + ret = -ENOMEM; + goto out_no_memory; + } + lsreq = &lsop->ls_req; + + lsreq->private = (void *)&lsop[1]; + assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) + (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; + + assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; + assoc_rqst->desc_list_len = + cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); + + assoc_rqst->assoc_cmd.desc_tag = + cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); + assoc_rqst->assoc_cmd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); + + assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); + assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); + /* Linux supports only Dynamic controllers */ + assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); + memcpy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id, + min_t(size_t, FCNVME_ASSOC_HOSTID_LEN, sizeof(uuid_be))); + strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, + min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); + strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, + min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); + + lsop->queue = queue; + lsreq->rqstaddr = assoc_rqst; + lsreq->rqstlen = sizeof(*assoc_rqst); + lsreq->rspaddr = assoc_acc; + lsreq->rsplen = sizeof(*assoc_acc); + lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + + ret = nvme_fc_send_ls_req(ctrl, lsop); + if (ret) + goto out_free_buffer; + + /* process connect LS completion */ + + /* validate the ACC response */ + if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) + fcret = VERR_LSACC; + if (assoc_acc->hdr.desc_list_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_cr_assoc_acc))) + fcret = VERR_CR_ASSOC_ACC_LEN; + if (assoc_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + fcret = VERR_LSDESC_RQST; + else if (assoc_acc->hdr.rqst.desc_len != + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) + fcret = VERR_LSDESC_RQST_LEN; + else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) + fcret = VERR_CR_ASSOC; + else if (assoc_acc->associd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) + fcret = VERR_ASSOC_ID; + else if (assoc_acc->associd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id))) + fcret = VERR_ASSOC_ID_LEN; + else if (assoc_acc->connectid.desc_tag != + cpu_to_be32(FCNVME_LSDESC_CONN_ID)) + fcret = VERR_CONN_ID; + else if (assoc_acc->connectid.desc_len != + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) + fcret = VERR_CONN_ID_LEN; + + if (fcret) { + ret = -EBADF; + dev_err(ctrl->dev, + "q %d connect failed: %s\n", + queue->qnum, validation_errors[fcret]); + } else { + ctrl->association_id = + be64_to_cpu(assoc_acc->associd.association_id); + queue->connection_id = + be64_to_cpu(assoc_acc->connectid.connection_id); + set_bit(NVME_FC_Q_CONNECTED, &queue->flags); + } + +out_free_buffer: + kfree(lsop); +out_no_memory: + if (ret) + dev_err(ctrl->dev, + "queue %d connect admin queue failed (%d).\n", + queue->qnum, ret); + return ret; +} + +static int +nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, + u16 qsize, u16 ersp_ratio) +{ + struct nvmefc_ls_req_op *lsop; + struct nvmefc_ls_req *lsreq; + struct fcnvme_ls_cr_conn_rqst *conn_rqst; + struct fcnvme_ls_cr_conn_acc *conn_acc; + int ret, fcret = 0; + + lsop = kzalloc((sizeof(*lsop) + + ctrl->lport->ops->lsrqst_priv_sz + + sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); + if (!lsop) { + ret = -ENOMEM; + goto out_no_memory; + } + lsreq = &lsop->ls_req; + + lsreq->private = (void *)&lsop[1]; + conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) + (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; + + conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; + conn_rqst->desc_list_len = cpu_to_be32( + sizeof(struct fcnvme_lsdesc_assoc_id) + + sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); + + conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); + conn_rqst->associd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id)); + conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); + conn_rqst->connect_cmd.desc_tag = + cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); + conn_rqst->connect_cmd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); + conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); + conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); + conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize); + + lsop->queue = queue; + lsreq->rqstaddr = conn_rqst; + lsreq->rqstlen = sizeof(*conn_rqst); + lsreq->rspaddr = conn_acc; + lsreq->rsplen = sizeof(*conn_acc); + lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + + ret = nvme_fc_send_ls_req(ctrl, lsop); + if (ret) + goto out_free_buffer; + + /* process connect LS completion */ + + /* validate the ACC response */ + if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) + fcret = VERR_LSACC; + if (conn_acc->hdr.desc_list_len != + fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) + fcret = VERR_CR_CONN_ACC_LEN; + if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + fcret = VERR_LSDESC_RQST; + else if (conn_acc->hdr.rqst.desc_len != + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) + fcret = VERR_LSDESC_RQST_LEN; + else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) + fcret = VERR_CR_CONN; + else if (conn_acc->connectid.desc_tag != + cpu_to_be32(FCNVME_LSDESC_CONN_ID)) + fcret = VERR_CONN_ID; + else if (conn_acc->connectid.desc_len != + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) + fcret = VERR_CONN_ID_LEN; + + if (fcret) { + ret = -EBADF; + dev_err(ctrl->dev, + "q %d connect failed: %s\n", + queue->qnum, validation_errors[fcret]); + } else { + queue->connection_id = + be64_to_cpu(conn_acc->connectid.connection_id); + set_bit(NVME_FC_Q_CONNECTED, &queue->flags); + } + +out_free_buffer: + kfree(lsop); +out_no_memory: + if (ret) + dev_err(ctrl->dev, + "queue %d connect command failed (%d).\n", + queue->qnum, ret); + return ret; +} + +static void +nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) +{ + struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); + struct nvme_fc_ctrl *ctrl = lsop->ctrl; + + __nvme_fc_finish_ls_req(ctrl, lsop); + + if (status) + dev_err(ctrl->dev, + "disconnect assoc ls request command failed (%d).\n", + status); + + /* fc-nvme iniator doesn't care about success or failure of cmd */ + + kfree(lsop); +} + +/* + * This routine sends a FC-NVME LS to disconnect (aka terminate) + * the FC-NVME Association. Terminating the association also + * terminates the FC-NVME connections (per queue, both admin and io + * queues) that are part of the association. E.g. things are torn + * down, and the related FC-NVME Association ID and Connection IDs + * become invalid. + * + * The behavior of the fc-nvme initiator is such that it's + * understanding of the association and connections will implicitly + * be torn down. The action is implicit as it may be due to a loss of + * connectivity with the fc-nvme target, so you may never get a + * response even if you tried. As such, the action of this routine + * is to asynchronously send the LS, ignore any results of the LS, and + * continue on with terminating the association. If the fc-nvme target + * is present and receives the LS, it too can tear down. + */ +static void +nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) +{ + struct fcnvme_ls_disconnect_rqst *discon_rqst; + struct fcnvme_ls_disconnect_acc *discon_acc; + struct nvmefc_ls_req_op *lsop; + struct nvmefc_ls_req *lsreq; + + lsop = kzalloc((sizeof(*lsop) + + ctrl->lport->ops->lsrqst_priv_sz + + sizeof(*discon_rqst) + sizeof(*discon_acc)), + GFP_KERNEL); + if (!lsop) + /* couldn't sent it... too bad */ + return; + + lsreq = &lsop->ls_req; + + lsreq->private = (void *)&lsop[1]; + discon_rqst = (struct fcnvme_ls_disconnect_rqst *) + (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; + + discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; + discon_rqst->desc_list_len = cpu_to_be32( + sizeof(struct fcnvme_lsdesc_assoc_id) + + sizeof(struct fcnvme_lsdesc_disconn_cmd)); + + discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); + discon_rqst->associd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id)); + + discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); + + discon_rqst->discon_cmd.desc_tag = cpu_to_be32( + FCNVME_LSDESC_DISCONN_CMD); + discon_rqst->discon_cmd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_disconn_cmd)); + discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; + discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); + + lsreq->rqstaddr = discon_rqst; + lsreq->rqstlen = sizeof(*discon_rqst); + lsreq->rspaddr = discon_acc; + lsreq->rsplen = sizeof(*discon_acc); + lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + + nvme_fc_send_ls_req_async(ctrl, lsop, nvme_fc_disconnect_assoc_done); + + /* only meaningful part to terminating the association */ + ctrl->association_id = 0; +} + + +/* *********************** NVME Ctrl Routines **************************** */ + + +static int +nvme_fc_reinit_request(void *data, struct request *rq) +{ + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; + + memset(cmdiu, 0, sizeof(*cmdiu)); + cmdiu->scsi_id = NVME_CMD_SCSI_ID; + cmdiu->fc_id = NVME_CMD_FC_ID; + cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); + memset(&op->rsp_iu, 0, sizeof(op->rsp_iu)); + + return 0; +} + +static void +__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_fcp_op *op) +{ + fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma, + sizeof(op->rsp_iu), DMA_FROM_DEVICE); + fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma, + sizeof(op->cmd_iu), DMA_TO_DEVICE); + + atomic_set(&op->state, FCPOP_STATE_UNINIT); +} + +static void +nvme_fc_exit_request(void *data, struct request *rq, + unsigned int hctx_idx, unsigned int rq_idx) +{ + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + + return __nvme_fc_exit_request(data, op); +} + +static void +nvme_fc_exit_aen_ops(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; + int i; + + for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + if (atomic_read(&aen_op->state) == FCPOP_STATE_UNINIT) + continue; + __nvme_fc_exit_request(ctrl, aen_op); + nvme_fc_ctrl_put(ctrl); + } +} + +void +nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) +{ + struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); + struct request *rq = op->rq; + struct nvmefc_fcp_req *freq = &op->fcp_req; + struct nvme_fc_ctrl *ctrl = op->ctrl; + struct nvme_fc_queue *queue = op->queue; + struct nvme_completion *cqe = &op->rsp_iu.cqe; + u16 status; + + /* + * WARNING: + * The current linux implementation of a nvme controller + * allocates a single tag set for all io queues and sizes + * the io queues to fully hold all possible tags. Thus, the + * implementation does not reference or care about the sqhd + * value as it never needs to use the sqhd/sqtail pointers + * for submission pacing. + * + * This affects the FC-NVME implementation in two ways: + * 1) As the value doesn't matter, we don't need to waste + * cycles extracting it from ERSPs and stamping it in the + * cases where the transport fabricates CQEs on successful + * completions. + * 2) The FC-NVME implementation requires that delivery of + * ERSP completions are to go back to the nvme layer in order + * relative to the rsn, such that the sqhd value will always + * be "in order" for the nvme layer. As the nvme layer in + * linux doesn't care about sqhd, there's no need to return + * them in order. + * + * Additionally: + * As the core nvme layer in linux currently does not look at + * every field in the cqe - in cases where the FC transport must + * fabricate a CQE, the following fields will not be set as they + * are not referenced: + * cqe.sqid, cqe.sqhd, cqe.command_id + */ + + fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, + sizeof(op->rsp_iu), DMA_FROM_DEVICE); + + if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) + status = NVME_SC_ABORT_REQ | NVME_SC_DNR; + else + status = freq->status; + + /* + * For the linux implementation, if we have an unsuccesful + * status, they blk-mq layer can typically be called with the + * non-zero status and the content of the cqe isn't important. + */ + if (status) + goto done; + + /* + * command completed successfully relative to the wire + * protocol. However, validate anything received and + * extract the status and result from the cqe (create it + * where necessary). + */ + + switch (freq->rcv_rsplen) { + + case 0: + case NVME_FC_SIZEOF_ZEROS_RSP: + /* + * No response payload or 12 bytes of payload (which + * should all be zeros) are considered successful and + * no payload in the CQE by the transport. + */ + if (freq->transferred_length != + be32_to_cpu(op->cmd_iu.data_len)) { + status = -EIO; + goto done; + } + op->nreq.result.u64 = 0; + break; + + case sizeof(struct nvme_fc_ersp_iu): + /* + * The ERSP IU contains a full completion with CQE. + * Validate ERSP IU and look at cqe. + */ + if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != + (freq->rcv_rsplen / 4) || + be32_to_cpu(op->rsp_iu.xfrd_len) != + freq->transferred_length || + op->rqno != le16_to_cpu(cqe->command_id))) { + status = -EIO; + goto done; + } + op->nreq.result = cqe->result; + status = le16_to_cpu(cqe->status) >> 1; + break; + + default: + status = -EIO; + goto done; + } + +done: + if (!queue->qnum && op->rqno >= AEN_CMDID_BASE) { + nvme_complete_async_event(&queue->ctrl->ctrl, status, + &op->nreq.result); + nvme_fc_ctrl_put(ctrl); + return; + } + + blk_mq_complete_request(rq, status); +} + +static int +__nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, + struct request *rq, u32 rqno) +{ + struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; + int ret = 0; + + memset(op, 0, sizeof(*op)); + op->fcp_req.cmdaddr = &op->cmd_iu; + op->fcp_req.cmdlen = sizeof(op->cmd_iu); + op->fcp_req.rspaddr = &op->rsp_iu; + op->fcp_req.rsplen = sizeof(op->rsp_iu); + op->fcp_req.done = nvme_fc_fcpio_done; + op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; + op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; + op->ctrl = ctrl; + op->queue = queue; + op->rq = rq; + op->rqno = rqno; + + cmdiu->scsi_id = NVME_CMD_SCSI_ID; + cmdiu->fc_id = NVME_CMD_FC_ID; + cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); + + op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, + &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); + if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { + dev_err(ctrl->dev, + "FCP Op failed - cmdiu dma mapping failed.\n"); + ret = EFAULT; + goto out_on_error; + } + + op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev, + &op->rsp_iu, sizeof(op->rsp_iu), + DMA_FROM_DEVICE); + if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { + dev_err(ctrl->dev, + "FCP Op failed - rspiu dma mapping failed.\n"); + ret = EFAULT; + } + + atomic_set(&op->state, FCPOP_STATE_IDLE); +out_on_error: + return ret; +} + +static int +nvme_fc_init_request(void *data, struct request *rq, + unsigned int hctx_idx, unsigned int rq_idx, + unsigned int numa_node) +{ + struct nvme_fc_ctrl *ctrl = data; + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1]; + + return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); +} + +static int +nvme_fc_init_admin_request(void *data, struct request *rq, + unsigned int hctx_idx, unsigned int rq_idx, + unsigned int numa_node) +{ + struct nvme_fc_ctrl *ctrl = data; + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_queue *queue = &ctrl->queues[0]; + + return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); +} + +static int +nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_fcp_op *aen_op; + struct nvme_fc_cmd_iu *cmdiu; + struct nvme_command *sqe; + int i, ret; + + aen_op = ctrl->aen_ops; + for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + cmdiu = &aen_op->cmd_iu; + sqe = &cmdiu->sqe; + ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], + aen_op, (struct request *)NULL, + (AEN_CMDID_BASE + i)); + if (ret) + return ret; + + memset(sqe, 0, sizeof(*sqe)); + sqe->common.opcode = nvme_admin_async_event; + sqe->common.command_id = AEN_CMDID_BASE + i; + } + return 0; +} + + +static inline void +__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, + unsigned int qidx) +{ + struct nvme_fc_queue *queue = &ctrl->queues[qidx]; + + hctx->driver_data = queue; + queue->hctx = hctx; +} + +static int +nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + struct nvme_fc_ctrl *ctrl = data; + + __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); + + return 0; +} + +static int +nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + struct nvme_fc_ctrl *ctrl = data; + + __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); + + return 0; +} + +static void +nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size) +{ + struct nvme_fc_queue *queue; + + queue = &ctrl->queues[idx]; + memset(queue, 0, sizeof(*queue)); + queue->ctrl = ctrl; + queue->qnum = idx; + atomic_set(&queue->csn, 1); + queue->dev = ctrl->dev; + + if (idx > 0) + queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; + else + queue->cmnd_capsule_len = sizeof(struct nvme_command); + + queue->queue_size = queue_size; + + /* + * Considered whether we should allocate buffers for all SQEs + * and CQEs and dma map them - mapping their respective entries + * into the request structures (kernel vm addr and dma address) + * thus the driver could use the buffers/mappings directly. + * It only makes sense if the LLDD would use them for its + * messaging api. It's very unlikely most adapter api's would use + * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload + * structures were used instead. + */ +} + +/* + * This routine terminates a queue at the transport level. + * The transport has already ensured that all outstanding ios on + * the queue have been terminated. + * The transport will send a Disconnect LS request to terminate + * the queue's connection. Termination of the admin queue will also + * terminate the association at the target. + */ +static void +nvme_fc_free_queue(struct nvme_fc_queue *queue) +{ + if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) + return; + + /* + * Current implementation never disconnects a single queue. + * It always terminates a whole association. So there is never + * a disconnect(queue) LS sent to the target. + */ + + queue->connection_id = 0; + clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); +} + +static void +__nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_queue *queue, unsigned int qidx) +{ + if (ctrl->lport->ops->delete_queue) + ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, + queue->lldd_handle); + queue->lldd_handle = NULL; +} + +static void +nvme_fc_destroy_admin_queue(struct nvme_fc_ctrl *ctrl) +{ + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); + blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_fc_free_queue(&ctrl->queues[0]); +} + +static void +nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->queue_count; i++) + nvme_fc_free_queue(&ctrl->queues[i]); +} + +static int +__nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) +{ + int ret = 0; + + queue->lldd_handle = NULL; + if (ctrl->lport->ops->create_queue) + ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, + qidx, qsize, &queue->lldd_handle); + + return ret; +} + +static void +nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1]; + int i; + + for (i = ctrl->queue_count - 1; i >= 1; i--, queue--) + __nvme_fc_delete_hw_queue(ctrl, queue, i); +} + +static int +nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) +{ + struct nvme_fc_queue *queue = &ctrl->queues[1]; + int i, j, ret; + + for (i = 1; i < ctrl->queue_count; i++, queue++) { + ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); + if (ret) { + for (j = i-1; j >= 0; j--) + __nvme_fc_delete_hw_queue(ctrl, + &ctrl->queues[j], j); + return ret; + } + } + + return 0; +} + +static int +nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) +{ + int i, ret = 0; + + for (i = 1; i < ctrl->queue_count; i++) { + ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, + (qsize / 5)); + if (ret) + break; + ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + if (ret) + break; + } + + return ret; +} + +static void +nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->queue_count; i++) + nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize); +} + +static void +nvme_fc_ctrl_free(struct kref *ref) +{ + struct nvme_fc_ctrl *ctrl = + container_of(ref, struct nvme_fc_ctrl, ref); + unsigned long flags; + + if (ctrl->state != FCCTRL_INIT) { + /* remove from rport list */ + spin_lock_irqsave(&ctrl->rport->lock, flags); + list_del(&ctrl->ctrl_list); + spin_unlock_irqrestore(&ctrl->rport->lock, flags); + } + + put_device(ctrl->dev); + nvme_fc_rport_put(ctrl->rport); + + kfree(ctrl->queues); + ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); + nvmf_free_options(ctrl->ctrl.opts); + kfree(ctrl); +} + +static void +nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) +{ + kref_put(&ctrl->ref, nvme_fc_ctrl_free); +} + +static int +nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) +{ + return kref_get_unless_zero(&ctrl->ref); +} + +/* + * All accesses from nvme core layer done - can now free the + * controller. Called after last nvme_put_ctrl() call + */ +static void +nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + + WARN_ON(nctrl != &ctrl->ctrl); + + /* + * Tear down the association, which will generate link + * traffic to terminate connections + */ + + if (ctrl->state != FCCTRL_INIT) { + /* send a Disconnect(association) LS to fc-nvme target */ + nvme_fc_xmt_disconnect_assoc(ctrl); + + if (ctrl->ctrl.tagset) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_fc_delete_hw_io_queues(ctrl); + nvme_fc_free_io_queues(ctrl); + } + + nvme_fc_exit_aen_ops(ctrl); + + nvme_fc_destroy_admin_queue(ctrl); + } + + nvme_fc_ctrl_put(ctrl); +} + + +static int +__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) +{ + int state; + + state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); + if (state != FCPOP_STATE_ACTIVE) { + atomic_set(&op->state, state); + return -ECANCELED; /* fail */ + } + + ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, + &ctrl->rport->remoteport, + op->queue->lldd_handle, + &op->fcp_req); + + return 0; +} + +enum blk_eh_timer_return +nvme_fc_timeout(struct request *rq, bool reserved) +{ + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_ctrl *ctrl = op->ctrl; + int ret; + + if (reserved) + return BLK_EH_RESET_TIMER; + + ret = __nvme_fc_abort_op(ctrl, op); + if (ret) + /* io wasn't active to abort consider it done */ + return BLK_EH_HANDLED; + + /* + * TODO: force a controller reset + * when that happens, queues will be torn down and outstanding + * ios will be terminated, and the above abort, on a single io + * will no longer be needed. + */ + + return BLK_EH_HANDLED; +} + +static int +nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, + struct nvme_fc_fcp_op *op) +{ + struct nvmefc_fcp_req *freq = &op->fcp_req; + u32 map_len = nvme_map_len(rq); + enum dma_data_direction dir; + int ret; + + freq->sg_cnt = 0; + + if (!map_len) + return 0; + + freq->sg_table.sgl = freq->first_sgl; + ret = sg_alloc_table_chained(&freq->sg_table, rq->nr_phys_segments, + freq->sg_table.sgl); + if (ret) + return -ENOMEM; + + op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); + WARN_ON(op->nents > rq->nr_phys_segments); + dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; + freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, + op->nents, dir); + if (unlikely(freq->sg_cnt <= 0)) { + sg_free_table_chained(&freq->sg_table, true); + freq->sg_cnt = 0; + return -EFAULT; + } + + /* + * TODO: blk_integrity_rq(rq) for DIF + */ + return 0; +} + +static void +nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, + struct nvme_fc_fcp_op *op) +{ + struct nvmefc_fcp_req *freq = &op->fcp_req; + + if (!freq->sg_cnt) + return; + + fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, + ((rq_data_dir(rq) == WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE)); + + nvme_cleanup_cmd(rq); + + sg_free_table_chained(&freq->sg_table, true); + + freq->sg_cnt = 0; +} + +/* + * In FC, the queue is a logical thing. At transport connect, the target + * creates its "queue" and returns a handle that is to be given to the + * target whenever it posts something to the corresponding SQ. When an + * SQE is sent on a SQ, FC effectively considers the SQE, or rather the + * command contained within the SQE, an io, and assigns a FC exchange + * to it. The SQE and the associated SQ handle are sent in the initial + * CMD IU sents on the exchange. All transfers relative to the io occur + * as part of the exchange. The CQE is the last thing for the io, + * which is transferred (explicitly or implicitly) with the RSP IU + * sent on the exchange. After the CQE is received, the FC exchange is + * terminaed and the Exchange may be used on a different io. + * + * The transport to LLDD api has the transport making a request for a + * new fcp io request to the LLDD. The LLDD then allocates a FC exchange + * resource and transfers the command. The LLDD will then process all + * steps to complete the io. Upon completion, the transport done routine + * is called. + * + * So - while the operation is outstanding to the LLDD, there is a link + * level FC exchange resource that is also outstanding. This must be + * considered in all cleanup operations. + */ +static int +nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, + struct nvme_fc_fcp_op *op, u32 data_len, + enum nvmefc_fcp_datadir io_dir) +{ + struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; + struct nvme_command *sqe = &cmdiu->sqe; + u32 csn; + int ret; + + if (!nvme_fc_ctrl_get(ctrl)) + return BLK_MQ_RQ_QUEUE_ERROR; + + /* format the FC-NVME CMD IU and fcp_req */ + cmdiu->connection_id = cpu_to_be64(queue->connection_id); + csn = atomic_inc_return(&queue->csn); + cmdiu->csn = cpu_to_be32(csn); + cmdiu->data_len = cpu_to_be32(data_len); + switch (io_dir) { + case NVMEFC_FCP_WRITE: + cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; + break; + case NVMEFC_FCP_READ: + cmdiu->flags = FCNVME_CMD_FLAGS_READ; + break; + case NVMEFC_FCP_NODATA: + cmdiu->flags = 0; + break; + } + op->fcp_req.payload_length = data_len; + op->fcp_req.io_dir = io_dir; + op->fcp_req.transferred_length = 0; + op->fcp_req.rcv_rsplen = 0; + op->fcp_req.status = 0; + op->fcp_req.sqid = cpu_to_le16(queue->qnum); + + /* + * validate per fabric rules, set fields mandated by fabric spec + * as well as those by FC-NVME spec. + */ + WARN_ON_ONCE(sqe->common.metadata); + WARN_ON_ONCE(sqe->common.dptr.prp1); + WARN_ON_ONCE(sqe->common.dptr.prp2); + sqe->common.flags |= NVME_CMD_SGL_METABUF; + + /* + * format SQE DPTR field per FC-NVME rules + * type=data block descr; subtype=offset; + * offset is currently 0. + */ + sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET; + sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); + sqe->rw.dptr.sgl.addr = 0; + + /* odd that we set the command_id - should come from nvme-fabrics */ + WARN_ON_ONCE(sqe->common.command_id != cpu_to_le16(op->rqno)); + + if (op->rq) { /* skipped on aens */ + ret = nvme_fc_map_data(ctrl, op->rq, op); + if (ret < 0) { + dev_err(queue->ctrl->ctrl.device, + "Failed to map data (%d)\n", ret); + nvme_cleanup_cmd(op->rq); + nvme_fc_ctrl_put(ctrl); + return (ret == -ENOMEM || ret == -EAGAIN) ? + BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR; + } + } + + fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma, + sizeof(op->cmd_iu), DMA_TO_DEVICE); + + atomic_set(&op->state, FCPOP_STATE_ACTIVE); + + if (op->rq) + blk_mq_start_request(op->rq); + + ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, + &ctrl->rport->remoteport, + queue->lldd_handle, &op->fcp_req); + + if (ret) { + dev_err(ctrl->dev, + "Send nvme command failed - lldd returned %d.\n", ret); + + if (op->rq) { /* normal request */ + nvme_fc_unmap_data(ctrl, op->rq, op); + nvme_cleanup_cmd(op->rq); + } + /* else - aen. no cleanup needed */ + + nvme_fc_ctrl_put(ctrl); + + if (ret != -EBUSY) + return BLK_MQ_RQ_QUEUE_ERROR; + + if (op->rq) { + blk_mq_stop_hw_queues(op->rq->q); + blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY); + } + return BLK_MQ_RQ_QUEUE_BUSY; + } + + return BLK_MQ_RQ_QUEUE_OK; +} + +static int +nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct nvme_ns *ns = hctx->queue->queuedata; + struct nvme_fc_queue *queue = hctx->driver_data; + struct nvme_fc_ctrl *ctrl = queue->ctrl; + struct request *rq = bd->rq; + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; + struct nvme_command *sqe = &cmdiu->sqe; + enum nvmefc_fcp_datadir io_dir; + u32 data_len; + int ret; + + ret = nvme_setup_cmd(ns, rq, sqe); + if (ret) + return ret; + + data_len = nvme_map_len(rq); + if (data_len) + io_dir = ((rq_data_dir(rq) == WRITE) ? + NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); + else + io_dir = NVMEFC_FCP_NODATA; + + return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); +} + +static struct blk_mq_tags * +nvme_fc_tagset(struct nvme_fc_queue *queue) +{ + if (queue->qnum == 0) + return queue->ctrl->admin_tag_set.tags[queue->qnum]; + + return queue->ctrl->tag_set.tags[queue->qnum - 1]; +} + +static int +nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) + +{ + struct nvme_fc_queue *queue = hctx->driver_data; + struct nvme_fc_ctrl *ctrl = queue->ctrl; + struct request *req; + struct nvme_fc_fcp_op *op; + + req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); + if (!req) { + dev_err(queue->ctrl->ctrl.device, + "tag 0x%x on QNum %#x not found\n", + tag, queue->qnum); + return 0; + } + + op = blk_mq_rq_to_pdu(req); + + if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && + (ctrl->lport->ops->poll_queue)) + ctrl->lport->ops->poll_queue(&ctrl->lport->localport, + queue->lldd_handle); + + return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); +} + +static void +nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) +{ + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); + struct nvme_fc_fcp_op *aen_op; + int ret; + + if (aer_idx > NVME_FC_NR_AEN_COMMANDS) + return; + + aen_op = &ctrl->aen_ops[aer_idx]; + + ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, + NVMEFC_FCP_NODATA); + if (ret) + dev_err(ctrl->ctrl.device, + "failed async event work [%d]\n", aer_idx); +} + +static void +nvme_fc_complete_rq(struct request *rq) +{ + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_ctrl *ctrl = op->ctrl; + int error = 0, state; + + state = atomic_xchg(&op->state, FCPOP_STATE_IDLE); + + nvme_cleanup_cmd(rq); + + nvme_fc_unmap_data(ctrl, rq, op); + + if (unlikely(rq->errors)) { + if (nvme_req_needs_retry(rq, rq->errors)) { + nvme_requeue_req(rq); + return; + } + + if (rq->cmd_type == REQ_TYPE_DRV_PRIV) + error = rq->errors; + else + error = nvme_error_status(rq->errors); + } + + nvme_fc_ctrl_put(ctrl); + + blk_mq_end_request(rq, error); +} + +static struct blk_mq_ops nvme_fc_mq_ops = { + .queue_rq = nvme_fc_queue_rq, + .complete = nvme_fc_complete_rq, + .init_request = nvme_fc_init_request, + .exit_request = nvme_fc_exit_request, + .reinit_request = nvme_fc_reinit_request, + .init_hctx = nvme_fc_init_hctx, + .poll = nvme_fc_poll, + .timeout = nvme_fc_timeout, +}; + +static struct blk_mq_ops nvme_fc_admin_mq_ops = { + .queue_rq = nvme_fc_queue_rq, + .complete = nvme_fc_complete_rq, + .init_request = nvme_fc_init_admin_request, + .exit_request = nvme_fc_exit_request, + .reinit_request = nvme_fc_reinit_request, + .init_hctx = nvme_fc_init_admin_hctx, + .timeout = nvme_fc_timeout, +}; + +static int +nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl) +{ + u32 segs; + int error; + + nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + + error = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], + NVME_FC_AQ_BLKMQ_DEPTH, + (NVME_FC_AQ_BLKMQ_DEPTH / 4)); + if (error) + return error; + + memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); + ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; + ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; + ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ + ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + (SG_CHUNK_SIZE * + sizeof(struct scatterlist)) + + ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.nr_hw_queues = 1; + ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + + error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); + if (error) + goto out_free_queue; + + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + error = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_tagset; + } + + error = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, + NVME_FC_AQ_BLKMQ_DEPTH); + if (error) + goto out_cleanup_queue; + + error = nvmf_connect_admin_queue(&ctrl->ctrl); + if (error) + goto out_delete_hw_queue; + + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + if (error) { + dev_err(ctrl->ctrl.device, + "prop_get NVME_REG_CAP failed\n"); + goto out_delete_hw_queue; + } + + ctrl->ctrl.sqsize = + min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + + error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + if (error) + goto out_delete_hw_queue; + + segs = min_t(u32, NVME_FC_MAX_SEGMENTS, + ctrl->lport->ops->max_sgl_segments); + ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); + + error = nvme_init_identify(&ctrl->ctrl); + if (error) + goto out_delete_hw_queue; + + nvme_start_keep_alive(&ctrl->ctrl); + + return 0; + +out_delete_hw_queue: + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); +out_cleanup_queue: + blk_cleanup_queue(ctrl->ctrl.admin_q); +out_free_tagset: + blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_free_queue: + nvme_fc_free_queue(&ctrl->queues[0]); + return error; +} + +/* + * This routine is used by the transport when it needs to find active + * io on a queue that is to be terminated. The transport uses + * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke + * this routine to kill them on a 1 by 1 basis. + * + * As FC allocates FC exchange for each io, the transport must contact + * the LLDD to terminate the exchange, thus releasing the FC exchange. + * After terminating the exchange the LLDD will call the transport's + * normal io done path for the request, but it will have an aborted + * status. The done path will return the io request back to the block + * layer with an error status. + */ +static void +nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +{ + struct nvme_ctrl *nctrl = data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); +int status; + + if (!blk_mq_request_started(req)) + return; + + /* this performs an ABTS-LS on the FC exchange for the io */ + status = __nvme_fc_abort_op(ctrl, op); + /* + * if __nvme_fc_abort_op failed: io wasn't active to abort + * consider it done. Assume completion path already completing + * in parallel + */ + if (status) + /* io wasn't active to abort consider it done */ + /* assume completion path already completing in parallel */ + return; +} + + +/* + * This routine stops operation of the controller. Admin and IO queues + * are stopped, outstanding ios on them terminated, and the nvme ctrl + * is shutdown. + */ +static void +nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) +{ + /* + * If io queues are present, stop them and terminate all outstanding + * ios on them. As FC allocates FC exchange for each io, the + * transport must contact the LLDD to terminate the exchange, + * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() + * to tell us what io's are busy and invoke a transport routine + * to kill them with the LLDD. After terminating the exchange + * the LLDD will call the transport's normal io done path, but it + * will have an aborted status. The done path will return the + * io requests back to the block layer as part of normal completions + * (but with error status). + */ + if (ctrl->queue_count > 1) { + nvme_stop_queues(&ctrl->ctrl); + blk_mq_tagset_busy_iter(&ctrl->tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + } + + if (ctrl->ctrl.state == NVME_CTRL_LIVE) + nvme_shutdown_ctrl(&ctrl->ctrl); + + /* + * now clean up the admin queue. Same thing as above. + * use blk_mq_tagset_busy_itr() and the transport routine to + * terminate the exchanges. + */ + blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); +} + +/* + * Called to teardown an association. + * May be called with association fully in place or partially in place. + */ +static void +__nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl) +{ + nvme_stop_keep_alive(&ctrl->ctrl); + + /* stop and terminate ios on admin and io queues */ + nvme_fc_shutdown_ctrl(ctrl); + + /* + * tear down the controller + * This will result in the last reference on the nvme ctrl to + * expire, calling the transport nvme_fc_free_nvme_ctrl() callback. + * From there, the transport will tear down it's logical queues and + * association. + */ + nvme_uninit_ctrl(&ctrl->ctrl); + + nvme_put_ctrl(&ctrl->ctrl); +} + +static void +nvme_fc_del_ctrl_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, delete_work); + + __nvme_fc_remove_ctrl(ctrl); +} + +static int +__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) +{ + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) + return -EBUSY; + + if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) + return -EBUSY; + + return 0; +} + +/* + * Request from nvme core layer to delete the controller + */ +static int +nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_rport *rport = ctrl->rport; + unsigned long flags; + int ret; + + spin_lock_irqsave(&rport->lock, flags); + ret = __nvme_fc_del_ctrl(ctrl); + spin_unlock_irqrestore(&rport->lock, flags); + if (ret) + return ret; + + flush_work(&ctrl->delete_work); + + return 0; +} + +static int +nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) +{ + return -EIO; +} + +static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { + .name = "fc", + .module = THIS_MODULE, + .is_fabrics = true, + .reg_read32 = nvmf_reg_read32, + .reg_read64 = nvmf_reg_read64, + .reg_write32 = nvmf_reg_write32, + .reset_ctrl = nvme_fc_reset_nvme_ctrl, + .free_ctrl = nvme_fc_free_nvme_ctrl, + .submit_async_event = nvme_fc_submit_async_event, + .delete_ctrl = nvme_fc_del_nvme_ctrl, + .get_subsysnqn = nvmf_get_subsysnqn, + .get_address = nvmf_get_address, +}; + +static int +nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + int ret; + + ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + if (ret) { + dev_info(ctrl->ctrl.device, + "set_queue_count failed: %d\n", ret); + return ret; + } + + ctrl->queue_count = opts->nr_io_queues + 1; + if (!opts->nr_io_queues) + return 0; + + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", + opts->nr_io_queues); + + nvme_fc_init_io_queues(ctrl); + + memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); + ctrl->tag_set.ops = &nvme_fc_mq_ops; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; + ctrl->tag_set.reserved_tags = 1; /* fabric connect */ + ctrl->tag_set.numa_node = NUMA_NO_NODE; + ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + (SG_CHUNK_SIZE * + sizeof(struct scatterlist)) + + ctrl->lport->ops->fcprqst_priv_sz; + ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.timeout = NVME_IO_TIMEOUT; + + ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + if (ret) + return ret; + + ctrl->ctrl.tagset = &ctrl->tag_set; + + ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); + if (IS_ERR(ctrl->ctrl.connect_q)) { + ret = PTR_ERR(ctrl->ctrl.connect_q); + goto out_free_tag_set; + } + + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_cleanup_blk_queue; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_delete_hw_queues; + + return 0; + +out_delete_hw_queues: + nvme_fc_delete_hw_io_queues(ctrl); +out_cleanup_blk_queue: + nvme_stop_keep_alive(&ctrl->ctrl); + blk_cleanup_queue(ctrl->ctrl.connect_q); +out_free_tag_set: + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_fc_free_io_queues(ctrl); + + /* force put free routine to ignore io queues */ + ctrl->ctrl.tagset = NULL; + + return ret; +} + + +static struct nvme_ctrl * +__nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, + struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) +{ + struct nvme_fc_ctrl *ctrl; + unsigned long flags; + int ret, idx; + bool changed; + + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); + if (!ctrl) { + ret = -ENOMEM; + goto out_fail; + } + + idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); + if (idx < 0) { + ret = -ENOSPC; + goto out_free_ctrl; + } + + ctrl->ctrl.opts = opts; + INIT_LIST_HEAD(&ctrl->ctrl_list); + INIT_LIST_HEAD(&ctrl->ls_req_list); + ctrl->lport = lport; + ctrl->rport = rport; + ctrl->dev = lport->dev; + ctrl->state = FCCTRL_INIT; + ctrl->cnum = idx; + + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); + if (ret) + goto out_free_ida; + + get_device(ctrl->dev); + kref_init(&ctrl->ref); + + INIT_WORK(&ctrl->delete_work, nvme_fc_del_ctrl_work); + spin_lock_init(&ctrl->lock); + + /* io queue count */ + ctrl->queue_count = min_t(unsigned int, + opts->nr_io_queues, + lport->ops->max_hw_queues); + opts->nr_io_queues = ctrl->queue_count; /* so opts has valid value */ + ctrl->queue_count++; /* +1 for admin queue */ + + ctrl->ctrl.sqsize = opts->queue_size - 1; + ctrl->ctrl.kato = opts->kato; + + ret = -ENOMEM; + ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), + GFP_KERNEL); + if (!ctrl->queues) + goto out_uninit_ctrl; + + ret = nvme_fc_configure_admin_queue(ctrl); + if (ret) + goto out_uninit_ctrl; + + /* sanity checks */ + + /* FC-NVME supports 64-byte SQE only */ + if (ctrl->ctrl.ioccsz != 4) { + dev_err(ctrl->ctrl.device, "ioccsz %d is not supported!\n", + ctrl->ctrl.ioccsz); + goto out_remove_admin_queue; + } + /* FC-NVME supports 16-byte CQE only */ + if (ctrl->ctrl.iorcsz != 1) { + dev_err(ctrl->ctrl.device, "iorcsz %d is not supported!\n", + ctrl->ctrl.iorcsz); + goto out_remove_admin_queue; + } + /* FC-NVME does not have other data in the capsule */ + if (ctrl->ctrl.icdoff) { + dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", + ctrl->ctrl.icdoff); + goto out_remove_admin_queue; + } + + /* FC-NVME supports normal SGL Data Block Descriptors */ + + if (opts->queue_size > ctrl->ctrl.maxcmd) { + /* warn if maxcmd is lower than queue_size */ + dev_warn(ctrl->ctrl.device, + "queue_size %zu > ctrl maxcmd %u, reducing " + "to queue_size\n", + opts->queue_size, ctrl->ctrl.maxcmd); + opts->queue_size = ctrl->ctrl.maxcmd; + } + + ret = nvme_fc_init_aen_ops(ctrl); + if (ret) + goto out_exit_aen_ops; + + if (ctrl->queue_count > 1) { + ret = nvme_fc_create_io_queues(ctrl); + if (ret) + goto out_exit_aen_ops; + } + + spin_lock_irqsave(&ctrl->lock, flags); + ctrl->state = FCCTRL_ACTIVE; + spin_unlock_irqrestore(&ctrl->lock, flags); + + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + WARN_ON_ONCE(!changed); + + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: new ctrl: NQN \"%s\" (%p)\n", + ctrl->cnum, ctrl->ctrl.opts->subsysnqn, &ctrl); + + kref_get(&ctrl->ctrl.kref); + + spin_lock_irqsave(&rport->lock, flags); + list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); + spin_unlock_irqrestore(&rport->lock, flags); + + if (opts->nr_io_queues) { + nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); + } + + return &ctrl->ctrl; + +out_exit_aen_ops: + nvme_fc_exit_aen_ops(ctrl); +out_remove_admin_queue: + /* send a Disconnect(association) LS to fc-nvme target */ + nvme_fc_xmt_disconnect_assoc(ctrl); + nvme_stop_keep_alive(&ctrl->ctrl); + nvme_fc_destroy_admin_queue(ctrl); +out_uninit_ctrl: + nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); + if (ret > 0) + ret = -EIO; + /* exit via here will follow ctlr ref point callbacks to free */ + return ERR_PTR(ret); + +out_free_ida: + ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); +out_free_ctrl: + kfree(ctrl); +out_fail: + nvme_fc_rport_put(rport); + /* exit via here doesn't follow ctlr ref points */ + return ERR_PTR(ret); +} + +enum { + FCT_TRADDR_ERR = 0, + FCT_TRADDR_WWNN = 1 << 0, + FCT_TRADDR_WWPN = 1 << 1, +}; + +struct nvmet_fc_traddr { + u64 nn; + u64 pn; +}; + +static const match_table_t traddr_opt_tokens = { + { FCT_TRADDR_WWNN, "nn-%s" }, + { FCT_TRADDR_WWPN, "pn-%s" }, + { FCT_TRADDR_ERR, NULL } +}; + +static int +nvme_fc_parse_address(struct nvmet_fc_traddr *traddr, char *buf) +{ + substring_t args[MAX_OPT_ARGS]; + char *options, *o, *p; + int token, ret = 0; + u64 token64; + + options = o = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + while ((p = strsep(&o, ":\n")) != NULL) { + if (!*p) + continue; + + token = match_token(p, traddr_opt_tokens, args); + switch (token) { + case FCT_TRADDR_WWNN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out; + } + traddr->nn = token64; + break; + case FCT_TRADDR_WWPN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out; + } + traddr->pn = token64; + break; + default: + pr_warn("unknown traddr token or missing value '%s'\n", + p); + ret = -EINVAL; + goto out; + } + } + +out: + kfree(options); + return ret; +} + +static struct nvme_ctrl * +nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) +{ + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + struct nvmet_fc_traddr laddr = { 0L, 0L }; + struct nvmet_fc_traddr raddr = { 0L, 0L }; + unsigned long flags; + int ret; + + ret = nvme_fc_parse_address(&raddr, opts->traddr); + if (ret || !raddr.nn || !raddr.pn) + return ERR_PTR(-EINVAL); + + ret = nvme_fc_parse_address(&laddr, opts->host_traddr); + if (ret || !laddr.nn || !laddr.pn) + return ERR_PTR(-EINVAL); + + /* find the host and remote ports to connect together */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + if (lport->localport.node_name != laddr.nn || + lport->localport.port_name != laddr.pn) + continue; + + list_for_each_entry(rport, &lport->endp_list, endp_list) { + if (rport->remoteport.node_name != raddr.nn || + rport->remoteport.port_name != raddr.pn) + continue; + + /* if fail to get reference fall through. Will error */ + if (!nvme_fc_rport_get(rport)) + break; + + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return __nvme_fc_create_ctrl(dev, opts, lport, rport); + } + } + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return ERR_PTR(-ENOENT); +} + + +static struct nvmf_transport_ops nvme_fc_transport = { + .name = "fc", + .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, + .allowed_opts = NVMF_OPT_RECONNECT_DELAY, + .create_ctrl = nvme_fc_create_ctrl, +}; + +static int __init nvme_fc_init_module(void) +{ + nvme_fc_wq = create_workqueue("nvme_fc_wq"); + if (!nvme_fc_wq) + return -ENOMEM; + + nvmf_register_transport(&nvme_fc_transport); + return 0; +} + +static void __exit nvme_fc_exit_module(void) +{ + /* sanity check - all lports should be removed */ + if (!list_empty(&nvme_fc_lport_list)) + pr_warn("%s: localport list not empty\n", __func__); + + nvmf_unregister_transport(&nvme_fc_transport); + + destroy_workqueue(nvme_fc_wq); + + ida_destroy(&nvme_fc_local_port_cnt); + ida_destroy(&nvme_fc_ctrl_cnt); +} + +module_init(nvme_fc_init_module); +module_exit(nvme_fc_exit_module); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 5daf2f4be0cd..588d4a34c083 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -146,14 +146,6 @@ struct nvme_nvm_command { }; }; -struct nvme_nvm_completion { - __le64 result; /* Used by LightNVM to return ppa completions */ - __le16 sq_head; /* how much of this queue may be reclaimed */ - __le16 sq_id; /* submission queue that generated this entry */ - __u16 command_id; /* of the command which completed */ - __le16 status; /* did the command fail, and if so, why? */ -}; - #define NVME_NVM_LP_MLC_PAIRS 886 struct nvme_nvm_lp_mlc { __le16 num_pairs; @@ -360,6 +352,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, while (nlb) { u32 cmd_nlb = min(nlb_pr_rq, nlb); + u64 elba = slba + cmd_nlb; c.l2p.slba = cpu_to_le64(cmd_slba); c.l2p.nlb = cpu_to_le32(cmd_nlb); @@ -373,6 +366,14 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, goto out; } + if (unlikely(elba > nvmdev->total_secs)) { + pr_err("nvm: L2P data from device is out of bounds!\n"); + return -EINVAL; + } + + /* Transform physical address to target address space */ + nvmdev->mt->part_to_tgt(nvmdev, entries, cmd_nlb); + if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { ret = -EINTR; goto out; @@ -391,11 +392,12 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, u8 *blks) { struct request_queue *q = nvmdev->q; + struct nvm_geo *geo = &nvmdev->geo; struct nvme_ns *ns = q->queuedata; struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_nvm_command c = {}; struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = nvmdev->blks_per_lun * nvmdev->plane_mode; + int nr_blks = geo->blks_per_lun * geo->plane_mode; int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; int ret = 0; @@ -436,7 +438,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, goto out; } - memcpy(blks, bb_tbl->blk, nvmdev->blks_per_lun * nvmdev->plane_mode); + memcpy(blks, bb_tbl->blk, geo->blks_per_lun * geo->plane_mode); out: kfree(bb_tbl); return ret; @@ -481,14 +483,11 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, static void nvme_nvm_end_io(struct request *rq, int error) { struct nvm_rq *rqd = rq->end_io_data; - struct nvme_nvm_completion *cqe = rq->special; - - if (cqe) - rqd->ppa_status = le64_to_cpu(cqe->result); + rqd->ppa_status = nvme_req(rq)->result.u64; nvm_end_io(rqd, error); - kfree(rq->cmd); + kfree(nvme_req(rq)->cmd); blk_mq_free_request(rq); } @@ -500,20 +499,18 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) struct bio *bio = rqd->bio; struct nvme_nvm_command *cmd; - rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0); - if (IS_ERR(rq)) + cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); + if (!cmd) return -ENOMEM; - cmd = kzalloc(sizeof(struct nvme_nvm_command) + - sizeof(struct nvme_nvm_completion), GFP_KERNEL); - if (!cmd) { - blk_mq_free_request(rq); + rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); + if (IS_ERR(rq)) { + kfree(cmd); return -ENOMEM; } + rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->ioprio = bio_prio(bio); - if (bio_has_data(bio)) rq->nr_phys_segments = bio_phys_segments(q, bio); @@ -522,10 +519,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) nvme_nvm_rqtocmd(rq, rqd, ns, cmd); - rq->cmd = (unsigned char *)cmd; - rq->cmd_len = sizeof(struct nvme_nvm_command); - rq->special = cmd + 1; - rq->end_io_data = rqd; blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io); @@ -543,6 +536,7 @@ static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd) c.erase.nsid = cpu_to_le32(ns->ns_id); c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa); c.erase.length = cpu_to_le16(rqd->nr_ppas - 1); + c.erase.control = cpu_to_le16(rqd->flags); return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); } @@ -592,12 +586,10 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .max_phys_sect = 64, }; -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node, - const struct attribute_group *attrs) +int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { struct request_queue *q = ns->queue; struct nvm_dev *dev; - int ret; dev = nvm_alloc_dev(node); if (!dev) @@ -606,18 +598,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node, dev->q = q; memcpy(dev->name, disk_name, DISK_NAME_LEN); dev->ops = &nvme_nvm_dev_ops; - dev->parent_dev = ns->ctrl->device; dev->private_data = ns; ns->ndev = dev; - ret = nvm_register(dev); - - ns->lba_shift = ilog2(dev->sec_size); - - if (sysfs_create_group(&dev->dev.kobj, attrs)) - pr_warn("%s: failed to create sysfs group for identification\n", - disk_name); - return ret; + return nvm_register(dev); } void nvme_nvm_unregister(struct nvme_ns *ns) @@ -625,6 +609,167 @@ void nvme_nvm_unregister(struct nvme_ns *ns) nvm_unregister(ns->ndev); } +static ssize_t nvm_dev_attr_show(struct device *dev, + struct device_attribute *dattr, char *page) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvm_dev *ndev = ns->ndev; + struct nvm_id *id; + struct nvm_id_group *grp; + struct attribute *attr; + + if (!ndev) + return 0; + + id = &ndev->identity; + grp = &id->groups[0]; + attr = &dattr->attr; + + if (strcmp(attr->name, "version") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id); + } else if (strcmp(attr->name, "vendor_opcode") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt); + } else if (strcmp(attr->name, "capabilities") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); + } else if (strcmp(attr->name, "device_mode") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); + } else if (strcmp(attr->name, "media_manager") == 0) { + if (!ndev->mt) + return scnprintf(page, PAGE_SIZE, "%s\n", "none"); + return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name); + } else if (strcmp(attr->name, "ppa_format") == 0) { + return scnprintf(page, PAGE_SIZE, + "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", + id->ppaf.ch_offset, id->ppaf.ch_len, + id->ppaf.lun_offset, id->ppaf.lun_len, + id->ppaf.pln_offset, id->ppaf.pln_len, + id->ppaf.blk_offset, id->ppaf.blk_len, + id->ppaf.pg_offset, id->ppaf.pg_len, + id->ppaf.sect_offset, id->ppaf.sect_len); + } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ + return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype); + } else if (strcmp(attr->name, "flash_media_type") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype); + } else if (strcmp(attr->name, "num_channels") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch); + } else if (strcmp(attr->name, "num_luns") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun); + } else if (strcmp(attr->name, "num_planes") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln); + } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk); + } else if (strcmp(attr->name, "num_pages") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg); + } else if (strcmp(attr->name, "page_size") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz); + } else if (strcmp(attr->name, "hw_sector_size") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs); + } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ + return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos); + } else if (strcmp(attr->name, "read_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt); + } else if (strcmp(attr->name, "read_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm); + } else if (strcmp(attr->name, "prog_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt); + } else if (strcmp(attr->name, "prog_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm); + } else if (strcmp(attr->name, "erase_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet); + } else if (strcmp(attr->name, "erase_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem); + } else if (strcmp(attr->name, "multiplane_modes") == 0) { + return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos); + } else if (strcmp(attr->name, "media_capabilities") == 0) { + return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap); + } else if (strcmp(attr->name, "max_phys_secs") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", + ndev->ops->max_phys_sect); + } else { + return scnprintf(page, + PAGE_SIZE, + "Unhandled attr(%s) in `nvm_dev_attr_show`\n", + attr->name); + } +} + +#define NVM_DEV_ATTR_RO(_name) \ + DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) + +static NVM_DEV_ATTR_RO(version); +static NVM_DEV_ATTR_RO(vendor_opcode); +static NVM_DEV_ATTR_RO(capabilities); +static NVM_DEV_ATTR_RO(device_mode); +static NVM_DEV_ATTR_RO(ppa_format); +static NVM_DEV_ATTR_RO(media_manager); + +static NVM_DEV_ATTR_RO(media_type); +static NVM_DEV_ATTR_RO(flash_media_type); +static NVM_DEV_ATTR_RO(num_channels); +static NVM_DEV_ATTR_RO(num_luns); +static NVM_DEV_ATTR_RO(num_planes); +static NVM_DEV_ATTR_RO(num_blocks); +static NVM_DEV_ATTR_RO(num_pages); +static NVM_DEV_ATTR_RO(page_size); +static NVM_DEV_ATTR_RO(hw_sector_size); +static NVM_DEV_ATTR_RO(oob_sector_size); +static NVM_DEV_ATTR_RO(read_typ); +static NVM_DEV_ATTR_RO(read_max); +static NVM_DEV_ATTR_RO(prog_typ); +static NVM_DEV_ATTR_RO(prog_max); +static NVM_DEV_ATTR_RO(erase_typ); +static NVM_DEV_ATTR_RO(erase_max); +static NVM_DEV_ATTR_RO(multiplane_modes); +static NVM_DEV_ATTR_RO(media_capabilities); +static NVM_DEV_ATTR_RO(max_phys_secs); + +static struct attribute *nvm_dev_attrs[] = { + &dev_attr_version.attr, + &dev_attr_vendor_opcode.attr, + &dev_attr_capabilities.attr, + &dev_attr_device_mode.attr, + &dev_attr_media_manager.attr, + + &dev_attr_ppa_format.attr, + &dev_attr_media_type.attr, + &dev_attr_flash_media_type.attr, + &dev_attr_num_channels.attr, + &dev_attr_num_luns.attr, + &dev_attr_num_planes.attr, + &dev_attr_num_blocks.attr, + &dev_attr_num_pages.attr, + &dev_attr_page_size.attr, + &dev_attr_hw_sector_size.attr, + &dev_attr_oob_sector_size.attr, + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + &dev_attr_prog_typ.attr, + &dev_attr_prog_max.attr, + &dev_attr_erase_typ.attr, + &dev_attr_erase_max.attr, + &dev_attr_multiplane_modes.attr, + &dev_attr_media_capabilities.attr, + &dev_attr_max_phys_secs.attr, + NULL, +}; + +static const struct attribute_group nvm_dev_attr_group = { + .name = "lightnvm", + .attrs = nvm_dev_attrs, +}; + +int nvme_nvm_register_sysfs(struct nvme_ns *ns) +{ + return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group); +} + +void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) +{ + sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group); +} + /* move to shared place when used in multiple places. */ #define PCI_VENDOR_ID_CNEX 0x1d1d #define PCI_DEVICE_ID_CNEX_WL 0x2807 diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d47f5a5d18c7..bd5321441d12 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -79,6 +79,20 @@ enum nvme_quirks { NVME_QUIRK_DELAY_BEFORE_CHK_RDY = (1 << 3), }; +/* + * Common request structure for NVMe passthrough. All drivers must have + * this structure as the first member of their request-private data. + */ +struct nvme_request { + struct nvme_command *cmd; + union nvme_result result; +}; + +static inline struct nvme_request *nvme_req(struct request *req) +{ + return blk_mq_rq_to_pdu(req); +} + /* The below value is the specific amount of delay needed before checking * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was @@ -222,8 +236,10 @@ static inline unsigned nvme_map_len(struct request *rq) static inline void nvme_cleanup_cmd(struct request *req) { - if (req_op(req) == REQ_OP_DISCARD) - kfree(req->completion_data); + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { + kfree(page_address(req->special_vec.bv_page) + + req->special_vec.bv_offset); + } } static inline int nvme_error_status(u16 status) @@ -261,8 +277,8 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); #define NVME_NR_AERS 1 -void nvme_complete_async_event(struct nvme_ctrl *ctrl, - struct nvme_completion *cqe); +void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, + union nvme_result *res); void nvme_queue_async_events(struct nvme_ctrl *ctrl); void nvme_stop_queues(struct nvme_ctrl *ctrl); @@ -278,7 +294,7 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - struct nvme_completion *cqe, void *buffer, unsigned bufflen, + union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, int flags); int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, u32 *result, @@ -307,36 +323,33 @@ int nvme_sg_get_version_num(int __user *ip); #ifdef CONFIG_NVM int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id); -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node, - const struct attribute_group *attrs); +int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); - -static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) -{ - if (dev->type->devnode) - return dev_to_disk(dev)->private_data; - - return (container_of(dev, struct nvm_dev, dev))->private_data; -} +int nvme_nvm_register_sysfs(struct nvme_ns *ns); +void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); #else static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, - int node, - const struct attribute_group *attrs) + int node) { return 0; } static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; - +static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns) +{ + return 0; +} +static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {}; static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) { return 0; } +#endif /* CONFIG_NVM */ + static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; } -#endif /* CONFIG_NVM */ int __init nvme_core_init(void); void nvme_core_exit(void); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5e52034ab010..3ba38f4d774d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -141,6 +141,7 @@ struct nvme_queue { * allocated to store the PRP list. */ struct nvme_iod { + struct nvme_request req; struct nvme_queue *nvmeq; int aborted; int npages; /* In the PRP list. 0 means small pool in use */ @@ -302,14 +303,14 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, static __le64 **iod_list(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - return (__le64 **)(iod->sg + req->nr_phys_segments); + return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); } static int nvme_init_iod(struct request *rq, unsigned size, struct nvme_dev *dev) { struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); - int nseg = rq->nr_phys_segments; + int nseg = blk_rq_nr_phys_segments(rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); @@ -324,11 +325,11 @@ static int nvme_init_iod(struct request *rq, unsigned size, iod->nents = 0; iod->length = size; - if (!(rq->cmd_flags & REQ_DONTPREP)) { + if (!(rq->rq_flags & RQF_DONTPREP)) { rq->retries = 0; - rq->cmd_flags |= REQ_DONTPREP; + rq->rq_flags |= RQF_DONTPREP; } - return 0; + return BLK_MQ_RQ_QUEUE_OK; } static void nvme_free_iod(struct nvme_dev *dev, struct request *req) @@ -339,8 +340,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) __le64 **list = iod_list(req); dma_addr_t prp_dma = iod->first_dma; - nvme_cleanup_cmd(req); - if (iod->npages == 0) dma_pool_free(dev->prp_small_pool, list[0], prp_dma); for (i = 0; i < iod->npages; i++) { @@ -510,7 +509,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, DMA_TO_DEVICE : DMA_FROM_DEVICE; int ret = BLK_MQ_RQ_QUEUE_ERROR; - sg_init_table(iod->sg, req->nr_phys_segments); + sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); iod->nents = blk_rq_map_sg(q, req, iod->sg); if (!iod->nents) goto out; @@ -566,6 +565,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) } } + nvme_cleanup_cmd(req); nvme_free_iod(dev, req); } @@ -596,22 +596,21 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } } - map_len = nvme_map_len(req); - ret = nvme_init_iod(req, map_len, dev); - if (ret) + ret = nvme_setup_cmd(ns, req, &cmnd); + if (ret != BLK_MQ_RQ_QUEUE_OK) return ret; - ret = nvme_setup_cmd(ns, req, &cmnd); - if (ret) - goto out; + map_len = nvme_map_len(req); + ret = nvme_init_iod(req, map_len, dev); + if (ret != BLK_MQ_RQ_QUEUE_OK) + goto out_free_cmd; - if (req->nr_phys_segments) + if (blk_rq_nr_phys_segments(req)) ret = nvme_map_data(dev, req, map_len, &cmnd); - if (ret) - goto out; + if (ret != BLK_MQ_RQ_QUEUE_OK) + goto out_cleanup_iod; - cmnd.common.command_id = req->tag; blk_mq_start_request(req); spin_lock_irq(&nvmeq->q_lock); @@ -621,14 +620,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, else ret = BLK_MQ_RQ_QUEUE_ERROR; spin_unlock_irq(&nvmeq->q_lock); - goto out; + goto out_cleanup_iod; } __nvme_submit_cmd(nvmeq, &cmnd); nvme_process_cq(nvmeq); spin_unlock_irq(&nvmeq->q_lock); return BLK_MQ_RQ_QUEUE_OK; -out: +out_cleanup_iod: nvme_free_iod(dev, req); +out_free_cmd: + nvme_cleanup_cmd(req); return ret; } @@ -703,13 +704,13 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) */ if (unlikely(nvmeq->qid == 0 && cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) { - nvme_complete_async_event(&nvmeq->dev->ctrl, &cqe); + nvme_complete_async_event(&nvmeq->dev->ctrl, + cqe.status, &cqe.result); continue; } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); - if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) - memcpy(req->special, &cqe, sizeof(cqe)); + nvme_req(req)->result = cqe.result; blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3d25add36d91..f42ab70ffa38 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -28,7 +28,6 @@ #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> -#include <rdma/ib_cm.h> #include <linux/nvme-rdma.h> #include "nvme.h" @@ -66,6 +65,7 @@ struct nvme_rdma_qe { struct nvme_rdma_queue; struct nvme_rdma_request { + struct nvme_request req; struct ib_mr *mr; struct nvme_rdma_qe sqe; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; @@ -241,7 +241,9 @@ out_free_ring: static void nvme_rdma_qp_event(struct ib_event *event, void *context) { - pr_debug("QP event %d\n", event->event); + pr_debug("QP event %s (%d)\n", + ib_event_msg(event->event), event->event); + } static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue) @@ -963,8 +965,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; - int nents, count; - int ret; + int count, ret; req->num_sge = 1; req->inline_data = false; @@ -976,16 +977,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, return nvme_rdma_set_sg_null(c); req->sg_table.sgl = req->first_sgl; - ret = sg_alloc_table_chained(&req->sg_table, rq->nr_phys_segments, - req->sg_table.sgl); + ret = sg_alloc_table_chained(&req->sg_table, + blk_rq_nr_phys_segments(rq), req->sg_table.sgl); if (ret) return -ENOMEM; - nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); - BUG_ON(nents > rq->nr_phys_segments); - req->nents = nents; + req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents, + count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(count <= 0)) { sg_free_table_chained(&req->sg_table, true); @@ -1130,13 +1129,10 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx) static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, struct nvme_completion *cqe, struct ib_wc *wc, int tag) { - u16 status = le16_to_cpu(cqe->status); struct request *rq; struct nvme_rdma_request *req; int ret = 0; - status >>= 1; - rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, @@ -1147,9 +1143,6 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, } req = blk_mq_rq_to_pdu(rq); - if (rq->cmd_type == REQ_TYPE_DRV_PRIV && rq->special) - memcpy(rq->special, cqe, sizeof(*cqe)); - if (rq->tag == tag) ret = 1; @@ -1157,8 +1150,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - blk_mq_complete_request(rq, status); - + req->req.result = cqe->result; + blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); return ret; } @@ -1186,7 +1179,8 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) */ if (unlikely(nvme_rdma_queue_idx(queue) == 0 && cqe->command_id >= NVME_RDMA_AQ_BLKMQ_DEPTH)) - nvme_complete_async_event(&queue->ctrl->ctrl, cqe); + nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, + &cqe->result); else ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag); ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE); @@ -1433,10 +1427,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, sizeof(struct nvme_command), DMA_TO_DEVICE); ret = nvme_setup_cmd(ns, rq, c); - if (ret) + if (ret != BLK_MQ_RQ_QUEUE_OK) return ret; - c->common.command_id = rq->tag; blk_mq_start_request(rq); map_len = nvme_map_len(rq); @@ -1944,6 +1937,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, opts->queue_size = ctrl->ctrl.maxcmd; } + if (opts->queue_size > ctrl->ctrl.sqsize + 1) { + /* warn if sqsize is lower than queue_size */ + dev_warn(ctrl->ctrl.device, + "queue_size %zu > ctrl sqsize %u, clamping down\n", + opts->queue_size, ctrl->ctrl.sqsize + 1); + opts->queue_size = ctrl->ctrl.sqsize + 1; + } + if (opts->nr_io_queues) { ret = nvme_rdma_create_io_queues(ctrl); if (ret) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index 3eaa4d27801e..b71e95044b43 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -1280,10 +1280,6 @@ static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10, static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list, u16 idx, u16 bd_len, u8 llbaa) { - u16 bd_num; - - bd_num = bd_len / ((llbaa == 0) ? - SHORT_DESC_BLOCK : LONG_DESC_BLOCK); /* Store block descriptor info if a FORMAT UNIT comes later */ /* TODO Saving 1st BD info; what to do if multiple BD received? */ if (llbaa == 0) { @@ -1528,7 +1524,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, int nvme_sc; struct nvme_id_ns *id_ns; u8 i; - u8 flbas, nlbaf; + u8 nlbaf; u8 selected_lbaf = 0xFF; u32 cdw10 = 0; struct nvme_command c; @@ -1539,7 +1535,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, if (res) return res; - flbas = (id_ns->flbas) & 0x0F; nlbaf = id_ns->nlbaf; for (i = 0; i < nlbaf; i++) { @@ -2168,12 +2163,10 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - u8 immed, pcmod, no_flush, start; + u8 immed, no_flush; immed = cmd[1] & 0x01; - pcmod = cmd[3] & 0x0f; no_flush = cmd[4] & 0x04; - start = cmd[4] & 0x01; if (immed != 0) { return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 3a5b9d0576cb..03e4ab65fe77 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -34,3 +34,27 @@ config NVME_TARGET_RDMA devices over RDMA. If unsure, say N. + +config NVME_TARGET_FC + tristate "NVMe over Fabrics FC target driver" + depends on NVME_TARGET + depends on HAS_DMA + help + This enables the NVMe FC target support, which allows exporting NVMe + devices over FC. + + If unsure, say N. + +config NVME_TARGET_FCLOOP + tristate "NVMe over Fabrics FC Transport Loopback Test driver" + depends on NVME_TARGET + select NVME_CORE + select NVME_FABRICS + select SG_POOL + depends on NVME_FC + depends on NVME_TARGET_FC + help + This enables the NVMe FC loopback test support, which can be useful + to test NVMe-FC transport interfaces. + + If unsure, say N. diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile index b7a06232c9da..fecc14f535b2 100644 --- a/drivers/nvme/target/Makefile +++ b/drivers/nvme/target/Makefile @@ -2,8 +2,12 @@ obj-$(CONFIG_NVME_TARGET) += nvmet.o obj-$(CONFIG_NVME_TARGET_LOOP) += nvme-loop.o obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o +obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o +obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \ discovery.o nvme-loop-y += loop.o nvmet-rdma-y += rdma.o +nvmet-fc-y += fc.o +nvme-fcloop-y += fcloop.o diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 6fe4c48a21e4..ec1ad2aa0a4c 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -237,7 +237,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); id->nn = cpu_to_le32(ctrl->subsys->max_nsid); - id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM); + id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | + NVME_CTRL_ONCS_WRITE_ZEROES); /* XXX: don't report vwc if the underlying device is write through */ id->vwc = NVME_CTRL_VWC_PRESENT; diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index af5e2dc4a3d5..d0f60c36d576 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -37,6 +37,8 @@ static ssize_t nvmet_addr_adrfam_show(struct config_item *item, return sprintf(page, "ipv6\n"); case NVMF_ADDR_FAMILY_IB: return sprintf(page, "ib\n"); + case NVMF_ADDR_FAMILY_FC: + return sprintf(page, "fc\n"); default: return sprintf(page, "\n"); } @@ -59,6 +61,8 @@ static ssize_t nvmet_addr_adrfam_store(struct config_item *item, port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6; } else if (sysfs_streq(page, "ib")) { port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB; + } else if (sysfs_streq(page, "fc")) { + port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC; } else { pr_err("Invalid value '%s' for adrfam\n", page); return -EINVAL; @@ -209,6 +213,8 @@ static ssize_t nvmet_addr_trtype_show(struct config_item *item, return sprintf(page, "rdma\n"); case NVMF_TRTYPE_LOOP: return sprintf(page, "loop\n"); + case NVMF_TRTYPE_FC: + return sprintf(page, "fc\n"); default: return sprintf(page, "\n"); } @@ -229,6 +235,12 @@ static void nvmet_port_init_tsas_loop(struct nvmet_port *port) memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); } +static void nvmet_port_init_tsas_fc(struct nvmet_port *port) +{ + port->disc_addr.trtype = NVMF_TRTYPE_FC; + memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); +} + static ssize_t nvmet_addr_trtype_store(struct config_item *item, const char *page, size_t count) { @@ -244,6 +256,8 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, nvmet_port_init_tsas_rdma(port); } else if (sysfs_streq(page, "loop")) { nvmet_port_init_tsas_loop(port); + } else if (sysfs_streq(page, "fc")) { + nvmet_port_init_tsas_fc(port); } else { pr_err("Invalid value '%s' for trtype\n", page); return -EINVAL; @@ -271,7 +285,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, mutex_lock(&subsys->lock); ret = -EBUSY; - if (nvmet_ns_enabled(ns)) + if (ns->enabled) goto out_unlock; kfree(ns->device_path); @@ -307,7 +321,7 @@ static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, int ret = 0; mutex_lock(&subsys->lock); - if (nvmet_ns_enabled(ns)) { + if (ns->enabled) { ret = -EBUSY; goto out_unlock; } @@ -339,7 +353,7 @@ CONFIGFS_ATTR(nvmet_ns_, device_nguid); static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", nvmet_ns_enabled(to_nvmet_ns(item))); + return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled); } static ssize_t nvmet_ns_enable_store(struct config_item *item, diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a21437a33adb..b1d66ed655c9 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -264,7 +264,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) int ret = 0; mutex_lock(&subsys->lock); - if (!list_empty(&ns->dev_link)) + if (ns->enabled) goto out_unlock; ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, @@ -309,6 +309,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0); + ns->enabled = true; ret = 0; out_unlock: mutex_unlock(&subsys->lock); @@ -325,11 +326,11 @@ void nvmet_ns_disable(struct nvmet_ns *ns) struct nvmet_ctrl *ctrl; mutex_lock(&subsys->lock); - if (list_empty(&ns->dev_link)) { - mutex_unlock(&subsys->lock); - return; - } - list_del_init(&ns->dev_link); + if (!ns->enabled) + goto out_unlock; + + ns->enabled = false; + list_del_rcu(&ns->dev_link); mutex_unlock(&subsys->lock); /* @@ -351,6 +352,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns) if (ns->bdev) blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ); +out_unlock: mutex_unlock(&subsys->lock); } @@ -617,7 +619,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, if (!subsys) { pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); - req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; } @@ -638,7 +640,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, pr_warn("could not find controller %d for subsys %s / host %s\n", cntlid, subsysnqn, hostnqn); - req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; out: @@ -700,7 +702,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!subsys) { pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); - req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); goto out; } @@ -709,7 +711,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!nvmet_host_allowed(req, subsys, hostnqn)) { pr_info("connect by host %s for subsystem %s not allowed\n", hostnqn, subsysnqn); - req->rsp->result = IPO_IATTR_CONNECT_DATA(hostnqn); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); up_read(&nvmet_config_sem); goto out_put_subsystem; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 9a97ae67e656..f4088198cd0d 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -69,7 +69,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) } } - req->rsp->result64 = cpu_to_le64(val); + req->rsp->result.u64 = cpu_to_le64(val); nvmet_req_complete(req, status); } @@ -125,7 +125,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) d = kmap(sg_page(req->sg)) + req->sg->offset; /* zero out initial completion result, assign values as needed */ - req->rsp->result = 0; + req->rsp->result.u32 = 0; if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", @@ -138,7 +138,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) pr_warn("connect attempt for invalid controller ID %#x\n", d->cntlid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; - req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); goto out; } @@ -155,7 +155,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) pr_info("creating controller %d for NQN %s.\n", ctrl->cntlid, ctrl->hostnqn); - req->rsp->result16 = cpu_to_le16(ctrl->cntlid); + req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: kunmap(sg_page(req->sg)); @@ -173,7 +173,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) d = kmap(sg_page(req->sg)) + req->sg->offset; /* zero out initial completion result, assign values as needed */ - req->rsp->result = 0; + req->rsp->result.u32 = 0; if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", @@ -191,14 +191,14 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) if (unlikely(qid > ctrl->subsys->max_qid)) { pr_warn("invalid queue id (%d)\n", qid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; - req->rsp->result = IPO_IATTR_CONNECT_SQE(qid); + req->rsp->result.u32 = IPO_IATTR_CONNECT_SQE(qid); goto out_ctrl_put; } status = nvmet_install_queue(ctrl, req); if (status) { /* pass back cntlid that had the issue of installing queue */ - req->rsp->result16 = cpu_to_le16(ctrl->cntlid); + req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); goto out_ctrl_put; } diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c new file mode 100644 index 000000000000..173e842f19c9 --- /dev/null +++ b/drivers/nvme/target/fc.c @@ -0,0 +1,2288 @@ +/* + * Copyright (c) 2016 Avago Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful. + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, + * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO + * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. + * See the GNU General Public License for more details, a copy of which + * can be found in the file COPYING included with this package + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/blk-mq.h> +#include <linux/parser.h> +#include <linux/random.h> +#include <uapi/scsi/fc/fc_fs.h> +#include <uapi/scsi/fc/fc_els.h> + +#include "nvmet.h" +#include <linux/nvme-fc-driver.h> +#include <linux/nvme-fc.h> + + +/* *************************** Data Structures/Defines ****************** */ + + +#define NVMET_LS_CTX_COUNT 4 + +/* for this implementation, assume small single frame rqst/rsp */ +#define NVME_FC_MAX_LS_BUFFER_SIZE 2048 + +struct nvmet_fc_tgtport; +struct nvmet_fc_tgt_assoc; + +struct nvmet_fc_ls_iod { + struct nvmefc_tgt_ls_req *lsreq; + struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ + + struct list_head ls_list; /* tgtport->ls_list */ + + struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_tgt_assoc *assoc; + + u8 *rqstbuf; + u8 *rspbuf; + u16 rqstdatalen; + dma_addr_t rspdma; + + struct scatterlist sg[2]; + + struct work_struct work; +} __aligned(sizeof(unsigned long long)); + +#define NVMET_FC_MAX_KB_PER_XFR 256 + +enum nvmet_fcp_datadir { + NVMET_FCP_NODATA, + NVMET_FCP_WRITE, + NVMET_FCP_READ, + NVMET_FCP_ABORTED, +}; + +struct nvmet_fc_fcp_iod { + struct nvmefc_tgt_fcp_req *fcpreq; + + struct nvme_fc_cmd_iu cmdiubuf; + struct nvme_fc_ersp_iu rspiubuf; + dma_addr_t rspdma; + struct scatterlist *data_sg; + struct scatterlist *next_sg; + int data_sg_cnt; + u32 next_sg_offset; + u32 total_length; + u32 offset; + enum nvmet_fcp_datadir io_dir; + bool active; + bool abort; + spinlock_t flock; + + struct nvmet_req req; + struct work_struct work; + + struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_tgt_queue *queue; + + struct list_head fcp_list; /* tgtport->fcp_list */ +}; + +struct nvmet_fc_tgtport { + + struct nvmet_fc_target_port fc_target_port; + + struct list_head tgt_list; /* nvmet_fc_target_list */ + struct device *dev; /* dev for dma mapping */ + struct nvmet_fc_target_template *ops; + + struct nvmet_fc_ls_iod *iod; + spinlock_t lock; + struct list_head ls_list; + struct list_head ls_busylist; + struct list_head assoc_list; + struct ida assoc_cnt; + struct nvmet_port *port; + struct kref ref; +}; + +struct nvmet_fc_tgt_queue { + bool ninetypercent; + u16 qid; + u16 sqsize; + u16 ersp_ratio; + u16 sqhd; + int cpu; + atomic_t connected; + atomic_t sqtail; + atomic_t zrspcnt; + atomic_t rsn; + spinlock_t qlock; + struct nvmet_port *port; + struct nvmet_cq nvme_cq; + struct nvmet_sq nvme_sq; + struct nvmet_fc_tgt_assoc *assoc; + struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */ + struct list_head fod_list; + struct workqueue_struct *work_q; + struct kref ref; +} __aligned(sizeof(unsigned long long)); + +struct nvmet_fc_tgt_assoc { + u64 association_id; + u32 a_id; + struct nvmet_fc_tgtport *tgtport; + struct list_head a_list; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES]; + struct kref ref; +}; + + +static inline int +nvmet_fc_iodnum(struct nvmet_fc_ls_iod *iodptr) +{ + return (iodptr - iodptr->tgtport->iod); +} + +static inline int +nvmet_fc_fodnum(struct nvmet_fc_fcp_iod *fodptr) +{ + return (fodptr - fodptr->queue->fod); +} + + +/* + * Association and Connection IDs: + * + * Association ID will have random number in upper 6 bytes and zero + * in lower 2 bytes + * + * Connection IDs will be Association ID with QID or'd in lower 2 bytes + * + * note: Association ID = Connection ID for queue 0 + */ +#define BYTES_FOR_QID sizeof(u16) +#define BYTES_FOR_QID_SHIFT (BYTES_FOR_QID * 8) +#define NVMET_FC_QUEUEID_MASK ((u64)((1 << BYTES_FOR_QID_SHIFT) - 1)) + +static inline u64 +nvmet_fc_makeconnid(struct nvmet_fc_tgt_assoc *assoc, u16 qid) +{ + return (assoc->association_id | qid); +} + +static inline u64 +nvmet_fc_getassociationid(u64 connectionid) +{ + return connectionid & ~NVMET_FC_QUEUEID_MASK; +} + +static inline u16 +nvmet_fc_getqueueid(u64 connectionid) +{ + return (u16)(connectionid & NVMET_FC_QUEUEID_MASK); +} + +static inline struct nvmet_fc_tgtport * +targetport_to_tgtport(struct nvmet_fc_target_port *targetport) +{ + return container_of(targetport, struct nvmet_fc_tgtport, + fc_target_port); +} + +static inline struct nvmet_fc_fcp_iod * +nvmet_req_to_fod(struct nvmet_req *nvme_req) +{ + return container_of(nvme_req, struct nvmet_fc_fcp_iod, req); +} + + +/* *************************** Globals **************************** */ + + +static DEFINE_SPINLOCK(nvmet_fc_tgtlock); + +static LIST_HEAD(nvmet_fc_target_list); +static DEFINE_IDA(nvmet_fc_tgtport_cnt); + + +static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); +static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); +static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); +static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); +static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); +static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); +static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); + + +/* *********************** FC-NVME DMA Handling **************************** */ + +/* + * The fcloop device passes in a NULL device pointer. Real LLD's will + * pass in a valid device pointer. If NULL is passed to the dma mapping + * routines, depending on the platform, it may or may not succeed, and + * may crash. + * + * As such: + * Wrapper all the dma routines and check the dev pointer. + * + * If simple mappings (return just a dma address, we'll noop them, + * returning a dma address of 0. + * + * On more complex mappings (dma_map_sg), a pseudo routine fills + * in the scatter list, setting all dma addresses to 0. + */ + +static inline dma_addr_t +fc_dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction dir) +{ + return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; +} + +static inline int +fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dev ? dma_mapping_error(dev, dma_addr) : 0; +} + +static inline void +fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + if (dev) + dma_unmap_single(dev, addr, size, dir); +} + +static inline void +fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + if (dev) + dma_sync_single_for_cpu(dev, addr, size, dir); +} + +static inline void +fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + if (dev) + dma_sync_single_for_device(dev, addr, size, dir); +} + +/* pseudo dma_map_sg call */ +static int +fc_map_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + WARN_ON(nents == 0 || sg[0].length == 0); + + for_each_sg(sg, s, nents, i) { + s->dma_address = 0L; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + s->dma_length = s->length; +#endif + } + return nents; +} + +static inline int +fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ + return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); +} + +static inline void +fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ + if (dev) + dma_unmap_sg(dev, sg, nents, dir); +} + + +/* *********************** FC-NVME Port Management ************************ */ + + +static int +nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_ls_iod *iod; + int i; + + iod = kcalloc(NVMET_LS_CTX_COUNT, sizeof(struct nvmet_fc_ls_iod), + GFP_KERNEL); + if (!iod) + return -ENOMEM; + + tgtport->iod = iod; + + for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { + INIT_WORK(&iod->work, nvmet_fc_handle_ls_rqst_work); + iod->tgtport = tgtport; + list_add_tail(&iod->ls_list, &tgtport->ls_list); + + iod->rqstbuf = kcalloc(2, NVME_FC_MAX_LS_BUFFER_SIZE, + GFP_KERNEL); + if (!iod->rqstbuf) + goto out_fail; + + iod->rspbuf = iod->rqstbuf + NVME_FC_MAX_LS_BUFFER_SIZE; + + iod->rspdma = fc_dma_map_single(tgtport->dev, iod->rspbuf, + NVME_FC_MAX_LS_BUFFER_SIZE, + DMA_TO_DEVICE); + if (fc_dma_mapping_error(tgtport->dev, iod->rspdma)) + goto out_fail; + } + + return 0; + +out_fail: + kfree(iod->rqstbuf); + list_del(&iod->ls_list); + for (iod--, i--; i >= 0; iod--, i--) { + fc_dma_unmap_single(tgtport->dev, iod->rspdma, + NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + kfree(iod->rqstbuf); + list_del(&iod->ls_list); + } + + kfree(iod); + + return -EFAULT; +} + +static void +nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_ls_iod *iod = tgtport->iod; + int i; + + for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { + fc_dma_unmap_single(tgtport->dev, + iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE, + DMA_TO_DEVICE); + kfree(iod->rqstbuf); + list_del(&iod->ls_list); + } + kfree(tgtport->iod); +} + +static struct nvmet_fc_ls_iod * +nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport) +{ + static struct nvmet_fc_ls_iod *iod; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + iod = list_first_entry_or_null(&tgtport->ls_list, + struct nvmet_fc_ls_iod, ls_list); + if (iod) + list_move_tail(&iod->ls_list, &tgtport->ls_busylist); + spin_unlock_irqrestore(&tgtport->lock, flags); + return iod; +} + + +static void +nvmet_fc_free_ls_iod(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod) +{ + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + list_move(&iod->ls_list, &tgtport->ls_list); + spin_unlock_irqrestore(&tgtport->lock, flags); +} + +static void +nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_tgt_queue *queue) +{ + struct nvmet_fc_fcp_iod *fod = queue->fod; + int i; + + for (i = 0; i < queue->sqsize; fod++, i++) { + INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); + fod->tgtport = tgtport; + fod->queue = queue; + fod->active = false; + list_add_tail(&fod->fcp_list, &queue->fod_list); + spin_lock_init(&fod->flock); + + fod->rspdma = fc_dma_map_single(tgtport->dev, &fod->rspiubuf, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + if (fc_dma_mapping_error(tgtport->dev, fod->rspdma)) { + list_del(&fod->fcp_list); + for (fod--, i--; i >= 0; fod--, i--) { + fc_dma_unmap_single(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), + DMA_TO_DEVICE); + fod->rspdma = 0L; + list_del(&fod->fcp_list); + } + + return; + } + } +} + +static void +nvmet_fc_destroy_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_tgt_queue *queue) +{ + struct nvmet_fc_fcp_iod *fod = queue->fod; + int i; + + for (i = 0; i < queue->sqsize; fod++, i++) { + if (fod->rspdma) + fc_dma_unmap_single(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + } +} + +static struct nvmet_fc_fcp_iod * +nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) +{ + static struct nvmet_fc_fcp_iod *fod; + unsigned long flags; + + spin_lock_irqsave(&queue->qlock, flags); + fod = list_first_entry_or_null(&queue->fod_list, + struct nvmet_fc_fcp_iod, fcp_list); + if (fod) { + list_del(&fod->fcp_list); + fod->active = true; + fod->abort = false; + /* + * no queue reference is taken, as it was taken by the + * queue lookup just prior to the allocation. The iod + * will "inherit" that reference. + */ + } + spin_unlock_irqrestore(&queue->qlock, flags); + return fod; +} + + +static void +nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, + struct nvmet_fc_fcp_iod *fod) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->qlock, flags); + list_add_tail(&fod->fcp_list, &fod->queue->fod_list); + fod->active = false; + spin_unlock_irqrestore(&queue->qlock, flags); + + /* + * release the reference taken at queue lookup and fod allocation + */ + nvmet_fc_tgt_q_put(queue); +} + +static int +nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid) +{ + int cpu, idx, cnt; + + if (!(tgtport->ops->target_features & + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED) || + tgtport->ops->max_hw_queues == 1) + return WORK_CPU_UNBOUND; + + /* Simple cpu selection based on qid modulo active cpu count */ + idx = !qid ? 0 : (qid - 1) % num_active_cpus(); + + /* find the n'th active cpu */ + for (cpu = 0, cnt = 0; ; ) { + if (cpu_active(cpu)) { + if (cnt == idx) + break; + cnt++; + } + cpu = (cpu + 1) % num_possible_cpus(); + } + + return cpu; +} + +static struct nvmet_fc_tgt_queue * +nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, + u16 qid, u16 sqsize) +{ + struct nvmet_fc_tgt_queue *queue; + unsigned long flags; + int ret; + + if (qid >= NVMET_NR_QUEUES) + return NULL; + + queue = kzalloc((sizeof(*queue) + + (sizeof(struct nvmet_fc_fcp_iod) * sqsize)), + GFP_KERNEL); + if (!queue) + return NULL; + + if (!nvmet_fc_tgt_a_get(assoc)) + goto out_free_queue; + + queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, + assoc->tgtport->fc_target_port.port_num, + assoc->a_id, qid); + if (!queue->work_q) + goto out_a_put; + + queue->fod = (struct nvmet_fc_fcp_iod *)&queue[1]; + queue->qid = qid; + queue->sqsize = sqsize; + queue->assoc = assoc; + queue->port = assoc->tgtport->port; + queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); + INIT_LIST_HEAD(&queue->fod_list); + atomic_set(&queue->connected, 0); + atomic_set(&queue->sqtail, 0); + atomic_set(&queue->rsn, 1); + atomic_set(&queue->zrspcnt, 0); + spin_lock_init(&queue->qlock); + kref_init(&queue->ref); + + nvmet_fc_prep_fcp_iodlist(assoc->tgtport, queue); + + ret = nvmet_sq_init(&queue->nvme_sq); + if (ret) + goto out_fail_iodlist; + + WARN_ON(assoc->queues[qid]); + spin_lock_irqsave(&assoc->tgtport->lock, flags); + assoc->queues[qid] = queue; + spin_unlock_irqrestore(&assoc->tgtport->lock, flags); + + return queue; + +out_fail_iodlist: + nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); + destroy_workqueue(queue->work_q); +out_a_put: + nvmet_fc_tgt_a_put(assoc); +out_free_queue: + kfree(queue); + return NULL; +} + + +static void +nvmet_fc_tgt_queue_free(struct kref *ref) +{ + struct nvmet_fc_tgt_queue *queue = + container_of(ref, struct nvmet_fc_tgt_queue, ref); + unsigned long flags; + + spin_lock_irqsave(&queue->assoc->tgtport->lock, flags); + queue->assoc->queues[queue->qid] = NULL; + spin_unlock_irqrestore(&queue->assoc->tgtport->lock, flags); + + nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); + + nvmet_fc_tgt_a_put(queue->assoc); + + destroy_workqueue(queue->work_q); + + kfree(queue); +} + +static void +nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue) +{ + kref_put(&queue->ref, nvmet_fc_tgt_queue_free); +} + +static int +nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue) +{ + return kref_get_unless_zero(&queue->ref); +} + + +static void +nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + int ret; + + fcpreq->op = NVMET_FCOP_ABORT; + fcpreq->offset = 0; + fcpreq->timeout = 0; + fcpreq->transfer_length = 0; + fcpreq->transferred_length = 0; + fcpreq->fcp_error = 0; + fcpreq->sg_cnt = 0; + + ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fcpreq); + if (ret) + /* should never reach here !! */ + WARN_ON(1); +} + + +static void +nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) +{ + struct nvmet_fc_fcp_iod *fod = queue->fod; + unsigned long flags; + int i; + bool disconnect; + + disconnect = atomic_xchg(&queue->connected, 0); + + spin_lock_irqsave(&queue->qlock, flags); + /* about outstanding io's */ + for (i = 0; i < queue->sqsize; fod++, i++) { + if (fod->active) { + spin_lock(&fod->flock); + fod->abort = true; + spin_unlock(&fod->flock); + } + } + spin_unlock_irqrestore(&queue->qlock, flags); + + flush_workqueue(queue->work_q); + + if (disconnect) + nvmet_sq_destroy(&queue->nvme_sq); + + nvmet_fc_tgt_q_put(queue); +} + +static struct nvmet_fc_tgt_queue * +nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, + u64 connection_id) +{ + struct nvmet_fc_tgt_assoc *assoc; + struct nvmet_fc_tgt_queue *queue; + u64 association_id = nvmet_fc_getassociationid(connection_id); + u16 qid = nvmet_fc_getqueueid(connection_id); + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(assoc, &tgtport->assoc_list, a_list) { + if (association_id == assoc->association_id) { + queue = assoc->queues[qid]; + if (queue && + (!atomic_read(&queue->connected) || + !nvmet_fc_tgt_q_get(queue))) + queue = NULL; + spin_unlock_irqrestore(&tgtport->lock, flags); + return queue; + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + return NULL; +} + +static struct nvmet_fc_tgt_assoc * +nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_tgt_assoc *assoc, *tmpassoc; + unsigned long flags; + u64 ran; + int idx; + bool needrandom = true; + + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); + if (!assoc) + return NULL; + + idx = ida_simple_get(&tgtport->assoc_cnt, 0, 0, GFP_KERNEL); + if (idx < 0) + goto out_free_assoc; + + if (!nvmet_fc_tgtport_get(tgtport)) + goto out_ida_put; + + assoc->tgtport = tgtport; + assoc->a_id = idx; + INIT_LIST_HEAD(&assoc->a_list); + kref_init(&assoc->ref); + + while (needrandom) { + get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); + ran = ran << BYTES_FOR_QID_SHIFT; + + spin_lock_irqsave(&tgtport->lock, flags); + needrandom = false; + list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) + if (ran == tmpassoc->association_id) { + needrandom = true; + break; + } + if (!needrandom) { + assoc->association_id = ran; + list_add_tail(&assoc->a_list, &tgtport->assoc_list); + } + spin_unlock_irqrestore(&tgtport->lock, flags); + } + + return assoc; + +out_ida_put: + ida_simple_remove(&tgtport->assoc_cnt, idx); +out_free_assoc: + kfree(assoc); + return NULL; +} + +static void +nvmet_fc_target_assoc_free(struct kref *ref) +{ + struct nvmet_fc_tgt_assoc *assoc = + container_of(ref, struct nvmet_fc_tgt_assoc, ref); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + list_del(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id); + kfree(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc) +{ + kref_put(&assoc->ref, nvmet_fc_target_assoc_free); +} + +static int +nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc) +{ + return kref_get_unless_zero(&assoc->ref); +} + +static void +nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; + struct nvmet_fc_tgt_queue *queue; + unsigned long flags; + int i; + + spin_lock_irqsave(&tgtport->lock, flags); + for (i = NVMET_NR_QUEUES - 1; i >= 0; i--) { + queue = assoc->queues[i]; + if (queue) { + if (!nvmet_fc_tgt_q_get(queue)) + continue; + spin_unlock_irqrestore(&tgtport->lock, flags); + nvmet_fc_delete_target_queue(queue); + nvmet_fc_tgt_q_put(queue); + spin_lock_irqsave(&tgtport->lock, flags); + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + + nvmet_fc_tgt_a_put(assoc); +} + +static struct nvmet_fc_tgt_assoc * +nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport, + u64 association_id) +{ + struct nvmet_fc_tgt_assoc *assoc; + struct nvmet_fc_tgt_assoc *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(assoc, &tgtport->assoc_list, a_list) { + if (association_id == assoc->association_id) { + ret = assoc; + nvmet_fc_tgt_a_get(assoc); + break; + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + + return ret; +} + + +/** + * nvme_fc_register_targetport - transport entry point called by an + * LLDD to register the existence of a local + * NVME subystem FC port. + * @pinfo: pointer to information about the port to be registered + * @template: LLDD entrypoints and operational parameters for the port + * @dev: physical hardware device node port corresponds to. Will be + * used for DMA mappings + * @portptr: pointer to a local port pointer. Upon success, the routine + * will allocate a nvme_fc_local_port structure and place its + * address in the local port pointer. Upon failure, local port + * pointer will be set to NULL. + * + * Returns: + * a completion status. Must be 0 upon success; a negative errno + * (ex: -ENXIO) upon failure. + */ +int +nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, + struct nvmet_fc_target_template *template, + struct device *dev, + struct nvmet_fc_target_port **portptr) +{ + struct nvmet_fc_tgtport *newrec; + unsigned long flags; + int ret, idx; + + if (!template->xmt_ls_rsp || !template->fcp_op || + !template->targetport_delete || + !template->max_hw_queues || !template->max_sgl_segments || + !template->max_dif_sgl_segments || !template->dma_boundary) { + ret = -EINVAL; + goto out_regtgt_failed; + } + + newrec = kzalloc((sizeof(*newrec) + template->target_priv_sz), + GFP_KERNEL); + if (!newrec) { + ret = -ENOMEM; + goto out_regtgt_failed; + } + + idx = ida_simple_get(&nvmet_fc_tgtport_cnt, 0, 0, GFP_KERNEL); + if (idx < 0) { + ret = -ENOSPC; + goto out_fail_kfree; + } + + if (!get_device(dev) && dev) { + ret = -ENODEV; + goto out_ida_put; + } + + newrec->fc_target_port.node_name = pinfo->node_name; + newrec->fc_target_port.port_name = pinfo->port_name; + newrec->fc_target_port.private = &newrec[1]; + newrec->fc_target_port.port_id = pinfo->port_id; + newrec->fc_target_port.port_num = idx; + INIT_LIST_HEAD(&newrec->tgt_list); + newrec->dev = dev; + newrec->ops = template; + spin_lock_init(&newrec->lock); + INIT_LIST_HEAD(&newrec->ls_list); + INIT_LIST_HEAD(&newrec->ls_busylist); + INIT_LIST_HEAD(&newrec->assoc_list); + kref_init(&newrec->ref); + ida_init(&newrec->assoc_cnt); + + ret = nvmet_fc_alloc_ls_iodlist(newrec); + if (ret) { + ret = -ENOMEM; + goto out_free_newrec; + } + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_add_tail(&newrec->tgt_list, &nvmet_fc_target_list); + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + + *portptr = &newrec->fc_target_port; + return 0; + +out_free_newrec: + put_device(dev); +out_ida_put: + ida_simple_remove(&nvmet_fc_tgtport_cnt, idx); +out_fail_kfree: + kfree(newrec); +out_regtgt_failed: + *portptr = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(nvmet_fc_register_targetport); + + +static void +nvmet_fc_free_tgtport(struct kref *ref) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(ref, struct nvmet_fc_tgtport, ref); + struct device *dev = tgtport->dev; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_del(&tgtport->tgt_list); + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + + nvmet_fc_free_ls_iodlist(tgtport); + + /* let the LLDD know we've finished tearing it down */ + tgtport->ops->targetport_delete(&tgtport->fc_target_port); + + ida_simple_remove(&nvmet_fc_tgtport_cnt, + tgtport->fc_target_port.port_num); + + ida_destroy(&tgtport->assoc_cnt); + + kfree(tgtport); + + put_device(dev); +} + +static void +nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport) +{ + kref_put(&tgtport->ref, nvmet_fc_free_tgtport); +} + +static int +nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport) +{ + return kref_get_unless_zero(&tgtport->ref); +} + +static void +__nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_tgt_assoc *assoc, *next; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry_safe(assoc, next, + &tgtport->assoc_list, a_list) { + if (!nvmet_fc_tgt_a_get(assoc)) + continue; + spin_unlock_irqrestore(&tgtport->lock, flags); + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); + spin_lock_irqsave(&tgtport->lock, flags); + } + spin_unlock_irqrestore(&tgtport->lock, flags); +} + +/* + * nvmet layer has called to terminate an association + */ +static void +nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) +{ + struct nvmet_fc_tgtport *tgtport, *next; + struct nvmet_fc_tgt_assoc *assoc; + struct nvmet_fc_tgt_queue *queue; + unsigned long flags; + bool found_ctrl = false; + + /* this is a bit ugly, but don't want to make locks layered */ + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_for_each_entry_safe(tgtport, next, &nvmet_fc_target_list, + tgt_list) { + if (!nvmet_fc_tgtport_get(tgtport)) + continue; + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(assoc, &tgtport->assoc_list, a_list) { + queue = assoc->queues[0]; + if (queue && queue->nvme_sq.ctrl == ctrl) { + if (nvmet_fc_tgt_a_get(assoc)) + found_ctrl = true; + break; + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + + nvmet_fc_tgtport_put(tgtport); + + if (found_ctrl) { + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); + return; + } + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + } + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/** + * nvme_fc_unregister_targetport - transport entry point called by an + * LLDD to deregister/remove a previously + * registered a local NVME subsystem FC port. + * @tgtport: pointer to the (registered) target port that is to be + * deregistered. + * + * Returns: + * a completion status. Must be 0 upon success; a negative errno + * (ex: -ENXIO) upon failure. + */ +int +nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) +{ + struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(tgtport); + + nvmet_fc_tgtport_put(tgtport); + + return 0; +} +EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); + + +/* *********************** FC-NVME LS Handling **************************** */ + + +static void +nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, u32 desc_len, u8 rqst_ls_cmd) +{ + struct fcnvme_ls_acc_hdr *acc = buf; + + acc->w0.ls_cmd = ls_cmd; + acc->desc_list_len = desc_len; + acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); + acc->rqst.desc_len = + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); + acc->rqst.w0.ls_cmd = rqst_ls_cmd; +} + +static int +nvmet_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, + u8 reason, u8 explanation, u8 vendor) +{ + struct fcnvme_ls_rjt *rjt = buf; + + nvmet_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, + fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), + ls_cmd); + rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); + rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); + rjt->rjt.reason_code = reason; + rjt->rjt.reason_explanation = explanation; + rjt->rjt.vendor = vendor; + + return sizeof(struct fcnvme_ls_rjt); +} + +/* Validation Error indexes into the string table below */ +enum { + VERR_NO_ERROR = 0, + VERR_CR_ASSOC_LEN = 1, + VERR_CR_ASSOC_RQST_LEN = 2, + VERR_CR_ASSOC_CMD = 3, + VERR_CR_ASSOC_CMD_LEN = 4, + VERR_ERSP_RATIO = 5, + VERR_ASSOC_ALLOC_FAIL = 6, + VERR_QUEUE_ALLOC_FAIL = 7, + VERR_CR_CONN_LEN = 8, + VERR_CR_CONN_RQST_LEN = 9, + VERR_ASSOC_ID = 10, + VERR_ASSOC_ID_LEN = 11, + VERR_NO_ASSOC = 12, + VERR_CONN_ID = 13, + VERR_CONN_ID_LEN = 14, + VERR_NO_CONN = 15, + VERR_CR_CONN_CMD = 16, + VERR_CR_CONN_CMD_LEN = 17, + VERR_DISCONN_LEN = 18, + VERR_DISCONN_RQST_LEN = 19, + VERR_DISCONN_CMD = 20, + VERR_DISCONN_CMD_LEN = 21, + VERR_DISCONN_SCOPE = 22, + VERR_RS_LEN = 23, + VERR_RS_RQST_LEN = 24, + VERR_RS_CMD = 25, + VERR_RS_CMD_LEN = 26, + VERR_RS_RCTL = 27, + VERR_RS_RO = 28, +}; + +static char *validation_errors[] = { + "OK", + "Bad CR_ASSOC Length", + "Bad CR_ASSOC Rqst Length", + "Not CR_ASSOC Cmd", + "Bad CR_ASSOC Cmd Length", + "Bad Ersp Ratio", + "Association Allocation Failed", + "Queue Allocation Failed", + "Bad CR_CONN Length", + "Bad CR_CONN Rqst Length", + "Not Association ID", + "Bad Association ID Length", + "No Association", + "Not Connection ID", + "Bad Connection ID Length", + "No Connection", + "Not CR_CONN Cmd", + "Bad CR_CONN Cmd Length", + "Bad DISCONN Length", + "Bad DISCONN Rqst Length", + "Not DISCONN Cmd", + "Bad DISCONN Cmd Length", + "Bad Disconnect Scope", + "Bad RS Length", + "Bad RS Rqst Length", + "Not RS Cmd", + "Bad RS Cmd Length", + "Bad RS R_CTL", + "Bad RS Relative Offset", +}; + +static void +nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod) +{ + struct fcnvme_ls_cr_assoc_rqst *rqst = + (struct fcnvme_ls_cr_assoc_rqst *)iod->rqstbuf; + struct fcnvme_ls_cr_assoc_acc *acc = + (struct fcnvme_ls_cr_assoc_acc *)iod->rspbuf; + struct nvmet_fc_tgt_queue *queue; + int ret = 0; + + memset(acc, 0, sizeof(*acc)); + + if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_assoc_rqst)) + ret = VERR_CR_ASSOC_LEN; + else if (rqst->desc_list_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_cr_assoc_rqst))) + ret = VERR_CR_ASSOC_RQST_LEN; + else if (rqst->assoc_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD)) + ret = VERR_CR_ASSOC_CMD; + else if (rqst->assoc_cmd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_cr_assoc_cmd))) + ret = VERR_CR_ASSOC_CMD_LEN; + else if (!rqst->assoc_cmd.ersp_ratio || + (be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >= + be16_to_cpu(rqst->assoc_cmd.sqsize))) + ret = VERR_ERSP_RATIO; + + else { + /* new association w/ admin queue */ + iod->assoc = nvmet_fc_alloc_target_assoc(tgtport); + if (!iod->assoc) + ret = VERR_ASSOC_ALLOC_FAIL; + else { + queue = nvmet_fc_alloc_target_queue(iod->assoc, 0, + be16_to_cpu(rqst->assoc_cmd.sqsize)); + if (!queue) + ret = VERR_QUEUE_ALLOC_FAIL; + } + } + + if (ret) { + dev_err(tgtport->dev, + "Create Association LS failed: %s\n", + validation_errors[ret]); + iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + ELS_RJT_LOGIC, + ELS_EXPL_NONE, 0); + return; + } + + queue->ersp_ratio = be16_to_cpu(rqst->assoc_cmd.ersp_ratio); + atomic_set(&queue->connected, 1); + queue->sqhd = 0; /* best place to init value */ + + /* format a response */ + + iod->lsreq->rsplen = sizeof(*acc); + + nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_cr_assoc_acc)), + FCNVME_LS_CREATE_ASSOCIATION); + acc->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); + acc->associd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id)); + acc->associd.association_id = + cpu_to_be64(nvmet_fc_makeconnid(iod->assoc, 0)); + acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID); + acc->connectid.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_conn_id)); + acc->connectid.connection_id = acc->associd.association_id; +} + +static void +nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod) +{ + struct fcnvme_ls_cr_conn_rqst *rqst = + (struct fcnvme_ls_cr_conn_rqst *)iod->rqstbuf; + struct fcnvme_ls_cr_conn_acc *acc = + (struct fcnvme_ls_cr_conn_acc *)iod->rspbuf; + struct nvmet_fc_tgt_queue *queue; + int ret = 0; + + memset(acc, 0, sizeof(*acc)); + + if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_conn_rqst)) + ret = VERR_CR_CONN_LEN; + else if (rqst->desc_list_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_cr_conn_rqst))) + ret = VERR_CR_CONN_RQST_LEN; + else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) + ret = VERR_ASSOC_ID; + else if (rqst->associd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id))) + ret = VERR_ASSOC_ID_LEN; + else if (rqst->connect_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD)) + ret = VERR_CR_CONN_CMD; + else if (rqst->connect_cmd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_cr_conn_cmd))) + ret = VERR_CR_CONN_CMD_LEN; + else if (!rqst->connect_cmd.ersp_ratio || + (be16_to_cpu(rqst->connect_cmd.ersp_ratio) >= + be16_to_cpu(rqst->connect_cmd.sqsize))) + ret = VERR_ERSP_RATIO; + + else { + /* new io queue */ + iod->assoc = nvmet_fc_find_target_assoc(tgtport, + be64_to_cpu(rqst->associd.association_id)); + if (!iod->assoc) + ret = VERR_NO_ASSOC; + else { + queue = nvmet_fc_alloc_target_queue(iod->assoc, + be16_to_cpu(rqst->connect_cmd.qid), + be16_to_cpu(rqst->connect_cmd.sqsize)); + if (!queue) + ret = VERR_QUEUE_ALLOC_FAIL; + + /* release get taken in nvmet_fc_find_target_assoc */ + nvmet_fc_tgt_a_put(iod->assoc); + } + } + + if (ret) { + dev_err(tgtport->dev, + "Create Connection LS failed: %s\n", + validation_errors[ret]); + iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + (ret == VERR_NO_ASSOC) ? + ELS_RJT_PROT : ELS_RJT_LOGIC, + ELS_EXPL_NONE, 0); + return; + } + + queue->ersp_ratio = be16_to_cpu(rqst->connect_cmd.ersp_ratio); + atomic_set(&queue->connected, 1); + queue->sqhd = 0; /* best place to init value */ + + /* format a response */ + + iod->lsreq->rsplen = sizeof(*acc); + + nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)), + FCNVME_LS_CREATE_CONNECTION); + acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID); + acc->connectid.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_conn_id)); + acc->connectid.connection_id = + cpu_to_be64(nvmet_fc_makeconnid(iod->assoc, + be16_to_cpu(rqst->connect_cmd.qid))); +} + +static void +nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod) +{ + struct fcnvme_ls_disconnect_rqst *rqst = + (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf; + struct fcnvme_ls_disconnect_acc *acc = + (struct fcnvme_ls_disconnect_acc *)iod->rspbuf; + struct nvmet_fc_tgt_queue *queue; + struct nvmet_fc_tgt_assoc *assoc; + int ret = 0; + bool del_assoc = false; + + memset(acc, 0, sizeof(*acc)); + + if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_rqst)) + ret = VERR_DISCONN_LEN; + else if (rqst->desc_list_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_disconnect_rqst))) + ret = VERR_DISCONN_RQST_LEN; + else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) + ret = VERR_ASSOC_ID; + else if (rqst->associd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id))) + ret = VERR_ASSOC_ID_LEN; + else if (rqst->discon_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) + ret = VERR_DISCONN_CMD; + else if (rqst->discon_cmd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_disconn_cmd))) + ret = VERR_DISCONN_CMD_LEN; + else if ((rqst->discon_cmd.scope != FCNVME_DISCONN_ASSOCIATION) && + (rqst->discon_cmd.scope != FCNVME_DISCONN_CONNECTION)) + ret = VERR_DISCONN_SCOPE; + else { + /* match an active association */ + assoc = nvmet_fc_find_target_assoc(tgtport, + be64_to_cpu(rqst->associd.association_id)); + iod->assoc = assoc; + if (!assoc) + ret = VERR_NO_ASSOC; + } + + if (ret) { + dev_err(tgtport->dev, + "Disconnect LS failed: %s\n", + validation_errors[ret]); + iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + (ret == 8) ? ELS_RJT_PROT : ELS_RJT_LOGIC, + ELS_EXPL_NONE, 0); + return; + } + + /* format a response */ + + iod->lsreq->rsplen = sizeof(*acc); + + nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_disconnect_acc)), + FCNVME_LS_DISCONNECT); + + + if (rqst->discon_cmd.scope == FCNVME_DISCONN_CONNECTION) { + queue = nvmet_fc_find_target_queue(tgtport, + be64_to_cpu(rqst->discon_cmd.id)); + if (queue) { + int qid = queue->qid; + + nvmet_fc_delete_target_queue(queue); + + /* release the get taken by find_target_queue */ + nvmet_fc_tgt_q_put(queue); + + /* tear association down if io queue terminated */ + if (!qid) + del_assoc = true; + } + } + + /* release get taken in nvmet_fc_find_target_assoc */ + nvmet_fc_tgt_a_put(iod->assoc); + + if (del_assoc) + nvmet_fc_delete_target_assoc(iod->assoc); +} + + +/* *********************** NVME Ctrl Routines **************************** */ + + +static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req); + +static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; + +static void +nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq) +{ + struct nvmet_fc_ls_iod *iod = lsreq->nvmet_fc_private; + struct nvmet_fc_tgtport *tgtport = iod->tgtport; + + fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma, + NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + nvmet_fc_free_ls_iod(tgtport, iod); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod) +{ + int ret; + + fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma, + NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + + ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsreq); + if (ret) + nvmet_fc_xmt_ls_rsp_done(iod->lsreq); +} + +/* + * Actual processing routine for received FC-NVME LS Requests from the LLD + */ +static void +nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod) +{ + struct fcnvme_ls_rqst_w0 *w0 = + (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf; + + iod->lsreq->nvmet_fc_private = iod; + iod->lsreq->rspbuf = iod->rspbuf; + iod->lsreq->rspdma = iod->rspdma; + iod->lsreq->done = nvmet_fc_xmt_ls_rsp_done; + /* Be preventative. handlers will later set to valid length */ + iod->lsreq->rsplen = 0; + + iod->assoc = NULL; + + /* + * handlers: + * parse request input, execute the request, and format the + * LS response + */ + switch (w0->ls_cmd) { + case FCNVME_LS_CREATE_ASSOCIATION: + /* Creates Association and initial Admin Queue/Connection */ + nvmet_fc_ls_create_association(tgtport, iod); + break; + case FCNVME_LS_CREATE_CONNECTION: + /* Creates an IO Queue/Connection */ + nvmet_fc_ls_create_connection(tgtport, iod); + break; + case FCNVME_LS_DISCONNECT: + /* Terminate a Queue/Connection or the Association */ + nvmet_fc_ls_disconnect(tgtport, iod); + break; + default: + iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, + NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, + ELS_RJT_INVAL, ELS_EXPL_NONE, 0); + } + + nvmet_fc_xmt_ls_rsp(tgtport, iod); +} + +/* + * Actual processing routine for received FC-NVME LS Requests from the LLD + */ +static void +nvmet_fc_handle_ls_rqst_work(struct work_struct *work) +{ + struct nvmet_fc_ls_iod *iod = + container_of(work, struct nvmet_fc_ls_iod, work); + struct nvmet_fc_tgtport *tgtport = iod->tgtport; + + nvmet_fc_handle_ls_rqst(tgtport, iod); +} + + +/** + * nvmet_fc_rcv_ls_req - transport entry point called by an LLDD + * upon the reception of a NVME LS request. + * + * The nvmet-fc layer will copy payload to an internal structure for + * processing. As such, upon completion of the routine, the LLDD may + * immediately free/reuse the LS request buffer passed in the call. + * + * If this routine returns error, the LLDD should abort the exchange. + * + * @tgtport: pointer to the (registered) target port the LS was + * received on. + * @lsreq: pointer to a lsreq request structure to be used to reference + * the exchange corresponding to the LS. + * @lsreqbuf: pointer to the buffer containing the LS Request + * @lsreqbuf_len: length, in bytes, of the received LS request + */ +int +nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, + struct nvmefc_tgt_ls_req *lsreq, + void *lsreqbuf, u32 lsreqbuf_len) +{ + struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + struct nvmet_fc_ls_iod *iod; + + if (lsreqbuf_len > NVME_FC_MAX_LS_BUFFER_SIZE) + return -E2BIG; + + if (!nvmet_fc_tgtport_get(tgtport)) + return -ESHUTDOWN; + + iod = nvmet_fc_alloc_ls_iod(tgtport); + if (!iod) { + nvmet_fc_tgtport_put(tgtport); + return -ENOENT; + } + + iod->lsreq = lsreq; + iod->fcpreq = NULL; + memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len); + iod->rqstdatalen = lsreqbuf_len; + + schedule_work(&iod->work); + + return 0; +} +EXPORT_SYMBOL_GPL(nvmet_fc_rcv_ls_req); + + +/* + * ********************** + * Start of FCP handling + * ********************** + */ + +static int +nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod) +{ + struct scatterlist *sg; + struct page *page; + unsigned int nent; + u32 page_len, length; + int i = 0; + + length = fod->total_length; + nent = DIV_ROUND_UP(length, PAGE_SIZE); + sg = kmalloc_array(nent, sizeof(struct scatterlist), GFP_KERNEL); + if (!sg) + goto out; + + sg_init_table(sg, nent); + + while (length) { + page_len = min_t(u32, length, PAGE_SIZE); + + page = alloc_page(GFP_KERNEL); + if (!page) + goto out_free_pages; + + sg_set_page(&sg[i], page, page_len, 0); + length -= page_len; + i++; + } + + fod->data_sg = sg; + fod->data_sg_cnt = nent; + fod->data_sg_cnt = fc_dma_map_sg(fod->tgtport->dev, sg, nent, + ((fod->io_dir == NVMET_FCP_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE)); + /* note: write from initiator perspective */ + + return 0; + +out_free_pages: + while (i > 0) { + i--; + __free_page(sg_page(&sg[i])); + } + kfree(sg); + fod->data_sg = NULL; + fod->data_sg_cnt = 0; +out: + return NVME_SC_INTERNAL; +} + +static void +nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod) +{ + struct scatterlist *sg; + int count; + + if (!fod->data_sg || !fod->data_sg_cnt) + return; + + fc_dma_unmap_sg(fod->tgtport->dev, fod->data_sg, fod->data_sg_cnt, + ((fod->io_dir == NVMET_FCP_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE)); + for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count) + __free_page(sg_page(sg)); + kfree(fod->data_sg); +} + + +static bool +queue_90percent_full(struct nvmet_fc_tgt_queue *q, u32 sqhd) +{ + u32 sqtail, used; + + /* egad, this is ugly. And sqtail is just a best guess */ + sqtail = atomic_read(&q->sqtail) % q->sqsize; + + used = (sqtail < sqhd) ? (sqtail + q->sqsize - sqhd) : (sqtail - sqhd); + return ((used * 10) >= (((u32)(q->sqsize - 1) * 9))); +} + +/* + * Prep RSP payload. + * May be a NVMET_FCOP_RSP or NVMET_FCOP_READDATA_RSP op + */ +static void +nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod) +{ + struct nvme_fc_ersp_iu *ersp = &fod->rspiubuf; + struct nvme_common_command *sqe = &fod->cmdiubuf.sqe.common; + struct nvme_completion *cqe = &ersp->cqe; + u32 *cqewd = (u32 *)cqe; + bool send_ersp = false; + u32 rsn, rspcnt, xfr_length; + + if (fod->fcpreq->op == NVMET_FCOP_READDATA_RSP) + xfr_length = fod->total_length; + else + xfr_length = fod->offset; + + /* + * check to see if we can send a 0's rsp. + * Note: to send a 0's response, the NVME-FC host transport will + * recreate the CQE. The host transport knows: sq id, SQHD (last + * seen in an ersp), and command_id. Thus it will create a + * zero-filled CQE with those known fields filled in. Transport + * must send an ersp for any condition where the cqe won't match + * this. + * + * Here are the FC-NVME mandated cases where we must send an ersp: + * every N responses, where N=ersp_ratio + * force fabric commands to send ersp's (not in FC-NVME but good + * practice) + * normal cmds: any time status is non-zero, or status is zero + * but words 0 or 1 are non-zero. + * the SQ is 90% or more full + * the cmd is a fused command + * transferred data length not equal to cmd iu length + */ + rspcnt = atomic_inc_return(&fod->queue->zrspcnt); + if (!(rspcnt % fod->queue->ersp_ratio) || + sqe->opcode == nvme_fabrics_command || + xfr_length != fod->total_length || + (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] || + (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) || + queue_90percent_full(fod->queue, cqe->sq_head)) + send_ersp = true; + + /* re-set the fields */ + fod->fcpreq->rspaddr = ersp; + fod->fcpreq->rspdma = fod->rspdma; + + if (!send_ersp) { + memset(ersp, 0, NVME_FC_SIZEOF_ZEROS_RSP); + fod->fcpreq->rsplen = NVME_FC_SIZEOF_ZEROS_RSP; + } else { + ersp->iu_len = cpu_to_be16(sizeof(*ersp)/sizeof(u32)); + rsn = atomic_inc_return(&fod->queue->rsn); + ersp->rsn = cpu_to_be32(rsn); + ersp->xfrd_len = cpu_to_be32(xfr_length); + fod->fcpreq->rsplen = sizeof(*ersp); + } + + fc_dma_sync_single_for_device(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); +} + +static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq); + +static void +nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod) +{ + int ret; + + fod->fcpreq->op = NVMET_FCOP_RSP; + fod->fcpreq->timeout = 0; + + nvmet_fc_prep_fcp_rsp(tgtport, fod); + + ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); + if (ret) + nvmet_fc_abort_op(tgtport, fod->fcpreq); +} + +static void +nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod, u8 op) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct scatterlist *sg, *datasg; + u32 tlen, sg_off; + int ret; + + fcpreq->op = op; + fcpreq->offset = fod->offset; + fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC; + tlen = min_t(u32, (NVMET_FC_MAX_KB_PER_XFR * 1024), + (fod->total_length - fod->offset)); + tlen = min_t(u32, tlen, NVME_FC_MAX_SEGMENTS * PAGE_SIZE); + tlen = min_t(u32, tlen, fod->tgtport->ops->max_sgl_segments + * PAGE_SIZE); + fcpreq->transfer_length = tlen; + fcpreq->transferred_length = 0; + fcpreq->fcp_error = 0; + fcpreq->rsplen = 0; + + fcpreq->sg_cnt = 0; + + datasg = fod->next_sg; + sg_off = fod->next_sg_offset; + + for (sg = fcpreq->sg ; tlen; sg++) { + *sg = *datasg; + if (sg_off) { + sg->offset += sg_off; + sg->length -= sg_off; + sg->dma_address += sg_off; + sg_off = 0; + } + if (tlen < sg->length) { + sg->length = tlen; + fod->next_sg = datasg; + fod->next_sg_offset += tlen; + } else if (tlen == sg->length) { + fod->next_sg_offset = 0; + fod->next_sg = sg_next(datasg); + } else { + fod->next_sg_offset = 0; + datasg = sg_next(datasg); + } + tlen -= sg->length; + fcpreq->sg_cnt++; + } + + /* + * If the last READDATA request: check if LLDD supports + * combined xfr with response. + */ + if ((op == NVMET_FCOP_READDATA) && + ((fod->offset + fcpreq->transfer_length) == fod->total_length) && + (tgtport->ops->target_features & NVMET_FCTGTFEAT_READDATA_RSP)) { + fcpreq->op = NVMET_FCOP_READDATA_RSP; + nvmet_fc_prep_fcp_rsp(tgtport, fod); + } + + ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); + if (ret) { + /* + * should be ok to set w/o lock as its in the thread of + * execution (not an async timer routine) and doesn't + * contend with any clearing action + */ + fod->abort = true; + + if (op == NVMET_FCOP_WRITEDATA) + nvmet_req_complete(&fod->req, + NVME_SC_FC_TRANSPORT_ERROR); + else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { + fcpreq->fcp_error = ret; + fcpreq->transferred_length = 0; + nvmet_fc_xmt_fcp_op_done(fod->fcpreq); + } + } +} + +static void +nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; + unsigned long flags; + bool abort; + + spin_lock_irqsave(&fod->flock, flags); + abort = fod->abort; + spin_unlock_irqrestore(&fod->flock, flags); + + /* if in the middle of an io and we need to tear down */ + if (abort && fcpreq->op != NVMET_FCOP_ABORT) { + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + if (fcpreq->fcp_error || abort) + nvmet_req_complete(&fod->req, fcpreq->fcp_error); + + return; + } + + switch (fcpreq->op) { + + case NVMET_FCOP_WRITEDATA: + if (abort || fcpreq->fcp_error || + fcpreq->transferred_length != fcpreq->transfer_length) { + nvmet_req_complete(&fod->req, + NVME_SC_FC_TRANSPORT_ERROR); + return; + } + + fod->offset += fcpreq->transferred_length; + if (fod->offset != fod->total_length) { + /* transfer the next chunk */ + nvmet_fc_transfer_fcp_data(tgtport, fod, + NVMET_FCOP_WRITEDATA); + return; + } + + /* data transfer complete, resume with nvmet layer */ + + fod->req.execute(&fod->req); + + break; + + case NVMET_FCOP_READDATA: + case NVMET_FCOP_READDATA_RSP: + if (abort || fcpreq->fcp_error || + fcpreq->transferred_length != fcpreq->transfer_length) { + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + nvmet_fc_abort_op(tgtport, fod->fcpreq); + return; + } + + /* success */ + + if (fcpreq->op == NVMET_FCOP_READDATA_RSP) { + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + nvmet_fc_free_fcp_iod(fod->queue, fod); + return; + } + + fod->offset += fcpreq->transferred_length; + if (fod->offset != fod->total_length) { + /* transfer the next chunk */ + nvmet_fc_transfer_fcp_data(tgtport, fod, + NVMET_FCOP_READDATA); + return; + } + + /* data transfer complete, send response */ + + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + nvmet_fc_xmt_fcp_rsp(tgtport, fod); + + break; + + case NVMET_FCOP_RSP: + case NVMET_FCOP_ABORT: + fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + nvmet_fc_free_fcp_iod(fod->queue, fod); + break; + + default: + nvmet_fc_free_tgt_pgs(fod); + nvmet_fc_abort_op(tgtport, fod->fcpreq); + break; + } +} + +/* + * actual completion handler after execution by the nvmet layer + */ +static void +__nvmet_fc_fcp_nvme_cmd_done(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod, int status) +{ + struct nvme_common_command *sqe = &fod->cmdiubuf.sqe.common; + struct nvme_completion *cqe = &fod->rspiubuf.cqe; + unsigned long flags; + bool abort; + + spin_lock_irqsave(&fod->flock, flags); + abort = fod->abort; + spin_unlock_irqrestore(&fod->flock, flags); + + /* if we have a CQE, snoop the last sq_head value */ + if (!status) + fod->queue->sqhd = cqe->sq_head; + + if (abort) { + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + nvmet_fc_abort_op(tgtport, fod->fcpreq); + return; + } + + /* if an error handling the cmd post initial parsing */ + if (status) { + /* fudge up a failed CQE status for our transport error */ + memset(cqe, 0, sizeof(*cqe)); + cqe->sq_head = fod->queue->sqhd; /* echo last cqe sqhd */ + cqe->sq_id = cpu_to_le16(fod->queue->qid); + cqe->command_id = sqe->command_id; + cqe->status = cpu_to_le16(status); + } else { + + /* + * try to push the data even if the SQE status is non-zero. + * There may be a status where data still was intended to + * be moved + */ + if ((fod->io_dir == NVMET_FCP_READ) && (fod->data_sg_cnt)) { + /* push the data over before sending rsp */ + nvmet_fc_transfer_fcp_data(tgtport, fod, + NVMET_FCOP_READDATA); + return; + } + + /* writes & no data - fall thru */ + } + + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + nvmet_fc_xmt_fcp_rsp(tgtport, fod); +} + + +static void +nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) +{ + struct nvmet_fc_fcp_iod *fod = nvmet_req_to_fod(nvme_req); + struct nvmet_fc_tgtport *tgtport = fod->tgtport; + + __nvmet_fc_fcp_nvme_cmd_done(tgtport, fod, 0); +} + + +/* + * Actual processing routine for received FC-NVME LS Requests from the LLD + */ +void +nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod) +{ + struct nvme_fc_cmd_iu *cmdiu = &fod->cmdiubuf; + int ret; + + /* + * Fused commands are currently not supported in the linux + * implementation. + * + * As such, the implementation of the FC transport does not + * look at the fused commands and order delivery to the upper + * layer until we have both based on csn. + */ + + fod->fcpreq->done = nvmet_fc_xmt_fcp_op_done; + + fod->total_length = be32_to_cpu(cmdiu->data_len); + if (cmdiu->flags & FCNVME_CMD_FLAGS_WRITE) { + fod->io_dir = NVMET_FCP_WRITE; + if (!nvme_is_write(&cmdiu->sqe)) + goto transport_error; + } else if (cmdiu->flags & FCNVME_CMD_FLAGS_READ) { + fod->io_dir = NVMET_FCP_READ; + if (nvme_is_write(&cmdiu->sqe)) + goto transport_error; + } else { + fod->io_dir = NVMET_FCP_NODATA; + if (fod->total_length) + goto transport_error; + } + + fod->req.cmd = &fod->cmdiubuf.sqe; + fod->req.rsp = &fod->rspiubuf.cqe; + fod->req.port = fod->queue->port; + + /* ensure nvmet handlers will set cmd handler callback */ + fod->req.execute = NULL; + + /* clear any response payload */ + memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); + + ret = nvmet_req_init(&fod->req, + &fod->queue->nvme_cq, + &fod->queue->nvme_sq, + &nvmet_fc_tgt_fcp_ops); + if (!ret) { /* bad SQE content */ + nvmet_fc_abort_op(tgtport, fod->fcpreq); + return; + } + + /* keep a running counter of tail position */ + atomic_inc(&fod->queue->sqtail); + + fod->data_sg = NULL; + fod->data_sg_cnt = 0; + if (fod->total_length) { + ret = nvmet_fc_alloc_tgt_pgs(fod); + if (ret) { + nvmet_req_complete(&fod->req, ret); + return; + } + } + fod->req.sg = fod->data_sg; + fod->req.sg_cnt = fod->data_sg_cnt; + fod->offset = 0; + fod->next_sg = fod->data_sg; + fod->next_sg_offset = 0; + + if (fod->io_dir == NVMET_FCP_WRITE) { + /* pull the data over before invoking nvmet layer */ + nvmet_fc_transfer_fcp_data(tgtport, fod, NVMET_FCOP_WRITEDATA); + return; + } + + /* + * Reads or no data: + * + * can invoke the nvmet_layer now. If read data, cmd completion will + * push the data + */ + + fod->req.execute(&fod->req); + + return; + +transport_error: + nvmet_fc_abort_op(tgtport, fod->fcpreq); +} + +/* + * Actual processing routine for received FC-NVME LS Requests from the LLD + */ +static void +nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) +{ + struct nvmet_fc_fcp_iod *fod = + container_of(work, struct nvmet_fc_fcp_iod, work); + struct nvmet_fc_tgtport *tgtport = fod->tgtport; + + nvmet_fc_handle_fcp_rqst(tgtport, fod); +} + +/** + * nvmet_fc_rcv_fcp_req - transport entry point called by an LLDD + * upon the reception of a NVME FCP CMD IU. + * + * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc + * layer for processing. + * + * The nvmet-fc layer will copy cmd payload to an internal structure for + * processing. As such, upon completion of the routine, the LLDD may + * immediately free/reuse the CMD IU buffer passed in the call. + * + * If this routine returns error, the lldd should abort the exchange. + * + * @target_port: pointer to the (registered) target port the FCP CMD IU + * was receive on. + * @fcpreq: pointer to a fcpreq request structure to be used to reference + * the exchange corresponding to the FCP Exchange. + * @cmdiubuf: pointer to the buffer containing the FCP CMD IU + * @cmdiubuf_len: length, in bytes, of the received FCP CMD IU + */ +int +nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, + struct nvmefc_tgt_fcp_req *fcpreq, + void *cmdiubuf, u32 cmdiubuf_len) +{ + struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + struct nvme_fc_cmd_iu *cmdiu = cmdiubuf; + struct nvmet_fc_tgt_queue *queue; + struct nvmet_fc_fcp_iod *fod; + + /* validate iu, so the connection id can be used to find the queue */ + if ((cmdiubuf_len != sizeof(*cmdiu)) || + (cmdiu->scsi_id != NVME_CMD_SCSI_ID) || + (cmdiu->fc_id != NVME_CMD_FC_ID) || + (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4))) + return -EIO; + + + queue = nvmet_fc_find_target_queue(tgtport, + be64_to_cpu(cmdiu->connection_id)); + if (!queue) + return -ENOTCONN; + + /* + * note: reference taken by find_target_queue + * After successful fod allocation, the fod will inherit the + * ownership of that reference and will remove the reference + * when the fod is freed. + */ + + fod = nvmet_fc_alloc_fcp_iod(queue); + if (!fod) { + /* release the queue lookup reference */ + nvmet_fc_tgt_q_put(queue); + return -ENOENT; + } + + fcpreq->nvmet_fc_private = fod; + fod->fcpreq = fcpreq; + /* + * put all admin cmds on hw queue id 0. All io commands go to + * the respective hw queue based on a modulo basis + */ + fcpreq->hwqid = queue->qid ? + ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; + memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); + + queue_work_on(queue->cpu, queue->work_q, &fod->work); + + return 0; +} +EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); + +enum { + FCT_TRADDR_ERR = 0, + FCT_TRADDR_WWNN = 1 << 0, + FCT_TRADDR_WWPN = 1 << 1, +}; + +struct nvmet_fc_traddr { + u64 nn; + u64 pn; +}; + +static const match_table_t traddr_opt_tokens = { + { FCT_TRADDR_WWNN, "nn-%s" }, + { FCT_TRADDR_WWPN, "pn-%s" }, + { FCT_TRADDR_ERR, NULL } +}; + +static int +nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf) +{ + substring_t args[MAX_OPT_ARGS]; + char *options, *o, *p; + int token, ret = 0; + u64 token64; + + options = o = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + while ((p = strsep(&o, ",\n")) != NULL) { + if (!*p) + continue; + + token = match_token(p, traddr_opt_tokens, args); + switch (token) { + case FCT_TRADDR_WWNN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out; + } + traddr->nn = token64; + break; + case FCT_TRADDR_WWPN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out; + } + traddr->pn = token64; + break; + default: + pr_warn("unknown traddr token or missing value '%s'\n", + p); + ret = -EINVAL; + goto out; + } + } + +out: + kfree(options); + return ret; +} + +static int +nvmet_fc_add_port(struct nvmet_port *port) +{ + struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_traddr traddr = { 0L, 0L }; + unsigned long flags; + int ret; + + /* validate the address info */ + if ((port->disc_addr.trtype != NVMF_TRTYPE_FC) || + (port->disc_addr.adrfam != NVMF_ADDR_FAMILY_FC)) + return -EINVAL; + + /* map the traddr address info to a target port */ + + ret = nvmet_fc_parse_traddr(&traddr, port->disc_addr.traddr); + if (ret) + return ret; + + ret = -ENXIO; + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { + if ((tgtport->fc_target_port.node_name == traddr.nn) && + (tgtport->fc_target_port.port_name == traddr.pn)) { + /* a FC port can only be 1 nvmet port id */ + if (!tgtport->port) { + tgtport->port = port; + port->priv = tgtport; + ret = 0; + } else + ret = -EALREADY; + break; + } + } + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + return ret; +} + +static void +nvmet_fc_remove_port(struct nvmet_port *port) +{ + struct nvmet_fc_tgtport *tgtport = port->priv; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + if (tgtport->port == port) { + nvmet_fc_tgtport_put(tgtport); + tgtport->port = NULL; + } + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { + .owner = THIS_MODULE, + .type = NVMF_TRTYPE_FC, + .msdbd = 1, + .add_port = nvmet_fc_add_port, + .remove_port = nvmet_fc_remove_port, + .queue_response = nvmet_fc_fcp_nvme_cmd_done, + .delete_ctrl = nvmet_fc_delete_ctrl, +}; + +static int __init nvmet_fc_init_module(void) +{ + return nvmet_register_transport(&nvmet_fc_tgt_fcp_ops); +} + +static void __exit nvmet_fc_exit_module(void) +{ + /* sanity check - all lports should be removed */ + if (!list_empty(&nvmet_fc_target_list)) + pr_warn("%s: targetport list not empty\n", __func__); + + nvmet_unregister_transport(&nvmet_fc_tgt_fcp_ops); + + ida_destroy(&nvmet_fc_tgtport_cnt); +} + +module_init(nvmet_fc_init_module); +module_exit(nvmet_fc_exit_module); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c new file mode 100644 index 000000000000..bcb8ebeb01c5 --- /dev/null +++ b/drivers/nvme/target/fcloop.c @@ -0,0 +1,1148 @@ +/* + * Copyright (c) 2016 Avago Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful. + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, + * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO + * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. + * See the GNU General Public License for more details, a copy of which + * can be found in the file COPYING included with this package + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/parser.h> +#include <uapi/scsi/fc/fc_fs.h> + +#include "../host/nvme.h" +#include "../target/nvmet.h" +#include <linux/nvme-fc-driver.h> +#include <linux/nvme-fc.h> + + +enum { + NVMF_OPT_ERR = 0, + NVMF_OPT_WWNN = 1 << 0, + NVMF_OPT_WWPN = 1 << 1, + NVMF_OPT_ROLES = 1 << 2, + NVMF_OPT_FCADDR = 1 << 3, + NVMF_OPT_LPWWNN = 1 << 4, + NVMF_OPT_LPWWPN = 1 << 5, +}; + +struct fcloop_ctrl_options { + int mask; + u64 wwnn; + u64 wwpn; + u32 roles; + u32 fcaddr; + u64 lpwwnn; + u64 lpwwpn; +}; + +static const match_table_t opt_tokens = { + { NVMF_OPT_WWNN, "wwnn=%s" }, + { NVMF_OPT_WWPN, "wwpn=%s" }, + { NVMF_OPT_ROLES, "roles=%d" }, + { NVMF_OPT_FCADDR, "fcaddr=%x" }, + { NVMF_OPT_LPWWNN, "lpwwnn=%s" }, + { NVMF_OPT_LPWWPN, "lpwwpn=%s" }, + { NVMF_OPT_ERR, NULL } +}; + +static int +fcloop_parse_options(struct fcloop_ctrl_options *opts, + const char *buf) +{ + substring_t args[MAX_OPT_ARGS]; + char *options, *o, *p; + int token, ret = 0; + u64 token64; + + options = o = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + while ((p = strsep(&o, ",\n")) != NULL) { + if (!*p) + continue; + + token = match_token(p, opt_tokens, args); + opts->mask |= token; + switch (token) { + case NVMF_OPT_WWNN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out_free_options; + } + opts->wwnn = token64; + break; + case NVMF_OPT_WWPN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out_free_options; + } + opts->wwpn = token64; + break; + case NVMF_OPT_ROLES: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out_free_options; + } + opts->roles = token; + break; + case NVMF_OPT_FCADDR: + if (match_hex(args, &token)) { + ret = -EINVAL; + goto out_free_options; + } + opts->fcaddr = token; + break; + case NVMF_OPT_LPWWNN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out_free_options; + } + opts->lpwwnn = token64; + break; + case NVMF_OPT_LPWWPN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out_free_options; + } + opts->lpwwpn = token64; + break; + default: + pr_warn("unknown parameter or missing value '%s'\n", p); + ret = -EINVAL; + goto out_free_options; + } + } + +out_free_options: + kfree(options); + return ret; +} + + +static int +fcloop_parse_nm_options(struct device *dev, u64 *nname, u64 *pname, + const char *buf) +{ + substring_t args[MAX_OPT_ARGS]; + char *options, *o, *p; + int token, ret = 0; + u64 token64; + + *nname = -1; + *pname = -1; + + options = o = kstrdup(buf, GFP_KERNEL); + if (!options) + return -ENOMEM; + + while ((p = strsep(&o, ",\n")) != NULL) { + if (!*p) + continue; + + token = match_token(p, opt_tokens, args); + switch (token) { + case NVMF_OPT_WWNN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out_free_options; + } + *nname = token64; + break; + case NVMF_OPT_WWPN: + if (match_u64(args, &token64)) { + ret = -EINVAL; + goto out_free_options; + } + *pname = token64; + break; + default: + pr_warn("unknown parameter or missing value '%s'\n", p); + ret = -EINVAL; + goto out_free_options; + } + } + +out_free_options: + kfree(options); + + if (!ret) { + if (*nname == -1) + return -EINVAL; + if (*pname == -1) + return -EINVAL; + } + + return ret; +} + + +#define LPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN) + +#define RPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN | \ + NVMF_OPT_LPWWNN | NVMF_OPT_LPWWPN) + +#define TGTPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN) + +#define ALL_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN | NVMF_OPT_ROLES | \ + NVMF_OPT_FCADDR | NVMF_OPT_LPWWNN | NVMF_OPT_LPWWPN) + + +static DEFINE_SPINLOCK(fcloop_lock); +static LIST_HEAD(fcloop_lports); +static LIST_HEAD(fcloop_nports); + +struct fcloop_lport { + struct nvme_fc_local_port *localport; + struct list_head lport_list; + struct completion unreg_done; +}; + +struct fcloop_rport { + struct nvme_fc_remote_port *remoteport; + struct nvmet_fc_target_port *targetport; + struct fcloop_nport *nport; + struct fcloop_lport *lport; +}; + +struct fcloop_tport { + struct nvmet_fc_target_port *targetport; + struct nvme_fc_remote_port *remoteport; + struct fcloop_nport *nport; + struct fcloop_lport *lport; +}; + +struct fcloop_nport { + struct fcloop_rport *rport; + struct fcloop_tport *tport; + struct fcloop_lport *lport; + struct list_head nport_list; + struct kref ref; + struct completion rport_unreg_done; + struct completion tport_unreg_done; + u64 node_name; + u64 port_name; + u32 port_role; + u32 port_id; +}; + +struct fcloop_lsreq { + struct fcloop_tport *tport; + struct nvmefc_ls_req *lsreq; + struct work_struct work; + struct nvmefc_tgt_ls_req tgt_ls_req; + int status; +}; + +struct fcloop_fcpreq { + struct fcloop_tport *tport; + struct nvmefc_fcp_req *fcpreq; + u16 status; + struct work_struct work; + struct nvmefc_tgt_fcp_req tgt_fcp_req; +}; + + +static inline struct fcloop_lsreq * +tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) +{ + return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req); +} + +static inline struct fcloop_fcpreq * +tgt_fcp_req_to_fcpreq(struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + return container_of(tgt_fcpreq, struct fcloop_fcpreq, tgt_fcp_req); +} + + +static int +fcloop_create_queue(struct nvme_fc_local_port *localport, + unsigned int qidx, u16 qsize, + void **handle) +{ + *handle = localport; + return 0; +} + +static void +fcloop_delete_queue(struct nvme_fc_local_port *localport, + unsigned int idx, void *handle) +{ +} + + +/* + * Transmit of LS RSP done (e.g. buffers all set). call back up + * initiator "done" flows. + */ +static void +fcloop_tgt_lsrqst_done_work(struct work_struct *work) +{ + struct fcloop_lsreq *tls_req = + container_of(work, struct fcloop_lsreq, work); + struct fcloop_tport *tport = tls_req->tport; + struct nvmefc_ls_req *lsreq = tls_req->lsreq; + + if (tport->remoteport) + lsreq->done(lsreq, tls_req->status); +} + +static int +fcloop_ls_req(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + struct nvmefc_ls_req *lsreq) +{ + struct fcloop_lsreq *tls_req = lsreq->private; + struct fcloop_rport *rport = remoteport->private; + int ret = 0; + + tls_req->lsreq = lsreq; + INIT_WORK(&tls_req->work, fcloop_tgt_lsrqst_done_work); + + if (!rport->targetport) { + tls_req->status = -ECONNREFUSED; + schedule_work(&tls_req->work); + return ret; + } + + tls_req->status = 0; + tls_req->tport = rport->targetport->private; + ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, + lsreq->rqstaddr, lsreq->rqstlen); + + return ret; +} + +static int +fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, + struct nvmefc_tgt_ls_req *tgt_lsreq) +{ + struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); + struct nvmefc_ls_req *lsreq = tls_req->lsreq; + + memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, + ((lsreq->rsplen < tgt_lsreq->rsplen) ? + lsreq->rsplen : tgt_lsreq->rsplen)); + tgt_lsreq->done(tgt_lsreq); + + schedule_work(&tls_req->work); + + return 0; +} + +/* + * FCP IO operation done. call back up initiator "done" flows. + */ +static void +fcloop_tgt_fcprqst_done_work(struct work_struct *work) +{ + struct fcloop_fcpreq *tfcp_req = + container_of(work, struct fcloop_fcpreq, work); + struct fcloop_tport *tport = tfcp_req->tport; + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + + if (tport->remoteport) { + fcpreq->status = tfcp_req->status; + fcpreq->done(fcpreq); + } +} + + +static int +fcloop_fcp_req(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + void *hw_queue_handle, + struct nvmefc_fcp_req *fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = fcpreq->private; + struct fcloop_rport *rport = remoteport->private; + int ret = 0; + + INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); + + if (!rport->targetport) { + tfcp_req->status = NVME_SC_FC_TRANSPORT_ERROR; + schedule_work(&tfcp_req->work); + return ret; + } + + tfcp_req->fcpreq = fcpreq; + tfcp_req->tport = rport->targetport->private; + + ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, + fcpreq->cmdaddr, fcpreq->cmdlen); + + return ret; +} + +static void +fcloop_fcp_copy_data(u8 op, struct scatterlist *data_sg, + struct scatterlist *io_sg, u32 offset, u32 length) +{ + void *data_p, *io_p; + u32 data_len, io_len, tlen; + + io_p = sg_virt(io_sg); + io_len = io_sg->length; + + for ( ; offset; ) { + tlen = min_t(u32, offset, io_len); + offset -= tlen; + io_len -= tlen; + if (!io_len) { + io_sg = sg_next(io_sg); + io_p = sg_virt(io_sg); + io_len = io_sg->length; + } else + io_p += tlen; + } + + data_p = sg_virt(data_sg); + data_len = data_sg->length; + + for ( ; length; ) { + tlen = min_t(u32, io_len, data_len); + tlen = min_t(u32, tlen, length); + + if (op == NVMET_FCOP_WRITEDATA) + memcpy(data_p, io_p, tlen); + else + memcpy(io_p, data_p, tlen); + + length -= tlen; + + io_len -= tlen; + if ((!io_len) && (length)) { + io_sg = sg_next(io_sg); + io_p = sg_virt(io_sg); + io_len = io_sg->length; + } else + io_p += tlen; + + data_len -= tlen; + if ((!data_len) && (length)) { + data_sg = sg_next(data_sg); + data_p = sg_virt(data_sg); + data_len = data_sg->length; + } else + data_p += tlen; + } +} + +static int +fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + u32 rsplen = 0, xfrlen = 0; + int fcp_err = 0; + u8 op = tgt_fcpreq->op; + + switch (op) { + case NVMET_FCOP_WRITEDATA: + xfrlen = tgt_fcpreq->transfer_length; + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, + tgt_fcpreq->offset, xfrlen); + fcpreq->transferred_length += xfrlen; + break; + + case NVMET_FCOP_READDATA: + case NVMET_FCOP_READDATA_RSP: + xfrlen = tgt_fcpreq->transfer_length; + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, + tgt_fcpreq->offset, xfrlen); + fcpreq->transferred_length += xfrlen; + if (op == NVMET_FCOP_READDATA) + break; + + /* Fall-Thru to RSP handling */ + + case NVMET_FCOP_RSP: + rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? + fcpreq->rsplen : tgt_fcpreq->rsplen); + memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); + if (rsplen < tgt_fcpreq->rsplen) + fcp_err = -E2BIG; + fcpreq->rcv_rsplen = rsplen; + fcpreq->status = 0; + tfcp_req->status = 0; + break; + + case NVMET_FCOP_ABORT: + tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; + break; + + default: + fcp_err = -EINVAL; + break; + } + + tgt_fcpreq->transferred_length = xfrlen; + tgt_fcpreq->fcp_error = fcp_err; + tgt_fcpreq->done(tgt_fcpreq); + + if ((!fcp_err) && (op == NVMET_FCOP_RSP || + op == NVMET_FCOP_READDATA_RSP || + op == NVMET_FCOP_ABORT)) + schedule_work(&tfcp_req->work); + + return 0; +} + +static void +fcloop_ls_abort(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + struct nvmefc_ls_req *lsreq) +{ +} + +static void +fcloop_fcp_abort(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + void *hw_queue_handle, + struct nvmefc_fcp_req *fcpreq) +{ +} + +static void +fcloop_localport_delete(struct nvme_fc_local_port *localport) +{ + struct fcloop_lport *lport = localport->private; + + /* release any threads waiting for the unreg to complete */ + complete(&lport->unreg_done); +} + +static void +fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport) +{ + struct fcloop_rport *rport = remoteport->private; + + /* release any threads waiting for the unreg to complete */ + complete(&rport->nport->rport_unreg_done); +} + +static void +fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) +{ + struct fcloop_tport *tport = targetport->private; + + /* release any threads waiting for the unreg to complete */ + complete(&tport->nport->tport_unreg_done); +} + +#define FCLOOP_HW_QUEUES 4 +#define FCLOOP_SGL_SEGS 256 +#define FCLOOP_DMABOUND_4G 0xFFFFFFFF + +struct nvme_fc_port_template fctemplate = { + .localport_delete = fcloop_localport_delete, + .remoteport_delete = fcloop_remoteport_delete, + .create_queue = fcloop_create_queue, + .delete_queue = fcloop_delete_queue, + .ls_req = fcloop_ls_req, + .fcp_io = fcloop_fcp_req, + .ls_abort = fcloop_ls_abort, + .fcp_abort = fcloop_fcp_abort, + .max_hw_queues = FCLOOP_HW_QUEUES, + .max_sgl_segments = FCLOOP_SGL_SEGS, + .max_dif_sgl_segments = FCLOOP_SGL_SEGS, + .dma_boundary = FCLOOP_DMABOUND_4G, + /* sizes of additional private data for data structures */ + .local_priv_sz = sizeof(struct fcloop_lport), + .remote_priv_sz = sizeof(struct fcloop_rport), + .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), + .fcprqst_priv_sz = sizeof(struct fcloop_fcpreq), +}; + +struct nvmet_fc_target_template tgttemplate = { + .targetport_delete = fcloop_targetport_delete, + .xmt_ls_rsp = fcloop_xmt_ls_rsp, + .fcp_op = fcloop_fcp_op, + .max_hw_queues = FCLOOP_HW_QUEUES, + .max_sgl_segments = FCLOOP_SGL_SEGS, + .max_dif_sgl_segments = FCLOOP_SGL_SEGS, + .dma_boundary = FCLOOP_DMABOUND_4G, + /* optional features */ + .target_features = NVMET_FCTGTFEAT_READDATA_RSP | + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED, + /* sizes of additional private data for data structures */ + .target_priv_sz = sizeof(struct fcloop_tport), +}; + +static ssize_t +fcloop_create_local_port(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nvme_fc_port_info pinfo; + struct fcloop_ctrl_options *opts; + struct nvme_fc_local_port *localport; + struct fcloop_lport *lport; + int ret; + + opts = kzalloc(sizeof(*opts), GFP_KERNEL); + if (!opts) + return -ENOMEM; + + ret = fcloop_parse_options(opts, buf); + if (ret) + goto out_free_opts; + + /* everything there ? */ + if ((opts->mask & LPORT_OPTS) != LPORT_OPTS) { + ret = -EINVAL; + goto out_free_opts; + } + + pinfo.node_name = opts->wwnn; + pinfo.port_name = opts->wwpn; + pinfo.port_role = opts->roles; + pinfo.port_id = opts->fcaddr; + + ret = nvme_fc_register_localport(&pinfo, &fctemplate, NULL, &localport); + if (!ret) { + unsigned long flags; + + /* success */ + lport = localport->private; + lport->localport = localport; + INIT_LIST_HEAD(&lport->lport_list); + + spin_lock_irqsave(&fcloop_lock, flags); + list_add_tail(&lport->lport_list, &fcloop_lports); + spin_unlock_irqrestore(&fcloop_lock, flags); + + /* mark all of the input buffer consumed */ + ret = count; + } + +out_free_opts: + kfree(opts); + return ret ? ret : count; +} + + +static void +__unlink_local_port(struct fcloop_lport *lport) +{ + list_del(&lport->lport_list); +} + +static int +__wait_localport_unreg(struct fcloop_lport *lport) +{ + int ret; + + init_completion(&lport->unreg_done); + + ret = nvme_fc_unregister_localport(lport->localport); + + wait_for_completion(&lport->unreg_done); + + return ret; +} + + +static ssize_t +fcloop_delete_local_port(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fcloop_lport *tlport, *lport = NULL; + u64 nodename, portname; + unsigned long flags; + int ret; + + ret = fcloop_parse_nm_options(dev, &nodename, &portname, buf); + if (ret) + return ret; + + spin_lock_irqsave(&fcloop_lock, flags); + + list_for_each_entry(tlport, &fcloop_lports, lport_list) { + if (tlport->localport->node_name == nodename && + tlport->localport->port_name == portname) { + lport = tlport; + __unlink_local_port(lport); + break; + } + } + spin_unlock_irqrestore(&fcloop_lock, flags); + + if (!lport) + return -ENOENT; + + ret = __wait_localport_unreg(lport); + + return ret ? ret : count; +} + +static void +fcloop_nport_free(struct kref *ref) +{ + struct fcloop_nport *nport = + container_of(ref, struct fcloop_nport, ref); + unsigned long flags; + + spin_lock_irqsave(&fcloop_lock, flags); + list_del(&nport->nport_list); + spin_unlock_irqrestore(&fcloop_lock, flags); + + kfree(nport); +} + +static void +fcloop_nport_put(struct fcloop_nport *nport) +{ + kref_put(&nport->ref, fcloop_nport_free); +} + +static int +fcloop_nport_get(struct fcloop_nport *nport) +{ + return kref_get_unless_zero(&nport->ref); +} + +static struct fcloop_nport * +fcloop_alloc_nport(const char *buf, size_t count, bool remoteport) +{ + struct fcloop_nport *newnport, *nport = NULL; + struct fcloop_lport *tmplport, *lport = NULL; + struct fcloop_ctrl_options *opts; + unsigned long flags; + u32 opts_mask = (remoteport) ? RPORT_OPTS : TGTPORT_OPTS; + int ret; + + opts = kzalloc(sizeof(*opts), GFP_KERNEL); + if (!opts) + return NULL; + + ret = fcloop_parse_options(opts, buf); + if (ret) + goto out_free_opts; + + /* everything there ? */ + if ((opts->mask & opts_mask) != opts_mask) { + ret = -EINVAL; + goto out_free_opts; + } + + newnport = kzalloc(sizeof(*newnport), GFP_KERNEL); + if (!newnport) + goto out_free_opts; + + INIT_LIST_HEAD(&newnport->nport_list); + newnport->node_name = opts->wwnn; + newnport->port_name = opts->wwpn; + if (opts->mask & NVMF_OPT_ROLES) + newnport->port_role = opts->roles; + if (opts->mask & NVMF_OPT_FCADDR) + newnport->port_id = opts->fcaddr; + kref_init(&newnport->ref); + + spin_lock_irqsave(&fcloop_lock, flags); + + list_for_each_entry(tmplport, &fcloop_lports, lport_list) { + if (tmplport->localport->node_name == opts->wwnn && + tmplport->localport->port_name == opts->wwpn) + goto out_invalid_opts; + + if (tmplport->localport->node_name == opts->lpwwnn && + tmplport->localport->port_name == opts->lpwwpn) + lport = tmplport; + } + + if (remoteport) { + if (!lport) + goto out_invalid_opts; + newnport->lport = lport; + } + + list_for_each_entry(nport, &fcloop_nports, nport_list) { + if (nport->node_name == opts->wwnn && + nport->port_name == opts->wwpn) { + if ((remoteport && nport->rport) || + (!remoteport && nport->tport)) { + nport = NULL; + goto out_invalid_opts; + } + + fcloop_nport_get(nport); + + spin_unlock_irqrestore(&fcloop_lock, flags); + + if (remoteport) + nport->lport = lport; + if (opts->mask & NVMF_OPT_ROLES) + nport->port_role = opts->roles; + if (opts->mask & NVMF_OPT_FCADDR) + nport->port_id = opts->fcaddr; + goto out_free_newnport; + } + } + + list_add_tail(&newnport->nport_list, &fcloop_nports); + + spin_unlock_irqrestore(&fcloop_lock, flags); + + kfree(opts); + return newnport; + +out_invalid_opts: + spin_unlock_irqrestore(&fcloop_lock, flags); +out_free_newnport: + kfree(newnport); +out_free_opts: + kfree(opts); + return nport; +} + +static ssize_t +fcloop_create_remote_port(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nvme_fc_remote_port *remoteport; + struct fcloop_nport *nport; + struct fcloop_rport *rport; + struct nvme_fc_port_info pinfo; + int ret; + + nport = fcloop_alloc_nport(buf, count, true); + if (!nport) + return -EIO; + + pinfo.node_name = nport->node_name; + pinfo.port_name = nport->port_name; + pinfo.port_role = nport->port_role; + pinfo.port_id = nport->port_id; + + ret = nvme_fc_register_remoteport(nport->lport->localport, + &pinfo, &remoteport); + if (ret || !remoteport) { + fcloop_nport_put(nport); + return ret; + } + + /* success */ + rport = remoteport->private; + rport->remoteport = remoteport; + rport->targetport = (nport->tport) ? nport->tport->targetport : NULL; + if (nport->tport) { + nport->tport->remoteport = remoteport; + nport->tport->lport = nport->lport; + } + rport->nport = nport; + rport->lport = nport->lport; + nport->rport = rport; + + return ret ? ret : count; +} + + +static struct fcloop_rport * +__unlink_remote_port(struct fcloop_nport *nport) +{ + struct fcloop_rport *rport = nport->rport; + + if (rport && nport->tport) + nport->tport->remoteport = NULL; + nport->rport = NULL; + + return rport; +} + +static int +__wait_remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport) +{ + int ret; + + if (!rport) + return -EALREADY; + + init_completion(&nport->rport_unreg_done); + + ret = nvme_fc_unregister_remoteport(rport->remoteport); + if (ret) + return ret; + + wait_for_completion(&nport->rport_unreg_done); + + fcloop_nport_put(nport); + + return ret; +} + +static ssize_t +fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fcloop_nport *nport = NULL, *tmpport; + static struct fcloop_rport *rport; + u64 nodename, portname; + unsigned long flags; + int ret; + + ret = fcloop_parse_nm_options(dev, &nodename, &portname, buf); + if (ret) + return ret; + + spin_lock_irqsave(&fcloop_lock, flags); + + list_for_each_entry(tmpport, &fcloop_nports, nport_list) { + if (tmpport->node_name == nodename && + tmpport->port_name == portname && tmpport->rport) { + nport = tmpport; + rport = __unlink_remote_port(nport); + break; + } + } + + spin_unlock_irqrestore(&fcloop_lock, flags); + + if (!nport) + return -ENOENT; + + ret = __wait_remoteport_unreg(nport, rport); + + return ret ? ret : count; +} + +static ssize_t +fcloop_create_target_port(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nvmet_fc_target_port *targetport; + struct fcloop_nport *nport; + struct fcloop_tport *tport; + struct nvmet_fc_port_info tinfo; + int ret; + + nport = fcloop_alloc_nport(buf, count, false); + if (!nport) + return -EIO; + + tinfo.node_name = nport->node_name; + tinfo.port_name = nport->port_name; + tinfo.port_id = nport->port_id; + + ret = nvmet_fc_register_targetport(&tinfo, &tgttemplate, NULL, + &targetport); + if (ret) { + fcloop_nport_put(nport); + return ret; + } + + /* success */ + tport = targetport->private; + tport->targetport = targetport; + tport->remoteport = (nport->rport) ? nport->rport->remoteport : NULL; + if (nport->rport) + nport->rport->targetport = targetport; + tport->nport = nport; + tport->lport = nport->lport; + nport->tport = tport; + + return ret ? ret : count; +} + + +static struct fcloop_tport * +__unlink_target_port(struct fcloop_nport *nport) +{ + struct fcloop_tport *tport = nport->tport; + + if (tport && nport->rport) + nport->rport->targetport = NULL; + nport->tport = NULL; + + return tport; +} + +static int +__wait_targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport) +{ + int ret; + + if (!tport) + return -EALREADY; + + init_completion(&nport->tport_unreg_done); + + ret = nvmet_fc_unregister_targetport(tport->targetport); + if (ret) + return ret; + + wait_for_completion(&nport->tport_unreg_done); + + fcloop_nport_put(nport); + + return ret; +} + +static ssize_t +fcloop_delete_target_port(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fcloop_nport *nport = NULL, *tmpport; + struct fcloop_tport *tport; + u64 nodename, portname; + unsigned long flags; + int ret; + + ret = fcloop_parse_nm_options(dev, &nodename, &portname, buf); + if (ret) + return ret; + + spin_lock_irqsave(&fcloop_lock, flags); + + list_for_each_entry(tmpport, &fcloop_nports, nport_list) { + if (tmpport->node_name == nodename && + tmpport->port_name == portname && tmpport->tport) { + nport = tmpport; + tport = __unlink_target_port(nport); + break; + } + } + + spin_unlock_irqrestore(&fcloop_lock, flags); + + if (!nport) + return -ENOENT; + + ret = __wait_targetport_unreg(nport, tport); + + return ret ? ret : count; +} + + +static DEVICE_ATTR(add_local_port, 0200, NULL, fcloop_create_local_port); +static DEVICE_ATTR(del_local_port, 0200, NULL, fcloop_delete_local_port); +static DEVICE_ATTR(add_remote_port, 0200, NULL, fcloop_create_remote_port); +static DEVICE_ATTR(del_remote_port, 0200, NULL, fcloop_delete_remote_port); +static DEVICE_ATTR(add_target_port, 0200, NULL, fcloop_create_target_port); +static DEVICE_ATTR(del_target_port, 0200, NULL, fcloop_delete_target_port); + +static struct attribute *fcloop_dev_attrs[] = { + &dev_attr_add_local_port.attr, + &dev_attr_del_local_port.attr, + &dev_attr_add_remote_port.attr, + &dev_attr_del_remote_port.attr, + &dev_attr_add_target_port.attr, + &dev_attr_del_target_port.attr, + NULL +}; + +static struct attribute_group fclopp_dev_attrs_group = { + .attrs = fcloop_dev_attrs, +}; + +static const struct attribute_group *fcloop_dev_attr_groups[] = { + &fclopp_dev_attrs_group, + NULL, +}; + +static struct class *fcloop_class; +static struct device *fcloop_device; + + +static int __init fcloop_init(void) +{ + int ret; + + fcloop_class = class_create(THIS_MODULE, "fcloop"); + if (IS_ERR(fcloop_class)) { + pr_err("couldn't register class fcloop\n"); + ret = PTR_ERR(fcloop_class); + return ret; + } + + fcloop_device = device_create_with_groups( + fcloop_class, NULL, MKDEV(0, 0), NULL, + fcloop_dev_attr_groups, "ctl"); + if (IS_ERR(fcloop_device)) { + pr_err("couldn't create ctl device!\n"); + ret = PTR_ERR(fcloop_device); + goto out_destroy_class; + } + + get_device(fcloop_device); + + return 0; + +out_destroy_class: + class_destroy(fcloop_class); + return ret; +} + +static void __exit fcloop_exit(void) +{ + struct fcloop_lport *lport; + struct fcloop_nport *nport; + struct fcloop_tport *tport; + struct fcloop_rport *rport; + unsigned long flags; + int ret; + + spin_lock_irqsave(&fcloop_lock, flags); + + for (;;) { + nport = list_first_entry_or_null(&fcloop_nports, + typeof(*nport), nport_list); + if (!nport) + break; + + tport = __unlink_target_port(nport); + rport = __unlink_remote_port(nport); + + spin_unlock_irqrestore(&fcloop_lock, flags); + + ret = __wait_targetport_unreg(nport, tport); + if (ret) + pr_warn("%s: Failed deleting target port\n", __func__); + + ret = __wait_remoteport_unreg(nport, rport); + if (ret) + pr_warn("%s: Failed deleting remote port\n", __func__); + + spin_lock_irqsave(&fcloop_lock, flags); + } + + for (;;) { + lport = list_first_entry_or_null(&fcloop_lports, + typeof(*lport), lport_list); + if (!lport) + break; + + __unlink_local_port(lport); + + spin_unlock_irqrestore(&fcloop_lock, flags); + + ret = __wait_localport_unreg(lport); + if (ret) + pr_warn("%s: Failed deleting local port\n", __func__); + + spin_lock_irqsave(&fcloop_lock, flags); + } + + spin_unlock_irqrestore(&fcloop_lock, flags); + + put_device(fcloop_device); + + device_destroy(fcloop_class, MKDEV(0, 0)); + class_destroy(fcloop_class); +} + +module_init(fcloop_init); +module_exit(fcloop_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 4a96c2049b7b..4195115c7e54 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -37,9 +37,7 @@ static void nvmet_inline_bio_init(struct nvmet_req *req) { struct bio *bio = &req->inline_bio; - bio_init(bio); - bio->bi_max_vecs = NVMET_MAX_INLINE_BIOVEC; - bio->bi_io_vec = req->inline_bvec; + bio_init(bio, req->inline_bvec, NVMET_MAX_INLINE_BIOVEC); } static void nvmet_execute_rw(struct nvmet_req *req) @@ -58,7 +56,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) if (req->cmd->rw.opcode == nvme_cmd_write) { op = REQ_OP_WRITE; - op_flags = WRITE_ODIRECT; + op_flags = REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op_flags |= REQ_FUA; } else { @@ -96,7 +94,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) cookie = submit_bio(bio); - blk_poll(bdev_get_queue(req->ns->bdev), cookie); + blk_mq_poll(bdev_get_queue(req->ns->bdev), cookie); } static void nvmet_execute_flush(struct nvmet_req *req) @@ -109,7 +107,7 @@ static void nvmet_execute_flush(struct nvmet_req *req) bio->bi_bdev = req->ns->bdev; bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(bio); } @@ -172,6 +170,32 @@ static void nvmet_execute_dsm(struct nvmet_req *req) } } +static void nvmet_execute_write_zeroes(struct nvmet_req *req) +{ + struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; + struct bio *bio = NULL; + u16 status = NVME_SC_SUCCESS; + sector_t sector; + sector_t nr_sector; + + sector = le64_to_cpu(write_zeroes->slba) << + (req->ns->blksize_shift - 9); + nr_sector = (((sector_t)le32_to_cpu(write_zeroes->length)) << + (req->ns->blksize_shift - 9)) + 1; + + if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, + GFP_KERNEL, &bio, true)) + status = NVME_SC_INTERNAL | NVME_SC_DNR; + + if (bio) { + bio->bi_private = req; + bio->bi_end_io = nvmet_bio_done; + submit_bio(bio); + } else { + nvmet_req_complete(req, status); + } +} + int nvmet_parse_io_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -209,6 +233,9 @@ int nvmet_parse_io_cmd(struct nvmet_req *req) req->data_len = le32_to_cpu(cmd->dsm.nr + 1) * sizeof(struct nvme_dsm_range); return 0; + case nvme_cmd_write_zeroes: + req->execute = nvmet_execute_write_zeroes; + return 0; default: pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index d5df77d686b2..9aaa70071ae5 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -36,6 +36,7 @@ (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS) struct nvme_loop_iod { + struct nvme_request nvme_req; struct nvme_command cmd; struct nvme_completion rsp; struct nvmet_req req; @@ -112,10 +113,10 @@ static void nvme_loop_complete_rq(struct request *req) blk_mq_end_request(req, error); } -static void nvme_loop_queue_response(struct nvmet_req *nvme_req) +static void nvme_loop_queue_response(struct nvmet_req *req) { struct nvme_loop_iod *iod = - container_of(nvme_req, struct nvme_loop_iod, req); + container_of(req, struct nvme_loop_iod, req); struct nvme_completion *cqe = &iod->rsp; /* @@ -126,13 +127,13 @@ static void nvme_loop_queue_response(struct nvmet_req *nvme_req) */ if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 && cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) { - nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe); + nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe->status, + &cqe->result); } else { - struct request *req = blk_mq_rq_from_pdu(iod); + struct request *rq = blk_mq_rq_from_pdu(iod); - if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) - memcpy(req->special, cqe, sizeof(*cqe)); - blk_mq_complete_request(req, le16_to_cpu(cqe->status) >> 1); + iod->nvme_req.result = cqe->result; + blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); } } @@ -168,7 +169,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, int ret; ret = nvme_setup_cmd(ns, req, &iod->cmd); - if (ret) + if (ret != BLK_MQ_RQ_QUEUE_OK) return ret; iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; @@ -178,26 +179,25 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, nvme_cleanup_cmd(req); blk_mq_start_request(req); nvme_loop_queue_response(&iod->req); - return 0; + return BLK_MQ_RQ_QUEUE_OK; } if (blk_rq_bytes(req)) { iod->sg_table.sgl = iod->first_sgl; ret = sg_alloc_table_chained(&iod->sg_table, - req->nr_phys_segments, iod->sg_table.sgl); + blk_rq_nr_phys_segments(req), + iod->sg_table.sgl); if (ret) return BLK_MQ_RQ_QUEUE_BUSY; iod->req.sg = iod->sg_table.sgl; iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); - BUG_ON(iod->req.sg_cnt > req->nr_phys_segments); } - iod->cmd.common.command_id = req->tag; blk_mq_start_request(req); schedule_work(&iod->work); - return 0; + return BLK_MQ_RQ_QUEUE_OK; } static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 76b6eedccaf9..23d5eb1c944f 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -47,6 +47,7 @@ struct nvmet_ns { loff_t size; u8 nguid[16]; + bool enabled; struct nvmet_subsys *subsys; const char *device_path; @@ -61,11 +62,6 @@ static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) return container_of(to_config_group(item), struct nvmet_ns, group); } -static inline bool nvmet_ns_enabled(struct nvmet_ns *ns) -{ - return !list_empty_careful(&ns->dev_link); -} - struct nvmet_cq { u16 qid; u16 size; @@ -238,7 +234,7 @@ static inline void nvmet_set_status(struct nvmet_req *req, u16 status) static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { - req->rsp->result = cpu_to_le32(result); + req->rsp->result.u32 = cpu_to_le32(result); } /* diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 005ef5d17a19..3fbcdb7a583c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1045,8 +1045,10 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, } ret = nvmet_sq_init(&queue->nvme_sq); - if (ret) + if (ret) { + ret = NVME_RDMA_CM_NO_RSC; goto out_free_queue; + } ret = nvmet_rdma_parse_cm_connect_req(&event->param.conn, queue); if (ret) @@ -1116,6 +1118,7 @@ out_destroy_sq: out_free_queue: kfree(queue); out_reject: + pr_debug("rejecting connect request with status code %d\n", ret); nvmet_rdma_cm_reject(cm_id, ret); return NULL; } @@ -1129,7 +1132,8 @@ static void nvmet_rdma_qp_event(struct ib_event *event, void *priv) rdma_notify(queue->cm_id, event->event); break; default: - pr_err("received unrecognized IB QP event %d\n", event->event); + pr_err("received IB QP event: %s (%d)\n", + ib_event_msg(event->event), event->event); break; } } |