diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 94 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 28 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 10 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 223 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 49 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 48 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 223 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 154 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 31 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 21 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 19 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 36 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 274 | ||||
-rw-r--r-- | drivers/nvme/target/fcloop.c | 197 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd.c | 24 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 92 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 11 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 47 |
18 files changed, 1061 insertions, 520 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eeb409c287b8..bf6729b1d8bf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5; module_param(shutdown_timeout, byte, 0644); MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); -unsigned int nvme_max_retries = 5; -module_param_named(max_retries, nvme_max_retries, uint, 0644); +static u8 nvme_max_retries = 5; +module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -EXPORT_SYMBOL_GPL(nvme_max_retries); static int nvme_char_major; module_param(nvme_char_major, int, 0); @@ -67,6 +66,57 @@ static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +static int nvme_error_status(struct request *req) +{ + switch (nvme_req(req)->status & 0x7ff) { + case NVME_SC_SUCCESS: + return 0; + case NVME_SC_CAP_EXCEEDED: + return -ENOSPC; + default: + return -EIO; + + /* + * XXX: these errors are a nasty side-band protocol to + * drivers/md/dm-mpath.c:noretry_error() that aren't documented + * anywhere.. + */ + case NVME_SC_CMD_SEQ_ERROR: + return -EILSEQ; + case NVME_SC_ONCS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case NVME_SC_WRITE_FAULT: + case NVME_SC_READ_ERROR: + case NVME_SC_UNWRITTEN_BLOCK: + return -ENODATA; + } +} + +static inline bool nvme_req_needs_retry(struct request *req) +{ + if (blk_noretry_request(req)) + return false; + if (nvme_req(req)->status & NVME_SC_DNR) + return false; + if (jiffies - req->start_time >= req->timeout) + return false; + if (nvme_req(req)->retries >= nvme_max_retries) + return false; + return true; +} + +void nvme_complete_rq(struct request *req) +{ + if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { + nvme_req(req)->retries++; + blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); + return; + } + + blk_mq_end_request(req, nvme_error_status(req)); +} +EXPORT_SYMBOL_GPL(nvme_complete_rq); + void nvme_cancel_request(struct request *req, void *data, bool reserved) { int status; @@ -80,7 +130,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved) status = NVME_SC_ABORT_REQ; if (blk_queue_dying(req->q)) status |= NVME_SC_DNR; - blk_mq_complete_request(req, status); + nvme_req(req)->status = status; + blk_mq_complete_request(req); + } EXPORT_SYMBOL_GPL(nvme_cancel_request); @@ -205,12 +257,6 @@ fail: return NULL; } -void nvme_requeue_req(struct request *req) -{ - blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); -} -EXPORT_SYMBOL_GPL(nvme_requeue_req); - struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { @@ -327,6 +373,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { int ret = BLK_MQ_RQ_QUEUE_OK; + if (!(req->rq_flags & RQF_DONTPREP)) { + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; + } + switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: @@ -335,6 +387,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); break; + case REQ_OP_WRITE_ZEROES: + /* currently only aliased to deallocate for a few ctrls: */ case REQ_OP_DISCARD: ret = nvme_setup_discard(ns, req, cmd); break; @@ -378,7 +432,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, blk_execute_rq(req->q, NULL, req, at_head); if (result) *result = nvme_req(req)->result; - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; out: blk_mq_free_request(req); return ret; @@ -463,7 +520,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, } submit: blk_execute_rq(req->q, disk, req, 0); - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; if (result) *result = le32_to_cpu(nvme_req(req)->result.u32); if (meta && !ret && !write) { @@ -900,16 +960,14 @@ static void nvme_config_discard(struct nvme_ns *ns) BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) - ns->queue->limits.discard_zeroes_data = 1; - else - ns->queue->limits.discard_zeroes_data = 0; - ns->queue->limits.discard_alignment = logical_block_size; ns->queue->limits.discard_granularity = logical_block_size; blk_queue_max_discard_sectors(ns->queue, UINT_MAX); blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); + + if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); } static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) @@ -2393,7 +2451,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_freeze_queue_start(ns->queue); + blk_freeze_queue_start(ns->queue); mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_start_freeze); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5b7386f69f4d..990e6fb32a63 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -471,6 +471,16 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) } EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl) +{ + if (ctrl->opts->max_reconnects != -1 && + ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(nvmf_should_reconnect); + /** * nvmf_register_transport() - NVMe Fabrics Library registration function. * @ops: Transport ops instance to be registered to the @@ -533,6 +543,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_QUEUE_SIZE, "queue_size=%d" }, { NVMF_OPT_NR_IO_QUEUES, "nr_io_queues=%d" }, { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" }, + { NVMF_OPT_CTRL_LOSS_TMO, "ctrl_loss_tmo=%d" }, { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, @@ -546,6 +557,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, char *options, *o, *p; int token, ret = 0; size_t nqnlen = 0; + int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO; /* Set defaults */ opts->queue_size = NVMF_DEF_QUEUE_SIZE; @@ -655,6 +667,16 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->kato = token; break; + case NVMF_OPT_CTRL_LOSS_TMO: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + + if (token < 0) + pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n"); + ctrl_loss_tmo = token; + break; case NVMF_OPT_HOSTNQN: if (opts->host) { pr_err("hostnqn already user-assigned: %s\n", @@ -710,6 +732,12 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } } + if (ctrl_loss_tmo < 0) + opts->max_reconnects = -1; + else + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, + opts->reconnect_delay); + if (!opts->host) { kref_get(&nvmf_default_host->ref); opts->host = nvmf_default_host; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 156018182ce4..f5a9c1fb186f 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -21,6 +21,8 @@ #define NVMF_MAX_QUEUE_SIZE 1024 #define NVMF_DEF_QUEUE_SIZE 128 #define NVMF_DEF_RECONNECT_DELAY 10 +/* default to 600 seconds of reconnect attempts before giving up */ +#define NVMF_DEF_CTRL_LOSS_TMO 600 /* * Define a host as seen by the target. We allocate one at boot, but also @@ -53,6 +55,7 @@ enum { NVMF_OPT_HOSTNQN = 1 << 8, NVMF_OPT_RECONNECT_DELAY = 1 << 9, NVMF_OPT_HOST_TRADDR = 1 << 10, + NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, }; /** @@ -77,6 +80,10 @@ enum { * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN. * @kato: Keep-alive timeout. * @host: Virtual NVMe host, contains the NQN and Host ID. + * @nr_reconnects: number of reconnect attempted since the last ctrl failure + * @max_reconnects: maximum number of allowed reconnect attempts before removing + * the controller, (-1) means reconnect forever, zero means remove + * immediately; */ struct nvmf_ctrl_options { unsigned mask; @@ -91,6 +98,8 @@ struct nvmf_ctrl_options { bool discovery_nqn; unsigned int kato; struct nvmf_host *host; + int nr_reconnects; + int max_reconnects; }; /* @@ -133,5 +142,6 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index d996ca73d3be..ecc1048de837 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -61,16 +61,23 @@ struct nvme_fc_queue { unsigned long flags; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ +enum nvme_fcop_flags { + FCOP_FLAGS_TERMIO = (1 << 0), + FCOP_FLAGS_RELEASED = (1 << 1), + FCOP_FLAGS_COMPLETE = (1 << 2), +}; + struct nvmefc_ls_req_op { struct nvmefc_ls_req ls_req; - struct nvme_fc_ctrl *ctrl; + struct nvme_fc_rport *rport; struct nvme_fc_queue *queue; struct request *rq; + u32 flags; int ls_error; struct completion ls_done; - struct list_head lsreq_list; /* ctrl->ls_req_list */ + struct list_head lsreq_list; /* rport->ls_req_list */ bool req_queued; }; @@ -120,6 +127,9 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; + struct list_head ls_req_list; + struct device *dev; /* physical device for dma */ + struct nvme_fc_lport *lport; spinlock_t lock; struct kref ref; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ @@ -144,7 +154,6 @@ struct nvme_fc_ctrl { u64 cap; struct list_head ctrl_list; /* rport->ctrl_list */ - struct list_head ls_req_list; struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; @@ -419,9 +428,12 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); + INIT_LIST_HEAD(&newrec->ls_req_list); kref_init(&newrec->ref); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + newrec->dev = lport->dev; + newrec->lport = lport; newrec->remoteport.private = &newrec[1]; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; @@ -444,7 +456,6 @@ out_kfree_rport: out_reghost_failed: *portptr = NULL; return ret; - } EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); @@ -487,6 +498,30 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport) return kref_get_unless_zero(&rport->ref); } +static int +nvme_fc_abort_lsops(struct nvme_fc_rport *rport) +{ + struct nvmefc_ls_req_op *lsop; + unsigned long flags; + +restart: + spin_lock_irqsave(&rport->lock, flags); + + list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { + if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { + lsop->flags |= FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&rport->lock, flags); + rport->lport->ops->ls_abort(&rport->lport->localport, + &rport->remoteport, + &lsop->ls_req); + goto restart; + } + } + spin_unlock_irqrestore(&rport->lock, flags); + + return 0; +} + /** * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously @@ -522,6 +557,8 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) spin_unlock_irqrestore(&rport->lock, flags); + nvme_fc_abort_lsops(rport); + nvme_fc_rport_put(rport); return 0; } @@ -624,16 +661,16 @@ static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); static void -__nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, - struct nvmefc_ls_req_op *lsop) +__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) { + struct nvme_fc_rport *rport = lsop->rport; struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); if (!lsop->req_queued) { - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); return; } @@ -641,56 +678,71 @@ __nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, lsop->req_queued = false; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - fc_dma_unmap_single(ctrl->dev, lsreq->rqstdma, + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvme_fc_ctrl_put(ctrl); + nvme_fc_rport_put(rport); } static int -__nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, +__nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - int ret; + int ret = 0; - if (!nvme_fc_ctrl_get(ctrl)) + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return -ECONNREFUSED; + + if (!nvme_fc_rport_get(rport)) return -ESHUTDOWN; lsreq->done = done; - lsop->ctrl = ctrl; + lsop->rport = rport; lsop->req_queued = false; INIT_LIST_HEAD(&lsop->lsreq_list); init_completion(&lsop->ls_done); - lsreq->rqstdma = fc_dma_map_single(ctrl->dev, lsreq->rqstaddr, + lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, lsreq->rqstlen + lsreq->rsplen, DMA_BIDIRECTIONAL); - if (fc_dma_mapping_error(ctrl->dev, lsreq->rqstdma)) { - nvme_fc_ctrl_put(ctrl); - dev_err(ctrl->dev, - "els request command failed EFAULT.\n"); - return -EFAULT; + if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { + ret = -EFAULT; + goto out_putrport; } lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); - list_add_tail(&lsop->lsreq_list, &ctrl->ls_req_list); + list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); lsop->req_queued = true; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - ret = ctrl->lport->ops->ls_req(&ctrl->lport->localport, - &ctrl->rport->remoteport, lsreq); + ret = rport->lport->ops->ls_req(&rport->lport->localport, + &rport->remoteport, lsreq); if (ret) - lsop->ls_error = ret; + goto out_unlink; + + return 0; + +out_unlink: + lsop->ls_error = ret; + spin_lock_irqsave(&rport->lock, flags); + lsop->req_queued = false; + list_del(&lsop->lsreq_list); + spin_unlock_irqrestore(&rport->lock, flags); + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); +out_putrport: + nvme_fc_rport_put(rport); return ret; } @@ -705,15 +757,15 @@ nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) } static int -nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) +nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; int ret; - ret = __nvme_fc_send_ls_req(ctrl, lsop, nvme_fc_send_ls_req_done); + ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); - if (!ret) + if (!ret) { /* * No timeout/not interruptible as we need the struct * to exist until the lldd calls us back. Thus mandate @@ -722,14 +774,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) */ wait_for_completion(&lsop->ls_done); - __nvme_fc_finish_ls_req(ctrl, lsop); + __nvme_fc_finish_ls_req(lsop); - if (ret) { - dev_err(ctrl->dev, - "ls request command failed (%d).\n", ret); - return ret; + ret = lsop->ls_error; } + if (ret) + return ret; + /* ACC or RJT payload ? */ if (rjt->w0.ls_cmd == FCNVME_LS_RJT) return -ENXIO; @@ -737,19 +789,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) return 0; } -static void -nvme_fc_send_ls_req_async(struct nvme_fc_ctrl *ctrl, +static int +nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { - int ret; - - ret = __nvme_fc_send_ls_req(ctrl, lsop, done); - /* don't wait for completion */ - if (ret) - done(&lsop->ls_req, ret); + return __nvme_fc_send_ls_req(rport, lsop, done); } /* Validation Error indexes into the string table below */ @@ -839,7 +886,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, lsreq->rsplen = sizeof(*assoc_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -848,11 +895,12 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, /* validate the ACC response */ if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) fcret = VERR_LSACC; - if (assoc_acc->hdr.desc_list_len != + else if (assoc_acc->hdr.desc_list_len != fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_cr_assoc_acc))) fcret = VERR_CR_ASSOC_ACC_LEN; - if (assoc_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + else if (assoc_acc->hdr.rqst.desc_tag != + cpu_to_be32(FCNVME_LSDESC_RQST)) fcret = VERR_LSDESC_RQST; else if (assoc_acc->hdr.rqst.desc_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) @@ -946,7 +994,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, lsreq->rsplen = sizeof(*conn_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -955,10 +1003,10 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, /* validate the ACC response */ if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) fcret = VERR_LSACC; - if (conn_acc->hdr.desc_list_len != + else if (conn_acc->hdr.desc_list_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) fcret = VERR_CR_CONN_ACC_LEN; - if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) fcret = VERR_LSDESC_RQST; else if (conn_acc->hdr.rqst.desc_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) @@ -997,14 +1045,8 @@ static void nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) { struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); - struct nvme_fc_ctrl *ctrl = lsop->ctrl; - __nvme_fc_finish_ls_req(ctrl, lsop); - - if (status) - dev_err(ctrl->dev, - "disconnect assoc ls request command failed (%d).\n", - status); + __nvme_fc_finish_ls_req(lsop); /* fc-nvme iniator doesn't care about success or failure of cmd */ @@ -1035,6 +1077,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) struct fcnvme_ls_disconnect_acc *discon_acc; struct nvmefc_ls_req_op *lsop; struct nvmefc_ls_req *lsreq; + int ret; lsop = kzalloc((sizeof(*lsop) + ctrl->lport->ops->lsrqst_priv_sz + @@ -1077,7 +1120,10 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) lsreq->rsplen = sizeof(*discon_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - nvme_fc_send_ls_req_async(ctrl, lsop, nvme_fc_disconnect_assoc_done); + ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, + nvme_fc_disconnect_assoc_done); + if (ret) + kfree(lsop); /* only meaningful part to terminating the association */ ctrl->association_id = 0; @@ -1146,7 +1192,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_fc_ctrl *ctrl = op->ctrl; struct nvme_fc_queue *queue = op->queue; struct nvme_completion *cqe = &op->rsp_iu.cqe; - u16 status; + __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); + union nvme_result result; /* * WARNING: @@ -1181,9 +1228,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) sizeof(op->rsp_iu), DMA_FROM_DEVICE); if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) - status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - else - status = freq->status; + status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); + else if (freq->status) + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); /* * For the linux implementation, if we have an unsuccesful @@ -1211,10 +1258,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) */ if (freq->transferred_length != be32_to_cpu(op->cmd_iu.data_len)) { - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } - op->nreq.result.u64 = 0; + result.u64 = 0; break; case sizeof(struct nvme_fc_ersp_iu): @@ -1226,28 +1273,28 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) (freq->rcv_rsplen / 4) || be32_to_cpu(op->rsp_iu.xfrd_len) != freq->transferred_length || + op->rsp_iu.status_code || op->rqno != le16_to_cpu(cqe->command_id))) { - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } - op->nreq.result = cqe->result; - status = le16_to_cpu(cqe->status) >> 1; + result = cqe->result; + status = cqe->status; break; default: - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } done: if (!queue->qnum && op->rqno >= AEN_CMDID_BASE) { - nvme_complete_async_event(&queue->ctrl->ctrl, status, - &op->nreq.result); + nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); nvme_fc_ctrl_put(ctrl); return; } - blk_mq_complete_request(rq, status); + nvme_end_request(rq, status, result); } static int @@ -1761,7 +1808,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, op->fcp_req.io_dir = io_dir; op->fcp_req.transferred_length = 0; op->fcp_req.rcv_rsplen = 0; - op->fcp_req.status = 0; + op->fcp_req.status = NVME_SC_SUCCESS; op->fcp_req.sqid = cpu_to_le16(queue->qnum); /* @@ -1923,32 +1970,18 @@ nvme_fc_complete_rq(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int error = 0, state; + int state; state = atomic_xchg(&op->state, FCPOP_STATE_IDLE); nvme_cleanup_cmd(rq); - nvme_fc_unmap_data(ctrl, rq, op); - - if (unlikely(rq->errors)) { - if (nvme_req_needs_retry(rq, rq->errors)) { - nvme_requeue_req(rq); - return; - } - - if (blk_rq_is_passthrough(rq)) - error = rq->errors; - else - error = nvme_error_status(rq->errors); - } - + nvme_complete_rq(rq); nvme_fc_ctrl_put(ctrl); - blk_mq_end_request(rq, error); } -static struct blk_mq_ops nvme_fc_mq_ops = { +static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_request, @@ -1959,7 +1992,7 @@ static struct blk_mq_ops nvme_fc_mq_ops = { .timeout = nvme_fc_timeout, }; -static struct blk_mq_ops nvme_fc_admin_mq_ops = { +static const struct blk_mq_ops nvme_fc_admin_mq_ops = { .queue_rq = nvme_fc_queue_rq, .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_admin_request, @@ -2314,7 +2347,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->ctrl_list); - INIT_LIST_HEAD(&ctrl->ls_req_list); ctrl->lport = lport; ctrl->rport = rport; ctrl->dev = lport->dev; @@ -2546,11 +2578,20 @@ static struct nvmf_transport_ops nvme_fc_transport = { static int __init nvme_fc_init_module(void) { + int ret; + nvme_fc_wq = create_workqueue("nvme_fc_wq"); if (!nvme_fc_wq) return -ENOMEM; - return nvmf_register_transport(&nvme_fc_transport); + ret = nvmf_register_transport(&nvme_fc_transport); + if (ret) + goto err; + + return 0; +err: + destroy_workqueue(nvme_fc_wq); + return ret; } static void __exit nvme_fc_exit_module(void) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 21cac8523bd8..de61a4a03d78 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -241,9 +241,9 @@ static inline void _nvme_nvm_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128); + BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); - BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512); + BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); } static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) @@ -324,7 +324,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, - sizeof(struct nvme_nvm_addr_format)); + sizeof(struct nvm_addr_format)); ret = init_grps(nvm_id, nvme_nvm_id); out: @@ -484,7 +484,7 @@ static void nvme_nvm_end_io(struct request *rq, int error) struct nvm_rq *rqd = rq->end_io_data; rqd->ppa_status = nvme_req(rq)->result.u64; - rqd->error = error; + rqd->error = nvme_req(rq)->status; nvm_end_io(rqd); kfree(nvme_req(rq)->cmd); @@ -510,12 +510,12 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) } rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - rq->ioprio = bio_prio(bio); - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; + if (bio) { + blk_init_request_from_bio(rq, bio); + } else { + rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + rq->__data_len = 0; + } nvme_nvm_rqtocmd(rq, rqd, ns, cmd); @@ -526,21 +526,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) return 0; } -static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd) -{ - struct request_queue *q = dev->q; - struct nvme_ns *ns = q->queuedata; - struct nvme_nvm_command c = {}; - - c.erase.opcode = NVM_OP_ERASE; - c.erase.nsid = cpu_to_le32(ns->ns_id); - c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c.erase.length = cpu_to_le16(rqd->nr_ppas - 1); - c.erase.control = cpu_to_le16(rqd->flags); - - return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); -} - static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) { struct nvme_ns *ns = nvmdev->q->queuedata; @@ -576,7 +561,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .set_bb_tbl = nvme_nvm_set_bb_tbl, .submit_io = nvme_nvm_submit_io, - .erase_block = nvme_nvm_erase_block, .create_dma_pool = nvme_nvm_create_dma_pool, .destroy_dma_pool = nvme_nvm_destroy_dma_pool, @@ -611,7 +595,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, __le64 *metadata = NULL; dma_addr_t metadata_dma; DECLARE_COMPLETION_ONSTACK(wait); - int ret; + int ret = 0; rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, NVME_QID_ANY); @@ -681,9 +665,12 @@ submit: wait_for_completion_io(&wait); - ret = nvme_error_status(rq->errors); + if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else if (nvme_req(rq)->status & 0x7ff) + ret = -EIO; if (result) - *result = rq->errors & 0x7ff; + *result = nvme_req(rq)->status & 0x7ff; if (status) *status = le64_to_cpu(nvme_req(rq)->result.u64); @@ -766,7 +753,7 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); /* cdw11-12 */ c.ph_rw.length = cpu_to_le16(vcmd.nppas); - c.ph_rw.control = cpu_to_le32(vcmd.control); + c.ph_rw.control = cpu_to_le16(vcmd.control); c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13); c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14); c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15); @@ -809,6 +796,8 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) struct request_queue *q = ns->queue; struct nvm_dev *dev; + _nvme_nvm_check_size(); + dev = nvm_alloc_dev(node); if (!dev) return -ENOMEM; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ab2d6ec7eb5c..29c708ca9621 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -21,16 +21,6 @@ #include <linux/lightnvm.h> #include <linux/sed-opal.h> -enum { - /* - * Driver internal status code for commands that were cancelled due - * to timeouts or controller shutdown. The value is negative so - * that it a) doesn't overlap with the unsigned hardware error codes, - * and b) can easily be tested for. - */ - NVME_SC_CANCELLED = -EINTR, -}; - extern unsigned char nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -43,8 +33,6 @@ extern unsigned char shutdown_timeout; #define NVME_DEFAULT_KATO 5 #define NVME_KATO_GRACE 10 -extern unsigned int nvme_max_retries; - enum { NVME_NS_LBA = 0, NVME_NS_LIGHTNVM = 1, @@ -68,10 +56,10 @@ enum nvme_quirks { NVME_QUIRK_IDENTIFY_CNS = (1 << 1), /* - * The controller deterministically returns O's on reads to discarded - * logical blocks. + * The controller deterministically returns O's on reads to + * logical blocks that deallocate was called on. */ - NVME_QUIRK_DISCARD_ZEROES = (1 << 2), + NVME_QUIRK_DEALLOCATE_ZEROES = (1 << 2), /* * The controller needs a delay before starts checking the device @@ -97,6 +85,13 @@ enum nvme_quirks { struct nvme_request { struct nvme_command *cmd; union nvme_result result; + u8 retries; + u8 flags; + u16 status; +}; + +enum { + NVME_REQ_CANCELLED = (1 << 0), }; static inline struct nvme_request *nvme_req(struct request *req) @@ -254,25 +249,17 @@ static inline void nvme_cleanup_cmd(struct request *req) } } -static inline int nvme_error_status(u16 status) +static inline void nvme_end_request(struct request *req, __le16 status, + union nvme_result result) { - switch (status & 0x7ff) { - case NVME_SC_SUCCESS: - return 0; - case NVME_SC_CAP_EXCEEDED: - return -ENOSPC; - default: - return -EIO; - } -} + struct nvme_request *rq = nvme_req(req); -static inline bool nvme_req_needs_retry(struct request *req, u16 status) -{ - return !(status & NVME_SC_DNR || blk_noretry_request(req)) && - (jiffies - req->start_time) < req->timeout && - req->retries < nvme_max_retries; + rq->status = le16_to_cpu(status) >> 1; + rq->result = result; + blk_mq_complete_request(req); } +void nvme_complete_rq(struct request *req); void nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); @@ -307,7 +294,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid); -void nvme_requeue_req(struct request *req); int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5d309535abbd..c8541c3dcd19 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -104,8 +104,22 @@ struct nvme_dev { u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; + u32 *dbbuf_dbs; + dma_addr_t dbbuf_dbs_dma_addr; + u32 *dbbuf_eis; + dma_addr_t dbbuf_eis_dma_addr; }; +static inline unsigned int sq_idx(unsigned int qid, u32 stride) +{ + return qid * 2 * stride; +} + +static inline unsigned int cq_idx(unsigned int qid, u32 stride) +{ + return (qid * 2 + 1) * stride; +} + static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) { return container_of(ctrl, struct nvme_dev, ctrl); @@ -134,6 +148,10 @@ struct nvme_queue { u16 qid; u8 cq_phase; u8 cqe_seen; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; }; /* @@ -172,6 +190,112 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); + BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); +} + +static inline unsigned int nvme_dbbuf_size(u32 stride) +{ + return ((num_possible_cpus() + 1) * 8 * stride); +} + +static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) + return 0; + + dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_dbs_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_dbs) + return -ENOMEM; + dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_eis_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + return -ENOMEM; + } + + return 0; +} + +static void nvme_dbbuf_dma_free(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + } + if (dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_eis, dev->dbbuf_eis_dma_addr); + dev->dbbuf_eis = NULL; + } +} + +static void nvme_dbbuf_init(struct nvme_dev *dev, + struct nvme_queue *nvmeq, int qid) +{ + if (!dev->dbbuf_dbs || !qid) + return; + + nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; +} + +static void nvme_dbbuf_set(struct nvme_dev *dev) +{ + struct nvme_command c; + + if (!dev->dbbuf_dbs) + return; + + memset(&c, 0, sizeof(c)); + c.dbbuf.opcode = nvme_admin_dbbuf; + c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr); + c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); + + if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { + dev_warn(dev->dev, "unable to set dbbuf\n"); + /* Free memory and continue on */ + nvme_dbbuf_dma_free(dev); + } +} + +static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) +{ + return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old); +} + +/* Update dbbuf and return true if an MMIO is required */ +static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, + volatile u32 *dbbuf_ei) +{ + if (dbbuf_db) { + u16 old_value; + + /* + * Ensure that the queue is written before updating + * the doorbell in memory + */ + wmb(); + + old_value = *dbbuf_db; + *dbbuf_db = value; + + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) + return false; + } + + return true; } /* @@ -298,7 +422,9 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, if (++tail == nvmeq->q_depth) tail = 0; - writel(tail, nvmeq->q_db); + if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db, + nvmeq->dbbuf_sq_ei)) + writel(tail, nvmeq->q_db); nvmeq->sq_tail = tail; } @@ -327,10 +453,6 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->nents = 0; iod->length = size; - if (!(rq->rq_flags & RQF_DONTPREP)) { - rq->retries = 0; - rq->rq_flags |= RQF_DONTPREP; - } return BLK_MQ_RQ_QUEUE_OK; } @@ -629,34 +751,12 @@ out_free_cmd: return ret; } -static void nvme_complete_rq(struct request *req) +static void nvme_pci_complete_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_dev *dev = iod->nvmeq->dev; - int error = 0; - - nvme_unmap_data(dev, req); - - if (unlikely(req->errors)) { - if (nvme_req_needs_retry(req, req->errors)) { - req->retries++; - nvme_requeue_req(req); - return; - } - - if (blk_rq_is_passthrough(req)) - error = req->errors; - else - error = nvme_error_status(req->errors); - } - - if (unlikely(iod->aborted)) { - dev_warn(dev->ctrl.device, - "completing aborted command with status: %04x\n", - req->errors); - } - blk_mq_end_request(req, error); + nvme_unmap_data(iod->nvmeq->dev, req); + nvme_complete_rq(req); } /* We read the CQE phase first to check if the rest of the entry is valid */ @@ -706,15 +806,16 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); - nvme_req(req)->result = cqe.result; - blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); + nvme_end_request(req, cqe.status, cqe.result); } if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; if (likely(nvmeq->cq_vector >= 0)) - writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, + nvmeq->dbbuf_cq_ei)) + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; @@ -746,10 +847,8 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_NONE; } -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { - struct nvme_queue *nvmeq = hctx->driver_data; - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { spin_lock_irq(&nvmeq->q_lock); __nvme_process_cq(nvmeq, &tag); @@ -762,6 +861,13 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; } +static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + return __nvme_poll(nvmeq, tag); +} + static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) { struct nvme_dev *dev = to_nvme_dev(ctrl); @@ -813,7 +919,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { struct nvme_command c; - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + int flags = NVME_QUEUE_PHYS_CONTIG; /* * Note: we (ab)use the fact the the prp fields survive if no data @@ -844,9 +950,9 @@ static void abort_endio(struct request *req, int error) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = iod->nvmeq; - u16 status = req->errors; - dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status); + dev_warn(nvmeq->dev->ctrl.device, + "Abort status: 0x%x", nvme_req(req)->status); atomic_inc(&nvmeq->dev->ctrl.abort_limit); blk_mq_free_request(req); } @@ -860,6 +966,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_command cmd; /* + * Did we miss an interrupt? + */ + if (__nvme_poll(nvmeq, req->tag)) { + dev_warn(dev->ctrl.device, + "I/O %d QID %d timeout, completion polled\n", + req->tag, nvmeq->qid); + return BLK_EH_HANDLED; + } + + /* * Shutdown immediately if controller times out while starting. The * reset work will see the pci device disabled when it gets the forced * cancellation error. All outstanding requests are completed on @@ -870,7 +986,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); - req->errors = NVME_SC_CANCELLED; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_HANDLED; } @@ -890,7 +1006,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * Mark the request as handled, since the inline shutdown * forces all outstanding requests to complete. */ - req->errors = NVME_SC_CANCELLED; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_HANDLED; } @@ -1098,6 +1214,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); + nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; spin_unlock_irq(&nvmeq->q_lock); } @@ -1130,18 +1247,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; } -static struct blk_mq_ops nvme_mq_admin_ops = { +static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, - .complete = nvme_complete_rq, + .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; -static struct blk_mq_ops nvme_mq_ops = { +static const struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, - .complete = nvme_complete_rq, + .complete = nvme_pci_complete_rq, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, .map_queues = nvme_pci_map_queues, @@ -1570,6 +1687,8 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; dev->ctrl.tagset = &dev->tagset; + + nvme_dbbuf_set(dev); } else { blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); @@ -1756,6 +1875,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); + nvme_dbbuf_dma_free(dev); put_device(dev->dev); if (dev->tagset.tags) blk_mq_free_tag_set(&dev->tagset); @@ -1823,6 +1943,13 @@ static void nvme_reset_work(struct work_struct *work) dev->ctrl.opal_dev = NULL; } + if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { + result = nvme_dbbuf_dma_alloc(dev); + if (result) + dev_warn(dev->dev, + "unable to allocate dma for dbbuf\n"); + } + result = nvme_setup_io_queues(dev); if (result) goto out; @@ -2159,13 +2286,13 @@ static const struct pci_error_handlers nvme_err_handler = { static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0953), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a53), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 16f84eb0b95e..29cf88ac3f61 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -34,7 +34,7 @@ #include "fabrics.h" -#define NVME_RDMA_CONNECT_TIMEOUT_MS 1000 /* 1 second */ +#define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */ #define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */ @@ -118,7 +118,6 @@ struct nvme_rdma_ctrl { struct nvme_rdma_qe async_event_sqe; - int reconnect_delay; struct delayed_work reconnect_work; struct list_head list; @@ -129,14 +128,8 @@ struct nvme_rdma_ctrl { u64 cap; u32 max_fr_pages; - union { - struct sockaddr addr; - struct sockaddr_in addr_in; - }; - union { - struct sockaddr src_addr; - struct sockaddr_in src_addr_in; - }; + struct sockaddr_storage addr; + struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; }; @@ -569,11 +562,12 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return PTR_ERR(queue->cm_id); } - queue->cm_error = -ETIMEDOUT; if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) - src_addr = &ctrl->src_addr; + src_addr = (struct sockaddr *)&ctrl->src_addr; - ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr, + queue->cm_error = -ETIMEDOUT; + ret = rdma_resolve_addr(queue->cm_id, src_addr, + (struct sockaddr *)&ctrl->addr, NVME_RDMA_CONNECT_TIMEOUT_MS); if (ret) { dev_info(ctrl->ctrl.device, @@ -712,6 +706,26 @@ free_ctrl: kfree(ctrl); } +static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) +{ + /* If we are resetting/deleting then do nothing */ + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { + WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || + ctrl->ctrl.state == NVME_CTRL_LIVE); + return; + } + + if (nvmf_should_reconnect(&ctrl->ctrl)) { + dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", + ctrl->ctrl.opts->reconnect_delay); + queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, + ctrl->ctrl.opts->reconnect_delay * HZ); + } else { + dev_info(ctrl->ctrl.device, "Removing controller...\n"); + queue_work(nvme_rdma_wq, &ctrl->delete_work); + } +} + static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), @@ -719,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) bool changed; int ret; + ++ctrl->ctrl.opts->nr_reconnects; + if (ctrl->queue_count > 1) { nvme_rdma_free_io_queues(ctrl); @@ -763,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); + ctrl->ctrl.opts->nr_reconnects = 0; if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); @@ -777,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) stop_admin_q: blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); requeue: - /* Make sure we are not resetting/deleting */ - if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) { - dev_info(ctrl->ctrl.device, - "Failed reconnect attempt, requeueing...\n"); - queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, - ctrl->reconnect_delay * HZ); - } + dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", + ctrl->ctrl.opts->nr_reconnects); + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery_work(struct work_struct *work) @@ -810,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); - dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n", - ctrl->reconnect_delay); - - queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, - ctrl->reconnect_delay * HZ); + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) @@ -1169,8 +1178,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - req->req.result = cqe->result; - blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); + nvme_end_request(rq, cqe->status, cqe->result); return ret; } @@ -1407,7 +1415,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) nvme_rdma_error_recovery(req->queue->ctrl); /* fail with DNR on cmd timeout */ - rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR; + nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; return BLK_EH_HANDLED; } @@ -1509,27 +1517,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_queue *queue = req->queue; - int error = 0; - - nvme_rdma_unmap_data(queue, rq); - if (unlikely(rq->errors)) { - if (nvme_req_needs_retry(rq, rq->errors)) { - nvme_requeue_req(rq); - return; - } - - if (blk_rq_is_passthrough(rq)) - error = rq->errors; - else - error = nvme_error_status(rq->errors); - } - - blk_mq_end_request(rq, error); + nvme_rdma_unmap_data(req->queue, rq); + nvme_complete_rq(rq); } -static struct blk_mq_ops nvme_rdma_mq_ops = { +static const struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_request, @@ -1540,7 +1533,7 @@ static struct blk_mq_ops nvme_rdma_mq_ops = { .timeout = nvme_rdma_timeout, }; -static struct blk_mq_ops nvme_rdma_admin_mq_ops = { +static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_admin_request, @@ -1857,27 +1850,13 @@ out_free_io_queues: return ret; } -static int nvme_rdma_parse_ipaddr(struct sockaddr_in *in_addr, char *p) -{ - u8 *addr = (u8 *)&in_addr->sin_addr.s_addr; - size_t buflen = strlen(p); - - /* XXX: handle IPv6 addresses */ - - if (buflen > INET_ADDRSTRLEN) - return -EINVAL; - if (in4_pton(p, buflen, addr, '\0', NULL) == 0) - return -EINVAL; - in_addr->sin_family = AF_INET; - return 0; -} - static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_rdma_ctrl *ctrl; int ret; bool changed; + char *port; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -1885,40 +1864,33 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - ret = nvme_rdma_parse_ipaddr(&ctrl->addr_in, opts->traddr); + if (opts->mask & NVMF_OPT_TRSVCID) + port = opts->trsvcid; + else + port = __stringify(NVME_RDMA_IP_PORT); + + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->traddr, port, &ctrl->addr); if (ret) { - pr_err("malformed IP address passed: %s\n", opts->traddr); + pr_err("malformed address passed: %s:%s\n", opts->traddr, port); goto out_free_ctrl; } if (opts->mask & NVMF_OPT_HOST_TRADDR) { - ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in, - opts->host_traddr); + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->host_traddr, NULL, &ctrl->src_addr); if (ret) { - pr_err("malformed src IP address passed: %s\n", + pr_err("malformed src address passed: %s\n", opts->host_traddr); goto out_free_ctrl; } } - if (opts->mask & NVMF_OPT_TRSVCID) { - u16 port; - - ret = kstrtou16(opts->trsvcid, 0, &port); - if (ret) - goto out_free_ctrl; - - ctrl->addr_in.sin_port = cpu_to_be16(port); - } else { - ctrl->addr_in.sin_port = cpu_to_be16(NVME_RDMA_IP_PORT); - } - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 0 /* no quirks, we're perfect! */); if (ret) goto out_free_ctrl; - ctrl->reconnect_delay = opts->reconnect_delay; INIT_DELAYED_WORK(&ctrl->reconnect_work, nvme_rdma_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); @@ -1977,7 +1949,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); kref_get(&ctrl->ctrl.kref); @@ -2013,7 +1985,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .name = "rdma", .required_opts = NVMF_OPT_TRADDR, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | - NVMF_OPT_HOST_TRADDR, + NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_rdma_create_ctrl, }; @@ -2055,12 +2027,20 @@ static int __init nvme_rdma_init_module(void) return -ENOMEM; ret = ib_register_client(&nvme_rdma_ib_client); - if (ret) { - destroy_workqueue(nvme_rdma_wq); - return ret; - } + if (ret) + goto err_destroy_wq; + + ret = nvmf_register_transport(&nvme_rdma_transport); + if (ret) + goto err_unreg_client; + + return 0; - return nvmf_register_transport(&nvme_rdma_transport); +err_unreg_client: + ib_unregister_client(&nvme_rdma_ib_client); +err_destroy_wq: + destroy_workqueue(nvme_rdma_wq); + return ret; } static void __exit nvme_rdma_cleanup_module(void) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 76450b0c55f1..ff1f97006322 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -121,7 +121,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) } switch (req->cmd->get_log_page.lid) { - case 0x01: + case NVME_LOG_ERROR: /* * We currently never set the More bit in the status field, * so all error log entries are invalid and can be zeroed out. @@ -129,7 +129,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) * mandatory log page. */ break; - case 0x02: + case NVME_LOG_SMART: /* * XXX: fill out actual smart log * @@ -149,7 +149,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) goto err; } break; - case 0x03: + case NVME_LOG_FW_SLOT: /* * We only support a single firmware slot which always is * active, so we can zero out the whole firmware slot log and @@ -480,31 +480,25 @@ static void nvmet_execute_keep_alive(struct nvmet_req *req) nvmet_req_complete(req, 0); } -int nvmet_parse_admin_cmd(struct nvmet_req *req) +u16 nvmet_parse_admin_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; + u16 ret; req->ns = NULL; - if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("nvmet: got admin cmd %d while CC.EN == 0\n", - cmd->common.opcode); - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got admin cmd %d while CSTS.RDY == 0\n", - cmd->common.opcode); - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) + return ret; switch (cmd->common.opcode) { case nvme_admin_get_log_page: req->data_len = nvmet_get_log_page_len(cmd); switch (cmd->get_log_page.lid) { - case 0x01: - case 0x02: - case 0x03: + case NVME_LOG_ERROR: + case NVME_LOG_SMART: + case NVME_LOG_FW_SLOT: req->execute = nvmet_execute_get_log_page; return 0; } @@ -545,6 +539,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req) return 0; } - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 798653b329b2..cf90713043da 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -273,8 +273,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, NULL); if (IS_ERR(ns->bdev)) { - pr_err("nvmet: failed to open block device %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); + pr_err("failed to open block device %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); ret = PTR_ERR(ns->bdev); ns->bdev = NULL; goto out_unlock; @@ -661,6 +661,23 @@ out: return status; } +u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) +{ + if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { + pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", + cmd->common.opcode, req->sq->qid); + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + + if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { + pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", + cmd->common.opcode, req->sq->qid); + req->ns = NULL; + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + return 0; +} + static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) { diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index af8aabf05335..1aaf597e81fc 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -159,15 +159,15 @@ out: nvmet_req_complete(req, status); } -int nvmet_parse_discovery_cmd(struct nvmet_req *req) +u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; req->ns = NULL; if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got cmd %d while not ready\n", - cmd->common.opcode); + pr_err("got cmd %d while not ready\n", + cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } @@ -180,8 +180,8 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) req->execute = nvmet_execute_get_disc_log_page; return 0; default: - pr_err("nvmet: unsupported get_log_page lid %d\n", - cmd->get_log_page.lid); + pr_err("unsupported get_log_page lid %d\n", + cmd->get_log_page.lid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } case nvme_admin_identify: @@ -192,17 +192,16 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) nvmet_execute_identify_disc_ctrl; return 0; default: - pr_err("nvmet: unsupported identify cns %d\n", - cmd->identify.cns); + pr_err("unsupported identify cns %d\n", + cmd->identify.cns); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: - pr_err("nvmet: unsupported cmd %d\n", - cmd->common.opcode); + pr_err("unsupported cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 8bd022af3df6..3cc17269504b 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -73,7 +73,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) nvmet_req_complete(req, status); } -int nvmet_parse_fabrics_cmd(struct nvmet_req *req) +u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -122,7 +122,15 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -143,7 +151,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) } status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, - le32_to_cpu(c->kato), &ctrl); + le32_to_cpu(c->kato), &ctrl); if (status) goto out; @@ -158,7 +166,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); } @@ -170,7 +179,15 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -183,8 +200,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) } status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, - le16_to_cpu(d->cntlid), - req, &ctrl); + le16_to_cpu(d->cntlid), + req, &ctrl); if (status) goto out; @@ -205,7 +222,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); return; @@ -214,7 +232,7 @@ out_ctrl_put: goto out; } -int nvmet_parse_connect_cmd(struct nvmet_req *req) +u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 8f483ee7868c..074bd3743b5f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -82,10 +82,13 @@ struct nvmet_fc_fcp_iod { enum nvmet_fcp_datadir io_dir; bool active; bool abort; + bool aborted; + bool writedataactive; spinlock_t flock; struct nvmet_req req; struct work_struct work; + struct work_struct done_work; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_queue *queue; @@ -213,6 +216,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); +static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); @@ -414,9 +418,13 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, for (i = 0; i < queue->sqsize; fod++, i++) { INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); + INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); fod->tgtport = tgtport; fod->queue = queue; fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->fcpreq = NULL; list_add_tail(&fod->fcp_list, &queue->fod_list); spin_lock_init(&fod->flock); @@ -463,7 +471,6 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) if (fod) { list_del(&fod->fcp_list); fod->active = true; - fod->abort = false; /* * no queue reference is taken, as it was taken by the * queue lookup just prior to the allocation. The iod @@ -479,17 +486,30 @@ static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; + fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + + fcpreq->nvmet_fc_private = NULL; + spin_lock_irqsave(&queue->qlock, flags); list_add_tail(&fod->fcp_list, &fod->queue->fod_list); fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->writedataactive = false; + fod->fcpreq = NULL; spin_unlock_irqrestore(&queue->qlock, flags); /* * release the reference taken at queue lookup and fod allocation */ nvmet_fc_tgt_q_put(queue); + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); } static int @@ -616,32 +636,12 @@ nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue) static void -nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, - struct nvmefc_tgt_fcp_req *fcpreq) -{ - int ret; - - fcpreq->op = NVMET_FCOP_ABORT; - fcpreq->offset = 0; - fcpreq->timeout = 0; - fcpreq->transfer_length = 0; - fcpreq->transferred_length = 0; - fcpreq->fcp_error = 0; - fcpreq->sg_cnt = 0; - - ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fcpreq); - if (ret) - /* should never reach here !! */ - WARN_ON(1); -} - - -static void nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { + struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; unsigned long flags; - int i; + int i, writedataactive; bool disconnect; disconnect = atomic_xchg(&queue->connected, 0); @@ -652,7 +652,20 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) if (fod->active) { spin_lock(&fod->flock); fod->abort = true; + writedataactive = fod->writedataactive; spin_unlock(&fod->flock); + /* + * only call lldd abort routine if waiting for + * writedata. other outstanding ops should finish + * on their own. + */ + if (writedataactive) { + spin_lock(&fod->flock); + fod->aborted = true; + spin_unlock(&fod->flock); + tgtport->ops->fcp_abort( + &tgtport->fc_target_port, fod->fcpreq); + } } } spin_unlock_irqrestore(&queue->qlock, flags); @@ -846,7 +859,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, int ret, idx; if (!template->xmt_ls_rsp || !template->fcp_op || - !template->targetport_delete || + !template->fcp_abort || + !template->fcp_req_release || !template->targetport_delete || !template->max_hw_queues || !template->max_sgl_segments || !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; @@ -1189,8 +1203,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, validation_errors[ret]); iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, - ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1281,8 +1295,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? - ELS_RJT_PROT : ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_INV_ASSOC : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1369,8 +1384,12 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, validation_errors[ret]); iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, - (ret == 8) ? ELS_RJT_PROT : ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + (ret == VERR_NO_ASSOC) ? + FCNVME_RJT_RC_INV_ASSOC : + (ret == VERR_NO_CONN) ? + FCNVME_RJT_RC_INV_CONN : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1479,7 +1498,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, default: iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, - ELS_RJT_INVAL, ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } nvmet_fc_xmt_ls_rsp(tgtport, iod); @@ -1619,6 +1638,8 @@ nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod) for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count) __free_page(sg_page(sg)); kfree(fod->data_sg); + fod->data_sg = NULL; + fod->data_sg_cnt = 0; } @@ -1704,6 +1725,26 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq); static void +nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + /* + * if an ABTS was received or we issued the fcp_abort early + * don't call abort routine again. + */ + /* no need to take lock - lock was taken earlier to get here */ + if (!fod->aborted) + tgtport->ops->fcp_abort(&tgtport->fc_target_port, fcpreq); + + nvmet_fc_free_fcp_iod(fod->queue, fod); +} + +static void nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) { @@ -1716,7 +1757,7 @@ nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); if (ret) - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } static void @@ -1725,6 +1766,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct scatterlist *sg, *datasg; + unsigned long flags; u32 tlen, sg_off; int ret; @@ -1789,10 +1831,13 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, */ fod->abort = true; - if (op == NVMET_FCOP_WRITEDATA) + if (op == NVMET_FCOP_WRITEDATA) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = false; + spin_unlock_irqrestore(&fod->flock, flags); nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); - else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { + } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { fcpreq->fcp_error = ret; fcpreq->transferred_length = 0; nvmet_fc_xmt_fcp_op_done(fod->fcpreq); @@ -1800,32 +1845,54 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, } } +static inline bool +__nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; + + /* if in the middle of an io and we need to tear down */ + if (abort) { + if (fcpreq->op == NVMET_FCOP_WRITEDATA) { + nvmet_req_complete(&fod->req, + NVME_SC_FC_TRANSPORT_ERROR); + return true; + } + + nvmet_fc_abort_op(tgtport, fod); + return true; + } + + return false; +} + +/* + * actual done handler for FCP operations when completed by the lldd + */ static void -nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) { - struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; bool abort; spin_lock_irqsave(&fod->flock, flags); abort = fod->abort; + fod->writedataactive = false; spin_unlock_irqrestore(&fod->flock, flags); - /* if in the middle of an io and we need to tear down */ - if (abort && fcpreq->op != NVMET_FCOP_ABORT) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_req_complete(&fod->req, fcpreq->fcp_error); - return; - } - switch (fcpreq->op) { case NVMET_FCOP_WRITEDATA: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { + spin_lock(&fod->flock); + fod->abort = true; + spin_unlock(&fod->flock); + nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); return; @@ -1833,6 +1900,10 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) fod->offset += fcpreq->transferred_length; if (fod->offset != fod->total_length) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = true; + spin_unlock_irqrestore(&fod->flock, flags); + /* transfer the next chunk */ nvmet_fc_transfer_fcp_data(tgtport, fod, NVMET_FCOP_WRITEDATA); @@ -1847,12 +1918,11 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -1861,8 +1931,6 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) if (fcpreq->op == NVMET_FCOP_READDATA_RSP) { /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); nvmet_fc_free_fcp_iod(fod->queue, fod); return; } @@ -1885,19 +1953,38 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) break; case NVMET_FCOP_RSP: - case NVMET_FCOP_ABORT: - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; nvmet_fc_free_fcp_iod(fod->queue, fod); break; default: - nvmet_fc_free_tgt_pgs(fod); - nvmet_fc_abort_op(tgtport, fod->fcpreq); break; } } +static void +nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work) +{ + struct nvmet_fc_fcp_iod *fod = + container_of(work, struct nvmet_fc_fcp_iod, done_work); + + nvmet_fc_fod_op_done(fod); +} + +static void +nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue = fod->queue; + + if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR) + /* context switch so completion is not in ISR context */ + queue_work_on(queue->cpu, queue->work_q, &fod->done_work); + else + nvmet_fc_fod_op_done(fod); +} + /* * actual completion handler after execution by the nvmet layer */ @@ -1919,10 +2006,7 @@ __nvmet_fc_fcp_nvme_cmd_done(struct nvmet_fc_tgtport *tgtport, fod->queue->sqhd = cqe->sq_head; if (abort) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -2018,8 +2102,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, &fod->queue->nvme_cq, &fod->queue->nvme_sq, &nvmet_fc_tgt_fcp_ops); - if (!ret) { /* bad SQE content */ - nvmet_fc_abort_op(tgtport, fod->fcpreq); + if (!ret) { /* bad SQE content or invalid ctrl state */ + nvmet_fc_abort_op(tgtport, fod); return; } @@ -2059,7 +2143,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, return; transport_error: - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } /* @@ -2089,7 +2173,7 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) * If this routine returns error, the lldd should abort the exchange. * * @target_port: pointer to the (registered) target port the FCP CMD IU - * was receive on. + * was received on. * @fcpreq: pointer to a fcpreq request structure to be used to reference * the exchange corresponding to the FCP Exchange. * @cmdiubuf: pointer to the buffer containing the FCP CMD IU @@ -2112,7 +2196,6 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4))) return -EIO; - queue = nvmet_fc_find_target_queue(tgtport, be64_to_cpu(cmdiu->connection_id)); if (!queue) @@ -2142,12 +2225,68 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); - queue_work_on(queue->cpu, queue->work_q, &fod->work); + if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) + queue_work_on(queue->cpu, queue->work_q, &fod->work); + else + nvmet_fc_handle_fcp_rqst(tgtport, fod); return 0; } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); +/** + * nvmet_fc_rcv_fcp_abort - transport entry point called by an LLDD + * upon the reception of an ABTS for a FCP command + * + * Notify the transport that an ABTS has been received for a FCP command + * that had been given to the transport via nvmet_fc_rcv_fcp_req(). The + * LLDD believes the command is still being worked on + * (template_ops->fcp_req_release() has not been called). + * + * The transport will wait for any outstanding work (an op to the LLDD, + * which the lldd should complete with error due to the ABTS; or the + * completion from the nvmet layer of the nvme command), then will + * stop processing and call the nvmet_fc_rcv_fcp_req() callback to + * return the i/o context to the LLDD. The LLDD may send the BA_ACC + * to the ABTS either after return from this function (assuming any + * outstanding op work has been terminated) or upon the callback being + * called. + * + * @target_port: pointer to the (registered) target port the FCP CMD IU + * was received on. + * @fcpreq: pointer to the fcpreq request structure that corresponds + * to the exchange that received the ABTS. + */ +void +nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue; + unsigned long flags; + + if (!fod || fod->fcpreq != fcpreq) + /* job appears to have already completed, ignore abort */ + return; + + queue = fod->queue; + + spin_lock_irqsave(&queue->qlock, flags); + if (fod->active) { + /* + * mark as abort. The abort handler, invoked upon completion + * of any work, will detect the aborted status and do the + * callback. + */ + spin_lock(&fod->flock); + fod->abort = true; + fod->aborted = true; + spin_unlock(&fod->flock); + } + spin_unlock_irqrestore(&queue->qlock, flags); +} +EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort); + enum { FCT_TRADDR_ERR = 0, FCT_TRADDR_WWNN = 1 << 0, @@ -2177,7 +2316,7 @@ nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf) if (!options) return -ENOMEM; - while ((p = strsep(&o, ",\n")) != NULL) { + while ((p = strsep(&o, ":\n")) != NULL) { if (!*p) continue; @@ -2238,6 +2377,7 @@ nvmet_fc_add_port(struct nvmet_port *port) if (!tgtport->port) { tgtport->port = port; port->priv = tgtport; + nvmet_fc_tgtport_get(tgtport); ret = 0; } else ret = -EALREADY; diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 4e8e6a22bce1..aaa3dbe22bd5 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -246,11 +246,19 @@ struct fcloop_lsreq { struct fcloop_fcpreq { struct fcloop_tport *tport; struct nvmefc_fcp_req *fcpreq; + spinlock_t reqlock; u16 status; + bool active; + bool aborted; struct work_struct work; struct nvmefc_tgt_fcp_req tgt_fcp_req; }; +struct fcloop_ini_fcpreq { + struct nvmefc_fcp_req *fcpreq; + struct fcloop_fcpreq *tfcp_req; + struct work_struct iniwork; +}; static inline struct fcloop_lsreq * tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) @@ -341,7 +349,21 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, } /* - * FCP IO operation done. call back up initiator "done" flows. + * FCP IO operation done by initiator abort. + * call back up initiator "done" flows. + */ +static void +fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work) +{ + struct fcloop_ini_fcpreq *inireq = + container_of(work, struct fcloop_ini_fcpreq, iniwork); + + inireq->fcpreq->done(inireq->fcpreq); +} + +/* + * FCP IO operation done by target completion. + * call back up initiator "done" flows. */ static void fcloop_tgt_fcprqst_done_work(struct work_struct *work) @@ -349,12 +371,18 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, work); struct fcloop_tport *tport = tfcp_req->tport; - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; - if (tport->remoteport) { + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + spin_unlock(&tfcp_req->reqlock); + + if (tport->remoteport && fcpreq) { fcpreq->status = tfcp_req->status; fcpreq->done(fcpreq); } + + kfree(tfcp_req); } @@ -364,20 +392,25 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { - struct fcloop_fcpreq *tfcp_req = fcpreq->private; struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req; int ret = 0; - INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); + if (!rport->targetport) + return -ECONNREFUSED; - if (!rport->targetport) { - tfcp_req->status = NVME_SC_FC_TRANSPORT_ERROR; - schedule_work(&tfcp_req->work); - return ret; - } + tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL); + if (!tfcp_req) + return -ENOMEM; + inireq->fcpreq = fcpreq; + inireq->tfcp_req = tfcp_req; + INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work); tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; + spin_lock_init(&tfcp_req->reqlock); + INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, fcpreq->cmdaddr, fcpreq->cmdlen); @@ -444,63 +477,129 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *tgt_fcpreq) { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; u32 rsplen = 0, xfrlen = 0; - int fcp_err = 0; + int fcp_err = 0, active, aborted; u8 op = tgt_fcpreq->op; + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + active = tfcp_req->active; + aborted = tfcp_req->aborted; + tfcp_req->active = true; + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(active)) + /* illegal - call while i/o active */ + return -EALREADY; + + if (unlikely(aborted)) { + /* target transport has aborted i/o prior */ + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = 0; + tgt_fcpreq->fcp_error = -ECANCELED; + tgt_fcpreq->done(tgt_fcpreq); + return 0; + } + + /* + * if fcpreq is NULL, the I/O has been aborted (from + * initiator side). For the target side, act as if all is well + * but don't actually move data. + */ + switch (op) { case NVMET_FCOP_WRITEDATA: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } break; case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } if (op == NVMET_FCOP_READDATA) break; /* Fall-Thru to RSP handling */ case NVMET_FCOP_RSP: - rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? - fcpreq->rsplen : tgt_fcpreq->rsplen); - memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); - if (rsplen < tgt_fcpreq->rsplen) - fcp_err = -E2BIG; - fcpreq->rcv_rsplen = rsplen; - fcpreq->status = 0; + if (fcpreq) { + rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? + fcpreq->rsplen : tgt_fcpreq->rsplen); + memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); + if (rsplen < tgt_fcpreq->rsplen) + fcp_err = -E2BIG; + fcpreq->rcv_rsplen = rsplen; + fcpreq->status = 0; + } tfcp_req->status = 0; break; - case NVMET_FCOP_ABORT: - tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; - break; - default: fcp_err = -EINVAL; break; } + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = xfrlen; tgt_fcpreq->fcp_error = fcp_err; tgt_fcpreq->done(tgt_fcpreq); - if ((!fcp_err) && (op == NVMET_FCOP_RSP || - op == NVMET_FCOP_READDATA_RSP || - op == NVMET_FCOP_ABORT)) - schedule_work(&tfcp_req->work); - return 0; } static void +fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + int active; + + /* + * mark aborted only in case there were 2 threads in transport + * (one doing io, other doing abort) and only kills ops posted + * after the abort request + */ + spin_lock(&tfcp_req->reqlock); + active = tfcp_req->active; + tfcp_req->aborted = true; + spin_unlock(&tfcp_req->reqlock); + + tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; + + /* + * nothing more to do. If io wasn't active, the transport should + * immediately call the req_release. If it was active, the op + * will complete, and the lldd should call req_release. + */ +} + +static void +fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + + schedule_work(&tfcp_req->work); +} + +static void fcloop_ls_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) @@ -513,6 +612,27 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { + struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; + + if (!tfcp_req) + /* abort has already been called */ + return; + + if (rport->targetport) + nvmet_fc_rcv_fcp_abort(rport->targetport, + &tfcp_req->tgt_fcp_req); + + /* break initiator/target relationship for io */ + spin_lock(&tfcp_req->reqlock); + inireq->tfcp_req = NULL; + tfcp_req->fcpreq = NULL; + spin_unlock(&tfcp_req->reqlock); + + /* post the aborted io completion */ + fcpreq->status = -ECANCELED; + schedule_work(&inireq->iniwork); } static void @@ -563,20 +683,23 @@ struct nvme_fc_port_template fctemplate = { .local_priv_sz = sizeof(struct fcloop_lport), .remote_priv_sz = sizeof(struct fcloop_rport), .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), - .fcprqst_priv_sz = sizeof(struct fcloop_fcpreq), + .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), }; struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, .xmt_ls_rsp = fcloop_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, + .fcp_abort = fcloop_tgt_fcp_abort, + .fcp_req_release = fcloop_fcp_req_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ - .target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED, + .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | + NVMET_FCTGTFEAT_OPDONE_IN_ISR, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), }; diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 6b0baa9caab9..c77940d80fc8 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -184,7 +184,7 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) (req->ns->blksize_shift - 9)) + 1; if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, - GFP_KERNEL, &bio, true)) + GFP_KERNEL, &bio, 0)) status = NVME_SC_INTERNAL | NVME_SC_DNR; if (bio) { @@ -196,26 +196,19 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) } } -int nvmet_parse_io_cmd(struct nvmet_req *req) +u16 nvmet_parse_io_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; + u16 ret; - if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("nvmet: got io cmd %d while CC.EN == 0\n", - cmd->common.opcode); + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) { req->ns = NULL; - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } - - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got io cmd %d while CSTS.RDY == 0\n", - cmd->common.opcode); - req->ns = NULL; - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + return ret; } req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); - if (!req->ns) + if (unlikely(!req->ns)) return NVME_SC_INVALID_NS | NVME_SC_DNR; switch (cmd->common.opcode) { @@ -237,7 +230,8 @@ int nvmet_parse_io_cmd(struct nvmet_req *req) req->execute = nvmet_execute_write_zeroes; return 0; default: - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index c7b0b6a52708..304f1c87c160 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -13,12 +13,10 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/scatterlist.h> -#include <linux/delay.h> #include <linux/blk-mq.h> #include <linux/nvme.h> #include <linux/module.h> #include <linux/parser.h> -#include <linux/t10-pi.h> #include "nvmet.h" #include "../host/nvme.h" #include "../host/fabrics.h" @@ -93,31 +91,26 @@ static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue) static void nvme_loop_complete_rq(struct request *req) { struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); - int error = 0; nvme_cleanup_cmd(req); sg_free_table_chained(&iod->sg_table, true); + nvme_complete_rq(req); +} - if (unlikely(req->errors)) { - if (nvme_req_needs_retry(req, req->errors)) { - nvme_requeue_req(req); - return; - } - - if (blk_rq_is_passthrough(req)) - error = req->errors; - else - error = nvme_error_status(req->errors); - } +static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue) +{ + u32 queue_idx = nvme_loop_queue_idx(queue); - blk_mq_end_request(req, error); + if (queue_idx == 0) + return queue->ctrl->admin_tag_set.tags[queue_idx]; + return queue->ctrl->tag_set.tags[queue_idx - 1]; } static void nvme_loop_queue_response(struct nvmet_req *req) { - struct nvme_loop_iod *iod = - container_of(req, struct nvme_loop_iod, req); - struct nvme_completion *cqe = &iod->rsp; + struct nvme_loop_queue *queue = + container_of(req->sq, struct nvme_loop_queue, nvme_sq); + struct nvme_completion *cqe = req->rsp; /* * AEN requests are special as they don't time out and can @@ -125,15 +118,22 @@ static void nvme_loop_queue_response(struct nvmet_req *req) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 && + if (unlikely(nvme_loop_queue_idx(queue) == 0 && cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) { - nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe->status, + nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); } else { - struct request *rq = blk_mq_rq_from_pdu(iod); + struct request *rq; + + rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id); + if (!rq) { + dev_err(queue->ctrl->ctrl.device, + "tag 0x%x on queue %d not found\n", + cqe->command_id, nvme_loop_queue_idx(queue)); + return; + } - iod->nvme_req.result = cqe->result; - blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); + nvme_end_request(rq, cqe->status, cqe->result); } } @@ -154,7 +154,7 @@ nvme_loop_timeout(struct request *rq, bool reserved) schedule_work(&iod->queue->ctrl->reset_work); /* fail with DNR on admin cmd timeout */ - rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR; + nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; return BLK_EH_HANDLED; } @@ -268,7 +268,7 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static struct blk_mq_ops nvme_loop_mq_ops = { +static const struct blk_mq_ops nvme_loop_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_request, @@ -276,7 +276,7 @@ static struct blk_mq_ops nvme_loop_mq_ops = { .timeout = nvme_loop_timeout, }; -static struct blk_mq_ops nvme_loop_admin_mq_ops = { +static const struct blk_mq_ops nvme_loop_admin_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_admin_request, @@ -349,6 +349,19 @@ out_destroy_queues: return ret; } +static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) +{ + int i, ret; + + for (i = 1; i < ctrl->queue_count; i++) { + ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + if (ret) + return ret; + } + + return 0; +} + static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; @@ -490,7 +503,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl, reset_work); bool changed; - int i, ret; + int ret; nvme_loop_shutdown_ctrl(ctrl); @@ -502,11 +515,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_destroy_admin; - for (i = 1; i < ctrl->queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - goto out_destroy_io; - } + ret = nvme_loop_connect_io_queues(ctrl); + if (ret) + goto out_destroy_io; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); @@ -559,7 +570,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) { - int ret, i; + int ret; ret = nvme_loop_init_io_queues(ctrl); if (ret) @@ -588,11 +599,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) goto out_free_tagset; } - for (i = 1; i < ctrl->queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - goto out_cleanup_connect_q; - } + ret = nvme_loop_connect_io_queues(ctrl); + if (ret) + goto out_cleanup_connect_q; return 0; @@ -736,7 +745,12 @@ static int __init nvme_loop_init_module(void) ret = nvmet_register_transport(&nvme_loop_ops); if (ret) return ret; - return nvmf_register_transport(&nvme_loop_transport); + + ret = nvmf_register_transport(&nvme_loop_transport); + if (ret) + nvmet_unregister_transport(&nvme_loop_ops); + + return ret; } static void __exit nvme_loop_cleanup_module(void) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f7ff15f17ca9..7cb77ba5993b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -253,11 +253,11 @@ struct nvmet_async_event { u8 log_page; }; -int nvmet_parse_connect_cmd(struct nvmet_req *req); -int nvmet_parse_io_cmd(struct nvmet_req *req); -int nvmet_parse_admin_cmd(struct nvmet_req *req); -int nvmet_parse_discovery_cmd(struct nvmet_req *req); -int nvmet_parse_fabrics_cmd(struct nvmet_req *req); +u16 nvmet_parse_connect_cmd(struct nvmet_req *req); +u16 nvmet_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_parse_admin_cmd(struct nvmet_req *req); +u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); +u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); @@ -278,6 +278,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, struct nvmet_req *req, struct nvmet_ctrl **ret); void nvmet_ctrl_put(struct nvmet_ctrl *ctrl); +u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd); struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, enum nvme_subsys_type type); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ecc4fe862561..99c69018a35f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1199,6 +1199,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, } queue->port = cm_id->context; + if (queue->host_qid == 0) { + /* Let inflight controller teardown complete */ + flush_scheduled_work(); + } + ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) goto release_queue; @@ -1427,12 +1432,16 @@ restart: static int nvmet_rdma_add_port(struct nvmet_port *port) { struct rdma_cm_id *cm_id; - struct sockaddr_in addr_in; - u16 port_in; + struct sockaddr_storage addr = { }; + __kernel_sa_family_t af; int ret; switch (port->disc_addr.adrfam) { case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; break; default: pr_err("address family %d not supported\n", @@ -1440,13 +1449,13 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return -EINVAL; } - ret = kstrtou16(port->disc_addr.trsvcid, 0, &port_in); - if (ret) + ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, + port->disc_addr.trsvcid, &addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + port->disc_addr.traddr, port->disc_addr.trsvcid); return ret; - - addr_in.sin_family = AF_INET; - addr_in.sin_addr.s_addr = in_aton(port->disc_addr.traddr); - addr_in.sin_port = htons(port_in); + } cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); @@ -1455,20 +1464,32 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return PTR_ERR(cm_id); } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr_in); + /* + * Allow both IPv4 and IPv6 sockets to bind a single port + * at the same time. + */ + ret = rdma_set_afonly(cm_id, 1); + if (ret) { + pr_err("rdma_set_afonly failed (%d)\n", ret); + goto out_destroy_id; + } + + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); if (ret) { - pr_err("binding CM ID to %pISpc failed (%d)\n", &addr_in, ret); + pr_err("binding CM ID to %pISpcs failed (%d)\n", + (struct sockaddr *)&addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, 128); if (ret) { - pr_err("listening to %pISpc failed (%d)\n", &addr_in, ret); + pr_err("listening to %pISpcs failed (%d)\n", + (struct sockaddr *)&addr, ret); goto out_destroy_id; } - pr_info("enabling port %d (%pISpc)\n", - le16_to_cpu(port->disc_addr.portid), &addr_in); + pr_info("enabling port %d (%pISpcs)\n", + le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); port->priv = cm_id; return 0; |