From 72e6329f86c714785ac195d293cb19dd24507880 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:47 -0700 Subject: nvme-fc and nvmet-fc: revise LLDD api for LS reception and LS request The current LLDD api has: nvme-fc: contains api for transport to do LS requests (and aborts of them). However, there is no interface for reception of LS's and sending responses for them. nvmet-fc: contains api for transport to do reception of LS's and sending of responses for them. However, there is no interface for doing LS requests. Revise the api's so that both nvme-fc and nvmet-fc can send LS's, as well as receiving LS's and sending their responses. Change name of the rcv_ls_req struct to better reflect generic use as a context to used to send an ls rsp. Specifically: nvmefc_tgt_ls_req -> nvmefc_ls_rsp nvmefc_tgt_ls_req.nvmet_fc_private -> nvmefc_ls_rsp.nvme_fc_private Change nvmet_fc_rcv_ls_req() calling sequence to provide handle that can be used by transport in later LS request sequences for an association. nvme-fc nvmet_fc nvme_fcloop: Revise to adapt to changed names in api header. Change calling sequence to nvmet_fc_rcv_ls_req() for hosthandle. Add stubs for new interfaces: host/fc.c: nvme_fc_rcv_ls_req() target/fc.c: nvmet_fc_invalidate_host() lpfc: Revise to adapt code to changed names in api header. Change calling sequence to nvmet_fc_rcv_ls_req() for hosthandle. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 35 ++++++++++++++++++++ drivers/nvme/target/fc.c | 77 ++++++++++++++++++++++++++++++++------------ drivers/nvme/target/fcloop.c | 20 ++++++------ 3 files changed, 102 insertions(+), 30 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7dfc4a2ecf1e..8012099fc3ee 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1464,6 +1464,41 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) kfree(lsop); } +/** + * nvme_fc_rcv_ls_req - transport entry point called by an LLDD + * upon the reception of a NVME LS request. + * + * The nvme-fc layer will copy payload to an internal structure for + * processing. As such, upon completion of the routine, the LLDD may + * immediately free/reuse the LS request buffer passed in the call. + * + * If this routine returns error, the LLDD should abort the exchange. + * + * @remoteport: pointer to the (registered) remote port that the LS + * was received from. The remoteport is associated with + * a specific localport. + * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be + * used to reference the exchange corresponding to the LS + * when issuing an ls response. + * @lsreqbuf: pointer to the buffer containing the LS Request + * @lsreqbuf_len: length, in bytes, of the received LS request + */ +int +nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, + struct nvmefc_ls_rsp *lsrsp, + void *lsreqbuf, u32 lsreqbuf_len) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); + struct nvme_fc_lport *lport = rport->lport; + + /* validate there's a routine to transmit a response */ + if (!lport->ops->xmt_ls_rsp) + return(-EINVAL); + + return 0; +} +EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req); + /* *********************** NVME Ctrl Routines **************************** */ diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a8ceb7721640..aac7869a70bb 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -28,7 +28,7 @@ struct nvmet_fc_tgtport; struct nvmet_fc_tgt_assoc; struct nvmet_fc_ls_iod { - struct nvmefc_tgt_ls_req *lsreq; + struct nvmefc_ls_rsp *lsrsp; struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ struct list_head ls_list; /* tgtport->ls_list */ @@ -1146,6 +1146,42 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) spin_unlock_irqrestore(&tgtport->lock, flags); } +/** + * nvmet_fc_invalidate_host - transport entry point called by an LLDD + * to remove references to a hosthandle for LS's. + * + * The nvmet-fc layer ensures that any references to the hosthandle + * on the targetport are forgotten (set to NULL). The LLDD will + * typically call this when a login with a remote host port has been + * lost, thus LS's for the remote host port are no longer possible. + * + * If an LS request is outstanding to the targetport/hosthandle (or + * issued concurrently with the call to invalidate the host), the + * LLDD is responsible for terminating/aborting the LS and completing + * the LS request. It is recommended that these terminations/aborts + * occur after calling to invalidate the host handle to avoid additional + * retries by the nvmet-fc transport. The nvmet-fc transport may + * continue to reference host handle while it cleans up outstanding + * NVME associations. The nvmet-fc transport will call the + * ops->host_release() callback to notify the LLDD that all references + * are complete and the related host handle can be recovered. + * Note: if there are no references, the callback may be called before + * the invalidate host call returns. + * + * @target_port: pointer to the (registered) target port that a prior + * LS was received on and which supplied the transport the + * hosthandle. + * @hosthandle: the handle (pointer) that represents the host port + * that no longer has connectivity and that LS's should + * no longer be directed to. + */ +void +nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, + void *hosthandle) +{ +} +EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host); + /* * nvmet layer has called to terminate an association */ @@ -1371,7 +1407,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Association LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); @@ -1384,7 +1420,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( @@ -1462,7 +1498,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Connection LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : @@ -1477,7 +1513,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)), @@ -1542,7 +1578,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Disconnect LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : @@ -1555,7 +1591,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( @@ -1577,9 +1613,9 @@ static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req); static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; static void -nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq) +nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) { - struct nvmet_fc_ls_iod *iod = lsreq->nvmet_fc_private; + struct nvmet_fc_ls_iod *iod = lsrsp->nvme_fc_private; struct nvmet_fc_tgtport *tgtport = iod->tgtport; fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma, @@ -1597,9 +1633,9 @@ nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); - ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsreq); + ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp); if (ret) - nvmet_fc_xmt_ls_rsp_done(iod->lsreq); + nvmet_fc_xmt_ls_rsp_done(iod->lsrsp); } /* @@ -1612,12 +1648,12 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf; - iod->lsreq->nvmet_fc_private = iod; - iod->lsreq->rspbuf = iod->rspbuf; - iod->lsreq->rspdma = iod->rspdma; - iod->lsreq->done = nvmet_fc_xmt_ls_rsp_done; + iod->lsrsp->nvme_fc_private = iod; + iod->lsrsp->rspbuf = iod->rspbuf; + iod->lsrsp->rspdma = iod->rspdma; + iod->lsrsp->done = nvmet_fc_xmt_ls_rsp_done; /* Be preventative. handlers will later set to valid length */ - iod->lsreq->rsplen = 0; + iod->lsrsp->rsplen = 0; iod->assoc = NULL; @@ -1640,7 +1676,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, nvmet_fc_ls_disconnect(tgtport, iod); break; default: - iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, + iod->lsrsp->rsplen = nvmet_fc_format_rjt(iod->rspbuf, NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } @@ -1674,14 +1710,15 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work) * * @target_port: pointer to the (registered) target port the LS was * received on. - * @lsreq: pointer to a lsreq request structure to be used to reference + * @lsrsp: pointer to a lsrsp structure to be used to reference * the exchange corresponding to the LS. * @lsreqbuf: pointer to the buffer containing the LS Request * @lsreqbuf_len: length, in bytes, of the received LS request */ int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, - struct nvmefc_tgt_ls_req *lsreq, + void *hosthandle, + struct nvmefc_ls_rsp *lsrsp, void *lsreqbuf, u32 lsreqbuf_len) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); @@ -1699,7 +1736,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, return -ENOENT; } - iod->lsreq = lsreq; + iod->lsrsp = lsrsp; iod->fcpreq = NULL; memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len); iod->rqstdatalen = lsreqbuf_len; diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index f69ce66e2d44..c11805a155e8 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -228,7 +228,7 @@ struct fcloop_nport { struct fcloop_lsreq { struct nvmefc_ls_req *lsreq; - struct nvmefc_tgt_ls_req tgt_ls_req; + struct nvmefc_ls_rsp ls_rsp; int status; struct list_head ls_list; /* fcloop_rport->ls_list */ }; @@ -267,9 +267,9 @@ struct fcloop_ini_fcpreq { }; static inline struct fcloop_lsreq * -tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) +ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp) { - return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req); + return container_of(lsrsp, struct fcloop_lsreq, ls_rsp); } static inline struct fcloop_fcpreq * @@ -344,7 +344,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, } tls_req->status = 0; - ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, + ret = nvmet_fc_rcv_ls_req(rport->targetport, NULL, &tls_req->ls_rsp, lsreq->rqstaddr, lsreq->rqstlen); return ret; @@ -352,19 +352,19 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, static int fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, - struct nvmefc_tgt_ls_req *tgt_lsreq) + struct nvmefc_ls_rsp *lsrsp) { - struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); + struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); struct nvmefc_ls_req *lsreq = tls_req->lsreq; struct fcloop_tport *tport = targetport->private; struct nvme_fc_remote_port *remoteport = tport->remoteport; struct fcloop_rport *rport; - memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, - ((lsreq->rsplen < tgt_lsreq->rsplen) ? - lsreq->rsplen : tgt_lsreq->rsplen)); + memcpy(lsreq->rspaddr, lsrsp->rspbuf, + ((lsreq->rsplen < lsrsp->rsplen) ? + lsreq->rsplen : lsrsp->rsplen)); - tgt_lsreq->done(tgt_lsreq); + lsrsp->done(lsrsp); if (remoteport) { rport = remoteport->private; -- cgit v1.2.3 From ca19bcd086331ec2fa182ad8cd589014beb931be Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:48 -0700 Subject: nvme-fc nvmet-fc: refactor for common LS definitions Routines in the target will want to be used in the host as well. Error definitions should now shared as both sides will process requests and responses to requests. Moved common declarations to new fc.h header kept in the host subdirectory. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 36 +------------ drivers/nvme/host/fc.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/fc.c | 115 ++++------------------------------------ 3 files changed, 143 insertions(+), 141 deletions(-) create mode 100644 drivers/nvme/host/fc.h (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8012099fc3ee..83f9b2ac7c55 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -14,6 +14,7 @@ #include "fabrics.h" #include #include +#include "fc.h" #include /* *************************** Data Structures/Defines ****************** */ @@ -1140,41 +1141,6 @@ nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, return __nvme_fc_send_ls_req(rport, lsop, done); } -/* Validation Error indexes into the string table below */ -enum { - VERR_NO_ERROR = 0, - VERR_LSACC = 1, - VERR_LSDESC_RQST = 2, - VERR_LSDESC_RQST_LEN = 3, - VERR_ASSOC_ID = 4, - VERR_ASSOC_ID_LEN = 5, - VERR_CONN_ID = 6, - VERR_CONN_ID_LEN = 7, - VERR_CR_ASSOC = 8, - VERR_CR_ASSOC_ACC_LEN = 9, - VERR_CR_CONN = 10, - VERR_CR_CONN_ACC_LEN = 11, - VERR_DISCONN = 12, - VERR_DISCONN_ACC_LEN = 13, -}; - -static char *validation_errors[] = { - "OK", - "Not LS_ACC", - "Not LSDESC_RQST", - "Bad LSDESC_RQST Length", - "Not Association ID", - "Bad Association ID Length", - "Not Connection ID", - "Bad Connection ID Length", - "Not CR_ASSOC Rqst", - "Bad CR_ASSOC ACC Length", - "Not CR_CONN Rqst", - "Bad CR_CONN ACC Length", - "Not Disconnect Rqst", - "Bad Disconnect ACC Length", -}; - static int nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h new file mode 100644 index 000000000000..d2861cdd58ee --- /dev/null +++ b/drivers/nvme/host/fc.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016, Avago Technologies + */ + +#ifndef _NVME_FC_TRANSPORT_H +#define _NVME_FC_TRANSPORT_H 1 + + +/* + * Common definitions between the nvme_fc (host) transport and + * nvmet_fc (target) transport implementation. + */ + +/* + * ****************** FC-NVME LS HANDLING ****************** + */ + +static inline void +nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) +{ + struct fcnvme_ls_acc_hdr *acc = buf; + + acc->w0.ls_cmd = ls_cmd; + acc->desc_list_len = desc_len; + acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); + acc->rqst.desc_len = + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); + acc->rqst.w0.ls_cmd = rqst_ls_cmd; +} + +static inline int +nvme_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, + u8 reason, u8 explanation, u8 vendor) +{ + struct fcnvme_ls_rjt *rjt = buf; + + nvme_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, + fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), + ls_cmd); + rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); + rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); + rjt->rjt.reason_code = reason; + rjt->rjt.reason_explanation = explanation; + rjt->rjt.vendor = vendor; + + return sizeof(struct fcnvme_ls_rjt); +} + +/* Validation Error indexes into the string table below */ +enum { + VERR_NO_ERROR = 0, + VERR_CR_ASSOC_LEN = 1, + VERR_CR_ASSOC_RQST_LEN = 2, + VERR_CR_ASSOC_CMD = 3, + VERR_CR_ASSOC_CMD_LEN = 4, + VERR_ERSP_RATIO = 5, + VERR_ASSOC_ALLOC_FAIL = 6, + VERR_QUEUE_ALLOC_FAIL = 7, + VERR_CR_CONN_LEN = 8, + VERR_CR_CONN_RQST_LEN = 9, + VERR_ASSOC_ID = 10, + VERR_ASSOC_ID_LEN = 11, + VERR_NO_ASSOC = 12, + VERR_CONN_ID = 13, + VERR_CONN_ID_LEN = 14, + VERR_INVAL_CONN = 15, + VERR_CR_CONN_CMD = 16, + VERR_CR_CONN_CMD_LEN = 17, + VERR_DISCONN_LEN = 18, + VERR_DISCONN_RQST_LEN = 19, + VERR_DISCONN_CMD = 20, + VERR_DISCONN_CMD_LEN = 21, + VERR_DISCONN_SCOPE = 22, + VERR_RS_LEN = 23, + VERR_RS_RQST_LEN = 24, + VERR_RS_CMD = 25, + VERR_RS_CMD_LEN = 26, + VERR_RS_RCTL = 27, + VERR_RS_RO = 28, + VERR_LSACC = 29, + VERR_LSDESC_RQST = 30, + VERR_LSDESC_RQST_LEN = 31, + VERR_CR_ASSOC = 32, + VERR_CR_ASSOC_ACC_LEN = 33, + VERR_CR_CONN = 34, + VERR_CR_CONN_ACC_LEN = 35, + VERR_DISCONN = 36, + VERR_DISCONN_ACC_LEN = 37, +}; + +static char *validation_errors[] = { + "OK", + "Bad CR_ASSOC Length", + "Bad CR_ASSOC Rqst Length", + "Not CR_ASSOC Cmd", + "Bad CR_ASSOC Cmd Length", + "Bad Ersp Ratio", + "Association Allocation Failed", + "Queue Allocation Failed", + "Bad CR_CONN Length", + "Bad CR_CONN Rqst Length", + "Not Association ID", + "Bad Association ID Length", + "No Association", + "Not Connection ID", + "Bad Connection ID Length", + "Invalid Connection ID", + "Not CR_CONN Cmd", + "Bad CR_CONN Cmd Length", + "Bad DISCONN Length", + "Bad DISCONN Rqst Length", + "Not DISCONN Cmd", + "Bad DISCONN Cmd Length", + "Bad Disconnect Scope", + "Bad RS Length", + "Bad RS Rqst Length", + "Not RS Cmd", + "Bad RS Cmd Length", + "Bad RS R_CTL", + "Bad RS Relative Offset", + "Not LS_ACC", + "Not LSDESC_RQST", + "Bad LSDESC_RQST Length", + "Not CR_ASSOC Rqst", + "Bad CR_ASSOC ACC Length", + "Not CR_CONN Rqst", + "Bad CR_CONN ACC Length", + "Not Disconnect Rqst", + "Bad Disconnect ACC Length", +}; + +#endif /* _NVME_FC_TRANSPORT_H */ diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index aac7869a70bb..1f3118a3b0a3 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -14,6 +14,7 @@ #include "nvmet.h" #include #include +#include "../host/fc.h" /* *************************** Data Structures/Defines ****************** */ @@ -1257,102 +1258,6 @@ EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); /* *********************** FC-NVME LS Handling **************************** */ -static void -nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) -{ - struct fcnvme_ls_acc_hdr *acc = buf; - - acc->w0.ls_cmd = ls_cmd; - acc->desc_list_len = desc_len; - acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); - acc->rqst.desc_len = - fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); - acc->rqst.w0.ls_cmd = rqst_ls_cmd; -} - -static int -nvmet_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, - u8 reason, u8 explanation, u8 vendor) -{ - struct fcnvme_ls_rjt *rjt = buf; - - nvmet_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, - fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), - ls_cmd); - rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); - rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); - rjt->rjt.reason_code = reason; - rjt->rjt.reason_explanation = explanation; - rjt->rjt.vendor = vendor; - - return sizeof(struct fcnvme_ls_rjt); -} - -/* Validation Error indexes into the string table below */ -enum { - VERR_NO_ERROR = 0, - VERR_CR_ASSOC_LEN = 1, - VERR_CR_ASSOC_RQST_LEN = 2, - VERR_CR_ASSOC_CMD = 3, - VERR_CR_ASSOC_CMD_LEN = 4, - VERR_ERSP_RATIO = 5, - VERR_ASSOC_ALLOC_FAIL = 6, - VERR_QUEUE_ALLOC_FAIL = 7, - VERR_CR_CONN_LEN = 8, - VERR_CR_CONN_RQST_LEN = 9, - VERR_ASSOC_ID = 10, - VERR_ASSOC_ID_LEN = 11, - VERR_NO_ASSOC = 12, - VERR_CONN_ID = 13, - VERR_CONN_ID_LEN = 14, - VERR_NO_CONN = 15, - VERR_CR_CONN_CMD = 16, - VERR_CR_CONN_CMD_LEN = 17, - VERR_DISCONN_LEN = 18, - VERR_DISCONN_RQST_LEN = 19, - VERR_DISCONN_CMD = 20, - VERR_DISCONN_CMD_LEN = 21, - VERR_DISCONN_SCOPE = 22, - VERR_RS_LEN = 23, - VERR_RS_RQST_LEN = 24, - VERR_RS_CMD = 25, - VERR_RS_CMD_LEN = 26, - VERR_RS_RCTL = 27, - VERR_RS_RO = 28, -}; - -static char *validation_errors[] = { - "OK", - "Bad CR_ASSOC Length", - "Bad CR_ASSOC Rqst Length", - "Not CR_ASSOC Cmd", - "Bad CR_ASSOC Cmd Length", - "Bad Ersp Ratio", - "Association Allocation Failed", - "Queue Allocation Failed", - "Bad CR_CONN Length", - "Bad CR_CONN Rqst Length", - "Not Association ID", - "Bad Association ID Length", - "No Association", - "Not Connection ID", - "Bad Connection ID Length", - "No Connection", - "Not CR_CONN Cmd", - "Bad CR_CONN Cmd Length", - "Bad DISCONN Length", - "Bad DISCONN Rqst Length", - "Not DISCONN Cmd", - "Bad DISCONN Cmd Length", - "Bad Disconnect Scope", - "Bad RS Length", - "Bad RS Rqst Length", - "Not RS Cmd", - "Bad RS Cmd Length", - "Bad RS R_CTL", - "Bad RS Relative Offset", -}; - static void nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) @@ -1407,7 +1312,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Association LS failed: %s\n", validation_errors[ret]); - iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); @@ -1422,7 +1327,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_cr_assoc_acc)), FCNVME_LS_CREATE_ASSOCIATION); @@ -1498,7 +1403,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Connection LS failed: %s\n", validation_errors[ret]); - iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : @@ -1515,7 +1420,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)), FCNVME_LS_CREATE_CONNECTION); acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID); @@ -1578,13 +1483,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Disconnect LS failed: %s\n", validation_errors[ret]); - iod->lsrsp->rsplen = nvmet_fc_format_rjt(acc, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : - (ret == VERR_NO_CONN) ? - FCNVME_RJT_RC_INV_CONN : - FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); return; } @@ -1593,7 +1496,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); @@ -1676,7 +1579,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, nvmet_fc_ls_disconnect(tgtport, iod); break; default: - iod->lsrsp->rsplen = nvmet_fc_format_rjt(iod->rspbuf, + iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf, NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } -- cgit v1.2.3 From 3b8281b02bdc6fc7bed6f20af6fd7933a86b94e2 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:49 -0700 Subject: nvmet-fc: Better size LS buffers Current code uses NVME_FC_MAX_LS_BUFFER_SIZE (2KB) when allocating buffers for LS requests and responses. This is considerable overkill for what is actually defined. Rework code to have unions for all possible requests and responses and size based on the unions. Remove NVME_FC_MAX_LS_BUFFER_SIZE. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.h | 15 ++++++++++++++ drivers/nvme/target/fc.c | 53 +++++++++++++++++++++--------------------------- 2 files changed, 38 insertions(+), 30 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h index d2861cdd58ee..08fa88381d45 100644 --- a/drivers/nvme/host/fc.h +++ b/drivers/nvme/host/fc.h @@ -16,6 +16,21 @@ * ****************** FC-NVME LS HANDLING ****************** */ +union nvmefc_ls_requests { + struct fcnvme_ls_cr_assoc_rqst rq_cr_assoc; + struct fcnvme_ls_cr_conn_rqst rq_cr_conn; + struct fcnvme_ls_disconnect_assoc_rqst rq_dis_assoc; + struct fcnvme_ls_disconnect_conn_rqst rq_dis_conn; +} __aligned(128); /* alignment for other things alloc'd with */ + +union nvmefc_ls_responses { + struct fcnvme_ls_rjt rsp_rjt; + struct fcnvme_ls_cr_assoc_acc rsp_cr_assoc; + struct fcnvme_ls_cr_conn_acc rsp_cr_conn; + struct fcnvme_ls_disconnect_assoc_acc rsp_dis_assoc; + struct fcnvme_ls_disconnect_conn_acc rsp_dis_conn; +} __aligned(128); /* alignment for other things alloc'd with */ + static inline void nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) { diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1f3118a3b0a3..66de6bd8f4fd 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -22,9 +22,6 @@ #define NVMET_LS_CTX_COUNT 256 -/* for this implementation, assume small single frame rqst/rsp */ -#define NVME_FC_MAX_LS_BUFFER_SIZE 2048 - struct nvmet_fc_tgtport; struct nvmet_fc_tgt_assoc; @@ -37,8 +34,8 @@ struct nvmet_fc_ls_iod { struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_assoc *assoc; - u8 *rqstbuf; - u8 *rspbuf; + union nvmefc_ls_requests *rqstbuf; + union nvmefc_ls_responses *rspbuf; u16 rqstdatalen; dma_addr_t rspdma; @@ -340,15 +337,16 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) iod->tgtport = tgtport; list_add_tail(&iod->ls_list, &tgtport->ls_list); - iod->rqstbuf = kcalloc(2, NVME_FC_MAX_LS_BUFFER_SIZE, - GFP_KERNEL); + iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) + + sizeof(union nvmefc_ls_responses), + GFP_KERNEL); if (!iod->rqstbuf) goto out_fail; - iod->rspbuf = iod->rqstbuf + NVME_FC_MAX_LS_BUFFER_SIZE; + iod->rspbuf = (union nvmefc_ls_responses *)&iod->rqstbuf[1]; iod->rspdma = fc_dma_map_single(tgtport->dev, iod->rspbuf, - NVME_FC_MAX_LS_BUFFER_SIZE, + sizeof(*iod->rspbuf), DMA_TO_DEVICE); if (fc_dma_mapping_error(tgtport->dev, iod->rspdma)) goto out_fail; @@ -361,7 +359,7 @@ out_fail: list_del(&iod->ls_list); for (iod--, i--; i >= 0; iod--, i--) { fc_dma_unmap_single(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); list_del(&iod->ls_list); } @@ -379,7 +377,7 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { fc_dma_unmap_single(tgtport->dev, - iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE, + iod->rspdma, sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); list_del(&iod->ls_list); @@ -1262,10 +1260,8 @@ static void nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_cr_assoc_rqst *rqst = - (struct fcnvme_ls_cr_assoc_rqst *)iod->rqstbuf; - struct fcnvme_ls_cr_assoc_acc *acc = - (struct fcnvme_ls_cr_assoc_acc *)iod->rspbuf; + struct fcnvme_ls_cr_assoc_rqst *rqst = &iod->rqstbuf->rq_cr_assoc; + struct fcnvme_ls_cr_assoc_acc *acc = &iod->rspbuf->rsp_cr_assoc; struct nvmet_fc_tgt_queue *queue; int ret = 0; @@ -1313,7 +1309,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, "Create Association LS failed: %s\n", validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + sizeof(*acc), rqst->w0.ls_cmd, FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); return; @@ -1348,10 +1344,8 @@ static void nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_cr_conn_rqst *rqst = - (struct fcnvme_ls_cr_conn_rqst *)iod->rqstbuf; - struct fcnvme_ls_cr_conn_acc *acc = - (struct fcnvme_ls_cr_conn_acc *)iod->rspbuf; + struct fcnvme_ls_cr_conn_rqst *rqst = &iod->rqstbuf->rq_cr_conn; + struct fcnvme_ls_cr_conn_acc *acc = &iod->rspbuf->rsp_cr_conn; struct nvmet_fc_tgt_queue *queue; int ret = 0; @@ -1404,7 +1398,7 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, "Create Connection LS failed: %s\n", validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + sizeof(*acc), rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : FCNVME_RJT_RC_LOGIC, @@ -1437,9 +1431,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { struct fcnvme_ls_disconnect_assoc_rqst *rqst = - (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf; + &iod->rqstbuf->rq_dis_assoc; struct fcnvme_ls_disconnect_assoc_acc *acc = - (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf; + &iod->rspbuf->rsp_dis_assoc; struct nvmet_fc_tgt_assoc *assoc; int ret = 0; @@ -1484,7 +1478,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, "Disconnect LS failed: %s\n", validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + sizeof(*acc), rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : FCNVME_RJT_RC_LOGIC, @@ -1522,7 +1516,7 @@ nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) struct nvmet_fc_tgtport *tgtport = iod->tgtport; fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); nvmet_fc_free_ls_iod(tgtport, iod); nvmet_fc_tgtport_put(tgtport); } @@ -1534,7 +1528,7 @@ nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, int ret; fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp); if (ret) @@ -1548,8 +1542,7 @@ static void nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_rqst_w0 *w0 = - (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf; + struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0; iod->lsrsp->nvme_fc_private = iod; iod->lsrsp->rspbuf = iod->rspbuf; @@ -1580,7 +1573,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, break; default: iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf, - NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, + sizeof(*iod->rspbuf), w0->ls_cmd, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } @@ -1627,7 +1620,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); struct nvmet_fc_ls_iod *iod; - if (lsreqbuf_len > NVME_FC_MAX_LS_BUFFER_SIZE) + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) return -E2BIG; if (!nvmet_fc_tgtport_get(tgtport)) -- cgit v1.2.3 From f56bf76f79f3dc15f17433dda1b567d34f18e699 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:50 -0700 Subject: nvme-fc: Ensure private pointers are NULL if no data Ensure that when allocations are done, and the lldd options indicate no private data is needed, that private pointers will be set to NULL (catches driver error that forgot to set private data size). Slightly reorg the allocations so that private data follows allocations for LS request/response buffers. Ensures better alignments for the buffers as well as the private pointer. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 81 ++++++++++++++++++++++++++++++------------------ drivers/nvme/target/fc.c | 5 ++- 2 files changed, 54 insertions(+), 32 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 83f9b2ac7c55..bf80b941d739 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -395,7 +395,10 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, newrec->ops = template; newrec->dev = dev; ida_init(&newrec->endp_cnt); - newrec->localport.private = &newrec[1]; + if (template->local_priv_sz) + newrec->localport.private = &newrec[1]; + else + newrec->localport.private = NULL; newrec->localport.node_name = pinfo->node_name; newrec->localport.port_name = pinfo->port_name; newrec->localport.port_role = pinfo->port_role; @@ -704,7 +707,10 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, newrec->remoteport.localport = &lport->localport; newrec->dev = lport->dev; newrec->lport = lport; - newrec->remoteport.private = &newrec[1]; + if (lport->ops->remote_priv_sz) + newrec->remoteport.private = &newrec[1]; + else + newrec->remoteport.private = NULL; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; newrec->remoteport.port_name = pinfo->port_name; @@ -1152,18 +1158,23 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); + sizeof(*assoc_rqst) + sizeof(*assoc_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Association failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)&lsop[1]; - assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1]; assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = &assoc_acc[1]; + else + lsreq->private = NULL; assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; assoc_rqst->desc_list_len = @@ -1261,18 +1272,23 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); + sizeof(*conn_rqst) + sizeof(*conn_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Connection failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)&lsop[1]; - conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1]; conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&conn_acc[1]; + else + lsreq->private = NULL; conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; conn_rqst->desc_list_len = cpu_to_be32( @@ -1386,19 +1402,23 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) int ret; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*discon_rqst) + sizeof(*discon_acc)), - GFP_KERNEL); - if (!lsop) - /* couldn't sent it... too bad */ + sizeof(*discon_rqst) + sizeof(*discon_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); + if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Disconnect Association " + "failed: ENOMEM\n", + ctrl->cnum); return; + } - lsreq = &lsop->ls_req; - - lsreq->private = (void *)&lsop[1]; - discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&discon_acc[1]; + else + lsreq->private = NULL; discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; discon_rqst->desc_list_len = cpu_to_be32( @@ -1784,15 +1804,17 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; struct nvme_fc_cmd_iu *cmdiu; struct nvme_command *sqe; - void *private; + void *private = NULL; int i, ret; aen_op = ctrl->aen_ops; for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { - private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, + if (ctrl->lport->ops->fcprqst_priv_sz) { + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, GFP_KERNEL); - if (!private) - return -ENOMEM; + if (!private) + return -ENOMEM; + } cmdiu = &aen_op->cmd_iu; sqe = &cmdiu->sqe; @@ -1823,9 +1845,6 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) aen_op = ctrl->aen_ops; for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { - if (!aen_op->fcp_req.private) - continue; - __nvme_fc_exit_request(ctrl, aen_op); kfree(aen_op->fcp_req.private); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 66de6bd8f4fd..66a60a218994 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1047,7 +1047,10 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, newrec->fc_target_port.node_name = pinfo->node_name; newrec->fc_target_port.port_name = pinfo->port_name; - newrec->fc_target_port.private = &newrec[1]; + if (template->target_priv_sz) + newrec->fc_target_port.private = &newrec[1]; + else + newrec->fc_target_port.private = NULL; newrec->fc_target_port.port_id = pinfo->port_id; newrec->fc_target_port.port_num = idx; INIT_LIST_HEAD(&newrec->tgt_list); -- cgit v1.2.3 From eb4ee8f125157926cf36a3c275b04825f1bf8cfa Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:51 -0700 Subject: nvme-fc: convert assoc_active flag to bit op Convert the assoc_active boolean flag to a bitop on the flags field. The bit ops will provide atomicity. To make this change, the flags field was converted to a long type, which also affects the FCCTRL_TERMIO flag. Both FCCTRL_TERMIO and now ASSOC_ACTIVE flags are set/cleared by bit operations. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index bf80b941d739..0ac246603063 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -127,9 +127,9 @@ struct nvme_fc_rport { unsigned long dev_loss_end; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_flags { - FCCTRL_TERMIO = (1 << 0), -}; +/* fc_ctrl flags values - specified as bit positions */ +#define ASSOC_ACTIVE 0 +#define FCCTRL_TERMIO 1 struct nvme_fc_ctrl { spinlock_t lock; @@ -140,7 +140,6 @@ struct nvme_fc_ctrl { u32 cnum; bool ioq_live; - bool assoc_active; atomic_t err_work_active; u64 association_id; @@ -153,7 +152,7 @@ struct nvme_fc_ctrl { struct work_struct err_work; struct kref ref; - u32 flags; + unsigned long flags; u32 iocnt; wait_queue_head_t ioabort_wait; @@ -1521,7 +1520,7 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); if (opstate != FCPOP_STATE_ACTIVE) atomic_set(&op->state, opstate); - else if (ctrl->flags & FCCTRL_TERMIO) + else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) ctrl->iocnt++; spin_unlock_irqrestore(&ctrl->lock, flags); @@ -1558,7 +1557,7 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, if (opstate == FCPOP_STATE_ABORTED) { spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) { + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { if (!--ctrl->iocnt) wake_up(&ctrl->ioabort_wait); } @@ -2386,16 +2385,9 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; - unsigned long flags; - bool terminating = false; blk_status_t ret; - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) - terminating = true; - spin_unlock_irqrestore(&ctrl->lock, flags); - - if (terminating) + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) return; aen_op = &ctrl->aen_ops[0]; @@ -2604,10 +2596,9 @@ nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl) struct nvme_fc_rport *rport = ctrl->rport; u32 cnt; - if (ctrl->assoc_active) + if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags)) return 1; - ctrl->assoc_active = true; cnt = atomic_inc_return(&rport->act_ctrl_cnt); if (cnt == 1) nvme_fc_rport_active_on_lport(rport); @@ -2622,7 +2613,7 @@ nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl) struct nvme_fc_lport *lport = rport->lport; u32 cnt; - /* ctrl->assoc_active=false will be set independently */ + /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */ cnt = atomic_dec_return(&rport->act_ctrl_cnt); if (cnt == 0) { @@ -2764,7 +2755,7 @@ out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); out_free_queue: nvme_fc_free_queue(&ctrl->queues[0]); - ctrl->assoc_active = false; + clear_bit(ASSOC_ACTIVE, &ctrl->flags); nvme_fc_ctlr_inactive_on_rport(ctrl); return ret; @@ -2781,12 +2772,11 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { unsigned long flags; - if (!ctrl->assoc_active) + if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) return; - ctrl->assoc_active = false; spin_lock_irqsave(&ctrl->lock, flags); - ctrl->flags |= FCCTRL_TERMIO; + set_bit(FCCTRL_TERMIO, &ctrl->flags); ctrl->iocnt = 0; spin_unlock_irqrestore(&ctrl->lock, flags); @@ -2837,7 +2827,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) /* wait for all io that had to be aborted */ spin_lock_irq(&ctrl->lock); wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); - ctrl->flags &= ~FCCTRL_TERMIO; + clear_bit(FCCTRL_TERMIO, &ctrl->flags); spin_unlock_irq(&ctrl->lock); nvme_fc_term_aen_ops(ctrl); @@ -3109,7 +3099,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->dev = lport->dev; ctrl->cnum = idx; ctrl->ioq_live = false; - ctrl->assoc_active = false; atomic_set(&ctrl->err_work_active, 0); init_waitqueue_head(&ctrl->ioabort_wait); -- cgit v1.2.3 From fd5a5f2213048b012bc7e19e832e9ae0ec1a2c4a Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:52 -0700 Subject: nvme-fc: Update header and host for common definitions for LS handling Given that both host and target now generate and receive LS's create a single table definition for LS names. Each tranport half will have a local version of the table. As Create Association LS is issued by both sides, and received by both sides, create common routines to format the LS and to validate the LS. Convert the host side transport to use the new common Create Association LS formatting routine. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 25 ++-------------- drivers/nvme/host/fc.h | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 23 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0ac246603063..c069ab056202 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1419,29 +1419,8 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) else lsreq->private = NULL; - discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; - discon_rqst->desc_list_len = cpu_to_be32( - sizeof(struct fcnvme_lsdesc_assoc_id) + - sizeof(struct fcnvme_lsdesc_disconn_cmd)); - - discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); - discon_rqst->associd.desc_len = - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_assoc_id)); - - discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); - - discon_rqst->discon_cmd.desc_tag = cpu_to_be32( - FCNVME_LSDESC_DISCONN_CMD); - discon_rqst->discon_cmd.desc_len = - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_disconn_cmd)); - - lsreq->rqstaddr = discon_rqst; - lsreq->rqstlen = sizeof(*discon_rqst); - lsreq->rspaddr = discon_acc; - lsreq->rsplen = sizeof(*discon_acc); - lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, + ctrl->association_id); ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, nvme_fc_disconnect_assoc_done); diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h index 08fa88381d45..05ce566f2caf 100644 --- a/drivers/nvme/host/fc.h +++ b/drivers/nvme/host/fc.h @@ -17,6 +17,7 @@ */ union nvmefc_ls_requests { + struct fcnvme_ls_rqst_w0 w0; struct fcnvme_ls_cr_assoc_rqst rq_cr_assoc; struct fcnvme_ls_cr_conn_rqst rq_cr_conn; struct fcnvme_ls_disconnect_assoc_rqst rq_dis_assoc; @@ -145,4 +146,82 @@ static char *validation_errors[] = { "Bad Disconnect ACC Length", }; +#define NVME_FC_LAST_LS_CMD_VALUE FCNVME_LS_DISCONNECT_CONN + +static char *nvmefc_ls_names[] = { + "Reserved (0)", + "RJT (1)", + "ACC (2)", + "Create Association", + "Create Connection", + "Disconnect Association", + "Disconnect Connection", +}; + +static inline void +nvmefc_fmt_lsreq_discon_assoc(struct nvmefc_ls_req *lsreq, + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst, + struct fcnvme_ls_disconnect_assoc_acc *discon_acc, + u64 association_id) +{ + lsreq->rqstaddr = discon_rqst; + lsreq->rqstlen = sizeof(*discon_rqst); + lsreq->rspaddr = discon_acc; + lsreq->rsplen = sizeof(*discon_acc); + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; + + discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; + discon_rqst->desc_list_len = cpu_to_be32( + sizeof(struct fcnvme_lsdesc_assoc_id) + + sizeof(struct fcnvme_lsdesc_disconn_cmd)); + + discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); + discon_rqst->associd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id)); + + discon_rqst->associd.association_id = cpu_to_be64(association_id); + + discon_rqst->discon_cmd.desc_tag = cpu_to_be32( + FCNVME_LSDESC_DISCONN_CMD); + discon_rqst->discon_cmd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_disconn_cmd)); +} + +static inline int +nvmefc_vldt_lsreq_discon_assoc(u32 rqstlen, + struct fcnvme_ls_disconnect_assoc_rqst *rqst) +{ + int ret = 0; + + if (rqstlen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) + ret = VERR_DISCONN_LEN; + else if (rqst->desc_list_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) + ret = VERR_DISCONN_RQST_LEN; + else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) + ret = VERR_ASSOC_ID; + else if (rqst->associd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id))) + ret = VERR_ASSOC_ID_LEN; + else if (rqst->discon_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) + ret = VERR_DISCONN_CMD; + else if (rqst->discon_cmd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_disconn_cmd))) + ret = VERR_DISCONN_CMD_LEN; + /* + * As the standard changed on the LS, check if old format and scope + * something other than Association (e.g. 0). + */ + else if (rqst->discon_cmd.rsvd8[0]) + ret = VERR_DISCONN_SCOPE; + + return ret; +} + #endif /* _NVME_FC_TRANSPORT_H */ -- cgit v1.2.3 From ec3b0e3cc393dee1ad3f4bd1026f2c1f8b1c1ffb Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:53 -0700 Subject: nvmet-fc: Update target for common definitions for LS handling Given that both host and target now generate and receive LS's create a single table definition for LS names. Each tranport half will have a local version of the table. Convert the target side transport to use the new common Create Association LS validation routine. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 66a60a218994..5739df7edc59 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1442,32 +1442,8 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, memset(acc, 0, sizeof(*acc)); - if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) - ret = VERR_DISCONN_LEN; - else if (rqst->desc_list_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) - ret = VERR_DISCONN_RQST_LEN; - else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) - ret = VERR_ASSOC_ID; - else if (rqst->associd.desc_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_assoc_id))) - ret = VERR_ASSOC_ID_LEN; - else if (rqst->discon_cmd.desc_tag != - cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) - ret = VERR_DISCONN_CMD; - else if (rqst->discon_cmd.desc_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_disconn_cmd))) - ret = VERR_DISCONN_CMD_LEN; - /* - * As the standard changed on the LS, check if old format and scope - * something other than Association (e.g. 0). - */ - else if (rqst->discon_cmd.rsvd8[0]) - ret = VERR_DISCONN_SCOPE; - else { + ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst); + if (!ret) { /* match an active association */ assoc = nvmet_fc_find_target_assoc(tgtport, be64_to_cpu(rqst->associd.association_id)); -- cgit v1.2.3 From 14fd1e98afafc0027a6a86ea1962e31dceafb400 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:54 -0700 Subject: nvme-fc: Add Disconnect Association Rcv support The nvme-fc host transport did not support the reception of a FC-NVME LS. Reception is necessary to implement full compliance with FC-NVME-2. Populate the LS receive handler, and specifically the handling of a Disconnect Association LS. The response to the LS, if it matched a controller, must be sent after the aborts for any I/O on any connection have been sent. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 363 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 359 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c069ab056202..1921d2195541 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -62,6 +62,17 @@ struct nvmefc_ls_req_op { bool req_queued; }; +struct nvmefc_ls_rcv_op { + struct nvme_fc_rport *rport; + struct nvmefc_ls_rsp *lsrsp; + union nvmefc_ls_requests *rqstbuf; + union nvmefc_ls_responses *rspbuf; + u16 rqstdatalen; + bool handled; + dma_addr_t rspdma; + struct list_head lsrcv_list; /* rport->ls_rcv_list */ +} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ + enum nvme_fcpop_state { FCPOP_STATE_UNINIT = 0, FCPOP_STATE_IDLE = 1, @@ -118,6 +129,7 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head ls_rcv_list; struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; @@ -125,6 +137,7 @@ struct nvme_fc_rport { struct kref ref; atomic_t act_ctrl_cnt; unsigned long dev_loss_end; + struct work_struct lsrcv_work; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ /* fc_ctrl flags values - specified as bit positions */ @@ -142,6 +155,7 @@ struct nvme_fc_ctrl { bool ioq_live; atomic_t err_work_active; u64 association_id; + struct nvmefc_ls_rcv_op *rcv_disconn; struct list_head ctrl_list; /* rport->ctrl_list */ @@ -219,6 +233,9 @@ static struct device *fc_udev_device; static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); +static void nvme_fc_handle_ls_rqst_work(struct work_struct *work); + + static void nvme_fc_free_lport(struct kref *ref) { @@ -704,6 +721,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + INIT_LIST_HEAD(&newrec->ls_rcv_list); newrec->dev = lport->dev; newrec->lport = lport; if (lport->ops->remote_priv_sz) @@ -717,6 +735,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; newrec->remoteport.port_num = idx; __nvme_fc_set_dev_loss_tmo(newrec, pinfo); + INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work); spin_lock_irqsave(&nvme_fc_lock, flags); list_add_tail(&newrec->endp_list, &lport->endp_list); @@ -1006,6 +1025,7 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); +static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); static void __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) @@ -1154,6 +1174,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req *lsreq; struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; struct fcnvme_ls_cr_assoc_acc *assoc_acc; + unsigned long flags; int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + @@ -1243,11 +1264,13 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, "q %d Create Association LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = be64_to_cpu(assoc_acc->associd.association_id); queue->connection_id = be64_to_cpu(assoc_acc->connectid.connection_id); set_bit(NVME_FC_Q_CONNECTED, &queue->flags); + spin_unlock_irqrestore(&ctrl->lock, flags); } out_free_buffer: @@ -1428,6 +1451,247 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) kfree(lsop); } +static void +nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) +{ + struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; + struct nvme_fc_rport *rport = lsop->rport; + struct nvme_fc_lport *lport = rport->lport; + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + list_del(&lsop->lsrcv_list); + spin_unlock_irqrestore(&rport->lock, flags); + + fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + fc_dma_unmap_single(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + + kfree(lsop); + + nvme_fc_rport_put(rport); +} + +static void +nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct nvme_fc_lport *lport = rport->lport; + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; + int ret; + + fc_dma_sync_single_for_device(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + + ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport, + lsop->lsrsp); + if (ret) { + dev_warn(lport->dev, + "LLDD rejected LS RSP xmt: LS %d status %d\n", + w0->ls_cmd, ret); + nvme_fc_xmt_ls_rsp_done(lsop->lsrsp); + return; + } +} + +static struct nvme_fc_ctrl * +nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport, + struct nvmefc_ls_rcv_op *lsop) +{ + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + &lsop->rqstbuf->rq_dis_assoc; + struct nvme_fc_ctrl *ctrl, *ret = NULL; + struct nvmefc_ls_rcv_op *oldls = NULL; + u64 association_id = be64_to_cpu(rqst->associd.association_id); + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + if (!nvme_fc_ctrl_get(ctrl)) + continue; + spin_lock(&ctrl->lock); + if (association_id == ctrl->association_id) { + oldls = ctrl->rcv_disconn; + ctrl->rcv_disconn = lsop; + ret = ctrl; + } + spin_unlock(&ctrl->lock); + if (ret) + /* leave the ctrl get reference */ + break; + nvme_fc_ctrl_put(ctrl); + } + + spin_unlock_irqrestore(&rport->lock, flags); + + /* transmit a response for anything that was pending */ + if (oldls) { + dev_info(rport->lport->dev, + "NVME-FC{%d}: Multiple Disconnect Association " + "LS's received\n", ctrl->cnum); + /* overwrite good response with bogus failure */ + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, + sizeof(*oldls->rspbuf), + rqst->w0.ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + nvme_fc_xmt_ls_rsp(oldls); + } + + return ret; +} + +/* + * returns true to mean LS handled and ls_rsp can be sent + * returns false to defer ls_rsp xmt (will be done as part of + * association termination) + */ +static bool +nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + &lsop->rqstbuf->rq_dis_assoc; + struct fcnvme_ls_disconnect_assoc_acc *acc = + &lsop->rspbuf->rsp_dis_assoc; + struct nvme_fc_ctrl *ctrl = NULL; + int ret = 0; + + memset(acc, 0, sizeof(*acc)); + + ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst); + if (!ret) { + /* match an active association */ + ctrl = nvme_fc_match_disconn_ls(rport, lsop); + if (!ctrl) + ret = VERR_NO_ASSOC; + } + + if (ret) { + dev_info(rport->lport->dev, + "Disconnect LS failed: %s\n", + validation_errors[ret]); + lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc, + sizeof(*acc), rqst->w0.ls_cmd, + (ret == VERR_NO_ASSOC) ? + FCNVME_RJT_RC_INV_ASSOC : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); + return true; + } + + /* format an ACCept response */ + + lsop->lsrsp->rsplen = sizeof(*acc); + + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_disconnect_assoc_acc)), + FCNVME_LS_DISCONNECT_ASSOC); + + /* + * the transmit of the response will occur after the exchanges + * for the association have been ABTS'd by + * nvme_fc_delete_association(). + */ + + /* fail the association */ + nvme_fc_error_recovery(ctrl, "Disconnect Association LS received"); + + /* release the reference taken by nvme_fc_match_disconn_ls() */ + nvme_fc_ctrl_put(ctrl); + + return false; +} + +/* + * Actual Processing routine for received FC-NVME LS Requests from the LLD + * returns true if a response should be sent afterward, false if rsp will + * be sent asynchronously. + */ +static bool +nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop) +{ + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; + bool ret = true; + + lsop->lsrsp->nvme_fc_private = lsop; + lsop->lsrsp->rspbuf = lsop->rspbuf; + lsop->lsrsp->rspdma = lsop->rspdma; + lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done; + /* Be preventative. handlers will later set to valid length */ + lsop->lsrsp->rsplen = 0; + + /* + * handlers: + * parse request input, execute the request, and format the + * LS response + */ + switch (w0->ls_cmd) { + case FCNVME_LS_DISCONNECT_ASSOC: + ret = nvme_fc_ls_disconnect_assoc(lsop); + break; + case FCNVME_LS_DISCONNECT_CONN: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0); + break; + case FCNVME_LS_CREATE_ASSOCIATION: + case FCNVME_LS_CREATE_CONNECTION: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); + break; + default: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); + break; + } + + return(ret); +} + +static void +nvme_fc_handle_ls_rqst_work(struct work_struct *work) +{ + struct nvme_fc_rport *rport = + container_of(work, struct nvme_fc_rport, lsrcv_work); + struct fcnvme_ls_rqst_w0 *w0; + struct nvmefc_ls_rcv_op *lsop; + unsigned long flags; + bool sendrsp; + +restart: + sendrsp = true; + spin_lock_irqsave(&rport->lock, flags); + list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) { + if (lsop->handled) + continue; + + lsop->handled = true; + if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + sendrsp = nvme_fc_handle_ls_rqst(lsop); + } else { + spin_unlock_irqrestore(&rport->lock, flags); + w0 = &lsop->rqstbuf->w0; + lsop->lsrsp->rsplen = nvme_fc_format_rjt( + lsop->rspbuf, + sizeof(*lsop->rspbuf), + w0->ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + } + if (sendrsp) + nvme_fc_xmt_ls_rsp(lsop); + goto restart; + } + spin_unlock_irqrestore(&rport->lock, flags); +} + /** * nvme_fc_rcv_ls_req - transport entry point called by an LLDD * upon the reception of a NVME LS request. @@ -1454,20 +1718,92 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, { struct nvme_fc_rport *rport = remoteport_to_rport(portptr); struct nvme_fc_lport *lport = rport->lport; + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; + struct nvmefc_ls_rcv_op *lsop; + unsigned long flags; + int ret; + + nvme_fc_rport_get(rport); /* validate there's a routine to transmit a response */ - if (!lport->ops->xmt_ls_rsp) - return(-EINVAL); + if (!lport->ops->xmt_ls_rsp) { + dev_info(lport->dev, + "RCV %s LS failed: no LLDD xmt_ls_rsp\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -EINVAL; + goto out_put; + } + + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { + dev_info(lport->dev, + "RCV %s LS failed: payload too large\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -E2BIG; + goto out_put; + } + + lsop = kzalloc(sizeof(*lsop) + + sizeof(union nvmefc_ls_requests) + + sizeof(union nvmefc_ls_responses), + GFP_KERNEL); + if (!lsop) { + dev_info(lport->dev, + "RCV %s LS failed: No memory\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -ENOMEM; + goto out_put; + } + lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1]; + lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1]; + + lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf, + sizeof(*lsop->rspbuf), + DMA_TO_DEVICE); + if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) { + dev_info(lport->dev, + "RCV %s LS failed: DMA mapping failure\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -EFAULT; + goto out_free; + } + + lsop->rport = rport; + lsop->lsrsp = lsrsp; + + memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len); + lsop->rqstdatalen = lsreqbuf_len; + + spin_lock_irqsave(&rport->lock, flags); + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + ret = -ENOTCONN; + goto out_unmap; + } + list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list); + spin_unlock_irqrestore(&rport->lock, flags); + + schedule_work(&rport->lsrcv_work); return 0; + +out_unmap: + fc_dma_unmap_single(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); +out_free: + kfree(lsop); +out_put: + nvme_fc_rport_put(rport); + return ret; } EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req); /* *********************** NVME Ctrl Routines **************************** */ -static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); - static void __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) @@ -2612,6 +2948,8 @@ static int nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + struct nvmefc_ls_rcv_op *disls = NULL; + unsigned long flags; int ret; bool changed; @@ -2729,7 +3067,13 @@ out_term_aen_ops: out_disconnect_admin_queue: /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = 0; + disls = ctrl->rcv_disconn; + ctrl->rcv_disconn = NULL; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (disls) + nvme_fc_xmt_ls_rsp(disls); out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); out_free_queue: @@ -2749,6 +3093,7 @@ out_free_queue: static void nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { + struct nvmefc_ls_rcv_op *disls = NULL; unsigned long flags; if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) @@ -2820,7 +3165,17 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) if (ctrl->association_id) nvme_fc_xmt_disconnect_assoc(ctrl); + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = 0; + disls = ctrl->rcv_disconn; + ctrl->rcv_disconn = NULL; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (disls) + /* + * if a Disconnect Request was waiting for a response, send + * now that all ABTS's have been issued (and are complete). + */ + nvme_fc_xmt_ls_rsp(disls); if (ctrl->ctrl.tagset) { nvme_fc_delete_hw_io_queues(ctrl); -- cgit v1.2.3 From a5c2b4f633cf06df62d24b0ef11f824e8da646a5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:55 -0700 Subject: nvmet-fc: add LS failure messages Add LS reception failure messages Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 5739df7edc59..a91c443c9098 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1598,15 +1598,31 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); struct nvmet_fc_ls_iod *iod; - - if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; + + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { + dev_info(tgtport->dev, + "RCV %s LS failed: payload too large (%d)\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : "", + lsreqbuf_len); return -E2BIG; + } - if (!nvmet_fc_tgtport_get(tgtport)) + if (!nvmet_fc_tgtport_get(tgtport)) { + dev_info(tgtport->dev, + "RCV %s LS failed: target deleting\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); return -ESHUTDOWN; + } iod = nvmet_fc_alloc_ls_iod(tgtport); if (!iod) { + dev_info(tgtport->dev, + "RCV %s LS failed: context allocation failed\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); nvmet_fc_tgtport_put(tgtport); return -ENOENT; } -- cgit v1.2.3 From 0dfb992e0ec2e7b9e5ccf92d2261aaa6b5cc57a8 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:56 -0700 Subject: nvmet-fc: perform small cleanups on unneeded checks While code reviewing saw a couple of items that can be cleaned up: - In nvmet_fc_delete_target_queue(), the routine unlocks, then checks and relocks. Reorganize to avoid the unlock/relock. - In nvmet_fc_delete_target_queue(), there's a check on the disconnect state that is unnecessary as the routine validates the state before starting any action. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a91c443c9098..01488fc35d46 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -677,7 +677,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) struct nvmet_fc_fcp_iod *fod = queue->fod; struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr; unsigned long flags; - int i, writedataactive; + int i; bool disconnect; disconnect = atomic_xchg(&queue->connected, 0); @@ -688,20 +688,18 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) if (fod->active) { spin_lock(&fod->flock); fod->abort = true; - writedataactive = fod->writedataactive; - spin_unlock(&fod->flock); /* * only call lldd abort routine if waiting for * writedata. other outstanding ops should finish * on their own. */ - if (writedataactive) { - spin_lock(&fod->flock); + if (fod->writedataactive) { fod->aborted = true; spin_unlock(&fod->flock); tgtport->ops->fcp_abort( &tgtport->fc_target_port, fod->fcpreq); - } + } else + spin_unlock(&fod->flock); } } @@ -741,8 +739,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) flush_workqueue(queue->work_q); - if (disconnect) - nvmet_sq_destroy(&queue->nvme_sq); + nvmet_sq_destroy(&queue->nvme_sq); nvmet_fc_tgt_q_put(queue); } -- cgit v1.2.3 From 58ab8ff9dca2142ba16ea02f7db9ba06eebbc37c Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:57 -0700 Subject: nvmet-fc: track hostport handle for associations In preparation for sending LS requests for an association that terminates, save and track the hosthandle that is part of the LS's that are received to create associations. Support consists of: - Create a hostport structure that will be 1:1 mapped to a host port handle. The hostport structure is specific to a targetport. - Whenever an association is created, create a host port for the hosthandle the Create Association LS was received from. There will be only 1 hostport structure created, with all associations that have the same hosthandle sharing the hostport structure. - When the association is terminated, the hostport reference will be removed. After the last association for the host port is removed, the hostport will be deleted. - Add support for the new nvmet_fc_invalidate_host() interface. In the past, the LLDD didn't notify loss of connectivity to host ports - the LLD would simply reject new requests and wait for the kato timeout to kill the association. Now, when host port connectivity is lost, the LLDD can notify the transport. The transport will initiate the termination of all associations for that host port. When the last association has been terminated and the hosthandle will no longer be referenced, the new host_release callback will be made to the lldd. - For compatibility with prior behavior which didn't report the hosthandle: the LLDD must set hosthandle to NULL. In these cases, not LS request will be made, and no host_release callbacks will be made either. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 177 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 01488fc35d46..3ccf27c328b2 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -33,6 +33,7 @@ struct nvmet_fc_ls_iod { struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_assoc *assoc; + void *hosthandle; union nvmefc_ls_requests *rqstbuf; union nvmefc_ls_responses *rspbuf; @@ -81,7 +82,6 @@ struct nvmet_fc_fcp_iod { }; struct nvmet_fc_tgtport { - struct nvmet_fc_target_port fc_target_port; struct list_head tgt_list; /* nvmet_fc_target_list */ @@ -93,6 +93,7 @@ struct nvmet_fc_tgtport { struct list_head ls_list; struct list_head ls_busylist; struct list_head assoc_list; + struct list_head host_list; struct ida assoc_cnt; struct nvmet_fc_port_entry *pe; struct kref ref; @@ -134,14 +135,24 @@ struct nvmet_fc_tgt_queue { struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */ } __aligned(sizeof(unsigned long long)); +struct nvmet_fc_hostport { + struct nvmet_fc_tgtport *tgtport; + void *hosthandle; + struct list_head host_list; + struct kref ref; + u8 invalid; +}; + struct nvmet_fc_tgt_assoc { u64 association_id; u32 a_id; struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_hostport *hostport; struct list_head a_list; struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; + atomic_t del_work_active; }; @@ -773,6 +784,102 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, return NULL; } +static void +nvmet_fc_hostport_free(struct kref *ref) +{ + struct nvmet_fc_hostport *hostport = + container_of(ref, struct nvmet_fc_hostport, ref); + struct nvmet_fc_tgtport *tgtport = hostport->tgtport; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + list_del(&hostport->host_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + if (tgtport->ops->host_release && hostport->invalid) + tgtport->ops->host_release(hostport->hosthandle); + kfree(hostport); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_hostport_put(struct nvmet_fc_hostport *hostport) +{ + kref_put(&hostport->ref, nvmet_fc_hostport_free); +} + +static int +nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport) +{ + return kref_get_unless_zero(&hostport->ref); +} + +static void +nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport) +{ + /* if LLDD not implemented, leave as NULL */ + if (!hostport->hosthandle) + return; + + nvmet_fc_hostport_put(hostport); +} + +static struct nvmet_fc_hostport * +nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) +{ + struct nvmet_fc_hostport *newhost, *host, *match = NULL; + unsigned long flags; + + /* if LLDD not implemented, leave as NULL */ + if (!hosthandle) + return NULL; + + /* take reference for what will be the newly allocated hostport */ + if (!nvmet_fc_tgtport_get(tgtport)) + return ERR_PTR(-EINVAL); + + newhost = kzalloc(sizeof(*newhost), GFP_KERNEL); + if (!newhost) { + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(host, &tgtport->host_list, host_list) { + if (host->hosthandle == hosthandle && !host->invalid) { + if (nvmet_fc_hostport_get(host)) { + match = host; + break; + } + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + /* no allocation - release reference */ + nvmet_fc_tgtport_put(tgtport); + return (match) ? match : ERR_PTR(-ENOMEM); + } + + newhost->tgtport = tgtport; + newhost->hosthandle = hosthandle; + INIT_LIST_HEAD(&newhost->host_list); + kref_init(&newhost->ref); + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(host, &tgtport->host_list, host_list) { + if (host->hosthandle == hosthandle && !host->invalid) { + if (nvmet_fc_hostport_get(host)) { + match = host; + break; + } + } + } + if (match) { + kfree(newhost); + newhost = NULL; + /* releasing allocation - release reference */ + nvmet_fc_tgtport_put(tgtport); + } else + list_add_tail(&newhost->host_list, &tgtport->host_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + + return (match) ? match : newhost; +} + static void nvmet_fc_delete_assoc(struct work_struct *work) { @@ -780,11 +887,12 @@ nvmet_fc_delete_assoc(struct work_struct *work) container_of(work, struct nvmet_fc_tgt_assoc, del_work); nvmet_fc_delete_target_assoc(assoc); + atomic_set(&assoc->del_work_active, 0); nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * -nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) +nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) { struct nvmet_fc_tgt_assoc *assoc, *tmpassoc; unsigned long flags; @@ -801,13 +909,18 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) goto out_free_assoc; if (!nvmet_fc_tgtport_get(tgtport)) - goto out_ida_put; + goto out_ida; + + assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle); + if (IS_ERR(assoc->hostport)) + goto out_put; assoc->tgtport = tgtport; assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + atomic_set(&assoc->del_work_active, 0); while (needrandom) { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); @@ -829,7 +942,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) return assoc; -out_ida_put: +out_put: + nvmet_fc_tgtport_put(tgtport); +out_ida: ida_simple_remove(&tgtport->assoc_cnt, idx); out_free_assoc: kfree(assoc); @@ -844,6 +959,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; unsigned long flags; + nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); list_del(&assoc->a_list); spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1057,6 +1173,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, INIT_LIST_HEAD(&newrec->ls_list); INIT_LIST_HEAD(&newrec->ls_busylist); INIT_LIST_HEAD(&newrec->assoc_list); + INIT_LIST_HEAD(&newrec->host_list); kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; @@ -1133,14 +1250,21 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) { struct nvmet_fc_tgt_assoc *assoc, *next; unsigned long flags; + int ret; spin_lock_irqsave(&tgtport->lock, flags); list_for_each_entry_safe(assoc, next, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!schedule_work(&assoc->del_work)) + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); + } } spin_unlock_irqrestore(&tgtport->lock, flags); } @@ -1178,6 +1302,36 @@ void nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, void *hosthandle) { + struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + struct nvmet_fc_tgt_assoc *assoc, *next; + unsigned long flags; + bool noassoc = true; + int ret; + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry_safe(assoc, next, + &tgtport->assoc_list, a_list) { + if (!assoc->hostport || + assoc->hostport->hosthandle != hosthandle) + continue; + if (!nvmet_fc_tgt_a_get(assoc)) + continue; + assoc->hostport->invalid = 1; + noassoc = false; + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ + nvmet_fc_tgt_a_put(assoc); + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + + /* if there's nothing to wait for - call the callback */ + if (noassoc && tgtport->ops->host_release) + tgtport->ops->host_release(hosthandle); } EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host); @@ -1192,6 +1346,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) struct nvmet_fc_tgt_queue *queue; unsigned long flags; bool found_ctrl = false; + int ret; /* this is a bit ugly, but don't want to make locks layered */ spin_lock_irqsave(&nvmet_fc_tgtlock, flags); @@ -1215,8 +1370,14 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!schedule_work(&assoc->del_work)) + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); + } return; } @@ -1293,7 +1454,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, else { /* new association w/ admin queue */ - iod->assoc = nvmet_fc_alloc_target_assoc(tgtport); + iod->assoc = nvmet_fc_alloc_target_assoc( + tgtport, iod->hosthandle); if (!iod->assoc) ret = VERR_ASSOC_ALLOC_FAIL; else { @@ -1628,6 +1790,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, iod->fcpreq = NULL; memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len); iod->rqstdatalen = lsreqbuf_len; + iod->hosthandle = hosthandle; schedule_work(&iod->work); -- cgit v1.2.3 From 349c694ee71ce0dfe4b9ccfac76ef5c1efb476cf Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:58 -0700 Subject: nvmet-fc: rename ls_list to ls_rcv_list In preparation to add ls request support, rename the current ls_list, which is RCV LS request only, to ls_rcv_list. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3ccf27c328b2..6a5af99f19ba 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -29,7 +29,7 @@ struct nvmet_fc_ls_iod { struct nvmefc_ls_rsp *lsrsp; struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ - struct list_head ls_list; /* tgtport->ls_list */ + struct list_head ls_rcv_list; /* tgtport->ls_rcv_list */ struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_assoc *assoc; @@ -90,7 +90,7 @@ struct nvmet_fc_tgtport { struct nvmet_fc_ls_iod *iod; spinlock_t lock; - struct list_head ls_list; + struct list_head ls_rcv_list; struct list_head ls_busylist; struct list_head assoc_list; struct list_head host_list; @@ -346,7 +346,7 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { INIT_WORK(&iod->work, nvmet_fc_handle_ls_rqst_work); iod->tgtport = tgtport; - list_add_tail(&iod->ls_list, &tgtport->ls_list); + list_add_tail(&iod->ls_rcv_list, &tgtport->ls_rcv_list); iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) + sizeof(union nvmefc_ls_responses), @@ -367,12 +367,12 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) out_fail: kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); for (iod--, i--; i >= 0; iod--, i--) { fc_dma_unmap_single(tgtport->dev, iod->rspdma, sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); } kfree(iod); @@ -391,7 +391,7 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) iod->rspdma, sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); } kfree(tgtport->iod); } @@ -403,10 +403,10 @@ nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport) unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); - iod = list_first_entry_or_null(&tgtport->ls_list, - struct nvmet_fc_ls_iod, ls_list); + iod = list_first_entry_or_null(&tgtport->ls_rcv_list, + struct nvmet_fc_ls_iod, ls_rcv_list); if (iod) - list_move_tail(&iod->ls_list, &tgtport->ls_busylist); + list_move_tail(&iod->ls_rcv_list, &tgtport->ls_busylist); spin_unlock_irqrestore(&tgtport->lock, flags); return iod; } @@ -419,7 +419,7 @@ nvmet_fc_free_ls_iod(struct nvmet_fc_tgtport *tgtport, unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); - list_move(&iod->ls_list, &tgtport->ls_list); + list_move(&iod->ls_rcv_list, &tgtport->ls_rcv_list); spin_unlock_irqrestore(&tgtport->lock, flags); } @@ -1170,7 +1170,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, newrec->dev = dev; newrec->ops = template; spin_lock_init(&newrec->lock); - INIT_LIST_HEAD(&newrec->ls_list); + INIT_LIST_HEAD(&newrec->ls_rcv_list); INIT_LIST_HEAD(&newrec->ls_busylist); INIT_LIST_HEAD(&newrec->assoc_list); INIT_LIST_HEAD(&newrec->host_list); -- cgit v1.2.3 From 47bf3241064498878ffed10a69131be9154201eb Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:49:59 -0700 Subject: nvmet-fc: Add Disconnect Association Xmt support As part of FC-NVME-2 (and ammendment on FC-NVME), the target is to send a Disconnect LS after an association is terminated and any exchanges for the association have been ABTS'd. The target is also not to send the receipt to any Disconnect Association LS, received to initiate the association termination or received while the association is terminating, until the Disconnect LS has been transmit. Add support for sending Disconnect Association LS after all I/O's complete (which is after ABTS'd certainly). Utilizes the new LLDD api to send ls requests. There is no need to track the Disconnect LS response or to retry after timeout. All spec requirements will have been met by waiting for i/o completion to initiate the transmission. Add support for tracking the reception of Disconnect Association and defering the response transmission until after the Disconnect Association LS has been transmit. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 296 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 6a5af99f19ba..02d9751bb7ee 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -25,7 +25,7 @@ struct nvmet_fc_tgtport; struct nvmet_fc_tgt_assoc; -struct nvmet_fc_ls_iod { +struct nvmet_fc_ls_iod { /* for an LS RQST RCV */ struct nvmefc_ls_rsp *lsrsp; struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ @@ -45,6 +45,18 @@ struct nvmet_fc_ls_iod { struct work_struct work; } __aligned(sizeof(unsigned long long)); +struct nvmet_fc_ls_req_op { /* for an LS RQST XMT */ + struct nvmefc_ls_req ls_req; + + struct nvmet_fc_tgtport *tgtport; + void *hosthandle; + + int ls_error; + struct list_head lsreq_list; /* tgtport->ls_req_list */ + bool req_queued; +}; + + /* desired maximum for a single sequence - if sg list allows it */ #define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) @@ -91,6 +103,7 @@ struct nvmet_fc_tgtport { struct nvmet_fc_ls_iod *iod; spinlock_t lock; struct list_head ls_rcv_list; + struct list_head ls_req_list; struct list_head ls_busylist; struct list_head assoc_list; struct list_head host_list; @@ -146,8 +159,10 @@ struct nvmet_fc_hostport { struct nvmet_fc_tgt_assoc { u64 association_id; u32 a_id; + atomic_t terminating; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_hostport *hostport; + struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; @@ -236,6 +251,8 @@ static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc); +static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod); /* *********************** FC-NVME DMA Handling **************************** */ @@ -327,6 +344,188 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, } +/* ********************** FC-NVME LS XMT Handling ************************* */ + + +static void +__nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) +{ + struct nvmet_fc_tgtport *tgtport = lsop->tgtport; + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + + if (!lsop->req_queued) { + spin_unlock_irqrestore(&tgtport->lock, flags); + return; + } + + list_del(&lsop->lsreq_list); + + lsop->req_queued = false; + + spin_unlock_irqrestore(&tgtport->lock, flags); + + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); + + nvmet_fc_tgtport_put(tgtport); +} + +static int +__nvmet_fc_send_ls_req(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_req_op *lsop, + void (*done)(struct nvmefc_ls_req *req, int status)) +{ + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + unsigned long flags; + int ret = 0; + + if (!tgtport->ops->ls_req) + return -EOPNOTSUPP; + + if (!nvmet_fc_tgtport_get(tgtport)) + return -ESHUTDOWN; + + lsreq->done = done; + lsop->req_queued = false; + INIT_LIST_HEAD(&lsop->lsreq_list); + + lsreq->rqstdma = fc_dma_map_single(tgtport->dev, lsreq->rqstaddr, + lsreq->rqstlen + lsreq->rsplen, + DMA_BIDIRECTIONAL); + if (fc_dma_mapping_error(tgtport->dev, lsreq->rqstdma)) { + ret = -EFAULT; + goto out_puttgtport; + } + lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; + + spin_lock_irqsave(&tgtport->lock, flags); + + list_add_tail(&lsop->lsreq_list, &tgtport->ls_req_list); + + lsop->req_queued = true; + + spin_unlock_irqrestore(&tgtport->lock, flags); + + ret = tgtport->ops->ls_req(&tgtport->fc_target_port, lsop->hosthandle, + lsreq); + if (ret) + goto out_unlink; + + return 0; + +out_unlink: + lsop->ls_error = ret; + spin_lock_irqsave(&tgtport->lock, flags); + lsop->req_queued = false; + list_del(&lsop->lsreq_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); +out_puttgtport: + nvmet_fc_tgtport_put(tgtport); + + return ret; +} + +static int +nvmet_fc_send_ls_req_async(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_req_op *lsop, + void (*done)(struct nvmefc_ls_req *req, int status)) +{ + /* don't wait for completion */ + + return __nvmet_fc_send_ls_req(tgtport, lsop, done); +} + +static void +nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) +{ + struct nvmet_fc_ls_req_op *lsop = + container_of(lsreq, struct nvmet_fc_ls_req_op, ls_req); + + __nvmet_fc_finish_ls_req(lsop); + + /* fc-nvme target doesn't care about success or failure of cmd */ + + kfree(lsop); +} + +/* + * This routine sends a FC-NVME LS to disconnect (aka terminate) + * the FC-NVME Association. Terminating the association also + * terminates the FC-NVME connections (per queue, both admin and io + * queues) that are part of the association. E.g. things are torn + * down, and the related FC-NVME Association ID and Connection IDs + * become invalid. + * + * The behavior of the fc-nvme target is such that it's + * understanding of the association and connections will implicitly + * be torn down. The action is implicit as it may be due to a loss of + * connectivity with the fc-nvme host, so the target may never get a + * response even if it tried. As such, the action of this routine + * is to asynchronously send the LS, ignore any results of the LS, and + * continue on with terminating the association. If the fc-nvme host + * is present and receives the LS, it too can tear down. + */ +static void +nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst; + struct fcnvme_ls_disconnect_assoc_acc *discon_acc; + struct nvmet_fc_ls_req_op *lsop; + struct nvmefc_ls_req *lsreq; + int ret; + + /* + * If ls_req is NULL or no hosthandle, it's an older lldd and no + * message is normal. Otherwise, send unless the hostport has + * already been invalidated by the lldd. + */ + if (!tgtport->ops->ls_req || !assoc->hostport || + assoc->hostport->invalid) + return; + + lsop = kzalloc((sizeof(*lsop) + + sizeof(*discon_rqst) + sizeof(*discon_acc) + + tgtport->ops->lsrqst_priv_sz), GFP_KERNEL); + if (!lsop) { + dev_info(tgtport->dev, + "{%d:%d} send Disconnect Association failed: ENOMEM\n", + tgtport->fc_target_port.port_num, assoc->a_id); + return; + } + + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; + discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; + lsreq = &lsop->ls_req; + if (tgtport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&discon_acc[1]; + else + lsreq->private = NULL; + + lsop->tgtport = tgtport; + lsop->hosthandle = assoc->hostport->hosthandle; + + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, + assoc->association_id); + + ret = nvmet_fc_send_ls_req_async(tgtport, lsop, + nvmet_fc_disconnect_assoc_done); + if (ret) { + dev_info(tgtport->dev, + "{%d:%d} XMT Disconnect Association failed: %d\n", + tgtport->fc_target_port.port_num, assoc->a_id, ret); + kfree(lsop); + } +} + + /* *********************** FC-NVME Port Management ************************ */ @@ -693,6 +892,10 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) disconnect = atomic_xchg(&queue->connected, 0); + /* if not connected, nothing to do */ + if (!disconnect) + return; + spin_lock_irqsave(&queue->qlock, flags); /* abort outstanding io's */ for (i = 0; i < queue->sqsize; fod++, i++) { @@ -921,6 +1124,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) kref_init(&assoc->ref); INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); atomic_set(&assoc->del_work_active, 0); + atomic_set(&assoc->terminating, 0); while (needrandom) { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); @@ -957,13 +1161,24 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgt_assoc *assoc = container_of(ref, struct nvmet_fc_tgt_assoc, ref); struct nvmet_fc_tgtport *tgtport = assoc->tgtport; + struct nvmet_fc_ls_iod *oldls; unsigned long flags; + /* Send Disconnect now that all i/o has completed */ + nvmet_fc_xmt_disconnect_assoc(assoc); + nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); list_del(&assoc->a_list); + oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); + /* if pending Rcv Disconnect Association LS, send rsp now */ + if (oldls) + nvmet_fc_xmt_ls_rsp(tgtport, oldls); ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id); + dev_info(tgtport->dev, + "{%d:%d} Association freed\n", + tgtport->fc_target_port.port_num, assoc->a_id); kfree(assoc); nvmet_fc_tgtport_put(tgtport); } @@ -986,7 +1201,13 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_tgt_queue *queue; unsigned long flags; - int i; + int i, terminating; + + terminating = atomic_xchg(&assoc->terminating, 1); + + /* if already terminating, do nothing */ + if (terminating) + return; spin_lock_irqsave(&tgtport->lock, flags); for (i = NVMET_NR_QUEUES; i >= 0; i--) { @@ -1002,6 +1223,10 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) } spin_unlock_irqrestore(&tgtport->lock, flags); + dev_info(tgtport->dev, + "{%d:%d} Association deleted\n", + tgtport->fc_target_port.port_num, assoc->a_id); + nvmet_fc_tgt_a_put(assoc); } @@ -1171,6 +1396,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, newrec->ops = template; spin_lock_init(&newrec->lock); INIT_LIST_HEAD(&newrec->ls_rcv_list); + INIT_LIST_HEAD(&newrec->ls_req_list); INIT_LIST_HEAD(&newrec->ls_busylist); INIT_LIST_HEAD(&newrec->assoc_list); INIT_LIST_HEAD(&newrec->host_list); @@ -1407,6 +1633,13 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + /* + * should terminate LS's as well. However, LS's will be generated + * at the tail end of association termination, so they likely don't + * exist yet. And even if they did, it's worthwhile to just let + * them finish and targetport ref counting will clean things up. + */ + nvmet_fc_tgtport_put(tgtport); return 0; @@ -1414,7 +1647,7 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); -/* *********************** FC-NVME LS Handling **************************** */ +/* ********************** FC-NVME LS RCV Handling ************************* */ static void @@ -1481,6 +1714,10 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, atomic_set(&queue->connected, 1); queue->sqhd = 0; /* best place to init value */ + dev_info(tgtport->dev, + "{%d:%d} Association created\n", + tgtport->fc_target_port.port_num, iod->assoc->a_id); + /* format a response */ iod->lsrsp->rsplen = sizeof(*acc); @@ -1588,7 +1825,11 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, be16_to_cpu(rqst->connect_cmd.qid))); } -static void +/* + * Returns true if the LS response is to be transmit + * Returns false if the LS response is to be delayed + */ +static int nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { @@ -1597,13 +1838,15 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, struct fcnvme_ls_disconnect_assoc_acc *acc = &iod->rspbuf->rsp_dis_assoc; struct nvmet_fc_tgt_assoc *assoc; + struct nvmet_fc_ls_iod *oldls = NULL; + unsigned long flags; int ret = 0; memset(acc, 0, sizeof(*acc)); ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst); if (!ret) { - /* match an active association */ + /* match an active association - takes an assoc ref if !NULL */ assoc = nvmet_fc_find_target_assoc(tgtport, be64_to_cpu(rqst->associd.association_id)); iod->assoc = assoc; @@ -1621,7 +1864,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, FCNVME_RJT_RC_INV_ASSOC : FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); - return; + return true; } /* format a response */ @@ -1634,9 +1877,40 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, FCNVME_LS_DISCONNECT_ASSOC); /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(iod->assoc); + nvmet_fc_tgt_a_put(assoc); + + /* + * The rules for LS response says the response cannot + * go back until ABTS's have been sent for all outstanding + * I/O and a Disconnect Association LS has been sent. + * So... save off the Disconnect LS to send the response + * later. If there was a prior LS already saved, replace + * it with the newer one and send a can't perform reject + * on the older one. + */ + spin_lock_irqsave(&tgtport->lock, flags); + oldls = assoc->rcv_disconn; + assoc->rcv_disconn = iod; + spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(iod->assoc); + nvmet_fc_delete_target_assoc(assoc); + + if (oldls) { + dev_info(tgtport->dev, + "{%d:%d} Multiple Disconnect Association LS's " + "received\n", + tgtport->fc_target_port.port_num, assoc->a_id); + /* overwrite good response with bogus failure */ + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, + sizeof(*iod->rspbuf), + /* ok to use rqst, LS is same */ + rqst->w0.ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + nvmet_fc_xmt_ls_rsp(tgtport, oldls); + } + + return false; } @@ -1681,6 +1955,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0; + bool sendrsp = true; iod->lsrsp->nvme_fc_private = iod; iod->lsrsp->rspbuf = iod->rspbuf; @@ -1707,7 +1982,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, break; case FCNVME_LS_DISCONNECT_ASSOC: /* Terminate a Queue/Connection or the Association */ - nvmet_fc_ls_disconnect(tgtport, iod); + sendrsp = nvmet_fc_ls_disconnect(tgtport, iod); break; default: iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf, @@ -1715,7 +1990,8 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } - nvmet_fc_xmt_ls_rsp(tgtport, iod); + if (sendrsp) + nvmet_fc_xmt_ls_rsp(tgtport, iod); } /* -- cgit v1.2.3 From ea39765843faf5f4426ffda000b0ca02217a1eeb Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:00 -0700 Subject: nvme-fcloop: refactor to enable target to host LS Currently nvmefc-loop only sends LS's from host to target. Slightly rework data structures and routine names to reflect this path. Allows a straight-forward conversion to be used by ls's from target to host. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fcloop.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c11805a155e8..fac7dbe572db 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -226,9 +226,15 @@ struct fcloop_nport { u32 port_id; }; +enum { + H2T = 0, + T2H = 1, +}; + struct fcloop_lsreq { struct nvmefc_ls_req *lsreq; struct nvmefc_ls_rsp ls_rsp; + int lsdir; /* H2T or T2H */ int status; struct list_head ls_list; /* fcloop_rport->ls_list */ }; @@ -323,7 +329,7 @@ fcloop_rport_lsrqst_work(struct work_struct *work) } static int -fcloop_ls_req(struct nvme_fc_local_port *localport, +fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) { @@ -331,6 +337,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, struct fcloop_rport *rport = remoteport->private; int ret = 0; + tls_req->lsdir = H2T; tls_req->lsreq = lsreq; INIT_LIST_HEAD(&tls_req->ls_list); @@ -351,7 +358,7 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, } static int -fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, +fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, struct nvmefc_ls_rsp *lsrsp) { struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); @@ -762,7 +769,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, } static void -fcloop_ls_abort(struct nvme_fc_local_port *localport, +fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) { @@ -879,9 +886,9 @@ static struct nvme_fc_port_template fctemplate = { .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, .delete_queue = fcloop_delete_queue, - .ls_req = fcloop_ls_req, + .ls_req = fcloop_h2t_ls_req, .fcp_io = fcloop_fcp_req, - .ls_abort = fcloop_ls_abort, + .ls_abort = fcloop_h2t_ls_abort, .fcp_abort = fcloop_fcp_abort, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, @@ -896,7 +903,7 @@ static struct nvme_fc_port_template fctemplate = { static struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, - .xmt_ls_rsp = fcloop_xmt_ls_rsp, + .xmt_ls_rsp = fcloop_h2t_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, .fcp_abort = fcloop_tgt_fcp_abort, .fcp_req_release = fcloop_fcp_req_release, -- cgit v1.2.3 From 437c0b824dbd05dbdab772ed1e0f69ffec76119d Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 31 Mar 2020 09:50:01 -0700 Subject: nvme-fcloop: add target to host LS request support Add support for performing LS requests from target to host. Include sending request from targetport, reception into host, host sending ls rsp. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fcloop.c | 130 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 12 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index fac7dbe572db..2ff1d1334a03 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -208,10 +208,13 @@ struct fcloop_rport { }; struct fcloop_tport { - struct nvmet_fc_target_port *targetport; - struct nvme_fc_remote_port *remoteport; - struct fcloop_nport *nport; - struct fcloop_lport *lport; + struct nvmet_fc_target_port *targetport; + struct nvme_fc_remote_port *remoteport; + struct fcloop_nport *nport; + struct fcloop_lport *lport; + spinlock_t lock; + struct list_head ls_list; + struct work_struct ls_work; }; struct fcloop_nport { @@ -226,11 +229,6 @@ struct fcloop_nport { u32 port_id; }; -enum { - H2T = 0, - T2H = 1, -}; - struct fcloop_lsreq { struct nvmefc_ls_req *lsreq; struct nvmefc_ls_rsp ls_rsp; @@ -337,7 +335,6 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, struct fcloop_rport *rport = remoteport->private; int ret = 0; - tls_req->lsdir = H2T; tls_req->lsreq = lsreq; INIT_LIST_HEAD(&tls_req->ls_list); @@ -351,8 +348,9 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, } tls_req->status = 0; - ret = nvmet_fc_rcv_ls_req(rport->targetport, NULL, &tls_req->ls_rsp, - lsreq->rqstaddr, lsreq->rqstlen); + ret = nvmet_fc_rcv_ls_req(rport->targetport, rport, + &tls_req->ls_rsp, + lsreq->rqstaddr, lsreq->rqstlen); return ret; } @@ -384,6 +382,99 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, return 0; } +static void +fcloop_tport_lsrqst_work(struct work_struct *work) +{ + struct fcloop_tport *tport = + container_of(work, struct fcloop_tport, ls_work); + struct fcloop_lsreq *tls_req; + + spin_lock(&tport->lock); + for (;;) { + tls_req = list_first_entry_or_null(&tport->ls_list, + struct fcloop_lsreq, ls_list); + if (!tls_req) + break; + + list_del(&tls_req->ls_list); + spin_unlock(&tport->lock); + + tls_req->lsreq->done(tls_req->lsreq, tls_req->status); + /* + * callee may free memory containing tls_req. + * do not reference lsreq after this. + */ + + spin_lock(&tport->lock); + } + spin_unlock(&tport->lock); +} + +static int +fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, + struct nvmefc_ls_req *lsreq) +{ + struct fcloop_lsreq *tls_req = lsreq->private; + struct fcloop_tport *tport = targetport->private; + int ret = 0; + + /* + * hosthandle should be the dst.rport value. + * hosthandle ignored as fcloop currently is + * 1:1 tgtport vs remoteport + */ + tls_req->lsreq = lsreq; + INIT_LIST_HEAD(&tls_req->ls_list); + + if (!tport->remoteport) { + tls_req->status = -ECONNREFUSED; + spin_lock(&tport->lock); + list_add_tail(&tport->ls_list, &tls_req->ls_list); + spin_unlock(&tport->lock); + schedule_work(&tport->ls_work); + return ret; + } + + tls_req->status = 0; + ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp, + lsreq->rqstaddr, lsreq->rqstlen); + + return ret; +} + +static int +fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + struct nvmefc_ls_rsp *lsrsp) +{ + struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); + struct nvmefc_ls_req *lsreq = tls_req->lsreq; + struct fcloop_rport *rport = remoteport->private; + struct nvmet_fc_target_port *targetport = rport->targetport; + struct fcloop_tport *tport; + + memcpy(lsreq->rspaddr, lsrsp->rspbuf, + ((lsreq->rsplen < lsrsp->rsplen) ? + lsreq->rsplen : lsrsp->rsplen)); + lsrsp->done(lsrsp); + + if (targetport) { + tport = targetport->private; + spin_lock(&tport->lock); + list_add_tail(&tport->ls_list, &tls_req->ls_list); + spin_unlock(&tport->lock); + schedule_work(&tport->ls_work); + } + + return 0; +} + +static void +fcloop_t2h_host_release(void *hosthandle) +{ + /* host handle ignored for now */ +} + /* * Simulate reception of RSCN and converting it to a initiator transport * call to rescan a remote port. @@ -775,6 +866,12 @@ fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport, { } +static void +fcloop_t2h_ls_abort(struct nvmet_fc_target_port *targetport, + void *hosthandle, struct nvmefc_ls_req *lsreq) +{ +} + static void fcloop_fcp_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, @@ -874,6 +971,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) { struct fcloop_tport *tport = targetport->private; + flush_work(&tport->ls_work); fcloop_nport_put(tport->nport); } @@ -890,6 +988,7 @@ static struct nvme_fc_port_template fctemplate = { .fcp_io = fcloop_fcp_req, .ls_abort = fcloop_h2t_ls_abort, .fcp_abort = fcloop_fcp_abort, + .xmt_ls_rsp = fcloop_t2h_xmt_ls_rsp, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, @@ -908,6 +1007,9 @@ static struct nvmet_fc_target_template tgttemplate = { .fcp_abort = fcloop_tgt_fcp_abort, .fcp_req_release = fcloop_fcp_req_release, .discovery_event = fcloop_tgt_discovery_evt, + .ls_req = fcloop_t2h_ls_req, + .ls_abort = fcloop_t2h_ls_abort, + .host_release = fcloop_t2h_host_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, @@ -916,6 +1018,7 @@ static struct nvmet_fc_target_template tgttemplate = { .target_features = 0, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), + .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), }; static ssize_t @@ -1265,6 +1368,9 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr, tport->nport = nport; tport->lport = nport->lport; nport->tport = tport; + spin_lock_init(&tport->lock); + INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work); + INIT_LIST_HEAD(&tport->ls_list); return count; } -- cgit v1.2.3 From 71fb90eb71d76089115aedf942533bcb3fd5c7f9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 3 Apr 2020 09:24:01 -0700 Subject: nvme: provide num dword helper Various nvme commands use a zeroes based number of dwords field. Create a helper function to convert byte lengths to this format. Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/nvme.h | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f3c037f5a9ba..6da941b6c67c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -530,7 +530,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1); + c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s))); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; @@ -2746,7 +2746,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset) { struct nvme_command c = { }; - unsigned long dwlen = size / 4 - 1; + u32 dwlen = nvme_bytes_to_numd(size); c.get_log_page.opcode = nvme_admin_get_log_page; c.get_log_page.nsid = cpu_to_le32(nsid); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2e04a36296d9..36f44b79bb3b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -449,6 +449,14 @@ static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) return lba << (ns->lba_shift - SECTOR_SHIFT); } +/* + * Convert byte length to nvme's 0-based num dwords + */ +static inline u32 nvme_bytes_to_numd(size_t len) +{ + return (len >> 2) - 1; +} + static inline void nvme_end_request(struct request *req, __le16 status, union nvme_result result) { -- cgit v1.2.3 From 03f8cebc127fa285874074ec314b76b1333f6c43 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 3 Apr 2020 09:24:09 -0700 Subject: nvme: remove unused parameter nvme_alloc_ns_head() doesn't use the 'struct nvme_id_ns' parameter. Remove it, and update caller accordingly. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6da941b6c67c..3d0c7d90ad25 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3410,8 +3410,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_id_ns *id, - struct nvme_ns_ids *ids) + unsigned nsid, struct nvme_ns_ids *ids) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3482,7 +3481,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, if (is_shared) head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { - head = nvme_alloc_ns_head(ctrl, nsid, id, &ids); + head = nvme_alloc_ns_head(ctrl, nsid, &ids); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; -- cgit v1.2.3 From b0012dd397155438c61b0c1b52ceec1f1366b3cc Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 19 Apr 2017 11:56:57 +0300 Subject: nvmet-rdma: use SRQ per completion vector In order to save resource allocation and utilize the completion locality in a better way (compared to SRQ per device that exist today), allocate Shared Receive Queues (SRQs) per completion vector. Associate each created QP/CQ with an appropriate SRQ according to the queue index. This association will reduce the lock contention in the fast path (compared to SRQ per device solution) and increase the locality in memory buffers. Add new module parameter for SRQ size to adjust it according to the expected load. User should make sure the size is >= 256 to avoid lack of resources. Also reduce the debug level of "last WQE reached" event that is raised when a QP is using SRQ during destruction process to relief the log. Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/rdma.c | 178 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 136 insertions(+), 42 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index fd47de0e4e4e..7a90b10359bb 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -34,6 +34,8 @@ /* Assume mpsmin == device_page_size == 4KB */ #define NVMET_RDMA_MAX_MDTS 8 +struct nvmet_rdma_srq; + struct nvmet_rdma_cmd { struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; struct ib_cqe cqe; @@ -41,6 +43,7 @@ struct nvmet_rdma_cmd { struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE]; struct nvme_command *nvme_cmd; struct nvmet_rdma_queue *queue; + struct nvmet_rdma_srq *nsrq; }; enum { @@ -83,6 +86,7 @@ struct nvmet_rdma_queue { struct ib_cq *cq; atomic_t sq_wr_avail; struct nvmet_rdma_device *dev; + struct nvmet_rdma_srq *nsrq; spinlock_t state_lock; enum nvmet_rdma_queue_state state; struct nvmet_cq nvme_cq; @@ -100,6 +104,7 @@ struct nvmet_rdma_queue { int idx; int host_qid; + int comp_vector; int recv_queue_size; int send_queue_size; @@ -113,11 +118,17 @@ struct nvmet_rdma_port { struct delayed_work repair_work; }; +struct nvmet_rdma_srq { + struct ib_srq *srq; + struct nvmet_rdma_cmd *cmds; + struct nvmet_rdma_device *ndev; +}; + struct nvmet_rdma_device { struct ib_device *device; struct ib_pd *pd; - struct ib_srq *srq; - struct nvmet_rdma_cmd *srq_cmds; + struct nvmet_rdma_srq **srqs; + int srq_count; size_t srq_size; struct kref ref; struct list_head entry; @@ -129,6 +140,16 @@ static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); +static int srq_size_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops srq_size_ops = { + .set = srq_size_set, + .get = param_get_int, +}; + +static int nvmet_rdma_srq_size = 1024; +module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644); +MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)"); + static DEFINE_IDA(nvmet_rdma_queue_ida); static LIST_HEAD(nvmet_rdma_queue_list); static DEFINE_MUTEX(nvmet_rdma_queue_mutex); @@ -149,6 +170,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, static const struct nvmet_fabrics_ops nvmet_rdma_ops; +static int srq_size_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < 256) + return -EINVAL; + + return param_set_int(val, kp); +} + static int num_pages(int len) { return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT); @@ -466,8 +498,8 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, cmd->sge[0].addr, cmd->sge[0].length, DMA_FROM_DEVICE); - if (ndev->srq) - ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); + if (cmd->nsrq) + ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL); else ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); @@ -845,23 +877,40 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) nvmet_rdma_handle_command(queue, rsp); } -static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev) +static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq) +{ + nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size, + false); + ib_destroy_srq(nsrq->srq); + + kfree(nsrq); +} + +static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev) { - if (!ndev->srq) + int i; + + if (!ndev->srqs) return; - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); - ib_destroy_srq(ndev->srq); + for (i = 0; i < ndev->srq_count; i++) + nvmet_rdma_destroy_srq(ndev->srqs[i]); + + kfree(ndev->srqs); } -static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) +static struct nvmet_rdma_srq * +nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) { struct ib_srq_init_attr srq_attr = { NULL, }; + size_t srq_size = ndev->srq_size; + struct nvmet_rdma_srq *nsrq; struct ib_srq *srq; - size_t srq_size; int ret, i; - srq_size = 4095; /* XXX: tune */ + nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL); + if (!nsrq) + return ERR_PTR(-ENOMEM); srq_attr.attr.max_wr = srq_size; srq_attr.attr.max_sge = 1 + ndev->inline_page_count; @@ -869,35 +918,73 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) srq_attr.srq_type = IB_SRQT_BASIC; srq = ib_create_srq(ndev->pd, &srq_attr); if (IS_ERR(srq)) { - /* - * If SRQs aren't supported we just go ahead and use normal - * non-shared receive queues. - */ - pr_info("SRQ requested but not supported.\n"); - return 0; + ret = PTR_ERR(srq); + goto out_free; } - ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); - if (IS_ERR(ndev->srq_cmds)) { - ret = PTR_ERR(ndev->srq_cmds); + nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); + if (IS_ERR(nsrq->cmds)) { + ret = PTR_ERR(nsrq->cmds); goto out_destroy_srq; } - ndev->srq = srq; - ndev->srq_size = srq_size; + nsrq->srq = srq; + nsrq->ndev = ndev; for (i = 0; i < srq_size; i++) { - ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]); + nsrq->cmds[i].nsrq = nsrq; + ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]); if (ret) goto out_free_cmds; } - return 0; + return nsrq; out_free_cmds: - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); + nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false); out_destroy_srq: ib_destroy_srq(srq); +out_free: + kfree(nsrq); + return ERR_PTR(ret); +} + +static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev) +{ + int i, ret; + + if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) { + /* + * If SRQs aren't supported we just go ahead and use normal + * non-shared receive queues. + */ + pr_info("SRQ requested but not supported.\n"); + return 0; + } + + ndev->srq_size = min(ndev->device->attrs.max_srq_wr, + nvmet_rdma_srq_size); + ndev->srq_count = min(ndev->device->num_comp_vectors, + ndev->device->attrs.max_srq); + + ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL); + if (!ndev->srqs) + return -ENOMEM; + + for (i = 0; i < ndev->srq_count; i++) { + ndev->srqs[i] = nvmet_rdma_init_srq(ndev); + if (IS_ERR(ndev->srqs[i])) { + ret = PTR_ERR(ndev->srqs[i]); + goto err_srq; + } + } + + return 0; + +err_srq: + while (--i >= 0) + nvmet_rdma_destroy_srq(ndev->srqs[i]); + kfree(ndev->srqs); return ret; } @@ -910,7 +997,7 @@ static void nvmet_rdma_free_dev(struct kref *ref) list_del(&ndev->entry); mutex_unlock(&device_list_mutex); - nvmet_rdma_destroy_srq(ndev); + nvmet_rdma_destroy_srqs(ndev); ib_dealloc_pd(ndev->pd); kfree(ndev); @@ -957,7 +1044,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) goto out_free_dev; if (nvmet_rdma_use_srq) { - ret = nvmet_rdma_init_srq(ndev); + ret = nvmet_rdma_init_srqs(ndev); if (ret) goto out_free_pd; } @@ -981,14 +1068,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) { struct ib_qp_init_attr qp_attr; struct nvmet_rdma_device *ndev = queue->dev; - int comp_vector, nr_cqe, ret, i, factor; - - /* - * Spread the io queues across completion vectors, - * but still keep all admin queues on vector 0. - */ - comp_vector = !queue->host_qid ? 0 : - queue->idx % ndev->device->num_comp_vectors; + int nr_cqe, ret, i, factor; /* * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND. @@ -996,7 +1076,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size; queue->cq = ib_alloc_cq(ndev->device, queue, - nr_cqe + 1, comp_vector, + nr_cqe + 1, queue->comp_vector, IB_POLL_WORKQUEUE); if (IS_ERR(queue->cq)) { ret = PTR_ERR(queue->cq); @@ -1020,8 +1100,8 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, ndev->device->attrs.max_send_sge); - if (ndev->srq) { - qp_attr.srq = ndev->srq; + if (queue->nsrq) { + qp_attr.srq = queue->nsrq->srq; } else { /* +1 for drain */ qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; @@ -1041,7 +1121,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) __func__, queue->cq->cqe, qp_attr.cap.max_send_sge, qp_attr.cap.max_send_wr, queue->cm_id); - if (!ndev->srq) { + if (!queue->nsrq) { for (i = 0; i < queue->recv_queue_size; i++) { queue->cmds[i].queue = queue; ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]); @@ -1076,7 +1156,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) nvmet_sq_destroy(&queue->nvme_sq); nvmet_rdma_destroy_queue_ib(queue); - if (!queue->dev->srq) { + if (!queue->nsrq) { nvmet_rdma_free_cmds(queue->dev, queue->cmds, queue->recv_queue_size, !queue->host_qid); @@ -1188,13 +1268,23 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, goto out_destroy_sq; } + /* + * Spread the io queues across completion vectors, + * but still keep all admin queues on vector 0. + */ + queue->comp_vector = !queue->host_qid ? 0 : + queue->idx % ndev->device->num_comp_vectors; + + ret = nvmet_rdma_alloc_rsps(queue); if (ret) { ret = NVME_RDMA_CM_NO_RSC; goto out_ida_remove; } - if (!ndev->srq) { + if (ndev->srqs) { + queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count]; + } else { queue->cmds = nvmet_rdma_alloc_cmds(ndev, queue->recv_queue_size, !queue->host_qid); @@ -1215,7 +1305,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, return queue; out_free_cmds: - if (!ndev->srq) { + if (!queue->nsrq) { nvmet_rdma_free_cmds(queue->dev, queue->cmds, queue->recv_queue_size, !queue->host_qid); @@ -1241,6 +1331,10 @@ static void nvmet_rdma_qp_event(struct ib_event *event, void *priv) case IB_EVENT_COMM_EST: rdma_notify(queue->cm_id, event->event); break; + case IB_EVENT_QP_LAST_WQE_REACHED: + pr_debug("received last WQE reached event for queue=0x%p\n", + queue); + break; default: pr_err("received IB QP event: %s (%d)\n", ib_event_msg(event->event), event->event); -- cgit v1.2.3 From e4fcc72c1a420bdbe425530dd19724214ceb44ec Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 6 Apr 2020 16:55:34 -0700 Subject: nvmet-fc: slight cleanup for kbuild test warnings The kbuild tst robot flagged the following 3 issues: Case 1) >> drivers/nvme/target/fc.c:1201:37: warning: Either the condition >> '!assoc' is redundant or there is possible null pointer dereference: >> assoc. [nullPointerRedundantCheck] >> struct nvmet_fc_tgtport *tgtport = assoc->tgtport; ^ >> drivers/nvme/target/fc.c:1853:7: note: Assuming that condition '!assoc' >> is not redundant >> if (!assoc) ^ >> drivers/nvme/target/fc.c:1850:37: note: Assignment >> 'assoc=nvmet_fc_find_target_assoc(tgtport,be64_to_cpu( >> rqst->associd.association_id))', assigned value is 0 >> assoc = nvmet_fc_find_target_assoc(tgtport, ^ >> drivers/nvme/target/fc.c:1896:31: note: Calling function >> 'nvmet_fc_delete_target_assoc', 1st argument 'assoc' value is 0 >> nvmet_fc_delete_target_assoc(assoc); ^ The tool isn't smart enough to see that line 1854 sets a ret value which thereafter causes the routine to exit. This occurs before any of the assoc references, so it is not an issue. There are 2 more reportings of this same failure. To quiet the tool - rework the if test that does the exit to also reference assoc. No change in logic otherwise. Case 2) drivers/nvme/target/fc.c:1202:29: warning: The scope of the variable 'queue' can be reduced. [variableScope] struct nvmet_fc_tgt_queue *queue; ^ The tool is requesting the variable be declared within the code block that utilizes it. Ignoring this report as existing code style is fine. Case 3) drivers/nvme/target/fc.c:1137:16: warning: Variable 'needrandom' is assigned a value that is never used. [unreadVariable] needrandom = true; ^ Another parsing issue with the tool. Given that parens were not used with the list_for_each_entry() check, it inadvertantly thinks the break exited the outer while loop not the inner for loop. This is not an error. But, added parens to the inner list_for_each_entry() to quiet the tool and as it is better coding style. -- james Signed-off-by: James Smart Reported-by: kbuild test robot CC: kbuild test robot CC: Christoph Hellwig Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 02d9751bb7ee..27fd3b5aa621 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1132,11 +1132,12 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) spin_lock_irqsave(&tgtport->lock, flags); needrandom = false; - list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) + list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) { if (ran == tmpassoc->association_id) { needrandom = true; break; } + } if (!needrandom) { assoc->association_id = ran; list_add_tail(&assoc->a_list, &tgtport->assoc_list); @@ -1837,7 +1838,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, &iod->rqstbuf->rq_dis_assoc; struct fcnvme_ls_disconnect_assoc_acc *acc = &iod->rspbuf->rsp_dis_assoc; - struct nvmet_fc_tgt_assoc *assoc; + struct nvmet_fc_tgt_assoc *assoc = NULL; struct nvmet_fc_ls_iod *oldls = NULL; unsigned long flags; int ret = 0; @@ -1854,7 +1855,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, ret = VERR_NO_ASSOC; } - if (ret) { + if (ret || !assoc) { dev_err(tgtport->dev, "Disconnect LS failed: %s\n", validation_errors[ret]); -- cgit v1.2.3 From b9a5c3d4c34d8bd9fd75f7f28d18a57cb68da237 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:11:28 +0200 Subject: nvme: refine the Qemu Identify CNS quirk Add a helper to check if we can use Identify CNS values > 1, and refine the Qemu quirk to not apply to reported versions larger than 1.1, as the Qemu implementation had been fixed by then. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3d0c7d90ad25..a9af83918afe 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1027,6 +1027,19 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); +/* + * In NVMe 1.0 the CNS field was just a binary controller or namespace + * flag, thus sending any new CNS opcodes has a big chance of not working. + * Qemu unfortunately had that bug after reporting a 1.1 version compliance + * (but not for any later version). + */ +static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl) +{ + if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS) + return ctrl->vs < NVME_VS(1, 2, 0); + return ctrl->vs < NVME_VS(1, 1, 0); +} + static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) { struct nvme_command c = { }; @@ -3814,8 +3827,7 @@ static void nvme_scan_work(struct work_struct *work) mutex_lock(&ctrl->scan_lock); nn = le32_to_cpu(id->nn); - if (ctrl->vs >= NVME_VS(1, 1, 0) && - !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { + if (!nvme_ctrl_limited_cns(ctrl)) { if (!nvme_scan_ns_list(ctrl, nn)) goto out_free_id; } -- cgit v1.2.3 From 25dcaa9292afc0689749099a5ba56fdb264eda7a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:16:03 +0200 Subject: nvme: clean up nvme_scan_work Move the check for the supported CNS value into nvme_scan_ns_list, and limit the life time of the identify controller allocation. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a9af83918afe..025a8a6d81c7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3738,6 +3738,9 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); int ret = 0; + if (nvme_ctrl_limited_cns(ctrl)) + return -EOPNOTSUPP; + ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; @@ -3824,17 +3827,14 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_identify_ctrl(ctrl, &id)) return; + nn = le32_to_cpu(id->nn); + kfree(id); mutex_lock(&ctrl->scan_lock); - nn = le32_to_cpu(id->nn); - if (!nvme_ctrl_limited_cns(ctrl)) { - if (!nvme_scan_ns_list(ctrl, nn)) - goto out_free_id; - } - nvme_scan_ns_sequential(ctrl, nn); -out_free_id: + if (nvme_scan_ns_list(ctrl, nn) != 0) + nvme_scan_ns_sequential(ctrl, nn); mutex_unlock(&ctrl->scan_lock); - kfree(id); + down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); up_write(&ctrl->namespaces_rwsem); -- cgit v1.2.3 From 4450ba3bbb47d5fe852b730ab6773d2c18cfbfd6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:30:32 +0200 Subject: nvme: factor out a nvme_ns_remove_by_nsid helper Factor out a piece of deeply indented and logicaly separate code from nvme_scan_ns_list into a new helper. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 025a8a6d81c7..42102b81d540 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3699,6 +3699,16 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_put_ns(ns); } +static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) +{ + struct nvme_ns *ns = nvme_find_get_ns(ctrl, nsid); + + if (ns) { + nvme_ns_remove(ns); + nvme_put_ns(ns); + } +} + static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -3732,7 +3742,6 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) { - struct nvme_ns *ns; __le32 *ns_list; unsigned i, j, nsid, prev = 0; unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); @@ -3757,13 +3766,8 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) nvme_validate_ns(ctrl, nsid); - while (++prev < nsid) { - ns = nvme_find_get_ns(ctrl, prev); - if (ns) { - nvme_ns_remove(ns); - nvme_put_ns(ns); - } - } + while (++prev < nsid) + nvme_ns_remove_by_nsid(ctrl, prev); } nn -= j; } -- cgit v1.2.3 From 4005f28d25cbd3d4f85529a31e46a0b78e293082 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:31:35 +0200 Subject: nvme: avoid an Identify Controller command for each namespace scan The namespace lists are 0-terminated, so we don't really need the NN value execept for the legacy sequential scan. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 42102b81d540..15b63426a113 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3740,12 +3740,11 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, } -static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) +static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) { __le32 *ns_list; - unsigned i, j, nsid, prev = 0; - unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); - int ret = 0; + u32 prev = 0; + int ret = 0, i; if (nvme_ctrl_limited_cns(ctrl)) return -EOPNOTSUPP; @@ -3754,22 +3753,20 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) if (!ns_list) return -ENOMEM; - for (i = 0; i < num_lists; i++) { + for (;;) { ret = nvme_identify_ns_list(ctrl, prev, ns_list); if (ret) goto free; - for (j = 0; j < min(nn, 1024U); j++) { - nsid = le32_to_cpu(ns_list[j]); - if (!nsid) - goto out; + for (i = 0; i < 1024; i++) { + u32 nsid = le32_to_cpu(ns_list[i]); + if (!nsid) /* end of the list? */ + goto out; nvme_validate_ns(ctrl, nsid); - while (++prev < nsid) nvme_ns_remove_by_nsid(ctrl, prev); } - nn -= j; } out: nvme_remove_invalid_namespaces(ctrl, prev); @@ -3778,9 +3775,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) return ret; } -static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) +static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl) { - unsigned i; + struct nvme_id_ctrl *id; + u32 nn, i; + + if (nvme_identify_ctrl(ctrl, &id)) + return; + nn = le32_to_cpu(id->nn); + kfree(id); for (i = 1; i <= nn; i++) nvme_validate_ns(ctrl, i); @@ -3817,8 +3820,6 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); - struct nvme_id_ctrl *id; - unsigned nn; /* No tagset on a live ctrl means IO queues could not created */ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) @@ -3829,14 +3830,9 @@ static void nvme_scan_work(struct work_struct *work) nvme_clear_changed_ns_log(ctrl); } - if (nvme_identify_ctrl(ctrl, &id)) - return; - nn = le32_to_cpu(id->nn); - kfree(id); - mutex_lock(&ctrl->scan_lock); - if (nvme_scan_ns_list(ctrl, nn) != 0) - nvme_scan_ns_sequential(ctrl, nn); + if (nvme_scan_ns_list(ctrl) != 0) + nvme_scan_ns_sequential(ctrl); mutex_unlock(&ctrl->scan_lock); down_write(&ctrl->namespaces_rwsem); -- cgit v1.2.3 From aec459b484b8356d6abe862428787d98ffb1cbde Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 4 Apr 2020 10:34:21 +0200 Subject: nvme: remove the magic 1024 constant in nvme_scan_ns_list Replace it with a value derived from the identify data and nsid sizes. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 15b63426a113..d0f65322156c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3742,6 +3742,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) { + const int nr_entries = NVME_IDENTIFY_DATA_SIZE / sizeof(__le32); __le32 *ns_list; u32 prev = 0; int ret = 0, i; @@ -3758,7 +3759,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) if (ret) goto free; - for (i = 0; i < 1024; i++) { + for (i = 0; i < nr_entries; i++) { u32 nsid = le32_to_cpu(ns_list[i]); if (!nsid) /* end of the list? */ -- cgit v1.2.3 From d567572906d986dedb78b37f111c44eba033f3ef Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:08:59 -0700 Subject: nvme: unlink head after removing last namespace The driver had been unlinking the namespace head from the subsystem's list only after the last reference was released, and outside of the list's subsys->lock protection. There is no reason to track an empty head, so unlink the entry from the subsystem's list when the last namespace using that head is removed and with the mutex lock protecting the list update. The next namespace to attach reusing the previous NSID will allocate a new head rather than find the old head with mismatched identifiers. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d0f65322156c..15b9e60be204 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -433,7 +433,6 @@ static void nvme_free_ns_head(struct kref *ref) nvme_mpath_remove_disk(head); ida_simple_remove(&head->subsys->ns_ida, head->instance); - list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); kfree(head); @@ -3414,7 +3413,6 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, list_for_each_entry(h, &subsys->nsheads, entry) { if (nvme_ns_ids_valid(&new->ids) && - !list_empty(&h->list) && nvme_ns_ids_equal(&new->ids, &h->ids)) return -EINVAL; } @@ -3660,6 +3658,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) + list_del_init(&ns->head->entry); mutex_unlock(&ctrl->subsys->lock); nvme_put_ns_head(ns->head); out_free_id: @@ -3679,7 +3679,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) + list_del_init(&ns->head->entry); mutex_unlock(&ns->ctrl->subsys->lock); + synchronize_rcu(); /* guarantee not available in head->list */ nvme_mpath_clear_current_path(ns); synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ -- cgit v1.2.3 From ac262508daa88fb12c5dc53cf30bde163f9f26c9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:00 -0700 Subject: nvme: release namespace head reference on error If a namespace identification does not match the subsystem's head for that NSID, release the reference that was taken when the matching head was initially found. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 15b9e60be204..f2dc89dd2e1c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3503,6 +3503,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, "IDs don't match for shared namespace %d\n", nsid); ret = -EINVAL; + nvme_put_ns_head(head); goto out_unlock; } } -- cgit v1.2.3 From 9ad1927a3bc2735996ccc74e31b68a41ae9a3d33 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:01 -0700 Subject: nvme: always search for namespace head Even if a namespace reports it is not capable of sharing, search the subsystem for a matching namespace head. If found, the driver should reject that namespace since it's coming from an invalid configuration. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f2dc89dd2e1c..6093c8baf809 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3489,8 +3489,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, goto out; mutex_lock(&ctrl->subsys->lock); - if (is_shared) - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { head = nvme_alloc_ns_head(ctrl, nsid, &ids); if (IS_ERR(head)) { @@ -3498,6 +3497,14 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, goto out_unlock; } } else { + if (!is_shared) { + dev_err(ctrl->device, + "Duplicate unshared namespace %d\n", + nsid); + ret = -EINVAL; + nvme_put_ns_head(head); + goto out_unlock; + } if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", -- cgit v1.2.3 From 0c284db7f23571f6428c44ca714f13a1fc5f70df Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:02 -0700 Subject: nvme: check namespace head shared property Reject a new shared namespace if a duplicate unshared namespace exists. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 3 ++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6093c8baf809..6437f2273785 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3496,8 +3496,9 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, ret = PTR_ERR(head); goto out_unlock; } + head->shared = is_shared; } else { - if (!is_shared) { + if (!is_shared || !head->shared) { dev_err(ctrl->device, "Duplicate unshared namespace %d\n", nsid); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 36f44b79bb3b..6222439a0776 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -352,6 +352,7 @@ struct nvme_ns_head { struct nvme_ns_ids ids; struct list_head entry; struct kref ref; + bool shared; int instance; #ifdef CONFIG_NVME_MULTIPATH struct gendisk *disk; -- cgit v1.2.3 From b2ce4d90690bd29ce5b554e203cd03682dd59697 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:04 -0700 Subject: nvme-multipath: set bdi capabilities once The queues' backing device info capabilities don't change with each namespace revalidation. Set it only when each path's request_queue is initially added to a multipath queue. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 7 ------- drivers/nvme/host/multipath.c | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6437f2273785..0624393d95e2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1909,13 +1909,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); blk_queue_stack_limits(ns->head->disk->queue, ns->queue); - if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { - struct backing_dev_info *info = - ns->head->disk->queue->backing_dev_info; - - info->capabilities |= BDI_CAP_STABLE_WRITES; - } - revalidate_disk(ns->head->disk); } #endif diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 54603bd3e02d..9f2844935fdf 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -3,6 +3,7 @@ * Copyright (c) 2017-2018 Christoph Hellwig. */ +#include #include #include #include "nvme.h" @@ -666,6 +667,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) nvme_mpath_set_live(ns); mutex_unlock(&ns->head->lock); } + + if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { + struct backing_dev_info *info = + ns->head->disk->queue->backing_dev_info; + + info->capabilities |= BDI_CAP_STABLE_WRITES; + } } void nvme_mpath_remove_disk(struct nvme_ns_head *head) -- cgit v1.2.3 From b2b2de7c5a0127882848f9c5e9379e84d2e02041 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:05 -0700 Subject: nvme: revalidate after verifying identifiers If the namespace identifiers have changed, skip updating the disk information, as that will register parameters from a mismatched namespace. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0624393d95e2..b9e6e6de58cf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1936,7 +1936,6 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto free_id; } - __nvme_revalidate_disk(disk, id); ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); if (ret) goto free_id; @@ -1945,8 +1944,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); ret = -ENODEV; + goto free_id; } + __nvme_revalidate_disk(disk, id); free_id: kfree(id); out: -- cgit v1.2.3 From 38adf94e166e3cb4eb89683458ca578051e8218d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:06 -0700 Subject: nvme: consolidate chunk_sectors settings Move the quirked chunk_sectors setting to the same location as noiob so one place registers this setting. And since the noiob value is only used locally, remove the member from struct nvme_ns. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 22 ++++++++++------------ drivers/nvme/host/nvme.h | 1 - 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b9e6e6de58cf..2279557d7361 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1725,12 +1725,6 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_set_chunk_size(struct nvme_ns *ns) -{ - u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); - blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); -} - static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1885,6 +1879,7 @@ static void nvme_update_disk_info(struct gendisk *disk, static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + u32 iob; /* * If identify namespace failed, use default 512 byte block size so @@ -1893,7 +1888,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; - ns->noiob = le16_to_cpu(id->noiob); + + if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ns->ctrl->max_hw_sectors)) + iob = ns->ctrl->max_hw_sectors; + else + iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* the PI implementation requires metadata equal t10 pi tuple size */ @@ -1902,8 +1903,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else ns->pi_type = 0; - if (ns->noiob) - nvme_set_chunk_size(ns); + if (iob) + blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { @@ -2255,9 +2256,6 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ctrl->max_hw_sectors)) - blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6222439a0776..f3ab17778349 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -389,7 +389,6 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 #define NVME_NS_ANA_PENDING 2 - u16 noiob; struct nvme_fault_inject fault_inject; -- cgit v1.2.3 From bc1af009a8ed99aac86ee44a711c4a406ed74263 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:07 -0700 Subject: nvme: revalidate namespace stream parameters The stream parameters are based on the currently formatted logical block size. Recheck these parameters on namespace revalidation so the registered constraints will be accurate. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 54 ++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2279557d7361..86ca33b3873f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1810,6 +1810,32 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; } +static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) +{ + struct streams_directive_params s; + int ret; + + if (!ctrl->nr_streams) + return 0; + + ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); + if (ret) + return ret; + + ns->sws = le32_to_cpu(s.sws); + ns->sgs = le16_to_cpu(s.sgs); + + if (ns->sws) { + unsigned int bs = 1 << ns->lba_shift; + + blk_queue_io_min(ns->queue, bs * ns->sws); + if (ns->sgs) + blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); + } + + return 0; +} + static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { @@ -1824,6 +1850,7 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); + nvme_setup_streams_ns(ns->ctrl, ns); if (id->nabo == 0) { /* * Bit 1 indicates whether NAWUPF is defined for this namespace @@ -3546,32 +3573,6 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) return ret; } -static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) -{ - struct streams_directive_params s; - int ret; - - if (!ctrl->nr_streams) - return 0; - - ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); - if (ret) - return ret; - - ns->sws = le32_to_cpu(s.sws); - ns->sgs = le16_to_cpu(s.sgs); - - if (ns->sws) { - unsigned int bs = 1 << ns->lba_shift; - - blk_queue_io_min(ns->queue, bs * ns->sws); - if (ns->sgs) - blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); - } - - return 0; -} - static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -3615,7 +3616,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ret = nvme_init_ns_head(ns, nsid, id); if (ret) goto out_free_id; - nvme_setup_streams_ns(ctrl, ns); nvme_set_disk_name(disk_name, ns, ctrl, &flags); disk = alloc_disk_node(0, node); -- cgit v1.2.3 From 31fdad7be18992606078caed6ff71741fa76310a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:08 -0700 Subject: nvme: consolodate io settings The stream parameters indicating optimal io settings were just getting overwritten later. Rearrange the settings so the streams parameters can be preserved if provided. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 86ca33b3873f..0231f61f37d0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1810,7 +1810,8 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; } -static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) +static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u32 *phys_bs, u32 *io_opt) { struct streams_directive_params s; int ret; @@ -1826,11 +1827,9 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) ns->sgs = le16_to_cpu(s.sgs); if (ns->sws) { - unsigned int bs = 1 << ns->lba_shift; - - blk_queue_io_min(ns->queue, bs * ns->sws); + *phys_bs = ns->sws * (1 << ns->lba_shift); if (ns->sgs) - blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); + *io_opt = *phys_bs * ns->sgs; } return 0; @@ -1850,7 +1849,8 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); - nvme_setup_streams_ns(ns->ctrl, ns); + atomic_bs = phys_bs = io_opt = bs; + nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt); if (id->nabo == 0) { /* * Bit 1 indicates whether NAWUPF is defined for this namespace @@ -1861,16 +1861,13 @@ static void nvme_update_disk_info(struct gendisk *disk, atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; - } else { - atomic_bs = bs; } - phys_bs = bs; - io_opt = bs; + if (id->nsfeat & (1 << 4)) { /* NPWG = Namespace Preferred Write Granularity */ - phys_bs *= 1 + le16_to_cpu(id->npwg); + phys_bs = bs * (1 + le16_to_cpu(id->npwg)); /* NOWS = Namespace Optimal Write Size */ - io_opt *= 1 + le16_to_cpu(id->nows); + io_opt = bs * (1 + le16_to_cpu(id->nows)); } blk_queue_logical_block_size(disk->queue, bs); -- cgit v1.2.3 From e8cd1ff11d58a21242ea2b85450298f3681768a1 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Sun, 19 Apr 2020 16:48:50 -0700 Subject: nvmet: add ns revalidation support Add support for detecting capacity changes on nvmet blockdev and file backed namespaces. This allows for emulating and testing online resizing of nvme devices and filesystems on top. Signed-off-by: Anthony Iliopoulos [chaitanya: Fix comments posted on V1] Signed-off-by: Chaitanya Kulkarni [hch: reuse code a bit more] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/admin-cmd.c | 5 +++++ drivers/nvme/target/io-cmd-bdev.c | 5 +++++ drivers/nvme/target/io-cmd-file.c | 17 +++++++++++++---- drivers/nvme/target/nvmet.h | 2 ++ 4 files changed, 25 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 9d6f75cfa77c..4c79aa804887 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -486,6 +486,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) if (!ns) goto done; + if (ns->bdev) + nvmet_bdev_ns_revalidate(ns); + else + nvmet_file_ns_revalidate(ns); + /* * nuse = ncap = nsze isn't always true, but we have no way to find * that out from the underlying device. diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index ea0e596be15d..0427e040e3dd 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -75,6 +75,11 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns) } } +void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) +{ + ns->size = i_size_read(ns->bdev->bd_inode); +} + static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) { u16 status = NVME_SC_SUCCESS; diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index cd5670b83118..f0bd08d86ac0 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -13,6 +13,18 @@ #define NVMET_MAX_MPOOL_BVEC 16 #define NVMET_MIN_MPOOL_OBJ 16 +int nvmet_file_ns_revalidate(struct nvmet_ns *ns) +{ + struct kstat stat; + int ret; + + ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE, + AT_STATX_FORCE_SYNC); + if (!ret) + ns->size = stat.size; + return ret; +} + void nvmet_file_ns_disable(struct nvmet_ns *ns) { if (ns->file) { @@ -30,7 +42,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) int nvmet_file_ns_enable(struct nvmet_ns *ns) { int flags = O_RDWR | O_LARGEFILE; - struct kstat stat; int ret; if (!ns->buffered_io) @@ -43,12 +54,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) return PTR_ERR(ns->file); } - ret = vfs_getattr(&ns->file->f_path, - &stat, STATX_SIZE, AT_STATX_FORCE_SYNC); + ret = nvmet_file_ns_revalidate(ns); if (ret) goto err; - ns->size = stat.size; /* * i_blkbits can be greater than the universally accepted upper bound, * so make sure we export a sane namespace lba_shift. diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 421dff3ea143..3d981eb6e100 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -498,6 +498,8 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns); u16 nvmet_bdev_flush(struct nvmet_req *req); u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); +void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); +int nvmet_file_ns_revalidate(struct nvmet_ns *ns); static inline u32 nvmet_rw_len(struct nvmet_req *req) { -- cgit v1.2.3 From 3add1d93d9919b6de94aa47900d4904adffbc976 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Apr 2020 23:30:57 +0200 Subject: nvme-fc: avoid gcc-10 zero-length-bounds warning When CONFIG_ARCH_NO_SG_CHAIN is set, op->sgl[0] cannot be dereferenced, as gcc-10 now points out: drivers/nvme/host/fc.c: In function 'nvme_fc_init_request': drivers/nvme/host/fc.c:1774:29: warning: array subscript 0 is outside the bounds of an interior zero-length array 'struct scatterlist[0]' [-Wzero-length-bounds] 1774 | op->op.fcp_req.first_sgl = &op->sgl[0]; | ^~~~~~~~~~~ drivers/nvme/host/fc.c:98:21: note: while referencing 'sgl' 98 | struct scatterlist sgl[NVME_INLINE_SG_CNT]; | ^~~ I don't know if this is a legitimate warning or a false-positive. If this is just a false alarm, the warning is easily suppressed by interpreting the array as a pointer. Fixes: b1ae1a238900 ("nvme-fc: Avoid preallocating big SGL for data") Signed-off-by: Arnd Bergmann Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1921d2195541..0b3ab3355e25 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2106,7 +2106,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); if (res) return res; - op->op.fcp_req.first_sgl = &op->sgl[0]; + op->op.fcp_req.first_sgl = op->sgl; op->op.fcp_req.private = &op->priv[0]; nvme_req(rq)->ctrl = &ctrl->ctrl; return res; -- cgit v1.2.3 From 6623c5b3dfa5513190d729a8516db7a5163ec7de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Apr 2020 09:59:08 +0200 Subject: nvme: clean up error handling in nvme_init_ns_head Use a common label for putting the nshead if needed and only convert nvme status codes for the one case where it actually is needed. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0231f61f37d0..35e279261c7d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3501,8 +3501,11 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, int ret = 0; ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); - if (ret) - goto out; + if (ret) { + if (ret < 0) + return ret; + return blk_status_to_errno(nvme_error_status(ret)); + } mutex_lock(&ctrl->subsys->lock); head = nvme_find_ns_head(ctrl->subsys, nsid); @@ -3514,32 +3517,29 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } head->shared = is_shared; } else { + ret = -EINVAL; if (!is_shared || !head->shared) { dev_err(ctrl->device, - "Duplicate unshared namespace %d\n", - nsid); - ret = -EINVAL; - nvme_put_ns_head(head); - goto out_unlock; + "Duplicate unshared namespace %d\n", nsid); + goto out_put_ns_head; } if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", nsid); - ret = -EINVAL; - nvme_put_ns_head(head); - goto out_unlock; + goto out_put_ns_head; } } list_add_tail(&ns->siblings, &head->list); ns->head = head; + mutex_unlock(&ctrl->subsys->lock); + return 0; +out_put_ns_head: + nvme_put_ns_head(head); out_unlock: mutex_unlock(&ctrl->subsys->lock); -out: - if (ret > 0) - ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } -- cgit v1.2.3 From b04df85d9a05d7ee4b3f93c1a22312b41a949ec1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 30 Apr 2020 05:31:23 +0900 Subject: nvme: flush scan work on passthrough commands If a passthrough command causes the namespace inventory or capabilities to change, flush the scan work that handles these changes so the driver synchronizes with the user command's effects before returning the result to user space. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 35e279261c7d..4a124a28118f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1404,8 +1404,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); - if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) + if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { nvme_queue_scan(ctrl); + flush_work(&ctrl->scan_work); + } } static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, -- cgit v1.2.3 From 74943d45eef4db64b1e5c9f7ad1d018576e113c5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 28 Apr 2020 07:21:56 -0700 Subject: nvme-pci: remove volatile cqes The completion queue entry is not volatile once the phase is confirmed. Remove the volatile keywords and check the phase using the appropriate READ_ONCE() accessor, allowing the compiler to optimize the remaining completion path. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e13c370de830..e95c7465c7bd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -166,7 +166,7 @@ struct nvme_queue { void *sq_cmds; /* only used for poll queues: */ spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; - volatile struct nvme_completion *cqes; + struct nvme_completion *cqes; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -922,8 +922,9 @@ static void nvme_pci_complete_rq(struct request *req) /* We read the CQE phase first to check if the rest of the entry is valid */ static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { - return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == - nvmeq->cq_phase; + struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head]; + + return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase; } static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) @@ -944,7 +945,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { - volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; + struct nvme_completion *cqe = &nvmeq->cqes[idx]; struct request *req; if (unlikely(cqe->command_id >= nvmeq->q_depth)) { -- cgit v1.2.3 From 54b2fcee1db041a83b52b51752dade6090cf952f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 27 Apr 2020 11:54:46 -0700 Subject: nvme-pci: remove last_sq_tail The nvme driver does not have enough tags to wrap the queue, and blk-mq will no longer call commit_rqs() when there are no new submissions to notify. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e95c7465c7bd..b945e9a89883 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -173,7 +173,6 @@ struct nvme_queue { u16 q_depth; u16 cq_vector; u16 sq_tail; - u16 last_sq_tail; u16 cq_head; u16 qid; u8 cq_phase; @@ -446,24 +445,11 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; } -/* - * Write sq tail if we are asked to, or if the next command would wrap. - */ -static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) { - if (!write_sq) { - u16 next_tail = nvmeq->sq_tail + 1; - - if (next_tail == nvmeq->q_depth) - next_tail = 0; - if (next_tail != nvmeq->last_sq_tail) - return; - } - if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) writel(nvmeq->sq_tail, nvmeq->q_db); - nvmeq->last_sq_tail = nvmeq->sq_tail; } /** @@ -480,7 +466,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, cmd, sizeof(*cmd)); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - nvme_write_sq_db(nvmeq, write_sq); + if (write_sq) + nvme_write_sq_db(nvmeq); spin_unlock(&nvmeq->sq_lock); } @@ -489,8 +476,7 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) struct nvme_queue *nvmeq = hctx->driver_data; spin_lock(&nvmeq->sq_lock); - if (nvmeq->sq_tail != nvmeq->last_sq_tail) - nvme_write_sq_db(nvmeq, true); + nvme_write_sq_db(nvmeq); spin_unlock(&nvmeq->sq_lock); } @@ -1494,7 +1480,6 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) struct nvme_dev *dev = nvmeq->dev; nvmeq->sq_tail = 0; - nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; -- cgit v1.2.3 From 2a5bcfdd41d68559567cec3c124a75e093506cc1 Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Sat, 2 May 2020 15:29:41 +0800 Subject: nvme-pci: align io queue count with allocted nvme_queue in nvme_probe Since commit 147b27e4bd08 ("nvme-pci: allocate device queues storage space at probe"), nvme_alloc_queue does not alloc the nvme queues itself anymore. If the write/poll_queues module parameters are changed at runtime to values larger than the number of allocated queues in nvme_probe, nvme_alloc_queue will access unallocated memory. Add a new nr_allocated_queues member to struct nvme_dev to record how many queues were alloctated in nvme_probe to avoid using more than the allocated queues after a reset following a change to the write/poll_queues module parameters. Also add nr_write_queues and nr_poll_queues members to allow refreshing the number of write and poll queues based on a change to the module parameters when resetting the controller. Fixes: 147b27e4bd08 ("nvme-pci: allocate device queues storage space at probe") Signed-off-by: Weiping Zhang Reviewed-by: Keith Busch Reviewed-by: Max Gurtovoy [hch: add nvme_max_io_queues, update the commit message] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 57 ++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b945e9a89883..b0978ac554d5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -128,6 +128,9 @@ struct nvme_dev { dma_addr_t host_mem_descs_dma; struct nvme_host_mem_buf_desc *host_mem_descs; void **host_mem_desc_bufs; + unsigned int nr_allocated_queues; + unsigned int nr_write_queues; + unsigned int nr_poll_queues; }; static int io_queue_depth_set(const char *val, const struct kernel_param *kp) @@ -208,25 +211,14 @@ struct nvme_iod { struct scatterlist *sg; }; -static unsigned int max_io_queues(void) +static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) { - return num_possible_cpus() + write_queues + poll_queues; -} - -static unsigned int max_queue_count(void) -{ - /* IO queues + admin queue */ - return 1 + max_io_queues(); -} - -static inline unsigned int nvme_dbbuf_size(u32 stride) -{ - return (max_queue_count() * 8 * stride); + return dev->nr_allocated_queues * 8 * dev->db_stride; } static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) { - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + unsigned int mem_size = nvme_dbbuf_size(dev); if (dev->dbbuf_dbs) return 0; @@ -251,7 +243,7 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) static void nvme_dbbuf_dma_free(struct nvme_dev *dev) { - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + unsigned int mem_size = nvme_dbbuf_size(dev); if (dev->dbbuf_dbs) { dma_free_coherent(dev->dev, mem_size, @@ -1981,7 +1973,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) { struct nvme_dev *dev = affd->priv; - unsigned int nr_read_queues; + unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues; /* * If there is no interupt available for queues, ensure that @@ -1997,12 +1989,12 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) if (!nrirqs) { nrirqs = 1; nr_read_queues = 0; - } else if (nrirqs == 1 || !write_queues) { + } else if (nrirqs == 1 || !nr_write_queues) { nr_read_queues = 0; - } else if (write_queues >= nrirqs) { + } else if (nr_write_queues >= nrirqs) { nr_read_queues = 1; } else { - nr_read_queues = nrirqs - write_queues; + nr_read_queues = nrirqs - nr_write_queues; } dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; @@ -2026,7 +2018,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) * Poll queues don't need interrupts, but we need at least one IO * queue left over for non-polled IO. */ - this_p_queues = poll_queues; + this_p_queues = dev->nr_poll_queues; if (this_p_queues >= nr_io_queues) { this_p_queues = nr_io_queues - 1; irq_queues = 1; @@ -2056,14 +2048,25 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) __nvme_disable_io_queues(dev, nvme_admin_delete_cq); } +static unsigned int nvme_max_io_queues(struct nvme_dev *dev) +{ + return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues; +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = &dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); - int result, nr_io_queues; + unsigned int nr_io_queues; unsigned long size; + int result; - nr_io_queues = max_io_queues(); + /* + * Sample the module parameters once at reset time so that we have + * stable values to work with. + */ + dev->nr_write_queues = write_queues; + dev->nr_poll_queues = poll_queues; /* * If tags are shared with admin queue (Apple bug), then @@ -2071,6 +2074,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) nr_io_queues = 1; + else + nr_io_queues = min(nvme_max_io_queues(dev), + dev->nr_allocated_queues - 1); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) @@ -2745,8 +2751,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!dev) return -ENOMEM; - dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue), - GFP_KERNEL, node); + dev->nr_write_queues = write_queues; + dev->nr_poll_queues = poll_queues; + dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1; + dev->queues = kcalloc_node(dev->nr_allocated_queues, + sizeof(struct nvme_queue), GFP_KERNEL, node); if (!dev->queues) goto free; -- cgit v1.2.3 From 386e5e6e1aa90b479fcf0467935922df8524393d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 30 Apr 2020 13:59:32 -0700 Subject: nvme-tcp: use bh_lock in data_ready data_ready may be invoked from send context or from softirq, so need bh locking for that. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c15a92163c1f..4862fa962011 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -794,11 +794,11 @@ static void nvme_tcp_data_ready(struct sock *sk) { struct nvme_tcp_queue *queue; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; if (likely(queue && queue->rd_enabled)) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void nvme_tcp_write_space(struct sock *sk) -- cgit v1.2.3 From 72e5d757c62029664c0287d14519ec4451901b5e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 1 May 2020 14:25:44 -0700 Subject: nvme-tcp: avoid scheduling io_work if we are already polling When the user runs polled I/O, we shouldn't have to trigger the workqueue to generate the receive work upon the .data_ready upcall. This prevents a redundant context switch when the application is already polling for completions. Proposed-by: Mark Wunderlich Signed-off-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4862fa962011..b28f91d0f083 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -60,6 +60,7 @@ struct nvme_tcp_request { enum nvme_tcp_queue_flags { NVME_TCP_Q_ALLOCATED = 0, NVME_TCP_Q_LIVE = 1, + NVME_TCP_Q_POLLING = 2, }; enum nvme_tcp_recv_state { @@ -796,7 +797,8 @@ static void nvme_tcp_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; - if (likely(queue && queue->rd_enabled)) + if (likely(queue && queue->rd_enabled) && + !test_bit(NVME_TCP_Q_POLLING, &queue->flags)) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); read_unlock_bh(&sk->sk_callback_lock); } @@ -2302,9 +2304,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; + set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); nvme_tcp_try_recv(queue); + clear_bit(NVME_TCP_Q_POLLING, &queue->flags); return queue->nr_cqe; } -- cgit v1.2.3 From db5ad6b7f8cdd6d78efef62a1557461d0cbaee54 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 1 May 2020 14:25:45 -0700 Subject: nvme-tcp: try to send request in queue_rq context Today, nvme-tcp automatically schedules a send request to a workqueue context, which is 1 more than we'd need in case the socket buffer is wide open. However, because we have async send activity (as a result of r2t, or write_space callbacks), we need to synchronize sends from possibly multiple contexts (ideally all running on the same cpu though). Thus, we only try to send directly from queue_rq in cases: 1. the send_list is empty 2. we can send it synchronously (i.e. not from the RX path) 3. we run on the same cpu as the queue->io_cpu to avoid contention on the send operation. Proposed-by: Mark Wunderlich Signed-off-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b28f91d0f083..c79e248b9f43 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -76,6 +76,7 @@ struct nvme_tcp_queue { int io_cpu; spinlock_t lock; + struct mutex send_mutex; struct list_head send_list; /* recv state */ @@ -132,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); static struct workqueue_struct *nvme_tcp_wq; static struct blk_mq_ops nvme_tcp_mq_ops; static struct blk_mq_ops nvme_tcp_admin_mq_ops; +static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) { @@ -258,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, } } -static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req) +static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, + bool sync) { struct nvme_tcp_queue *queue = req->queue; + bool empty; spin_lock(&queue->lock); + empty = list_empty(&queue->send_list) && !queue->request; list_add_tail(&req->entry, &queue->send_list); spin_unlock(&queue->lock); - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + /* + * if we're the first on the send_list and we can try to send + * directly, otherwise queue io_work. Also, only do that if we + * are on the same cpu, so we don't introduce contention. + */ + if (queue->io_cpu == smp_processor_id() && + sync && empty && mutex_trylock(&queue->send_mutex)) { + nvme_tcp_try_send(queue); + mutex_unlock(&queue->send_mutex); + } else { + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + } } static inline struct nvme_tcp_request * @@ -579,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, req->state = NVME_TCP_SEND_H2C_PDU; req->offset = 0; - nvme_tcp_queue_request(req); + nvme_tcp_queue_request(req, false); return 0; } @@ -1065,11 +1081,14 @@ static void nvme_tcp_io_work(struct work_struct *w) bool pending = false; int result; - result = nvme_tcp_try_send(queue); - if (result > 0) - pending = true; - else if (unlikely(result < 0)) - break; + if (mutex_trylock(&queue->send_mutex)) { + result = nvme_tcp_try_send(queue); + mutex_unlock(&queue->send_mutex); + if (result > 0) + pending = true; + else if (unlikely(result < 0)) + break; + } result = nvme_tcp_try_recv(queue); if (result > 0) @@ -1321,6 +1340,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); spin_lock_init(&queue->lock); + mutex_init(&queue->send_mutex); INIT_WORK(&queue->io_work, nvme_tcp_io_work); queue->queue_size = queue_size; @@ -1545,6 +1565,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = 1; @@ -1556,7 +1577,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ set->numa_node = NUMA_NO_NODE; - set->flags = BLK_MQ_F_SHOULD_MERGE; + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; @@ -2115,7 +2136,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) ctrl->async_req.curr_bio = NULL; ctrl->async_req.data_len = 0; - nvme_tcp_queue_request(&ctrl->async_req); + nvme_tcp_queue_request(&ctrl->async_req, true); } static enum blk_eh_timer_return @@ -2246,7 +2267,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); - nvme_tcp_queue_request(req); + nvme_tcp_queue_request(req, true); return BLK_STS_OK; } -- cgit v1.2.3 From 7890b9701b792a4d75b4adef4abe325383ccfca4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Mar 2020 19:41:38 +0200 Subject: nvme-multipath: stop using ->queuedata nvme-multipath already uses the gendisk private data, not need to also set up the request_queue queuedata and use it in one place only. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9f2844935fdf..5f5537d0a024 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -294,7 +294,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, struct bio *bio) { - struct nvme_ns_head *head = q->queuedata; + struct nvme_ns_head *head = bio->bi_disk->private_data; struct device *dev = disk_to_dev(head->disk); struct nvme_ns *ns; blk_qc_t ret = BLK_QC_T_NONE; @@ -378,7 +378,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node); if (!q) goto out; - q->queuedata = head; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); -- cgit v1.2.3 From 45e2f3c2d2f5f39b47745cba41e9e2b0c58d7d94 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:43 -0700 Subject: nvmet: add generic type-name mapping This patch adds a new type to name mapping generic structure. It replaces nvmet_transport_name with new generic mapping structure nvmet_transport. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 58cabd7b6fc5..143f3d02f334 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -20,10 +20,12 @@ static const struct config_item_type nvmet_subsys_type; static LIST_HEAD(nvmet_ports_list); struct list_head *nvmet_ports = &nvmet_ports_list; -static const struct nvmet_transport_name { +struct nvmet_type_name_map { u8 type; const char *name; -} nvmet_transport_names[] = { +}; + +static struct nvmet_type_name_map nvmet_transport[] = { { NVMF_TRTYPE_RDMA, "rdma" }, { NVMF_TRTYPE_FC, "fc" }, { NVMF_TRTYPE_TCP, "tcp" }, @@ -254,10 +256,9 @@ static ssize_t nvmet_addr_trtype_show(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int i; - for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { - if (port->disc_addr.trtype != nvmet_transport_names[i].type) - continue; - return sprintf(page, "%s\n", nvmet_transport_names[i].name); + for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { + if (port->disc_addr.trtype == nvmet_transport[i].type) + return sprintf(page, "%s\n", nvmet_transport[i].name); } return sprintf(page, "\n"); @@ -282,8 +283,8 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, return -EACCES; } - for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { - if (sysfs_streq(page, nvmet_transport_names[i].name)) + for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { + if (sysfs_streq(page, nvmet_transport[i].name)) goto found; } @@ -291,7 +292,7 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, return -EINVAL; found: memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); - port->disc_addr.trtype = nvmet_transport_names[i].type; + port->disc_addr.trtype = nvmet_transport[i].type; if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) nvmet_port_init_tsas_rdma(port); return count; -- cgit v1.2.3 From 7e764179c86784594d54764556c4d3973645e6a5 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:44 -0700 Subject: nvmet: use type-name map for address family Right now nvmet_addr_adrfam_[store|show]() uses switch and if else ladder for address family to string and reverse mapping which also repeats the strings in show and store function. With addition of generic nvmet_type_name_map structure we can now get rid of the switch and if else ladder and string duplication. Also, we add a newline in before found label in nvmet_addr_trtype_store() which keeps goto label code consistent with nvmet_allowed_hosts_drop_link(), nvmet_port_subsys_drop_link() and nvmet_ana_group_ana_state_store(). Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 51 ++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 24 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 143f3d02f334..8a5d99e25192 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -32,31 +32,36 @@ static struct nvmet_type_name_map nvmet_transport[] = { { NVMF_TRTYPE_LOOP, "loop" }, }; +static const struct nvmet_type_name_map nvmet_addr_family[] = { + { NVMF_ADDR_FAMILY_PCI, "pcie" }, + { NVMF_ADDR_FAMILY_IP4, "ipv4" }, + { NVMF_ADDR_FAMILY_IP6, "ipv6" }, + { NVMF_ADDR_FAMILY_IB, "ib" }, + { NVMF_ADDR_FAMILY_FC, "fc" }, +}; + /* * nvmet_port Generic ConfigFS definitions. * Used in any place in the ConfigFS tree that refers to an address. */ -static ssize_t nvmet_addr_adrfam_show(struct config_item *item, - char *page) +static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.adrfam) { - case NVMF_ADDR_FAMILY_IP4: - return sprintf(page, "ipv4\n"); - case NVMF_ADDR_FAMILY_IP6: - return sprintf(page, "ipv6\n"); - case NVMF_ADDR_FAMILY_IB: - return sprintf(page, "ib\n"); - case NVMF_ADDR_FAMILY_FC: - return sprintf(page, "fc\n"); - default: - return sprintf(page, "\n"); + u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam; + int i; + + for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { + if (nvmet_addr_family[i].type == adrfam) + return sprintf(page, "%s\n", nvmet_addr_family[i].name); } + + return sprintf(page, "\n"); } static ssize_t nvmet_addr_adrfam_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); + int i; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -64,19 +69,16 @@ static ssize_t nvmet_addr_adrfam_store(struct config_item *item, return -EACCES; } - if (sysfs_streq(page, "ipv4")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4; - } else if (sysfs_streq(page, "ipv6")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6; - } else if (sysfs_streq(page, "ib")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB; - } else if (sysfs_streq(page, "fc")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC; - } else { - pr_err("Invalid value '%s' for adrfam\n", page); - return -EINVAL; + for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { + if (sysfs_streq(page, nvmet_addr_family[i].name)) + goto found; } + pr_err("Invalid value '%s' for adrfam\n", page); + return -EINVAL; + +found: + port->disc_addr.adrfam = i; return count; } @@ -290,6 +292,7 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, pr_err("Invalid value '%s' for trtype\n", page); return -EINVAL; + found: memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); port->disc_addr.trtype = nvmet_transport[i].type; -- cgit v1.2.3 From 84b8d0d7aa159652dc191d58c4d353b6c9173c54 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:45 -0700 Subject: nvmet: use type-name map for ana states Now that we have a generic type to name map for configfs, get rid of the nvmet_ana_state_names structure and replace it with newly added nvmet_type_name_map. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 8a5d99e25192..78eb822a20d5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1153,10 +1153,7 @@ static const struct config_item_type nvmet_referrals_type = { .ct_group_ops = &nvmet_referral_group_ops, }; -static struct { - enum nvme_ana_state state; - const char *name; -} nvmet_ana_state_names[] = { +struct nvmet_type_name_map nvmet_ana_state[] = { { NVME_ANA_OPTIMIZED, "optimized" }, { NVME_ANA_NONOPTIMIZED, "non-optimized" }, { NVME_ANA_INACCESSIBLE, "inaccessible" }, @@ -1171,10 +1168,9 @@ static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item, enum nvme_ana_state state = grp->port->ana_state[grp->grpid]; int i; - for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { - if (state != nvmet_ana_state_names[i].state) - continue; - return sprintf(page, "%s\n", nvmet_ana_state_names[i].name); + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { + if (state == nvmet_ana_state[i].type) + return sprintf(page, "%s\n", nvmet_ana_state[i].name); } return sprintf(page, "\n"); @@ -1184,10 +1180,11 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ana_group *grp = to_ana_group(item); + enum nvme_ana_state *ana_state = grp->port->ana_state; int i; - for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { - if (sysfs_streq(page, nvmet_ana_state_names[i].name)) + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { + if (sysfs_streq(page, nvmet_ana_state[i].name)) goto found; } @@ -1196,10 +1193,9 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item, found: down_write(&nvmet_ana_sem); - grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state; + ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type; nvmet_ana_chgcnt++; up_write(&nvmet_ana_sem); - nvmet_port_send_ana_event(grp->port); return count; } -- cgit v1.2.3 From 87628e2851008d5b0e53859f6062988c6d66d7f5 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:46 -0700 Subject: nvmet: use type-name map for address treq Currently nvmet_addr_treq_[store|show]() uses switch and if else ladder for address transport requirements to string and reverse mapping. With addtion of the generic nvmet_type_name_map structure we can get rid of the switch and if else ladder with string duplication. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 48 +++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 22 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 78eb822a20d5..9c808f4185a0 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -147,20 +147,24 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, addr_traddr); -static ssize_t nvmet_addr_treq_show(struct config_item *item, - char *page) +static const struct nvmet_type_name_map nvmet_addr_treq[] = { + { NVMF_TREQ_NOT_SPECIFIED, "not specified" }, + { NVMF_TREQ_REQUIRED, "required" }, + { NVMF_TREQ_NOT_REQUIRED, "not required" }, +}; + +static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.treq & - NVME_TREQ_SECURE_CHANNEL_MASK) { - case NVMF_TREQ_NOT_SPECIFIED: - return sprintf(page, "not specified\n"); - case NVMF_TREQ_REQUIRED: - return sprintf(page, "required\n"); - case NVMF_TREQ_NOT_REQUIRED: - return sprintf(page, "not required\n"); - default: - return sprintf(page, "\n"); + u8 treq = to_nvmet_port(item)->disc_addr.treq & + NVME_TREQ_SECURE_CHANNEL_MASK; + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { + if (treq == nvmet_addr_treq[i].type) + return sprintf(page, "%s\n", nvmet_addr_treq[i].name); } + + return sprintf(page, "\n"); } static ssize_t nvmet_addr_treq_store(struct config_item *item, @@ -168,6 +172,7 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, { struct nvmet_port *port = to_nvmet_port(item); u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; + int i; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -175,18 +180,17 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, return -EACCES; } - if (sysfs_streq(page, "not specified")) { - treq |= NVMF_TREQ_NOT_SPECIFIED; - } else if (sysfs_streq(page, "required")) { - treq |= NVMF_TREQ_REQUIRED; - } else if (sysfs_streq(page, "not required")) { - treq |= NVMF_TREQ_NOT_REQUIRED; - } else { - pr_err("Invalid value '%s' for treq\n", page); - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { + if (sysfs_streq(page, nvmet_addr_treq[i].name)) + goto found; } - port->disc_addr.treq = treq; + pr_err("Invalid value '%s' for treq\n", page); + return -EINVAL; + +found: + treq |= nvmet_addr_treq[i].type; + port->disc_addr.treq = treq; return count; } -- cgit v1.2.3 From 3ecb5faa07c7fd33a3ce1a8340841aa368df7d43 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:47 -0700 Subject: nvmet: centralize port enable access for configfs The configfs attributes which are supposed to set when port is disable such as addr[addrfam|portid|traddr|treq|trsvcid|inline_data_size|trtype] has repetitive check and generic error message printing. This patch creates centralize helper to check and print an error message that also accepts caller as a parameter. This makes error message easy to parse for the user, removes the duplicate code and makes it available for futures such scenarios. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 44 +++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 9c808f4185a0..20779587eefe 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -40,6 +40,14 @@ static const struct nvmet_type_name_map nvmet_addr_family[] = { { NVMF_ADDR_FAMILY_FC, "fc" }, }; +static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller) +{ + if (p->enabled) + pr_err("Disable port '%u' before changing attribute in %s\n", + le16_to_cpu(p->disc_addr.portid), caller); + return p->enabled; +} + /* * nvmet_port Generic ConfigFS definitions. * Used in any place in the ConfigFS tree that refers to an address. @@ -63,11 +71,8 @@ static ssize_t nvmet_addr_adrfam_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { if (sysfs_streq(page, nvmet_addr_family[i].name)) @@ -104,11 +109,9 @@ static ssize_t nvmet_addr_portid_store(struct config_item *item, return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } + port->disc_addr.portid = cpu_to_le16(portid); return count; } @@ -134,11 +137,8 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item, return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1) return -EINVAL; @@ -174,11 +174,8 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { if (sysfs_streq(page, nvmet_addr_treq[i].name)) @@ -214,11 +211,8 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item, pr_err("Invalid value '%s' for trsvcid\n", page); return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1) return -EINVAL; @@ -241,11 +235,8 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int ret; - if (port->enabled) { - pr_err("Cannot modify inline_data_size while port enabled\n"); - pr_err("Disable the port before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } ret = kstrtoint(page, 0, &port->inline_data_size); if (ret) { pr_err("Invalid value '%s' for inline_data_size\n", page); @@ -283,11 +274,8 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { if (sysfs_streq(page, nvmet_transport[i].name)) -- cgit v1.2.3 From d02abd198633a4c40411b9a5994111452720470d Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 4 May 2020 01:56:48 -0700 Subject: nvmet: align addrfam list to spec With reference to the NVMeOF Specification (page 44, Figure 38) discovery log page entry provides address family field. We do set the transport type field but the adrfam field is not set when using loop transport and also it doesn't have support in the nvme-cli. So when reading discovery log page with a loop transport it leads to confusing output. As per the spec for adrfam value 254 is reserved for Intra Host Transport i.e. loopback), we add a required macro in the protocol header file, set default port disc addr entry's adrfam to NVMF_ADDR_FAMILY_MAX, and update nvmet_addr_family configfs array for show/store attribute. Without this patch, setting adrfam to (ipv4/ipv6/ib/fc/loop/" ") we get following output for nvme discover command from nvme-cli which is confusing. trtype: loop adrfam: ipv4 trtype: loop adrfam: ipv6 trtype: loop adrfam: infiniband trtype: loop adrfam: fibre-channel trtype: loop # ${CFGFS_HOME}/nvmet/ports/1/addr_adrfam = loop adrfam: pci # <----- pci for loop trtype: loop # ${CFGFS_HOME}/nvmet/ports/1/addr_adrfam = " " adrfam: pci # <----- pci for unrecognized This patch fixes above output :- trtype: loop adrfam: ipv4 trtype: loop adrfam: ipv6 trtype: loop adrfam: infiniband trtype: loop adrfam: fibre-channel trtype: loop # ${CFGFS_HOME}/nvmet/ports/1/addr_adrfam = loop adrfam: loop # <----- loop for loop trtype: loop # ${CFGFS_HOME}/config/nvmet/ports/adrfam = " " adrfam: unrecognized # <----- unrecognized when invalid value Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 14 ++++++++------ include/linux/nvme.h | 2 ++ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 20779587eefe..ae8fb4489a10 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -33,11 +33,12 @@ static struct nvmet_type_name_map nvmet_transport[] = { }; static const struct nvmet_type_name_map nvmet_addr_family[] = { - { NVMF_ADDR_FAMILY_PCI, "pcie" }, - { NVMF_ADDR_FAMILY_IP4, "ipv4" }, - { NVMF_ADDR_FAMILY_IP6, "ipv6" }, - { NVMF_ADDR_FAMILY_IB, "ib" }, - { NVMF_ADDR_FAMILY_FC, "fc" }, + { NVMF_ADDR_FAMILY_PCI, "pcie" }, + { NVMF_ADDR_FAMILY_IP4, "ipv4" }, + { NVMF_ADDR_FAMILY_IP6, "ipv6" }, + { NVMF_ADDR_FAMILY_IB, "ib" }, + { NVMF_ADDR_FAMILY_FC, "fc" }, + { NVMF_ADDR_FAMILY_LOOP, "loop" }, }; static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller) @@ -83,7 +84,7 @@ static ssize_t nvmet_addr_adrfam_store(struct config_item *item, return -EINVAL; found: - port->disc_addr.adrfam = i; + port->disc_addr.adrfam = nvmet_addr_family[i].type; return count; } @@ -1338,6 +1339,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, port->inline_data_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); + port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX; port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW; config_group_init_type_name(&port->group, name, &nvmet_port_type); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3d5189f46cb1..2d978d0cbde6 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -38,6 +38,8 @@ enum { NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */ NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */ NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */ + NVMF_ADDR_FAMILY_LOOP = 254, /* Reserved for host usage */ + NVMF_ADDR_FAMILY_MAX, }; /* Transport Type codes for Discovery Log Page entry TRTYPE field */ -- cgit v1.2.3 From 92decf118f1da4c866515f80387f9cf4d48611d6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 3 Apr 2020 10:53:46 -0700 Subject: nvme: define constants for identification values Improve code readability by defining the specification's constants that the driver is using when decoding identification payloads. Signed-off-by: Keith Busch Reviewed-by: Bart van Assche Reviewed-by: Chaitanya Kulkarni Acked-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 10 +++++----- drivers/nvme/host/multipath.c | 5 +++-- include/linux/nvme.h | 6 ++++++ 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4a124a28118f..805d289e6cd9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1859,13 +1859,13 @@ static void nvme_update_disk_info(struct gendisk *disk, * and whether it should be used instead of AWUPF. If NAWUPF == * 0 then AWUPF must be used instead. */ - if (id->nsfeat & (1 << 1) && id->nawupf) + if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; } - if (id->nsfeat & (1 << 4)) { + if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { /* NPWG = Namespace Preferred Write Granularity */ phys_bs = bs * (1 + le16_to_cpu(id->npwg)); /* NOWS = Namespace Optimal Write Size */ @@ -1894,7 +1894,7 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); - if (id->nsattr & (1 << 0)) + if (id->nsattr & NVME_NS_ATTR_RO) set_disk_ro(disk, true); else set_disk_ro(disk, false); @@ -2685,7 +2685,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, return false; } - if ((id->cmic & (1 << 1)) || + if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || (ctrl->opts && ctrl->opts->discovery_nqn)) continue; @@ -3497,7 +3497,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; - bool is_shared = id->nmic & (1 << 0); + bool is_shared = id->nmic & NVME_NS_NMIC_SHARED; struct nvme_ns_head *head = NULL; struct nvme_ns_ids ids; int ret = 0; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5f5537d0a024..da78e499947a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -372,7 +372,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) * We also do this for private namespaces as the namespace sharing data could * change after a rescan. */ - if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) return 0; q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node); @@ -694,7 +694,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) int error; /* check if multipath is enabled and we have the capability */ - if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3))) + if (!multipath || !ctrl->subsys || + !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) return 0; ctrl->anacap = id->anacap; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2d978d0cbde6..b235a48eac8c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -301,6 +301,8 @@ struct nvme_id_ctrl { }; enum { + NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1, + NVME_CTRL_CMIC_ANA = 1 << 3, NVME_CTRL_ONCS_COMPARE = 1 << 0, NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, NVME_CTRL_ONCS_DSM = 1 << 2, @@ -396,8 +398,12 @@ enum { enum { NVME_NS_FEAT_THIN = 1 << 0, + NVME_NS_FEAT_ATOMICS = 1 << 1, + NVME_NS_FEAT_IO_OPT = 1 << 4, + NVME_NS_ATTR_RO = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, NVME_NS_FLBAS_META_EXT = 0x10, + NVME_NS_NMIC_SHARED = 1 << 0, NVME_LBAF_RP_BEST = 0, NVME_LBAF_RP_BETTER = 1, NVME_LBAF_RP_GOOD = 2, -- cgit v1.2.3 From 09bb8986c99cd3395e96635df9e712f5da45bc07 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Fri, 8 May 2020 19:59:06 +0800 Subject: nvmet: replace kstrndup() with kmemdup_nul() It is more efficient to use kmemdup_nul() if the size is known exactly. The doc in kernel: "Note: Use kmemdup_nul() instead if the size is known exactly." Signed-off-by: Chen Zhou Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index ae8fb4489a10..19bd5e1c681c 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -324,7 +324,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, kfree(ns->device_path); ret = -ENOMEM; - ns->device_path = kstrndup(page, len, GFP_KERNEL); + ns->device_path = kmemdup_nul(page, len, GFP_KERNEL); if (!ns->device_path) goto out_unlock; @@ -960,7 +960,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, return -EINVAL; } - new_model_number = kstrndup(page, len, GFP_KERNEL); + new_model_number = kmemdup_nul(page, len, GFP_KERNEL); if (!new_model_number) return -ENOMEM; -- cgit v1.2.3 From 7425596945d7f8bf79f42b2a9c8463d16e24fc34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 May 2020 18:19:13 +0200 Subject: nvmet: mark nvmet_ana_state static Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 19bd5e1c681c..24eb4cf53b4f 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1146,7 +1146,7 @@ static const struct config_item_type nvmet_referrals_type = { .ct_group_ops = &nvmet_referral_group_ops, }; -struct nvmet_type_name_map nvmet_ana_state[] = { +static struct nvmet_type_name_map nvmet_ana_state[] = { { NVME_ANA_OPTIMIZED, "optimized" }, { NVME_ANA_NONOPTIMIZED, "non-optimized" }, { NVME_ANA_INACCESSIBLE, "inaccessible" }, -- cgit v1.2.3 From 5bb052d7aad1f4063631f2aa05452c700139f0f3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 4 May 2020 22:20:01 -0700 Subject: nvme-tcp: set MSG_SENDPAGE_NOTLAST with MSG_MORE when we have more to send We can signal the stack that this is not the last page coming and the stack can build a larger tso segment, so go ahead and use it. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c79e248b9f43..7c7c1886642f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -885,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) if (last && !queue->data_digest) flags |= MSG_EOR; else - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; /* can't zcopy slab pages */ if (unlikely(PageSlab(page))) { @@ -924,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_cmd_pdu *pdu = req->pdu; bool inline_data = nvme_tcp_has_inline_data(req); - int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR); u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) + hdgst - req->offset; + int flags = MSG_DONTWAIT; int ret; + if (inline_data) + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; + else + flags |= MSG_EOR; + if (queue->hdr_digest && !req->offset) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); @@ -967,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) ret = kernel_sendpage(queue->sock, virt_to_page(pdu), offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE); + MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); if (unlikely(ret <= 0)) return ret; -- cgit v1.2.3 From 4eea804364628c30facd99c30499b2b01c6272c6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 4 May 2020 22:20:02 -0700 Subject: nvmet-tcp: set MSG_SENDPAGE_NOTLAST with MSG_MORE when we have more to send We can signal the stack that this is not the last page coming and the stack can build a larger tso segment, so go ahead and use it. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f0da04e960f4..c08aec62115e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -510,7 +510,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu), offset_in_page(cmd->data_pdu) + cmd->offset, - left, MSG_DONTWAIT | MSG_MORE); + left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); if (ret <= 0) return ret; @@ -538,7 +538,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch) if ((!last_in_batch && cmd->queue->send_list_len) || cmd->wbytes_done + left < cmd->req.transfer_len || queue->data_digest || !queue->nvme_sq.sqhd_disabled) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, left, flags); @@ -585,7 +585,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, int ret; if (!last_in_batch && cmd->queue->send_list_len) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; else flags |= MSG_EOR; @@ -614,7 +614,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) int ret; if (!last_in_batch && cmd->queue->send_list_len) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; else flags |= MSG_EOR; -- cgit v1.2.3 From f381ab1f26aa33f14412cb9b7cb7f947b88a55b2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 12 May 2020 18:01:43 -0700 Subject: nvmet-tcp: set MSG_EOR if we send last payload in the batch when trying to send the pdu data digest, we should set this flag. Reported-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c08aec62115e..f8dd69f3b657 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -644,6 +644,8 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) if (!last_in_batch && cmd->queue->send_list_len) msg.msg_flags |= MSG_MORE; + else + msg.msg_flags |= MSG_EOR; ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (unlikely(ret <= 0)) -- cgit v1.2.3 From 0236d3437909ff888e5c79228e2d5a851651c4c6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 18 May 2020 10:47:48 -0700 Subject: nvmet-tcp: move send/recv error handling in the send/recv methods instead of call-sites Have routines handle errors and just bail out of the poll loop. This simplifies the code and will help as we may enhance the poll loop logic and these are somewhat in the way. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f8dd69f3b657..6f557db0320d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -325,6 +325,14 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) kernel_sock_shutdown(queue->sock, SHUT_RDWR); } +static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) +{ + if (status == -EPIPE || status == -ECONNRESET) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + else + nvmet_tcp_fatal_error(queue); +} + static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) { struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; @@ -718,11 +726,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue, for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_send_one(queue, i == budget - 1); - if (ret <= 0) + if (unlikely(ret < 0)) { + nvmet_tcp_socket_error(queue, ret); + goto done; + } else if (ret == 0) { break; + } (*sends)++; } - +done: return ret; } @@ -1159,11 +1171,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_recv_one(queue); - if (ret <= 0) + if (unlikely(ret < 0)) { + nvmet_tcp_socket_error(queue, ret); + goto done; + } else if (ret == 0) { break; + } (*recvs)++; } - +done: return ret; } @@ -1188,27 +1204,16 @@ static void nvmet_tcp_io_work(struct work_struct *w) pending = false; ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops); - if (ret > 0) { + if (ret > 0) pending = true; - } else if (ret < 0) { - if (ret == -EPIPE || ret == -ECONNRESET) - kernel_sock_shutdown(queue->sock, SHUT_RDWR); - else - nvmet_tcp_fatal_error(queue); + else if (ret < 0) return; - } ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops); - if (ret > 0) { - /* transmitted message/data */ + if (ret > 0) pending = true; - } else if (ret < 0) { - if (ret == -EPIPE || ret == -ECONNRESET) - kernel_sock_shutdown(queue->sock, SHUT_RDWR); - else - nvmet_tcp_fatal_error(queue); + else if (ret < 0) return; - } } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); -- cgit v1.2.3 From 9c9e76d5792b121f10c3b8ddbb639617e49197f7 Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Sat, 9 May 2020 14:22:08 +0800 Subject: nvme-pci: make sure write/poll_queues less or equal then cpu count Check module parameter write/poll_queues before using it to catch too large values. Reproducer: modprobe -r nvme modprobe nvme write_queues=`nproc` echo $((`nproc`+1)) > /sys/module/nvme/parameters/write_queues echo 1 > /sys/block/nvme0n1/device/reset_controller [ 657.069000] ------------[ cut here ]------------ [ 657.069022] WARNING: CPU: 10 PID: 1163 at kernel/irq/affinity.c:390 irq_create_affinity_masks+0x47c/0x4a0 [ 657.069056] dm_region_hash dm_log dm_mod [ 657.069059] CPU: 10 PID: 1163 Comm: kworker/u193:9 Kdump: loaded Tainted: G W 5.6.0+ #8 [ 657.069060] Hardware name: Inspur SA5212M5/YZMB-00882-104, BIOS 4.0.9 08/27/2019 [ 657.069064] Workqueue: nvme-reset-wq nvme_reset_work [nvme] [ 657.069066] RIP: 0010:irq_create_affinity_masks+0x47c/0x4a0 [ 657.069067] Code: fe ff ff 48 c7 c0 b0 89 14 95 48 89 46 20 e9 e9 fb ff ff 31 c0 e9 90 fc ff ff 0f 0b 48 c7 44 24 08 00 00 00 00 e9 e9 fc ff ff <0f> 0b e9 87 fe ff ff 48 8b 7c 24 28 e8 33 a0 80 00 e9 b6 fc ff ff [ 657.069068] RSP: 0018:ffffb505ce1ffc78 EFLAGS: 00010202 [ 657.069069] RAX: 0000000000000060 RBX: ffff9b97921fe5c0 RCX: 0000000000000000 [ 657.069069] RDX: ffff9b67bad80000 RSI: 00000000ffffffa0 RDI: 0000000000000000 [ 657.069070] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff9b97921fe718 [ 657.069070] R10: ffff9b97921fe710 R11: 0000000000000001 R12: 0000000000000064 [ 657.069070] R13: 0000000000000060 R14: 0000000000000000 R15: 0000000000000001 [ 657.069071] FS: 0000000000000000(0000) GS:ffff9b67c0880000(0000) knlGS:0000000000000000 [ 657.069072] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 657.069072] CR2: 0000559eac6fc238 CR3: 000000057860a002 CR4: 00000000007606e0 [ 657.069073] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 657.069073] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 657.069073] PKRU: 55555554 [ 657.069074] Call Trace: [ 657.069080] __pci_enable_msix_range+0x233/0x5a0 [ 657.069085] ? kernfs_put+0xec/0x190 [ 657.069086] pci_alloc_irq_vectors_affinity+0xbb/0x130 [ 657.069089] nvme_reset_work+0x6e6/0xeab [nvme] [ 657.069093] ? __switch_to_asm+0x34/0x70 [ 657.069094] ? __switch_to_asm+0x40/0x70 [ 657.069095] ? nvme_irq_check+0x30/0x30 [nvme] [ 657.069098] process_one_work+0x1a7/0x370 [ 657.069101] worker_thread+0x1c9/0x380 [ 657.069102] ? max_active_store+0x80/0x80 [ 657.069103] kthread+0x112/0x130 [ 657.069104] ? __kthread_parkme+0x70/0x70 [ 657.069105] ret_from_fork+0x35/0x40 [ 657.069106] ---[ end trace f4f06b7d24513d06 ]--- [ 657.077110] nvme nvme0: 95/1/0 default/read/poll queues Signed-off-by: Weiping Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b0978ac554d5..6cc4630ddd6d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -68,14 +68,30 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); +static int io_queue_count_set(const char *val, const struct kernel_param *kp) +{ + unsigned int n; + int ret; + + ret = kstrtouint(val, 10, &n); + if (ret != 0 || n > num_possible_cpus()) + return -EINVAL; + return param_set_uint(val, kp); +} + +static const struct kernel_param_ops io_queue_count_ops = { + .set = io_queue_count_set, + .get = param_get_uint, +}; + static unsigned int write_queues; -module_param(write_queues, uint, 0644); +module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644); MODULE_PARM_DESC(write_queues, "Number of queues to use for writes. If not set, reads and writes " "will share a queue set."); static unsigned int poll_queues; -module_param(poll_queues, uint, 0644); +module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644); MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); struct nvme_dev; @@ -3118,8 +3134,6 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); - write_queues = min(write_queues, num_possible_cpus()); - poll_queues = min(poll_queues, num_possible_cpus()); return pci_register_driver(&nvme_driver); } -- cgit v1.2.3 From 614fc1c0d980423a131bfb5c93d8d53e5272f587 Mon Sep 17 00:00:00 2001 From: Martin George Date: Tue, 12 May 2020 22:17:04 +0530 Subject: nvme-fc: print proper nvme-fc devloss_tmo value The nvme-fc devloss_tmo is computed as the min of either the ctrl_loss_tmo (max_retries * reconnect_delay) or the remote port's devloss_tmo. But what gets printed as the nvme-fc devloss_tmo in nvme_fc_reconnect_or_delete() is always the remote port's devloss_tmo value. So correct this by printing the min value instead. Signed-off-by: Martin George Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0b3ab3355e25..8aabc056c754 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3246,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: dev_loss_tmo (%d) expired " "while waiting for remoteport connectivity.\n", - ctrl->cnum, portptr->dev_loss_tmo); + ctrl->cnum, min_t(int, portptr->dev_loss_tmo, + (ctrl->ctrl.opts->max_reconnects * + ctrl->ctrl.opts->reconnect_delay))); WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } -- cgit v1.2.3 From 84e4c204b6a0e81f56bd7d1254123390ef0498c8 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Wed, 13 May 2020 16:18:13 +0800 Subject: nvme: disable streams when get stream params failed Disable streams again if getting the stream params fails. Signed-off-by: Wu Bo Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 805d289e6cd9..43a6336d6264 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -552,19 +552,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); if (ret) - return ret; + goto out_disable_stream; ctrl->nssa = le16_to_cpu(s.nssa); if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { dev_info(ctrl->device, "too few streams (%u) available\n", ctrl->nssa); - nvme_disable_streams(ctrl); - return 0; + goto out_disable_stream; } ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); return 0; + +out_disable_stream: + nvme_disable_streams(ctrl); + return ret; } /* -- cgit v1.2.3 From 68ab60ca2d6bd8e6b1ecc2857a174c1c2b9451e9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 14 May 2020 14:56:26 +0900 Subject: nvme: fix io_opt limit setting Currently, a namespace io_opt queue limit is set by default to the physical sector size of the namespace and to the the write optimal size (NOWS) when the namespace reports optimal IO sizes. This causes problems with block limits stacking in blk_stack_limits() when a namespace block device is combined with an HDD which generally do not report any optimal transfer size (io_opt limit is 0). The code: /* Optimal I/O a multiple of the physical block size? */ if (t->io_opt & (t->physical_block_size - 1)) { t->io_opt = 0; t->misaligned = 1; ret = -1; } in blk_stack_limits() results in an error return for this function when the combined devices have different but compatible physical sector sizes (e.g. 512B sector SSD with 4KB sector disks). Fix this by not setting the optimal IO size queue limit if the namespace does not report an optimal write size value. Signed-off-by: Damien Le Moal Reviewed-by: Bart van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 43a6336d6264..4c194dcabd12 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1845,7 +1845,7 @@ static void nvme_update_disk_info(struct gendisk *disk, { sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); unsigned short bs = 1 << ns->lba_shift; - u32 atomic_bs, phys_bs, io_opt; + u32 atomic_bs, phys_bs, io_opt = 0; if (ns->lba_shift > PAGE_SHIFT) { /* unsupported block size, set capacity to 0 later */ @@ -1854,7 +1854,7 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); - atomic_bs = phys_bs = io_opt = bs; + atomic_bs = phys_bs = bs; nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt); if (id->nabo == 0) { /* -- cgit v1.2.3 From f1e71d75f04792721d411170168e68019ccb7de3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:04:52 -0500 Subject: nvme: replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/lightnvm.c | 2 +- include/linux/nvme.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8aabc056c754..cb0007592c12 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -108,7 +108,7 @@ struct nvme_fc_fcp_op { struct nvme_fcp_op_w_sgl { struct nvme_fc_fcp_op op; struct scatterlist sgl[NVME_INLINE_SG_CNT]; - uint8_t priv[0]; + uint8_t priv[]; }; struct nvme_fc_lport { diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index ec46693f6b64..3002bf972c6b 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl { __le32 tdresv; __le32 thresv; __le32 rsvd2[8]; - __u8 blk[0]; + __u8 blk[]; }; struct nvme_nvm_id20_addrf { diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b235a48eac8c..e2993e6a9d7c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1185,7 +1185,7 @@ struct nvmf_disc_rsp_page_hdr { __le64 numrec; __le16 recfmt; __u8 resv14[1006]; - struct nvmf_disc_rsp_page_entry entries[0]; + struct nvmf_disc_rsp_page_entry entries[]; }; enum { -- cgit v1.2.3 From ec0862ac5aa03961fd2027f861689beccd91c5d1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 May 2020 15:06:59 +0300 Subject: nvme: delete an unnecessary declaration The nvme_put_ctrl() is implemented earlier as an inline function so this declaration isn't required. Signed-off-by: Dan Carpenter Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f3ab17778349..86f152e777bc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -497,7 +497,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); -void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); -- cgit v1.2.3 From 696ece751366e7a02a81fa0228125fe25a47969d Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 May 2020 01:06:30 -0700 Subject: nvmet: add async event tracing support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new tracepoint for the target to trace async event. This is helpful in debugging and comparing host and target side async events especially when host is connected to different targets on different machines and now that we rely on userspace components to generate AEN.  Signed-off-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/trace.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b685f99d56a1..7ce3d3b0f5d6 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -151,6 +151,7 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) kfree(aen); mutex_unlock(&ctrl->lock); + trace_nvmet_async_event(ctrl, req->cqe->result.u32); nvmet_req_complete(req, status); } } diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index e645caa882dd..0458046d6501 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -130,6 +130,34 @@ TRACE_EVENT(nvmet_req_complete, ); +#define aer_name(aer) { aer, #aer } + +TRACE_EVENT(nvmet_async_event, + TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result), + TP_ARGS(ctrl, result), + TP_STRUCT__entry( + __field(int, ctrl_id) + __field(u32, result) + ), + TP_fast_assign( + __entry->ctrl_id = ctrl->cntlid; + __entry->result = (le32_to_cpu(result) & 0xff00) >> 8; + ), + TP_printk("nvmet%d: NVME_AEN=%#08x [%s]", + __entry->ctrl_id, __entry->result, + __print_symbolic(__entry->result, + aer_name(NVME_AER_NOTICE_NS_CHANGED), + aer_name(NVME_AER_NOTICE_ANA), + aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_NOTICE_DISC_CHANGED), + aer_name(NVME_AER_ERROR), + aer_name(NVME_AER_SMART), + aer_name(NVME_AER_CSS), + aer_name(NVME_AER_VS)) + ) +); +#undef aer_name + #endif /* _TRACE_NVMET_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 463c5fabb8dfca4941c4c50365bb7749ac5c9916 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 May 2020 01:06:27 -0700 Subject: nvmet: add helper to revalidate bdev and file ns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a wrapper helper to indicate size change in the bdev & file-backed namespace when revalidating ns. This helper is needed in order to minimize code repetition in the next patch for configfs.c and existing admin-cmd.c.   Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 5 +---- drivers/nvme/target/core.c | 8 ++++++++ drivers/nvme/target/nvmet.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 4c79aa804887..f544a14e8b5c 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -486,10 +486,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) if (!ns) goto done; - if (ns->bdev) - nvmet_bdev_ns_revalidate(ns); - else - nvmet_file_ns_revalidate(ns); + nvmet_ns_revalidate(ns); /* * nuse = ncap = nsze isn't always true, but we have no way to find diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 7ce3d3b0f5d6..ee5865568588 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -515,6 +515,14 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, ns->nsid); } +void nvmet_ns_revalidate(struct nvmet_ns *ns) +{ + if (ns->bdev) + nvmet_bdev_ns_revalidate(ns); + else + nvmet_file_ns_revalidate(ns); +} + int nvmet_ns_enable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 3d981eb6e100..93e0c2aa3e71 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -500,6 +500,7 @@ u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); int nvmet_file_ns_revalidate(struct nvmet_ns *ns); +void nvmet_ns_revalidate(struct nvmet_ns *ns); static inline u32 nvmet_rw_len(struct nvmet_req *req) { -- cgit v1.2.3 From de124f427347b0a1eb9708b1007251fc4c8a222f Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 May 2020 01:06:28 -0700 Subject: nvmet: generate AEN for ns revalidate size change The newly added function nvmet_ns_revalidate() does update the ns size in the identify namespace in-core target data structure when host issues id-ns command. This can lead to host having inconsistencies between size of the namespace present in the id-ns command result and size of the corresponding block device until host scans the namespaces explicitly. To avoid this scenario generate AEN if old size is not same as the new one in nvmet_ns_revalidate(). This will allow automatic AEN generation when host calls id-ns command and also allows target to install userspace rules so that it can trigger nvmet_ns_revalidate() (using configfs interface with the help of next patch) resulting in appropriate AEN generation when underlying namespace size change is detected. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ee5865568588..fb78b165c123 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -517,10 +517,15 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, void nvmet_ns_revalidate(struct nvmet_ns *ns) { + loff_t oldsize = ns->size; + if (ns->bdev) nvmet_bdev_ns_revalidate(ns); else nvmet_file_ns_revalidate(ns); + + if (oldsize != ns->size) + nvmet_ns_changed(ns->subsys, ns->nsid); } int nvmet_ns_enable(struct nvmet_ns *ns) -- cgit v1.2.3 From 1f357548ec79a34e069716716b71194ce8b53e10 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 19 May 2020 01:06:29 -0700 Subject: nvmet: revalidate-ns & generate AEN from configfs Add a new attribute "revalidate_size" for the namespace which allows user to revalidate and generate the AEN if needed. This attribute is needed so that we can install userspace rules with systemd service based on inotify/fsnotify/uevent. The registered callback for such a service will end up writing to this attribute to generate AEN if needed. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 24eb4cf53b4f..17c529260e12 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -540,6 +540,31 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item, CONFIGFS_ATTR(nvmet_ns_, buffered_io); +static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_ns *ns = to_nvmet_ns(item); + bool val; + + if (strtobool(page, &val)) + return -EINVAL; + + if (!val) + return -EINVAL; + + mutex_lock(&ns->subsys->lock); + if (!ns->enabled) { + pr_err("enable ns before revalidate.\n"); + mutex_unlock(&ns->subsys->lock); + return -EINVAL; + } + nvmet_ns_revalidate(ns); + mutex_unlock(&ns->subsys->lock); + return count; +} + +CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size); + static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_device_path, &nvmet_ns_attr_device_nguid, @@ -547,6 +572,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_ana_grpid, &nvmet_ns_attr_enable, &nvmet_ns_attr_buffered_io, + &nvmet_ns_attr_revalidate_size, #ifdef CONFIG_PCI_P2PDMA &nvmet_ns_attr_p2pmem, #endif -- cgit v1.2.3 From ffc89b1d3ca45669e8d2226f5fd4dde756f7ad17 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:49 +0300 Subject: nvme: introduce namespace features flag Replace the specific ext boolean (that implies on extended LBA format) with a feature in the new namespace features flag. This is a preparation for adding more namespace features (such as metadata specific features). Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 +++++--- drivers/nvme/host/lightnvm.c | 5 ++++- drivers/nvme/host/nvme.h | 6 +++++- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4c194dcabd12..fecf484e7b08 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1305,7 +1305,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = (io.nblocks + 1) * ns->ms; metadata = nvme_to_user_ptr(io.metadata); - if (ns->ext) { + if (ns->features & NVME_NS_EXT_LBAS) { length += meta_len; meta_len = 0; } else if (meta_len) { @@ -1885,7 +1885,7 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - if (ns->ms && !ns->ext && + if (ns->ms && !(ns->features & NVME_NS_EXT_LBAS) && (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns->ms, ns->pi_type); if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || @@ -1924,8 +1924,10 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + ns->features = 0; ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + if (ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT)) + ns->features |= NVME_NS_EXT_LBAS; /* the PI implementation requires metadata equal t10 pi tuple size */ if (ns->ms == sizeof(struct t10_pi_tuple)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 3002bf972c6b..69608755d415 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) geo = &dev->geo; geo->csecs = 1 << ns->lba_shift; geo->sos = ns->ms; - geo->ext = ns->ext; + if (ns->features & NVME_NS_EXT_LBAS) + geo->ext = true; + else + geo->ext = false; geo->mdts = ns->ctrl->max_hw_sectors; dev->q = q; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 86f152e777bc..58ae6ebdc63a 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -364,6 +364,10 @@ struct nvme_ns_head { #endif }; +enum nvme_ns_features { + NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ +}; + struct nvme_ns { struct list_head list; @@ -383,8 +387,8 @@ struct nvme_ns { u16 ms; u16 sgs; u32 sws; - bool ext; u8 pi_type; + unsigned long features; unsigned long flags; #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 -- cgit v1.2.3 From b29f84857a0f1cb4355363d0307d2b83897e8955 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:50 +0300 Subject: nvme: introduce NVME_NS_METADATA_SUPPORTED flag This is a preparation for adding support for metadata in fabric controllers. New flag will imply that NVMe namespace supports getting metadata that was originally generated by host's block layer. Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 41 ++++++++++++++++++++++++++++++++++------- drivers/nvme/host/nvme.h | 1 + 2 files changed, 35 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fecf484e7b08..aa168dd58f31 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1885,13 +1885,27 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - if (ns->ms && !(ns->features & NVME_NS_EXT_LBAS) && - (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns->ms, ns->pi_type); - if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || - ns->lba_shift > PAGE_SHIFT) + /* + * The block layer can't support LBA sizes larger than the page size + * yet, so catch this early and don't allow block I/O. + */ + if (ns->lba_shift > PAGE_SHIFT) capacity = 0; + /* + * Register a metadata profile for PI, or the plain non-integrity NVMe + * metadata masquerading as Type 0 if supported, otherwise reject block + * I/O to namespaces with metadata except when the namespace supports + * PI, as it can strip/insert in that case. + */ + if (ns->ms) { + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + (ns->features & NVME_NS_METADATA_SUPPORTED)) + nvme_init_integrity(disk, ns->ms, ns->pi_type); + else if (!nvme_ns_has_pi(ns)) + capacity = 0; + } + set_capacity_revalidate_and_notify(disk, capacity, false); nvme_config_discard(disk, ns); @@ -1926,14 +1940,27 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->features = 0; ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - if (ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT)) - ns->features |= NVME_NS_EXT_LBAS; /* the PI implementation requires metadata equal t10 pi tuple size */ if (ns->ms == sizeof(struct t10_pi_tuple)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else ns->pi_type = 0; + if (ns->ms) { + if (id->flbas & NVME_NS_FLBAS_META_EXT) + ns->features |= NVME_NS_EXT_LBAS; + + /* + * For PCI, Extended logical block will be generated by the + * controller. Non-extended format can be generated by the + * block layer. + */ + if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) { + if (!(ns->features & NVME_NS_EXT_LBAS)) + ns->features |= NVME_NS_METADATA_SUPPORTED; + } + } + if (iob) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 58ae6ebdc63a..9ed6a3dac537 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -366,6 +366,7 @@ struct nvme_ns_head { enum nvme_ns_features { NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ + NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ }; struct nvme_ns { -- cgit v1.2.3 From 4d2ce68835649afebbc5e8816b79426fb04c639f Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 19 May 2020 17:05:51 +0300 Subject: nvme: make nvme_ns_has_pi accessible to transports Move the nvme_ns_has_pi() inline from core.c to the nvme.h header. This allows use by the transports. Signed-off-by: James Smart [maxg: added a comment for nvme_ns_has_pi()] Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ------ drivers/nvme/host/nvme.h | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index aa168dd58f31..beea013f7b73 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) nvme_put_ctrl(ctrl); } -static inline bool nvme_ns_has_pi(struct nvme_ns *ns) -{ - return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); -} - static blk_status_t nvme_error_status(u16 status) { switch (status & 0x7ff) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9ed6a3dac537..c83ff69338d8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -399,6 +400,12 @@ struct nvme_ns { }; +/* NVMe ns supports metadata actions by the controller (generate/strip) */ +static inline bool nvme_ns_has_pi(struct nvme_ns *ns) +{ + return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); +} + struct nvme_ctrl_ops { const char *name; struct module *module; -- cgit v1.2.3 From 95093350394a394e7c4e778176194b14b76ec5d8 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:52 +0300 Subject: nvme: introduce max_integrity_segments ctrl attribute This patch doesn't change any logic, and is needed as a preparation for adding PI support for fabrics drivers that will use an extended LBA format for metadata and will support more than 1 integrity segment. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 +++++++---- drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 6 ++++++ 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index beea013f7b73..404edceb84cb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1693,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { struct blk_integrity integrity; @@ -1716,10 +1717,11 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } integrity.tuple_size = ms; blk_integrity_register(disk, &integrity); - blk_queue_max_integrity_segments(disk->queue, 1); + blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -1895,7 +1897,8 @@ static void nvme_update_disk_info(struct gendisk *disk, if (ns->ms) { if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && (ns->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns->ms, ns->pi_type); + nvme_init_integrity(disk, ns->ms, ns->pi_type, + ns->ctrl->max_integrity_segments); else if (!nvme_ns_has_pi(ns)) capacity = 0; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c83ff69338d8..5b5ed128fd9e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -229,6 +229,7 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; u32 max_segments; + u32 max_integrity_segments; u16 crdt[3]; u16 oncs; u16 oacs; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6cc4630ddd6d..b307c06a783d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2565,6 +2565,12 @@ static void nvme_reset_work(struct work_struct *work) goto out; } + /* + * We do not support an SGL for metadata (yet), so we are limited to a + * single integrity segment for the separate metadata pointer. + */ + dev->ctrl.max_integrity_segments = 1; + result = nvme_init_identify(&dev->ctrl); if (result) goto out; -- cgit v1.2.3 From 33cfdc2aa6969829f42640f758357e4b015e9f7d Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:53 +0300 Subject: nvme: enforce extended LBA format for fabrics metadata An extended LBA is a larger LBA that is created when metadata associated with the LBA is transferred contiguously with the LBA data (AKA interleaved). The metadata may be either transferred as part of the LBA (creating an extended LBA) or it may be transferred as a separate contiguous buffer of data. According to the NVMeoF spec, a fabrics ctrl supports only an Extended LBA format. Fail revalidation in case we have a spec violation. Also add a flag that will imply on capable transports and controllers as part of a preparation for allowing end-to-end protection information for fabric controllers. Suggested-by: Christoph Hellwig Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 404edceb84cb..a3a4dbc59af1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1916,9 +1916,10 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_unfreeze_queue(disk->queue); } -static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) +static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + struct nvme_ctrl *ctrl = ns->ctrl; u32 iob; /* @@ -1929,9 +1930,9 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->lba_shift == 0) ns->lba_shift = 9; - if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ns->ctrl->max_hw_sectors)) - iob = ns->ctrl->max_hw_sectors; + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ctrl->max_hw_sectors)) + iob = ctrl->max_hw_sectors; else iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); @@ -1944,16 +1945,24 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->pi_type = 0; if (ns->ms) { - if (id->flbas & NVME_NS_FLBAS_META_EXT) - ns->features |= NVME_NS_EXT_LBAS; - /* - * For PCI, Extended logical block will be generated by the - * controller. Non-extended format can be generated by the - * block layer. + * For PCIe only the separate metadata pointer is supported, + * as the block layer supplies metadata in a separate bio_vec + * chain. For Fabrics, only metadata as part of extended data + * LBA is supported on the wire per the Fabrics specification, + * but the HBA/HCA will do the remapping from the separate + * metadata buffers for us. */ - if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) { - if (!(ns->features & NVME_NS_EXT_LBAS)) + if (id->flbas & NVME_NS_FLBAS_META_EXT) { + ns->features |= NVME_NS_EXT_LBAS; + if ((ctrl->ops->flags & NVME_F_FABRICS) && + (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) && + ctrl->max_integrity_segments) + ns->features |= NVME_NS_METADATA_SUPPORTED; + } else { + if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS)) + return -EINVAL; + if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) ns->features |= NVME_NS_METADATA_SUPPORTED; } } @@ -1968,6 +1977,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) revalidate_disk(ns->head->disk); } #endif + return 0; } static int nvme_revalidate_disk(struct gendisk *disk) @@ -2003,7 +2013,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto free_id; } - __nvme_revalidate_disk(disk, id); + ret = __nvme_revalidate_disk(disk, id); free_id: kfree(id); out: @@ -3657,7 +3667,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; - __nvme_revalidate_disk(disk, id); + if (__nvme_revalidate_disk(disk, id)) + goto out_free_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -3684,6 +3695,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) /* prevent double queue cleanup */ ns->disk->queue = NULL; put_disk(ns->disk); + out_free_disk: + del_gendisk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); -- cgit v1.2.3 From ba7ca2ae029607c7eb2c18e37e8bc0d2252d3d12 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:54 +0300 Subject: nvme: introduce NVME_INLINE_METADATA_SG_CNT SGL size of metadata is usually small. Thus, 1 inline sg should cover most cases. The macro will be used for pre-allocate a single SGL entry for metadata. The preallocation of small inline SGLs depends on SG_CHAIN capability so if the ARCH doesn't support SG_CHAIN, use the runtime allocation for the SGL. This patch is a preparation for adding metadata (T10-PI) over fabric support. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 5b5ed128fd9e..fa5c75501049 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -31,8 +31,10 @@ extern unsigned int admin_timeout; #ifdef CONFIG_ARCH_NO_SG_CHAIN #define NVME_INLINE_SG_CNT 0 +#define NVME_INLINE_METADATA_SG_CNT 0 #else #define NVME_INLINE_SG_CNT 2 +#define NVME_INLINE_METADATA_SG_CNT 1 #endif extern struct workqueue_struct *nvme_wq; -- cgit v1.2.3 From 324d9e7814dd9c76bb3aebf2529b02149c340d48 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:55 +0300 Subject: nvme-rdma: introduce nvme_rdma_sgl structure Remove first_sgl pointer from struct nvme_rdma_request and use pointer arithmetic instead. The inline scatterlist, if exists, will be located right after the nvme_rdma_request. This patch is needed as a preparation for adding PI support. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index cac8a930396a..40868749547a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -48,6 +48,11 @@ struct nvme_rdma_qe { u64 dma; }; +struct nvme_rdma_sgl { + int nents; + struct sg_table sg_table; +}; + struct nvme_rdma_queue; struct nvme_rdma_request { struct nvme_request req; @@ -58,12 +63,10 @@ struct nvme_rdma_request { refcount_t ref; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; - int nents; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; - struct sg_table sg_table; - struct scatterlist first_sgl[]; + struct nvme_rdma_sgl data_sgl; }; enum nvme_rdma_queue_flags { @@ -1158,8 +1161,9 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, req->mr = NULL; } - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); } static int nvme_rdma_set_sg_null(struct nvme_command *c) @@ -1178,7 +1182,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, int count) { struct nvme_sgl_desc *sg = &c->common.dptr.sgl; - struct scatterlist *sgl = req->sg_table.sgl; + struct scatterlist *sgl = req->data_sgl.sg_table.sgl; struct ib_sge *sge = &req->sge[1]; u32 len = 0; int i; @@ -1203,8 +1207,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, { struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; - sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); - put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); + sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl)); + put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length); put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; return 0; @@ -1225,7 +1229,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, * Align the MR to a 4K page size to match the ctrl page size and * the block virtual boundary. */ - nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); + nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL, + SZ_4K); if (unlikely(nr < count)) { ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); req->mr = NULL; @@ -1272,17 +1277,18 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, if (!blk_rq_nr_phys_segments(rq)) return nvme_rdma_set_sg_null(c); - req->sg_table.sgl = req->first_sgl; - ret = sg_alloc_table_chained(&req->sg_table, - blk_rq_nr_phys_segments(rq), req->sg_table.sgl, + req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1); + ret = sg_alloc_table_chained(&req->data_sgl.sg_table, + blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl, NVME_INLINE_SG_CNT); if (ret) return -ENOMEM; - req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); + req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, + req->data_sgl.sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, - rq_dma_dir(rq)); + count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, + req->data_sgl.nents, rq_dma_dir(rq)); if (unlikely(count <= 0)) { ret = -EIO; goto out_free_table; @@ -1311,9 +1317,10 @@ out: return 0; out_unmap_sg: - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); out_free_table: - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); return ret; } -- cgit v1.2.3 From 5ec5d3bddc6b912b7de9e3eb6c1f2397faeca2bc Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 19 May 2020 17:05:56 +0300 Subject: nvme-rdma: add metadata/T10-PI support For capable HCAs (e.g. ConnectX-5/ConnectX-6) this will allow end-to-end protection information passthrough and validation for NVMe over RDMA transport. Metadata offload support was implemented over the new RDMA signature verbs API and it is enabled for capable controllers. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 280 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 270 insertions(+), 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 40868749547a..f8f856dc0c67 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -34,6 +34,11 @@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 4 +#define NVME_RDMA_DATA_SGL_SIZE \ + (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT) +#define NVME_RDMA_METADATA_SGL_SIZE \ + (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT) + struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; @@ -67,6 +72,8 @@ struct nvme_rdma_request { struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; struct nvme_rdma_sgl data_sgl; + struct nvme_rdma_sgl *metadata_sgl; + bool use_sig_mr; }; enum nvme_rdma_queue_flags { @@ -88,6 +95,7 @@ struct nvme_rdma_queue { struct rdma_cm_id *cm_id; int cm_error; struct completion cm_done; + bool pi_support; }; struct nvme_rdma_ctrl { @@ -264,6 +272,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = queue->ib_cq; init_attr.recv_cq = queue->ib_cq; + if (queue->pi_support) + init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr); @@ -293,6 +303,12 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, if (!req->sqe.data) return -ENOMEM; + /* metadata nvme_rdma_sgl struct is located after command's data SGL */ + if (queue->pi_support) + req->metadata_sgl = (void *)nvme_req(rq) + + sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE; + req->queue = queue; return 0; @@ -403,6 +419,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) dev = queue->device; ibdev = dev->dev; + if (queue->pi_support) + ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs); ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); /* @@ -419,10 +437,16 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) nvme_rdma_dev_put(dev); } -static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) +static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support) { - return min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ibdev->attrs.max_fast_reg_page_list_len - 1); + u32 max_page_list_len; + + if (pi_support) + max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len; + else + max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len; + + return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1); } static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -479,7 +503,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) * misaligned we'll end up using two entries for a single data page, * so one additional entry is required. */ - pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1; ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, @@ -491,10 +515,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_ring; } + if (queue->pi_support) { + ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs, + queue->queue_size, IB_MR_TYPE_INTEGRITY, + pages_per_mr, pages_per_mr); + if (ret) { + dev_err(queue->ctrl->ctrl.device, + "failed to initialize PI MR pool sized %d for QID %d\n", + queue->queue_size, idx); + goto out_destroy_mr_pool; + } + } + set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); return 0; +out_destroy_mr_pool: + ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); out_destroy_ring: nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, sizeof(struct nvme_completion), DMA_FROM_DEVICE); @@ -516,6 +554,10 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; + if (idx && ctrl->ctrl.max_integrity_segments) + queue->pi_support = true; + else + queue->pi_support = false; init_completion(&queue->cm_done); if (idx > 0) @@ -726,7 +768,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = nctrl->numa_node; set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); + NVME_RDMA_DATA_SGL_SIZE; set->driver_data = ctrl; set->nr_hw_queues = 1; set->timeout = ADMIN_TIMEOUT; @@ -740,7 +782,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); + NVME_RDMA_DATA_SGL_SIZE; + if (nctrl->max_integrity_segments) + set->cmd_size += sizeof(struct nvme_rdma_sgl) + + NVME_RDMA_METADATA_SGL_SIZE; set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; @@ -773,6 +818,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, bool new) { + bool pi_capable = false; int error; error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); @@ -782,7 +828,13 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->device = ctrl->queues[0].device; ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); - ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); + /* T10-PI support */ + if (ctrl->device->dev->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER) + pi_capable = true; + + ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev, + pi_capable); /* * Bind the async event SQE DMA mapping to the admin queue lifetime. @@ -824,6 +876,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->ctrl.max_segments = ctrl->max_fr_pages; ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); + if (pi_capable) + ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages; + else + ctrl->ctrl.max_integrity_segments = 0; blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); @@ -1152,12 +1208,23 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + struct list_head *pool = &queue->qp->rdma_mrs; if (!blk_rq_nr_phys_segments(rq)) return; + if (blk_integrity_rq(rq)) { + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, rq_dma_dir(rq)); + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); + } + + if (req->use_sig_mr) + pool = &queue->qp->sig_mrs; + if (req->mr) { - ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + ib_mr_pool_put(queue->qp, pool, req->mr); req->mr = NULL; } @@ -1261,12 +1328,125 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, return 0; } +static void nvme_rdma_set_sig_domain(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_domain *domain, + u16 control, u8 pi_type) +{ + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = 1 << bi->interval_exp; + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); + if (control & NVME_RW_PRINFO_PRCHK_REF) + domain->sig.dif.ref_remap = true; + + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_escape = true; + if (pi_type == NVME_NS_DPS_PI_TYPE3) + domain->sig.dif.ref_escape = true; +} + +static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs, + u8 pi_type) +{ + u16 control = le16_to_cpu(cmd->rw.control); + + memset(sig_attrs, 0, sizeof(*sig_attrs)); + if (control & NVME_RW_PRINFO_PRACT) { + /* for WRITE_INSERT/READ_STRIP no memory domain */ + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + /* Clear the PRACT bit since HCA will generate/verify the PI */ + control &= ~NVME_RW_PRINFO_PRACT; + cmd->rw.control = cpu_to_le16(control); + } else { + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + } +} + +static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask) +{ + *mask = 0; + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF) + *mask |= IB_SIG_CHECK_REFTAG; + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD) + *mask |= IB_SIG_CHECK_GUARD; +} + +static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc) +{ + if (unlikely(wc->status != IB_WC_SUCCESS)) + nvme_rdma_wr_error(cq, wc, "SIG"); +} + +static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, + struct nvme_rdma_request *req, struct nvme_command *c, + int count, int pi_count) +{ + struct nvme_rdma_sgl *sgl = &req->data_sgl; + struct ib_reg_wr *wr = &req->reg_wr; + struct request *rq = blk_mq_rq_from_pdu(req); + struct nvme_ns *ns = rq->q->queuedata; + struct bio *bio = rq->bio; + struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + int nr; + + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); + if (WARN_ON_ONCE(!req->mr)) + return -EAGAIN; + + nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL, + req->metadata_sgl->sg_table.sgl, pi_count, NULL, + SZ_4K); + if (unlikely(nr)) + goto mr_put; + + nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c, + req->mr->sig_attrs, ns->pi_type); + nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); + + ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); + + req->reg_cqe.done = nvme_rdma_sig_done; + memset(wr, 0, sizeof(*wr)); + wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; + wr->wr.wr_cqe = &req->reg_cqe; + wr->wr.num_sge = 0; + wr->wr.send_flags = 0; + wr->mr = req->mr; + wr->key = req->mr->rkey; + wr->access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + + sg->addr = cpu_to_le64(req->mr->iova); + put_unaligned_le24(req->mr->length, sg->length); + put_unaligned_le32(req->mr->rkey, sg->key); + sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; + + return 0; + +mr_put: + ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr); + req->mr = NULL; + if (nr < 0) + return nr; + return -EINVAL; +} + static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, struct request *rq, struct nvme_command *c) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + int pi_count = 0; int count, ret; req->num_sge = 1; @@ -1294,6 +1474,35 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, goto out_free_table; } + if (blk_integrity_rq(rq)) { + req->metadata_sgl->sg_table.sgl = + (struct scatterlist *)(req->metadata_sgl + 1); + ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table, + blk_rq_count_integrity_sg(rq->q, rq->bio), + req->metadata_sgl->sg_table.sgl, + NVME_INLINE_METADATA_SG_CNT); + if (unlikely(ret)) { + ret = -ENOMEM; + goto out_unmap_sg; + } + + req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, + rq->bio, req->metadata_sgl->sg_table.sgl); + pi_count = ib_dma_map_sg(ibdev, + req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, + rq_dma_dir(rq)); + if (unlikely(pi_count <= 0)) { + ret = -EIO; + goto out_free_pi_table; + } + } + + if (req->use_sig_mr) { + ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count); + goto out; + } + if (count <= dev->num_inline_segments) { if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && queue->ctrl->use_inline_data && @@ -1312,10 +1521,18 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ret = nvme_rdma_map_sg_fr(queue, req, c, count); out: if (unlikely(ret)) - goto out_unmap_sg; + goto out_unmap_pi_sg; return 0; +out_unmap_pi_sg: + if (blk_integrity_rq(rq)) + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, rq_dma_dir(rq)); +out_free_pi_table: + if (blk_integrity_rq(rq)) + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); out_unmap_sg: ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, rq_dma_dir(rq)); @@ -1768,6 +1985,15 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + queue->pi_support && + (c->common.opcode == nvme_cmd_write || + c->common.opcode == nvme_cmd_read) && + nvme_ns_has_pi(ns)) + req->use_sig_mr = true; + else + req->use_sig_mr = false; + err = nvme_rdma_map_data(queue, rq, c); if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1808,12 +2034,46 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) return ib_process_cq_direct(queue->ib_cq, -1); } +static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req) +{ + struct request *rq = blk_mq_rq_from_pdu(req); + struct ib_mr_status mr_status; + int ret; + + ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + nvme_req(rq)->status = NVME_SC_INVALID_PI; + return; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + nvme_req(rq)->status = NVME_SC_GUARD_CHECK; + break; + case IB_SIG_BAD_REFTAG: + nvme_req(rq)->status = NVME_SC_REFTAG_CHECK; + break; + case IB_SIG_BAD_APPTAG: + nvme_req(rq)->status = NVME_SC_APPTAG_CHECK; + break; + } + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, mr_status.sig_err.expected, + mr_status.sig_err.actual); + } +} + static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct ib_device *ibdev = queue->device->dev; + if (req->use_sig_mr) + nvme_rdma_check_pi_status(req); + nvme_rdma_unmap_data(queue, rq); ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -1933,7 +2193,7 @@ out_fail: static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .name = "rdma", .module = THIS_MODULE, - .flags = NVME_F_FABRICS, + .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, -- cgit v1.2.3 From d2d1c454a4a44010cac627fd63945ff5e7dd3b4c Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:57 +0300 Subject: nvmet: add metadata characteristics for a namespace Fill those namespace fields from the block device format for adding metadata (T10-PI) over fabric support with block devices. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Kconfig | 1 + drivers/nvme/target/io-cmd-bdev.c | 22 ++++++++++++++++++++++ drivers/nvme/target/nvmet.h | 3 +++ 3 files changed, 26 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index d7f48c0fb311..4474952d64c6 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -4,6 +4,7 @@ config NVME_TARGET tristate "NVMe Target support" depends on BLOCK depends on CONFIGFS_FS + select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY select SGL_ALLOC help This enabled target side support for the NVMe protocol, that is diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 0427e040e3dd..e518bb9bf718 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -47,6 +47,22 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) id->nows = to0based(ql->io_opt / ql->logical_block_size); } +static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) +{ + struct blk_integrity *bi = bdev_get_integrity(ns->bdev); + + if (bi) { + ns->metadata_size = bi->tuple_size; + if (bi->profile == &t10_pi_type1_crc) + ns->pi_type = NVME_NS_DPS_PI_TYPE1; + else if (bi->profile == &t10_pi_type3_crc) + ns->pi_type = NVME_NS_DPS_PI_TYPE3; + else + /* Unsupported metadata type */ + ns->metadata_size = 0; + } +} + int nvmet_bdev_ns_enable(struct nvmet_ns *ns) { int ret; @@ -64,6 +80,12 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) } ns->size = i_size_read(ns->bdev->bd_inode); ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + + ns->pi_type = 0; + ns->metadata_size = 0; + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) + nvmet_bdev_ns_enable_integrity(ns); + return 0; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 93e0c2aa3e71..daef06a3aff4 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -19,6 +19,7 @@ #include #include #include +#include #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 @@ -77,6 +78,8 @@ struct nvmet_ns { int use_p2pmem; struct pci_dev *p2p_dev; + int pi_type; + int metadata_size; }; static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) -- cgit v1.2.3 From 26af180c1bd9cdd6f9b96d8df58b51d5900a2978 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:58 +0300 Subject: nvmet: rename nvmet_rw_len to nvmet_rw_data_len The function doesn't add the metadata length (only data length is calculated). This is preparation for adding metadata (T10-PI) support. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/nvme/target/io-cmd-file.c | 2 +- drivers/nvme/target/nvmet.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index e518bb9bf718..628e81a74444 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -178,7 +178,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector_t sector; int op, i; - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index f0bd08d86ac0..b31717c5d75f 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -241,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) { ssize_t nr_bvec = req->sg_cnt; - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt || !nr_bvec) { diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index daef06a3aff4..8bed8a61345e 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -505,7 +505,7 @@ void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); int nvmet_file_ns_revalidate(struct nvmet_ns *ns); void nvmet_ns_revalidate(struct nvmet_ns *ns); -static inline u32 nvmet_rw_len(struct nvmet_req *req) +static inline u32 nvmet_rw_data_len(struct nvmet_req *req) { return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << req->ns->blksize_shift; -- cgit v1.2.3 From 136cc1ffcf0a3309c59d844cb1a4ddad964ea3d8 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:05:59 +0300 Subject: nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len The function doesn't check only the data length, because the transfer length includes also the metadata length in some cases. This is preparation for adding metadata (T10-PI) support. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 14 +++++++------- drivers/nvme/target/core.c | 6 +++--- drivers/nvme/target/discovery.c | 8 ++++---- drivers/nvme/target/fabrics-cmd.c | 8 ++++---- drivers/nvme/target/io-cmd-bdev.c | 6 +++--- drivers/nvme/target/io-cmd-file.c | 6 +++--- drivers/nvme/target/nvmet.h | 2 +- 7 files changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index f544a14e8b5c..7b6b0395eaca 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -295,7 +295,7 @@ out: static void nvmet_execute_get_log_page(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd))) + if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd))) return; switch (req->cmd->get_log_page.lid) { @@ -627,7 +627,7 @@ out: static void nvmet_execute_identify(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) return; switch (req->cmd->identify.cns) { @@ -656,7 +656,7 @@ static void nvmet_execute_identify(struct nvmet_req *req) */ static void nvmet_execute_abort(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; nvmet_set_result(req, 1); nvmet_req_complete(req, 0); @@ -745,7 +745,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) u16 nsqr; u16 ncqr; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { @@ -817,7 +817,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; - if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10))) + if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10))) return; switch (cdw10 & 0xff) { @@ -884,7 +884,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; mutex_lock(&ctrl->lock); @@ -903,7 +903,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; pr_debug("ctrl %d update keep-alive timer for %d secs\n", diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index fb78b165c123..a4ed70e4afe3 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -950,9 +950,9 @@ void nvmet_req_uninit(struct nvmet_req *req) } EXPORT_SYMBOL_GPL(nvmet_req_uninit); -bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) +bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) { - if (unlikely(data_len != req->transfer_len)) { + if (unlikely(len != req->transfer_len)) { req->error_loc = offsetof(struct nvme_common_command, dptr); nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); return false; @@ -960,7 +960,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) return true; } -EXPORT_SYMBOL_GPL(nvmet_check_data_len); +EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) { diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 0c2274b21e15..40cf0b6e6c9d 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) u16 status = 0; void *buffer; - if (!nvmet_check_data_len(req, data_len)) + if (!nvmet_check_transfer_len(req, data_len)) return; if (req->cmd->get_log_page.lid != NVME_LOG_DISC) { @@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) const char model[] = "Linux"; u16 status = 0; - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) return; if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) { @@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { @@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index feef15c38ec9..52a6f70c9d80 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req) u64 val = le64_to_cpu(req->cmd->prop_set.value); u16 status = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; if (req->cmd->prop_set.attrib & 1) { @@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) u16 status = 0; u64 val = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; if (req->cmd->prop_get.attrib & 1) { @@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; d = kmalloc(sizeof(*d), GFP_KERNEL); @@ -223,7 +223,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; d = kmalloc(sizeof(*d), GFP_KERNEL); diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 628e81a74444..e065c2430bfa 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -178,7 +178,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector_t sector; int op, i; - if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt) { @@ -239,7 +239,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) { struct bio *bio = &req->b.inline_bio; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); @@ -331,7 +331,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) sector_t nr_sector; int ret; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; sector = le64_to_cpu(write_zeroes->slba) << diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index b31717c5d75f..0abbefd9925e 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -241,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) { ssize_t nr_bvec = req->sg_cnt; - if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt || !nr_bvec) { @@ -285,7 +285,7 @@ static void nvmet_file_flush_work(struct work_struct *w) static void nvmet_file_execute_flush(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_flush_work); schedule_work(&req->f.work); @@ -375,7 +375,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); schedule_work(&req->f.work); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 8bed8a61345e..29ab0b96bd32 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -387,7 +387,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); -bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); +bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); -- cgit v1.2.3 From ea52ac1c6605fbd25347fabf46233e260dd92eb2 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:06:01 +0300 Subject: nvmet: add metadata/T10-PI support Expose the namespace metadata format when PI is enabled. The user needs to enable the capability per subsystem and per port. The other metadata properties are taken from the namespace/bdev. Usage example: echo 1 > /config/nvmet/subsystems/${NAME}/attr_pi_enable echo 1 > /config/nvmet/ports/${PORT_NUM}/param_pi_enable Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 26 +++++++++++++++--- drivers/nvme/target/configfs.c | 58 +++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/core.c | 21 +++++++++++--- drivers/nvme/target/fabrics-cmd.c | 7 +++-- drivers/nvme/target/nvmet.h | 19 +++++++++++++ 5 files changed, 121 insertions(+), 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 7b6b0395eaca..1db8c0498668 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -341,6 +341,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; + u32 cmd_capsule_size; u16 status = 0; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -433,9 +434,15 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); - /* Max command capsule size is sqe + single page of in-capsule data */ - id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + - req->port->inline_data_size) / 16); + /* + * Max command capsule size is sqe + in-capsule data size. + * Disable in-capsule data for Metadata capable controllers. + */ + cmd_capsule_size = sizeof(struct nvme_command); + if (!ctrl->pi_support) + cmd_capsule_size += req->port->inline_data_size; + id->ioccsz = cpu_to_le32(cmd_capsule_size / 16); + /* Max response capsule size is cqe */ id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); @@ -465,6 +472,7 @@ out: static void nvmet_execute_identify_ns(struct nvmet_req *req) { + struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ns *ns; struct nvme_id_ns *id; u16 status = 0; @@ -482,7 +490,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) } /* return an all zeroed buffer if we can't find an active namespace */ - ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); + ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); if (!ns) goto done; @@ -523,6 +531,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) id->lbaf[0].ds = ns->blksize_shift; + if (ctrl->pi_support && nvmet_ns_has_pi(ns)) { + id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST | + NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 | + NVME_NS_DPC_PI_TYPE3; + id->mc = NVME_MC_EXTENDED_LBA; + id->dps = ns->pi_type; + id->flbas = NVME_NS_FLBAS_META_EXT; + id->lbaf[0].ms = cpu_to_le16(ns->metadata_size); + } + if (ns->readonly) id->nsattr |= (1 << 0); nvmet_put_namespace(ns); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 17c529260e12..419e0d4ce79b 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -248,6 +248,36 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, param_inline_data_size); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static ssize_t nvmet_param_pi_enable_show(struct config_item *item, + char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + + return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable); +} + +static ssize_t nvmet_param_pi_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + bool val; + + if (strtobool(page, &val)) + return -EINVAL; + + if (port->enabled) { + pr_err("Disable port before setting pi_enable value.\n"); + return -EACCES; + } + + port->pi_enable = val; + return count; +} + +CONFIGFS_ATTR(nvmet_, param_pi_enable); +#endif + static ssize_t nvmet_addr_trtype_show(struct config_item *item, char *page) { @@ -1010,6 +1040,28 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_model); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support); +} + +static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + bool pi_enable; + + if (strtobool(page, &pi_enable)) + return -EINVAL; + + subsys->pi_support = pi_enable; + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable); +#endif + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, @@ -1017,6 +1069,9 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, +#ifdef CONFIG_BLK_DEV_INTEGRITY + &nvmet_subsys_attr_attr_pi_enable, +#endif NULL, }; @@ -1316,6 +1371,9 @@ static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trtype, &nvmet_attr_param_inline_data_size, +#ifdef CONFIG_BLK_DEV_INTEGRITY + &nvmet_attr_param_pi_enable, +#endif NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a4ed70e4afe3..f9172d38aff5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -323,12 +323,21 @@ int nvmet_enable_port(struct nvmet_port *port) if (!try_module_get(ops->owner)) return -EINVAL; - ret = ops->add_port(port); - if (ret) { - module_put(ops->owner); - return ret; + /* + * If the user requested PI support and the transport isn't pi capable, + * don't enable the port. + */ + if (port->pi_enable && !ops->metadata_support) { + pr_err("T10-PI is not supported by transport type %d\n", + port->disc_addr.trtype); + ret = -EINVAL; + goto out_put; } + ret = ops->add_port(port); + if (ret) + goto out_put; + /* If the transport didn't set inline_data_size, then disable it. */ if (port->inline_data_size < 0) port->inline_data_size = 0; @@ -336,6 +345,10 @@ int nvmet_enable_port(struct nvmet_port *port) port->enabled = true; port->tr_ops = ops; return 0; + +out_put: + module_put(ops->owner); + return ret; } void nvmet_disable_port(struct nvmet_port *port) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 52a6f70c9d80..42bd12b8bf00 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -197,6 +197,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } + ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; + uuid_copy(&ctrl->hostid, &d->hostid); status = nvmet_install_queue(ctrl, req); @@ -205,8 +207,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } - pr_info("creating controller %d for subsystem %s for NQN %s.\n", - ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); + pr_info("creating controller %d for subsystem %s for NQN %s%s.\n", + ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, + ctrl->pi_support ? " T10-PI is enabled" : ""); req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); out: diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 29ab0b96bd32..46b5d2b4ca0a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -145,6 +145,7 @@ struct nvmet_port { bool enabled; int inline_data_size; const struct nvmet_fabrics_ops *tr_ops; + bool pi_enable; }; static inline struct nvmet_port *to_nvmet_port(struct config_item *item) @@ -204,6 +205,7 @@ struct nvmet_ctrl { spinlock_t error_lock; u64 err_counter; struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; + bool pi_support; }; struct nvmet_subsys_model { @@ -233,6 +235,7 @@ struct nvmet_subsys { u64 ver; u64 serial; char *subsysnqn; + bool pi_support; struct config_group group; @@ -284,6 +287,7 @@ struct nvmet_fabrics_ops { unsigned int type; unsigned int msdbd; bool has_keyed_sgls : 1; + bool metadata_support : 1; void (*queue_response)(struct nvmet_req *req); int (*add_port)(struct nvmet_port *port); void (*remove_port)(struct nvmet_port *port); @@ -511,6 +515,14 @@ static inline u32 nvmet_rw_data_len(struct nvmet_req *req) req->ns->blksize_shift; } +static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req) +{ + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return 0; + return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) * + req->ns->metadata_size; +} + static inline u32 nvmet_dsm_len(struct nvmet_req *req) { return (le32_to_cpu(req->cmd->dsm.nr) + 1) * @@ -525,4 +537,11 @@ static inline __le16 to0based(u32 a) return cpu_to_le16(max(1U, min(1U << 16, a)) - 1); } +static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns) +{ + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return false; + return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple); +} + #endif /* _NVMET_H */ -- cgit v1.2.3 From c6e3f13398123a008cd2ee28f93510b113a32791 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:06:02 +0300 Subject: nvmet: add metadata support for block devices Allocate the metadata SGL buffers and set metadata fields for the request. Then create a block IO request for the metadata from the protection SG list. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 95 ++++++++++++++++++++++++++++----------- drivers/nvme/target/io-cmd-bdev.c | 87 ++++++++++++++++++++++++++++++++++- drivers/nvme/target/nvmet.h | 7 ++- drivers/nvme/target/rdma.c | 4 +- 4 files changed, 161 insertions(+), 32 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index f9172d38aff5..edf54d9957b7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -900,8 +900,11 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sq = sq; req->ops = ops; req->sg = NULL; + req->metadata_sg = NULL; req->sg_cnt = 0; + req->metadata_sg_cnt = 0; req->transfer_len = 0; + req->metadata_len = 0; req->cqe->status = 0; req->cqe->sq_head = 0; req->ns = NULL; @@ -986,50 +989,90 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) return true; } -int nvmet_req_alloc_sgl(struct nvmet_req *req) +static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) { - struct pci_dev *p2p_dev = NULL; + return req->transfer_len - req->metadata_len; +} - if (IS_ENABLED(CONFIG_PCI_P2PDMA)) { - if (req->sq->ctrl && req->ns) - p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, - req->ns->nsid); +static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req) +{ + req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt, + nvmet_data_transfer_len(req)); + if (!req->sg) + goto out_err; - req->p2p_dev = NULL; - if (req->sq->qid && p2p_dev) { - req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, - req->transfer_len); - if (req->sg) { - req->p2p_dev = p2p_dev; - return 0; - } - } + if (req->metadata_len) { + req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev, + &req->metadata_sg_cnt, req->metadata_len); + if (!req->metadata_sg) + goto out_free_sg; + } + return 0; +out_free_sg: + pci_p2pmem_free_sgl(req->p2p_dev, req->sg); +out_err: + return -ENOMEM; +} - /* - * If no P2P memory was available we fallback to using - * regular memory - */ +static bool nvmet_req_find_p2p_dev(struct nvmet_req *req) +{ + if (!IS_ENABLED(CONFIG_PCI_P2PDMA)) + return false; + + if (req->sq->ctrl && req->sq->qid && req->ns) { + req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, + req->ns->nsid); + if (req->p2p_dev) + return true; } - req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); + req->p2p_dev = NULL; + return false; +} + +int nvmet_req_alloc_sgls(struct nvmet_req *req) +{ + if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req)) + return 0; + + req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, + &req->sg_cnt); if (unlikely(!req->sg)) - return -ENOMEM; + goto out; + + if (req->metadata_len) { + req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, + &req->metadata_sg_cnt); + if (unlikely(!req->metadata_sg)) + goto out_free; + } return 0; +out_free: + sgl_free(req->sg); +out: + return -ENOMEM; } -EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl); +EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); -void nvmet_req_free_sgl(struct nvmet_req *req) +void nvmet_req_free_sgls(struct nvmet_req *req) { - if (req->p2p_dev) + if (req->p2p_dev) { pci_p2pmem_free_sgl(req->p2p_dev, req->sg); - else + if (req->metadata_sg) + pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); + } else { sgl_free(req->sg); + if (req->metadata_sg) + sgl_free(req->metadata_sg); + } req->sg = NULL; + req->metadata_sg = NULL; req->sg_cnt = 0; + req->metadata_sg_cnt = 0; } -EXPORT_SYMBOL_GPL(nvmet_req_free_sgl); +EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); static inline bool nvmet_cc_en(u32 cc) { diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index e065c2430bfa..07055f7ac398 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -169,6 +169,61 @@ static void nvmet_bio_done(struct bio *bio) bio_put(bio); } +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, + struct sg_mapping_iter *miter) +{ + struct blk_integrity *bi; + struct bio_integrity_payload *bip; + struct block_device *bdev = req->ns->bdev; + int rc; + size_t resid, len; + + bi = bdev_get_integrity(bdev); + if (unlikely(!bi)) { + pr_err("Unable to locate bio_integrity\n"); + return -ENODEV; + } + + bip = bio_integrity_alloc(bio, GFP_NOIO, + min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES)); + if (IS_ERR(bip)) { + pr_err("Unable to allocate bio_integrity_payload\n"); + return PTR_ERR(bip); + } + + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); + /* virtual start sector must be in integrity interval units */ + bip_set_seed(bip, bio->bi_iter.bi_sector >> + (bi->interval_exp - SECTOR_SHIFT)); + + resid = bip->bip_iter.bi_size; + while (resid > 0 && sg_miter_next(miter)) { + len = min_t(size_t, miter->length, resid); + rc = bio_integrity_add_page(bio, miter->page, len, + offset_in_page(miter->addr)); + if (unlikely(rc != len)) { + pr_err("bio_integrity_add_page() failed; %d\n", rc); + sg_miter_stop(miter); + return -ENOMEM; + } + + resid -= len; + if (len < miter->length) + miter->consumed -= miter->length - len; + } + sg_miter_stop(miter); + + return 0; +} +#else +static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, + struct sg_mapping_iter *miter) +{ + return -EINVAL; +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; @@ -176,9 +231,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) struct scatterlist *sg; struct blk_plug plug; sector_t sector; - int op, i; + int op, i, rc; + struct sg_mapping_iter prot_miter; + unsigned int iter_flags; + unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; - if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, total_len)) return; if (!req->sg_cnt) { @@ -190,8 +248,10 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op |= REQ_FUA; + iter_flags = SG_MITER_TO_SG; } else { op = REQ_OP_READ; + iter_flags = SG_MITER_FROM_SG; } if (is_pci_p2pdma_page(sg_page(req->sg))) @@ -213,11 +273,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) bio->bi_opf = op; blk_start_plug(&plug); + if (req->metadata_len) + sg_miter_start(&prot_miter, req->metadata_sg, + req->metadata_sg_cnt, iter_flags); + for_each_sg(req->sg, sg, req->sg_cnt, i) { while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) != sg->length) { struct bio *prev = bio; + if (req->metadata_len) { + rc = nvmet_bdev_alloc_bip(req, bio, + &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } + } + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; @@ -231,6 +304,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sg_cnt--; } + if (req->metadata_len) { + rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } + } + submit_bio(bio); blk_finish_plug(&plug); } @@ -358,6 +439,8 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) case nvme_cmd_read: case nvme_cmd_write: req->execute = nvmet_bdev_execute_rw; + if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) + req->metadata_len = nvmet_rw_metadata_len(req); return 0; case nvme_cmd_flush: req->execute = nvmet_bdev_execute_flush; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 46b5d2b4ca0a..809691291e73 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -309,6 +309,7 @@ struct nvmet_req { struct nvmet_cq *cq; struct nvmet_ns *ns; struct scatterlist *sg; + struct scatterlist *metadata_sg; struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC]; union { struct { @@ -322,8 +323,10 @@ struct nvmet_req { } f; }; int sg_cnt; + int metadata_sg_cnt; /* data length as parsed from the SGL descriptor: */ size_t transfer_len; + size_t metadata_len; struct nvmet_port *port; @@ -394,8 +397,8 @@ void nvmet_req_uninit(struct nvmet_req *req); bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); -int nvmet_req_alloc_sgl(struct nvmet_req *req); -void nvmet_req_free_sgl(struct nvmet_req *req); +int nvmet_req_alloc_sgls(struct nvmet_req *req); +void nvmet_req_free_sgls(struct nvmet_req *req); void nvmet_execute_keep_alive(struct nvmet_req *req); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 7a90b10359bb..178dbffa8c41 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -546,7 +546,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) } if (rsp->req.sg != rsp->cmd->inline_sg) - nvmet_req_free_sgl(&rsp->req); + nvmet_req_free_sgls(&rsp->req); if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) nvmet_rdma_process_wr_wait_list(queue); @@ -708,7 +708,7 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, if (!rsp->req.transfer_len) return 0; - ret = nvmet_req_alloc_sgl(&rsp->req); + ret = nvmet_req_alloc_sgls(&rsp->req); if (unlikely(ret < 0)) goto error_out; -- cgit v1.2.3 From b09160c3996c11d62a08f9534c755103a10a89b4 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 May 2020 17:06:03 +0300 Subject: nvmet-rdma: add metadata/T10-PI support For capable HCAs (e.g. ConnectX-5/ConnectX-6) this will allow end-to-end protection information passthrough and validation for NVMe over RDMA transport. Metadata support was implemented over the new RDMA signature verbs API. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 234 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 215 insertions(+), 19 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 178dbffa8c41..d5141780592e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -33,6 +33,7 @@ /* Assume mpsmin == device_page_size == 4KB */ #define NVMET_RDMA_MAX_MDTS 8 +#define NVMET_RDMA_MAX_METADATA_MDTS 5 struct nvmet_rdma_srq; @@ -60,6 +61,7 @@ struct nvmet_rdma_rsp { struct nvmet_rdma_queue *queue; struct ib_cqe read_cqe; + struct ib_cqe write_cqe; struct rdma_rw_ctx rw; struct nvmet_req req; @@ -161,6 +163,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp); static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, @@ -423,6 +426,9 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, /* Data In / RDMA READ */ r->read_cqe.done = nvmet_rdma_read_data_done; + /* Data Out / RDMA WRITE */ + r->write_cqe.done = nvmet_rdma_write_data_done; + return 0; out_free_rsp: @@ -532,6 +538,129 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) spin_unlock(&queue->rsp_wr_wait_lock); } +static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) +{ + struct ib_mr_status mr_status; + int ret; + u16 status = 0; + + ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + return NVME_SC_INVALID_PI; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + status = NVME_SC_GUARD_CHECK; + break; + case IB_SIG_BAD_REFTAG: + status = NVME_SC_REFTAG_CHECK; + break; + case IB_SIG_BAD_APPTAG: + status = NVME_SC_APPTAG_CHECK; + break; + } + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + } + + return status; +} + +static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_domain *domain, + u16 control, u8 pi_type) +{ + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = 1 << bi->interval_exp; + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); + if (control & NVME_RW_PRINFO_PRCHK_REF) + domain->sig.dif.ref_remap = true; + + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_escape = true; + if (pi_type == NVME_NS_DPS_PI_TYPE3) + domain->sig.dif.ref_escape = true; +} + +static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, + struct ib_sig_attrs *sig_attrs) +{ + struct nvme_command *cmd = req->cmd; + u16 control = le16_to_cpu(cmd->rw.control); + u8 pi_type = req->ns->pi_type; + struct blk_integrity *bi; + + bi = bdev_get_integrity(req->ns->bdev); + + memset(sig_attrs, 0, sizeof(*sig_attrs)); + + if (control & NVME_RW_PRINFO_PRACT) { + /* for WRITE_INSERT/READ_STRIP no wire domain */ + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + /* Clear the PRACT bit since HCA will generate/verify the PI */ + control &= ~NVME_RW_PRINFO_PRACT; + cmd->rw.control = cpu_to_le16(control); + /* PI is added by the HW */ + req->transfer_len += req->metadata_len; + } else { + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + } + + if (control & NVME_RW_PRINFO_PRCHK_REF) + sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG; + if (control & NVME_RW_PRINFO_PRCHK_GUARD) + sig_attrs->check_mask |= IB_SIG_CHECK_GUARD; + if (control & NVME_RW_PRINFO_PRCHK_APP) + sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG; +} + +static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key, + struct ib_sig_attrs *sig_attrs) +{ + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + struct nvmet_req *req = &rsp->req; + int ret; + + if (req->metadata_len) + ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp, + cm_id->port_num, req->sg, req->sg_cnt, + req->metadata_sg, req->metadata_sg_cnt, sig_attrs, + addr, key, nvmet_data_dir(req)); + else + ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, + req->sg, req->sg_cnt, 0, addr, key, + nvmet_data_dir(req)); + + return ret; +} + +static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp) +{ + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + struct nvmet_req *req = &rsp->req; + + if (req->metadata_len) + rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp, + cm_id->port_num, req->sg, req->sg_cnt, + req->metadata_sg, req->metadata_sg_cnt, + nvmet_data_dir(req)); + else + rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num, + req->sg, req->sg_cnt, nvmet_data_dir(req)); +} static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) { @@ -539,11 +668,8 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); - if (rsp->n_rdma) { - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, - queue->cm_id->port_num, rsp->req.sg, - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); - } + if (rsp->n_rdma) + nvmet_rdma_rw_ctx_destroy(rsp); if (rsp->req.sg != rsp->cmd->inline_sg) nvmet_req_free_sgls(&rsp->req); @@ -598,11 +724,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) rsp->send_wr.opcode = IB_WR_SEND; } - if (nvmet_rdma_need_data_out(rsp)) - first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, - cm_id->port_num, NULL, &rsp->send_wr); - else + if (nvmet_rdma_need_data_out(rsp)) { + if (rsp->req.metadata_len) + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, + cm_id->port_num, &rsp->write_cqe, NULL); + else + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, + cm_id->port_num, NULL, &rsp->send_wr); + } else { first_wr = &rsp->send_wr; + } nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); @@ -621,15 +752,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe); struct nvmet_rdma_queue *queue = cq->cq_context; + u16 status = 0; WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, - queue->cm_id->port_num, rsp->req.sg, - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->n_rdma = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_rdma_rw_ctx_destroy(rsp); nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { @@ -640,7 +770,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) return; } - rsp->req.execute(&rsp->req); + if (rsp->req.metadata_len) + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); + nvmet_rdma_rw_ctx_destroy(rsp); + + if (unlikely(status)) + nvmet_req_complete(&rsp->req, status); + else + rsp->req.execute(&rsp->req); +} + +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct nvmet_rdma_rsp *rsp = + container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); + struct nvmet_rdma_queue *queue = cq->cq_context; + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + u16 status; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return; + + WARN_ON(rsp->n_rdma <= 0); + atomic_add(rsp->n_rdma, &queue->sq_wr_avail); + rsp->n_rdma = 0; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_rdma_rw_ctx_destroy(rsp); + nvmet_req_uninit(&rsp->req); + nvmet_rdma_release_rsp(rsp); + if (wc->status != IB_WC_WR_FLUSH_ERR) { + pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n", + wc->wr_cqe, ib_wc_status_msg(wc->status), + wc->status); + nvmet_rdma_error_comp(queue); + } + return; + } + + /* + * Upon RDMA completion check the signature status + * - if succeeded send good NVMe response + * - if failed send bad NVMe response with appropriate error + */ + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); + if (unlikely(status)) + rsp->req.cqe->status = cpu_to_le16(status << 1); + nvmet_rdma_rw_ctx_destroy(rsp); + + if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) { + pr_err("sending cmd response failed\n"); + nvmet_rdma_release_rsp(rsp); + } } static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, @@ -697,9 +878,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, struct nvme_keyed_sgl_desc *sgl, bool invalidate) { - struct rdma_cm_id *cm_id = rsp->queue->cm_id; u64 addr = le64_to_cpu(sgl->addr); u32 key = get_unaligned_le32(sgl->key); + struct ib_sig_attrs sig_attrs; int ret; rsp->req.transfer_len = get_unaligned_le24(sgl->length); @@ -708,13 +889,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, if (!rsp->req.transfer_len) return 0; + if (rsp->req.metadata_len) + nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs); + ret = nvmet_req_alloc_sgls(&rsp->req); if (unlikely(ret < 0)) goto error_out; - ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, - rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, - nvmet_data_dir(&rsp->req)); + ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs); if (unlikely(ret < 0)) goto error_out; rsp->n_rdma += ret; @@ -1108,6 +1290,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; } + if (queue->port->pi_enable && queue->host_qid) + qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; + ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); if (ret) { pr_err("failed to create_qp ret= %d\n", ret); @@ -1226,6 +1411,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { + struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_queue *queue; int ret; @@ -1252,6 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work); queue->dev = ndev; queue->cm_id = cm_id; + queue->port = port->nport; spin_lock_init(&queue->state_lock); queue->state = NVMET_RDMA_Q_CONNECTING; @@ -1369,7 +1556,6 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { - struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_device *ndev; struct nvmet_rdma_queue *queue; int ret = -EINVAL; @@ -1385,7 +1571,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = -ENOMEM; goto put_device; } - queue->port = port->nport; if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ @@ -1657,6 +1842,14 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } + if (port->nport->pi_enable && + !(cm_id->device->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER)) { + pr_err("T10-PI is not supported for %pISpcs\n", addr); + ret = -EINVAL; + goto out_destroy_id; + } + port->cm_id = cm_id; return 0; @@ -1766,6 +1959,8 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) { + if (ctrl->pi_support) + return NVMET_RDMA_MAX_METADATA_MDTS; return NVMET_RDMA_MAX_MDTS; } @@ -1774,6 +1969,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .type = NVMF_TRTYPE_RDMA, .msdbd = 1, .has_keyed_sgls = 1, + .metadata_support = 1, .add_port = nvmet_rdma_add_port, .remove_port = nvmet_rdma_remove_port, .queue_response = nvmet_rdma_queue_response, -- cgit v1.2.3 From 64f5e9cdd711b030b05062c17b2ecfbce890cf4c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 20 May 2020 12:48:12 -0700 Subject: nvmet: fix memory leak when removing namespaces and controllers concurrently When removing a namespace, we add an NS_CHANGE async event, however if the controller admin queue is removed after the event was added but not yet processed, we won't free the aens, resulting in the below memory leak [1]. Fix that by moving nvmet_async_event_free to the final controller release after it is detached from subsys->ctrls ensuring no async events are added, and modify it to simply remove all pending aens. -- $ cat /sys/kernel/debug/kmemleak unreferenced object 0xffff888c1af2c000 (size 32): comm "nvmetcli", pid 5164, jiffies 4295220864 (age 6829.924s) hex dump (first 32 bytes): 28 01 82 3b 8b 88 ff ff 28 01 82 3b 8b 88 ff ff (..;....(..;.... 02 00 04 65 76 65 6e 74 5f 66 69 6c 65 00 00 00 ...event_file... backtrace: [<00000000217ae580>] nvmet_add_async_event+0x57/0x290 [nvmet] [<0000000012aa2ea9>] nvmet_ns_changed+0x206/0x300 [nvmet] [<00000000bb3fd52e>] nvmet_ns_disable+0x367/0x4f0 [nvmet] [<00000000e91ca9ec>] nvmet_ns_free+0x15/0x180 [nvmet] [<00000000a15deb52>] config_item_release+0xf1/0x1c0 [<000000007e148432>] configfs_rmdir+0x555/0x7c0 [<00000000f4506ea6>] vfs_rmdir+0x142/0x3c0 [<0000000000acaaf0>] do_rmdir+0x2b2/0x340 [<0000000034d1aa52>] do_syscall_64+0xa5/0x4d0 [<00000000211f13bc>] entry_SYSCALL_64_after_hwframe+0x6a/0xdf Fixes: a07b4970f464 ("nvmet: add a generic NVMe target") Reported-by: David Milburn Signed-off-by: Sagi Grimberg Tested-by: David Milburn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index edf54d9957b7..48f5123d875b 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -158,14 +158,12 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) { - struct nvmet_req *req; + struct nvmet_async_event *aen, *tmp; mutex_lock(&ctrl->lock); - while (ctrl->nr_async_event_cmds) { - req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - mutex_unlock(&ctrl->lock); - nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); - mutex_lock(&ctrl->lock); + list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { + list_del(&aen->entry); + kfree(aen); } mutex_unlock(&ctrl->lock); } @@ -791,10 +789,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) * If this is the admin queue, complete all AERs so that our * queue doesn't have outstanding requests on it. */ - if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) { + if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) nvmet_async_events_process(ctrl, status); - nvmet_async_events_free(ctrl); - } percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); @@ -1427,6 +1423,7 @@ static void nvmet_ctrl_free(struct kref *ref) ida_simple_remove(&cntlid_ida, ctrl->cntlid); + nvmet_async_events_free(ctrl); kfree(ctrl->sqs); kfree(ctrl->cqs); kfree(ctrl->changed_ns_list); -- cgit v1.2.3 From 1cdf9f7670a7d74e27177d5c390c2f8b3b9ba338 Mon Sep 17 00:00:00 2001 From: David Milburn Date: Mon, 18 May 2020 13:59:55 -0500 Subject: nvmet: cleanups the loop in nvmet_async_events_process Based-on-a-patch-by: Christoph Hellwig Tested-by: Yi Zhang Signed-off-by: David Milburn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 48f5123d875b..6392bcd30bd7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -134,15 +134,10 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) struct nvmet_async_event *aen; struct nvmet_req *req; - while (1) { - mutex_lock(&ctrl->lock); - aen = list_first_entry_or_null(&ctrl->async_events, - struct nvmet_async_event, entry); - if (!aen || !ctrl->nr_async_event_cmds) { - mutex_unlock(&ctrl->lock); - break; - } - + mutex_lock(&ctrl->lock); + while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { + aen = list_first_entry(&ctrl->async_events, + struct nvmet_async_event, entry); req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; if (status == 0) nvmet_set_result(req, nvmet_async_event_result(aen)); @@ -153,7 +148,9 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) mutex_unlock(&ctrl->lock); trace_nvmet_async_event(ctrl, req->cqe->result.u32); nvmet_req_complete(req, status); + mutex_lock(&ctrl->lock); } + mutex_unlock(&ctrl->lock); } static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) -- cgit v1.2.3 From 3b2a1ebceba3e03b17ef0970bb7757a3a64cdc8b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 20 May 2020 19:22:53 -0700 Subject: nvme: set dma alignment to qword The default dma alignment mask is 511, which is much larger than any nvme controller requires. NVMe controllers accept qword aligned DMA addresses, so set the request_queue constraints to that. This can help avoid bounce buffers on user passthrough commands. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a3a4dbc59af1..569671e264b5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2322,6 +2322,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } blk_queue_virt_boundary(q, ctrl->page_size - 1); + blk_queue_dma_alignment(q, 7); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; blk_queue_write_cache(q, vwc, vwc); -- cgit v1.2.3