From f6e8bd59c4e84820fc5f6c404730ef872439548a Mon Sep 17 00:00:00 2001 From: Amit Engel Date: Thu, 22 Apr 2021 15:33:16 +0300 Subject: nvmet: move ka_work initialization to nvmet_alloc_ctrl Initialize keep-alive work only once, as part of alloc_ctrl and not each time that nvmet_start_keep_alive_timer is being called Signed-off-by: Amit Engel Reviewed-by: Hou Pu --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/target/core.c') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 1853db38b682..4ae4bea6625d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -412,7 +412,6 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) pr_debug("ctrl %d start keep-alive timer for %d secs\n", ctrl->cntlid, ctrl->kato); - INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); } @@ -1352,6 +1351,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, INIT_LIST_HEAD(&ctrl->async_events); INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); + INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); -- cgit v1.2.3 From e13b061589ace0aee18bdbf86f3ddb2b6b5b5ab8 Mon Sep 17 00:00:00 2001 From: Noam Gottlieb Date: Mon, 7 Jun 2021 12:23:21 +0300 Subject: nvmet: change sn size and check validity According to the NVM specification, the serial_number should be 20 bytes (bytes 23:04 of the Identify Controller data structure), and should contain only ASCII characters. In accordance, the serial_number size is changed to 20 bytes and before any attempt to store a new value in serial_number we check that the input is valid - i.e. contains only ASCII characters, is not empty and does not exceed 20 bytes. Signed-off-by: Max Gurtovoy Signed-off-by: Noam Gottlieb Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 4 +--- drivers/nvme/target/configfs.c | 33 +++++++++++++++++++++++---------- drivers/nvme/target/core.c | 4 +++- drivers/nvme/target/discovery.c | 4 +--- drivers/nvme/target/nvmet.h | 3 ++- 5 files changed, 30 insertions(+), 18 deletions(-) (limited to 'drivers/nvme/target/core.c') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dcd49a72f2f3..9c73dbfb8228 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -357,9 +357,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->vid = 0; id->ssvid = 0; - memset(id->sn, ' ', sizeof(id->sn)); - bin2hex(id->sn, &ctrl->subsys->serial, - min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); + memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE); memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number, strlen(subsys->model_number), ' '); memcpy_and_pad(id->fr, sizeof(id->fr), diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 65a0cf99f557..027b28aaf7cd 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1030,24 +1030,43 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_version); +/* See Section 1.5 of NVMe 1.4 */ +static bool nvmet_is_ascii(const char c) +{ + return c >= 0x20 && c <= 0x7e; +} + static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); - return snprintf(page, PAGE_SIZE, "%llx\n", subsys->serial); + return snprintf(page, PAGE_SIZE, "%s\n", subsys->serial); } static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item, const char *page, size_t count) { - u64 serial; + struct nvmet_subsys *subsys = to_subsys(item); + int pos, len = strcspn(page, "\n"); - if (sscanf(page, "%llx\n", &serial) != 1) + if (!len || len > NVMET_SN_MAX_SIZE) { + pr_err("Serial Number can not be empty or exceed %d Bytes\n", + NVMET_SN_MAX_SIZE); return -EINVAL; + } + + for (pos = 0; pos < len; pos++) { + if (!nvmet_is_ascii(page[pos])) { + pr_err("Serial Number must contain only ASCII strings\n"); + return -EINVAL; + } + } down_write(&nvmet_config_sem); - to_subsys(item)->serial = serial; + mutex_lock(&subsys->lock); + memcpy_and_pad(subsys->serial, NVMET_SN_MAX_SIZE, page, len, ' '); + mutex_unlock(&subsys->lock); up_write(&nvmet_config_sem); return count; @@ -1128,12 +1147,6 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item, return ret; } -/* See Section 1.5 of NVMe 1.4 */ -static bool nvmet_is_ascii(const char c) -{ - return c >= 0x20 && c <= 0x7e; -} - static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, const char *page, size_t count) { diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 4ae4bea6625d..213a0c2af4f7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1493,6 +1493,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, enum nvme_subsys_type type) { struct nvmet_subsys *subsys; + char serial[NVMET_SN_MAX_SIZE / 2]; subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); if (!subsys) @@ -1500,7 +1501,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, subsys->ver = NVMET_DEFAULT_VS; /* generate a random serial number as our controllers are ephemeral: */ - get_random_bytes(&subsys->serial, sizeof(subsys->serial)); + get_random_bytes(&serial, sizeof(serial)); + bin2hex(subsys->serial, &serial, sizeof(serial)); switch (type) { case NVME_NQN_NVME: diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index fc3645fc2c24..b7fdad13094a 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -262,9 +262,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) goto out; } - memset(id->sn, ' ', sizeof(id->sn)); - bin2hex(id->sn, &ctrl->subsys->serial, - min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); + memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE); memset(id->fr, ' ', sizeof(id->fr)); memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); memcpy_and_pad(id->fr, sizeof(id->fr), diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d69a409515d6..0ae809ca428c 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -28,6 +28,7 @@ #define NVMET_NO_ERROR_LOC ((u16)-1) #define NVMET_DEFAULT_CTRL_MODEL "Linux" #define NVMET_MN_MAX_SIZE 40 +#define NVMET_SN_MAX_SIZE 20 /* * Supported optional AENs: @@ -229,7 +230,7 @@ struct nvmet_subsys { u16 max_qid; u64 ver; - u64 serial; + char serial[NVMET_SN_MAX_SIZE]; char *subsysnqn; bool pi_support; -- cgit v1.2.3 From 0d148efdf0f0414b2ed2dd9c31e71302bb9ce123 Mon Sep 17 00:00:00 2001 From: Noam Gottlieb Date: Mon, 7 Jun 2021 12:23:23 +0300 Subject: nvmet: allow mn change if subsys not discovered Currently, once the subsystem's model_number is set for the first time there is no way to change it. However, as long as no connection was established to nvmf target, there is no reason for such restriction and we should allow to change the subsystem's model_number as many times as needed. In addition, in order to simplfy the changes and make the model number flow more similar to the rest of the attributes in the Identify Controller data structure, we set a default value for the model number at the initiation of the subsystem. Reviewed-by: Max Gurtovoy Signed-off-by: Noam Gottlieb Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 26 -------------------------- drivers/nvme/target/configfs.c | 10 ++-------- drivers/nvme/target/core.c | 21 +++++++++++++++++---- drivers/nvme/target/discovery.c | 4 ++-- 4 files changed, 21 insertions(+), 40 deletions(-) (limited to 'drivers/nvme/target/core.c') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 5f15c3285a5b..cd60a8184d04 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -313,22 +313,6 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); } -static u16 nvmet_set_model_number(struct nvmet_subsys *subsys) -{ - u16 status = 0; - - mutex_lock(&subsys->lock); - if (!subsys->model_number) { - subsys->model_number = - kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); - if (!subsys->model_number) - status = NVME_SC_INTERNAL; - } - mutex_unlock(&subsys->lock); - - return status; -} - static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -343,16 +327,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) mutex_unlock(&subsys->lock); } - /* - * If there is no model number yet, set it now. It will then remain - * stable for the life time of the subsystem. - */ - if (!subsys->model_number) { - status = nvmet_set_model_number(subsys); - if (status) - goto out; - } - id = kzalloc(sizeof(*id), GFP_KERNEL); if (!id) { status = NVME_SC_INTERNAL; diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index a13da86fb374..9ef8708b92c6 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1154,14 +1154,8 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); - int ret; - - mutex_lock(&subsys->lock); - ret = snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number ? - subsys->model_number : NVMET_DEFAULT_CTRL_MODEL); - mutex_unlock(&subsys->lock); - return ret; + return snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number); } static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, @@ -1169,7 +1163,7 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, { int pos = 0, len; - if (subsys->model_number) { + if (subsys->subsys_discovered) { pr_err("Can't set model number. %s is already assigned\n", subsys->model_number); return -EINVAL; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 213a0c2af4f7..146909486b8f 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1494,6 +1494,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, { struct nvmet_subsys *subsys; char serial[NVMET_SN_MAX_SIZE / 2]; + int ret; subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); if (!subsys) @@ -1504,6 +1505,12 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, get_random_bytes(&serial, sizeof(serial)); bin2hex(subsys->serial, &serial, sizeof(serial)); + subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); + if (!subsys->model_number) { + ret = -ENOMEM; + goto free_subsys; + } + switch (type) { case NVME_NQN_NVME: subsys->max_qid = NVMET_NR_QUEUES; @@ -1513,15 +1520,15 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, break; default: pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); - kfree(subsys); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto free_mn; } subsys->type = type; subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, GFP_KERNEL); if (!subsys->subsysnqn) { - kfree(subsys); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto free_mn; } subsys->cntlid_min = NVME_CNTLID_MIN; subsys->cntlid_max = NVME_CNTLID_MAX; @@ -1533,6 +1540,12 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, INIT_LIST_HEAD(&subsys->hosts); return subsys; + +free_mn: + kfree(subsys->model_number); +free_subsys: + kfree(subsys); + return ERR_PTR(ret); } static void nvmet_subsys_free(struct kref *ref) diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index b7fdad13094a..7aa62bc6ae84 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -244,7 +244,6 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; - const char model[] = "Linux"; u16 status = 0; if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) @@ -264,7 +263,8 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE); memset(id->fr, ' ', sizeof(id->fr)); - memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); + memcpy_and_pad(id->mn, sizeof(id->mn), ctrl->subsys->model_number, + strlen(ctrl->subsys->model_number), ' '); memcpy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE), ' '); -- cgit v1.2.3 From 245067e37d52185a741d269e658afee40d467287 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 13 Jun 2021 18:58:49 -0700 Subject: nvmet: use u32 type for the local variable nsid In function nvmet_max_nsid() we calculate the max nsid by iterating over the XArray and store it in the variable nsid that has type of unsigned long. Since the value of this function is stored into the subsys->max_nsid which is of type u32, change the local variable nsid type and the return type of the same function to u32. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/target/core.c') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 146909486b8f..8494a132da35 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -122,11 +122,11 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len) return 0; } -static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys) +static u32 nvmet_max_nsid(struct nvmet_subsys *subsys) { - unsigned long nsid = 0; struct nvmet_ns *cur; unsigned long idx; + u32 nsid = 0; xa_for_each(&subsys->namespaces, idx, cur) nsid = cur->nsid; -- cgit v1.2.3 From 8bb6cb9b97ef0b0ae4a492db5a90f8156d2cbe85 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 13 Jun 2021 18:58:50 -0700 Subject: nvmet: use nvme status value directly There is no point in keeping the status variable that is used only once in the function nvmet_async_events_failall(). Remove the variable and use the value directly. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme/target/core.c') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 8494a132da35..45a5b273b525 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -141,14 +141,13 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen) static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) { - u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; struct nvmet_req *req; mutex_lock(&ctrl->lock); while (ctrl->nr_async_event_cmds) { req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; mutex_unlock(&ctrl->lock); - nvmet_req_complete(req, status); + nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); mutex_lock(&ctrl->lock); } mutex_unlock(&ctrl->lock); -- cgit v1.2.3 From 7860569ad47f9fbd7c9f93a5c2b7d2a18e4af831 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 13 Jun 2021 18:58:51 -0700 Subject: nvmet: remove local variable In function errno_to_nvme_status() we store the value of the NVMe status into the local variable and don't do anything useful with that but just return. Remove the local variable and return the value directly from switch. This also removed extra break statements. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'drivers/nvme/target/core.c') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 45a5b273b525..c8708dcaeaa5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -43,43 +43,34 @@ DECLARE_RWSEM(nvmet_ana_sem); inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) { - u16 status; - switch (errno) { case 0: - status = NVME_SC_SUCCESS; - break; + return NVME_SC_SUCCESS; case -ENOSPC: req->error_loc = offsetof(struct nvme_rw_command, length); - status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; - break; + return NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; case -EREMOTEIO: req->error_loc = offsetof(struct nvme_rw_command, slba); - status = NVME_SC_LBA_RANGE | NVME_SC_DNR; - break; + return NVME_SC_LBA_RANGE | NVME_SC_DNR; case -EOPNOTSUPP: req->error_loc = offsetof(struct nvme_common_command, opcode); switch (req->cmd->common.opcode) { case nvme_cmd_dsm: case nvme_cmd_write_zeroes: - status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; - break; + return NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; default: - status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } break; case -ENODATA: req->error_loc = offsetof(struct nvme_rw_command, nsid); - status = NVME_SC_ACCESS_DENIED; - break; + return NVME_SC_ACCESS_DENIED; case -EIO: fallthrough; default: req->error_loc = offsetof(struct nvme_common_command, opcode); - status = NVME_SC_INTERNAL | NVME_SC_DNR; + return NVME_SC_INTERNAL | NVME_SC_DNR; } - - return status; } u16 nvmet_report_invalid_opcode(struct nvmet_req *req) -- cgit v1.2.3 From ab5d0b38c0475d6ff59f1a6ccf7c668b9ec2e0a4 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 9 Jun 2021 18:32:51 -0700 Subject: nvmet: add Command Set Identifier support NVMe TP 4056 allows controllers to support different command sets. NVMeoF target currently only supports namespaces that contain traditional logical blocks that may be randomly read and written. In some applications there is a value in exposing namespaces that contain logical blocks that have special access rules (e.g. sequentially write required namespace such as Zoned Namespace (ZNS)). In order to support the Zoned Block Devices (ZBD) backend, controllers need to have support for ZNS Command Set Identifier (CSI). In this preparation patch, we adjust the code such that it can now support the default command set identifier. We update the namespace data structure to store the CSI value which defaults to NVME_CSI_NVM that represents traditional logical blocks namespace type. The CSI support is required to implement the ZBD backend for NVMeOF with host side NVMe ZNS interface, since ZNS commands belong to the different command set than the default one. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 75 ++++++++++++++++++++++++++++++++++------- drivers/nvme/target/core.c | 28 ++++++++++++--- drivers/nvme/target/nvmet.h | 1 + include/linux/nvme.h | 1 + 4 files changed, 87 insertions(+), 18 deletions(-) (limited to 'drivers/nvme/target/core.c') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 3de6a6c99b01..93aaa7479e71 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -162,15 +162,8 @@ out: nvmet_req_complete(req, status); } -static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) +static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) { - u16 status = NVME_SC_INTERNAL; - struct nvme_effects_log *log; - - log = kzalloc(sizeof(*log), GFP_KERNEL); - if (!log) - goto out; - log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0); log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0); log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0); @@ -184,9 +177,30 @@ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0); log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0); log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0); +} - status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); +static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) +{ + struct nvme_effects_log *log; + u16 status = NVME_SC_SUCCESS; + + log = kzalloc(sizeof(*log), GFP_KERNEL); + if (!log) { + status = NVME_SC_INTERNAL; + goto out; + } + + switch (req->cmd->get_log_page.csi) { + case NVME_CSI_NVM: + nvmet_get_cmd_effects_nvm(log); + break; + default: + status = NVME_SC_INVALID_LOG_PAGE; + goto free; + } + status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); +free: kfree(log); out: nvmet_req_complete(req, status); @@ -613,6 +627,12 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req) goto out; } + status = nvmet_copy_ns_identifier(req, NVME_NIDT_CSI, + NVME_NIDT_CSI_LEN, + &req->ns->csi, &off); + if (status) + goto out; + if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off, off) != NVME_IDENTIFY_DATA_SIZE - off) status = NVME_SC_INTERNAL | NVME_SC_DNR; @@ -621,6 +641,17 @@ out: nvmet_req_complete(req, status); } +static bool nvmet_handle_identify_desclist(struct nvmet_req *req) +{ + switch (req->cmd->identify.csi) { + case NVME_CSI_NVM: + nvmet_execute_identify_desclist(req); + return true; + default: + return false; + } +} + static void nvmet_execute_identify(struct nvmet_req *req) { if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) @@ -628,13 +659,31 @@ static void nvmet_execute_identify(struct nvmet_req *req) switch (req->cmd->identify.cns) { case NVME_ID_CNS_NS: - return nvmet_execute_identify_ns(req); + switch (req->cmd->identify.csi) { + case NVME_CSI_NVM: + return nvmet_execute_identify_ns(req); + default: + break; + } + break; case NVME_ID_CNS_CTRL: - return nvmet_execute_identify_ctrl(req); + switch (req->cmd->identify.csi) { + case NVME_CSI_NVM: + return nvmet_execute_identify_ctrl(req); + } + break; case NVME_ID_CNS_NS_ACTIVE_LIST: - return nvmet_execute_identify_nslist(req); + switch (req->cmd->identify.csi) { + case NVME_CSI_NVM: + return nvmet_execute_identify_nslist(req); + default: + break; + } + break; case NVME_ID_CNS_NS_DESC_LIST: - return nvmet_execute_identify_desclist(req); + if (nvmet_handle_identify_desclist(req) == true) + return; + break; } nvmet_req_cns_error_complete(req); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index c8708dcaeaa5..77873d56cff5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -682,6 +682,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) uuid_gen(&ns->uuid); ns->buffered_io = false; + ns->csi = NVME_CSI_NVM; return ns; } @@ -877,10 +878,14 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) return ret; } - if (req->ns->file) - return nvmet_file_parse_io_cmd(req); - - return nvmet_bdev_parse_io_cmd(req); + switch (req->ns->csi) { + case NVME_CSI_NVM: + if (req->ns->file) + return nvmet_file_parse_io_cmd(req); + return nvmet_bdev_parse_io_cmd(req); + default: + return NVME_SC_INVALID_IO_CMD_SET; + } } bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, @@ -1102,6 +1107,17 @@ static inline u8 nvmet_cc_iocqes(u32 cc) return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; } +static inline bool nvmet_css_supported(u8 cc_css) +{ + switch (cc_css <<= NVME_CC_CSS_SHIFT) { + case NVME_CC_CSS_NVM: + case NVME_CC_CSS_CSI: + return true; + default: + return false; + } +} + static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) { lockdep_assert_held(&ctrl->lock); @@ -1121,7 +1137,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) if (nvmet_cc_mps(ctrl->cc) != 0 || nvmet_cc_ams(ctrl->cc) != 0 || - nvmet_cc_css(ctrl->cc) != 0) { + !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) { ctrl->csts = NVME_CSTS_CFS; return; } @@ -1172,6 +1188,8 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl) { /* command sets supported: NVMe command set: */ ctrl->cap = (1ULL << 37); + /* Controller supports one or more I/O Command Sets */ + ctrl->cap |= (1ULL << 43); /* CC.EN timeout in 500msec units: */ ctrl->cap |= (15ULL << 24); /* maximum queue entries supported: */ diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 29b386de1d07..cc71918ff8fe 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -83,6 +83,7 @@ struct nvmet_ns { struct pci_dev *p2p_dev; int pi_type; int metadata_size; + u8 csi; }; static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index edcbd60b88b9..c7ba83144d52 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1504,6 +1504,7 @@ enum { NVME_SC_NS_WRITE_PROTECTED = 0x20, NVME_SC_CMD_INTERRUPTED = 0x21, NVME_SC_TRANSIENT_TR_ERR = 0x22, + NVME_SC_INVALID_IO_CMD_SET = 0x2C, NVME_SC_LBA_RANGE = 0x80, NVME_SC_CAP_EXCEEDED = 0x81, -- cgit v1.2.3 From aaf2e048af2704da5869f27b508b288f36d5c7b7 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 9 Jun 2021 18:32:52 -0700 Subject: nvmet: add ZBD over ZNS backend support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVMe TP 4053 – Zoned Namespaces (ZNS) allows host software to communicate with a non-volatile memory subsystem using zones for NVMe protocol-based controllers. NVMeOF already support the ZNS NVMe Protocol compliant devices on the target in the passthru mode. There are generic zoned block devices like Shingled Magnetic Recording (SMR) HDDs that are not based on the NVMe protocol. This patch adds ZNS backend support for non-ZNS zoned block devices as NVMeOF targets. This support includes implementing the new command set NVME_CSI_ZNS, adding different command handlers for ZNS command set such as NVMe Identify Controller, NVMe Identify Namespace, NVMe Zone Append, NVMe Zone Management Send and NVMe Zone Management Receive. With the new command set identifier, we also update the target command effects logs to reflect the ZNS compliant commands. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Makefile | 1 + drivers/nvme/target/admin-cmd.c | 41 +++ drivers/nvme/target/core.c | 15 +- drivers/nvme/target/io-cmd-bdev.c | 26 +- drivers/nvme/target/nvmet.h | 20 ++ drivers/nvme/target/zns.c | 615 ++++++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 7 + 7 files changed, 714 insertions(+), 11 deletions(-) create mode 100644 drivers/nvme/target/zns.c (limited to 'drivers/nvme/target/core.c') diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile index ebf91fc4c72e..9837e580fa7e 100644 --- a/drivers/nvme/target/Makefile +++ b/drivers/nvme/target/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \ discovery.o io-cmd-file.o io-cmd-bdev.o nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o +nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o nvme-loop-y += loop.o nvmet-rdma-y += rdma.o nvmet-fc-y += fc.o diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 93aaa7479e71..363e357d2f20 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -179,6 +179,13 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0); } +static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log) +{ + log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0); +} + static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) { struct nvme_effects_log *log; @@ -194,6 +201,14 @@ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) case NVME_CSI_NVM: nvmet_get_cmd_effects_nvm(log); break; + case NVME_CSI_ZNS: + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + status = NVME_SC_INVALID_IO_CMD_SET; + goto free; + } + nvmet_get_cmd_effects_nvm(log); + nvmet_get_cmd_effects_zns(log); + break; default: status = NVME_SC_INVALID_LOG_PAGE; goto free; @@ -647,6 +662,12 @@ static bool nvmet_handle_identify_desclist(struct nvmet_req *req) case NVME_CSI_NVM: nvmet_execute_identify_desclist(req); return true; + case NVME_CSI_ZNS: + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + nvmet_execute_identify_desclist(req); + return true; + } + return false; default: return false; } @@ -666,12 +687,32 @@ static void nvmet_execute_identify(struct nvmet_req *req) break; } break; + case NVME_ID_CNS_CS_NS: + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + switch (req->cmd->identify.csi) { + case NVME_CSI_ZNS: + return nvmet_execute_identify_cns_cs_ns(req); + default: + break; + } + } + break; case NVME_ID_CNS_CTRL: switch (req->cmd->identify.csi) { case NVME_CSI_NVM: return nvmet_execute_identify_ctrl(req); } break; + case NVME_ID_CNS_CS_CTRL: + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + switch (req->cmd->identify.csi) { + case NVME_CSI_ZNS: + return nvmet_execute_identify_cns_cs_ctrl(req); + default: + break; + } + } + break; case NVME_ID_CNS_NS_ACTIVE_LIST: switch (req->cmd->identify.csi) { case NVME_CSI_NVM: diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 77873d56cff5..dd16704c9b6b 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -16,6 +16,7 @@ #include "nvmet.h" struct workqueue_struct *buffered_io_wq; +struct workqueue_struct *zbd_wq; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); @@ -883,6 +884,10 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) if (req->ns->file) return nvmet_file_parse_io_cmd(req); return nvmet_bdev_parse_io_cmd(req); + case NVME_CSI_ZNS: + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return nvmet_bdev_zns_parse_io_cmd(req); + return NVME_SC_INVALID_IO_CMD_SET; default: return NVME_SC_INVALID_IO_CMD_SET; } @@ -1592,11 +1597,15 @@ static int __init nvmet_init(void) nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; + zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); + if (!zbd_wq) + return -ENOMEM; + buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", WQ_MEM_RECLAIM, 0); if (!buffered_io_wq) { error = -ENOMEM; - goto out; + goto out_free_zbd_work_queue; } error = nvmet_init_discovery(); @@ -1612,7 +1621,8 @@ out_exit_discovery: nvmet_exit_discovery(); out_free_work_queue: destroy_workqueue(buffered_io_wq); -out: +out_free_zbd_work_queue: + destroy_workqueue(zbd_wq); return error; } @@ -1622,6 +1632,7 @@ static void __exit nvmet_exit(void) nvmet_exit_discovery(); ida_destroy(&cntlid_ida); destroy_workqueue(buffered_io_wq); + destroy_workqueue(zbd_wq); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 019cc994efcd..0fc2781ab970 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -47,6 +47,14 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) id->nows = to0based(ql->io_opt / ql->logical_block_size); } +void nvmet_bdev_ns_disable(struct nvmet_ns *ns) +{ + if (ns->bdev) { + blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); + ns->bdev = NULL; + } +} + static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) { struct blk_integrity *bi = bdev_get_integrity(ns->bdev); @@ -86,15 +94,15 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) nvmet_bdev_ns_enable_integrity(ns); - return 0; -} - -void nvmet_bdev_ns_disable(struct nvmet_ns *ns) -{ - if (ns->bdev) { - blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); - ns->bdev = NULL; + if (bdev_is_zoned(ns->bdev)) { + if (!nvmet_bdev_zns_enable(ns)) { + nvmet_bdev_ns_disable(ns); + return -EINVAL; + } + ns->csi = NVME_CSI_ZNS; } + + return 0; } void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) @@ -102,7 +110,7 @@ void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) ns->size = i_size_read(ns->bdev->bd_inode); } -static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) +u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) { u16 status = NVME_SC_SUCCESS; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index cc71918ff8fe..d719a1cd5dda 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -250,6 +250,10 @@ struct nvmet_subsys { unsigned int admin_timeout; unsigned int io_timeout; #endif /* CONFIG_NVME_TARGET_PASSTHRU */ + +#ifdef CONFIG_BLK_DEV_ZONED + u8 zasl; +#endif /* CONFIG_BLK_DEV_ZONED */ }; static inline struct nvmet_subsys *to_subsys(struct config_item *item) @@ -335,6 +339,12 @@ struct nvmet_req { struct work_struct work; bool use_workqueue; } p; +#ifdef CONFIG_BLK_DEV_ZONED + struct { + struct bio inline_bio; + struct work_struct zmgmt_work; + } z; +#endif /* CONFIG_BLK_DEV_ZONED */ }; int sg_cnt; int metadata_sg_cnt; @@ -354,6 +364,7 @@ struct nvmet_req { }; extern struct workqueue_struct *buffered_io_wq; +extern struct workqueue_struct *zbd_wq; static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { @@ -403,6 +414,7 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req); void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id); u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req); u16 nvmet_parse_admin_cmd(struct nvmet_req *req); u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); @@ -530,6 +542,14 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); int nvmet_file_ns_revalidate(struct nvmet_ns *ns); void nvmet_ns_revalidate(struct nvmet_ns *ns); +u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts); + +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns); +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req); +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req); +void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req); +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req); +void nvmet_bdev_execute_zone_append(struct nvmet_req *req); static inline u32 nvmet_rw_data_len(struct nvmet_req *req) { diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c new file mode 100644 index 000000000000..17f8b7a45f21 --- /dev/null +++ b/drivers/nvme/target/zns.c @@ -0,0 +1,615 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe ZNS-ZBD command implementation. + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include "nvmet.h" + +/* + * We set the Memory Page Size Minimum (MPSMIN) for target controller to 0 + * which gets added by 12 in the nvme_enable_ctrl() which results in 2^12 = 4k + * as page_shift value. When calculating the ZASL use shift by 12. + */ +#define NVMET_MPSMIN_SHIFT 12 + +static inline u8 nvmet_zasl(unsigned int zone_append_sects) +{ + /* + * Zone Append Size Limit (zasl) is expressed as a power of 2 value + * with the minimum memory page size (i.e. 12) as unit. + */ + return ilog2(zone_append_sects >> (NVMET_MPSMIN_SHIFT - 9)); +} + +static int validate_conv_zones_cb(struct blk_zone *z, + unsigned int i, void *data) +{ + if (z->type == BLK_ZONE_TYPE_CONVENTIONAL) + return -EOPNOTSUPP; + return 0; +} + +bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) +{ + struct request_queue *q = ns->bdev->bd_disk->queue; + u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q)); + struct gendisk *bd_disk = ns->bdev->bd_disk; + int ret; + + if (ns->subsys->zasl) { + if (ns->subsys->zasl > zasl) + return false; + } + ns->subsys->zasl = zasl; + + /* + * Generic zoned block devices may have a smaller last zone which is + * not supported by ZNS. Exclude zoned drives that have such smaller + * last zone. + */ + if (get_capacity(bd_disk) & (bdev_zone_sectors(ns->bdev) - 1)) + return false; + /* + * ZNS does not define a conventional zone type. If the underlying + * device has a bitmap set indicating the existence of conventional + * zones, reject the device. Otherwise, use report zones to detect if + * the device has conventional zones. + */ + if (ns->bdev->bd_disk->queue->conv_zones_bitmap) + return false; + + ret = blkdev_report_zones(ns->bdev, 0, blkdev_nr_zones(bd_disk), + validate_conv_zones_cb, NULL); + if (ret < 0) + return false; + + ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + + return true; +} + +void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req) +{ + u8 zasl = req->sq->ctrl->subsys->zasl; + struct nvmet_ctrl *ctrl = req->sq->ctrl; + struct nvme_id_ctrl_zns *id; + u16 status; + + id = kzalloc(sizeof(*id), GFP_KERNEL); + if (!id) { + status = NVME_SC_INTERNAL; + goto out; + } + + if (ctrl->ops->get_mdts) + id->zasl = min_t(u8, ctrl->ops->get_mdts(ctrl), zasl); + else + id->zasl = zasl; + + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + + kfree(id); +out: + nvmet_req_complete(req, status); +} + +void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) +{ + struct nvme_id_ns_zns *id_zns; + u64 zsze; + u16 status; + + if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) { + req->error_loc = offsetof(struct nvme_identify, nsid); + status = NVME_SC_INVALID_NS | NVME_SC_DNR; + goto out; + } + + id_zns = kzalloc(sizeof(*id_zns), GFP_KERNEL); + if (!id_zns) { + status = NVME_SC_INTERNAL; + goto out; + } + + status = nvmet_req_find_ns(req); + if (status) { + status = NVME_SC_INTERNAL; + goto done; + } + + if (!bdev_is_zoned(req->ns->bdev)) { + req->error_loc = offsetof(struct nvme_identify, nsid); + status = NVME_SC_INVALID_NS | NVME_SC_DNR; + goto done; + } + + nvmet_ns_revalidate(req->ns); + zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >> + req->ns->blksize_shift; + id_zns->lbafe[0].zsze = cpu_to_le64(zsze); + id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev)); + id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev)); + +done: + status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns)); + kfree(id_zns); +out: + nvmet_req_complete(req, status); +} + +static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req) +{ + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; + + if (sect >= get_capacity(req->ns->bdev->bd_disk)) { + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, slba); + return NVME_SC_LBA_RANGE | NVME_SC_DNR; + } + + if (out_bufsize < sizeof(struct nvme_zone_report)) { + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, numd); + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + } + + if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) { + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, zra); + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + } + + switch (req->cmd->zmr.pr) { + case 0: + case 1: + break; + default: + req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, pr); + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + } + + switch (req->cmd->zmr.zrasf) { + case NVME_ZRASF_ZONE_REPORT_ALL: + case NVME_ZRASF_ZONE_STATE_EMPTY: + case NVME_ZRASF_ZONE_STATE_IMP_OPEN: + case NVME_ZRASF_ZONE_STATE_EXP_OPEN: + case NVME_ZRASF_ZONE_STATE_CLOSED: + case NVME_ZRASF_ZONE_STATE_FULL: + case NVME_ZRASF_ZONE_STATE_READONLY: + case NVME_ZRASF_ZONE_STATE_OFFLINE: + break; + default: + req->error_loc = + offsetof(struct nvme_zone_mgmt_recv_cmd, zrasf); + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + } + + return NVME_SC_SUCCESS; +} + +struct nvmet_report_zone_data { + struct nvmet_req *req; + u64 out_buf_offset; + u64 out_nr_zones; + u64 nr_zones; + u8 zrasf; +}; + +static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d) +{ + static const unsigned int nvme_zrasf_to_blk_zcond[] = { + [NVME_ZRASF_ZONE_STATE_EMPTY] = BLK_ZONE_COND_EMPTY, + [NVME_ZRASF_ZONE_STATE_IMP_OPEN] = BLK_ZONE_COND_IMP_OPEN, + [NVME_ZRASF_ZONE_STATE_EXP_OPEN] = BLK_ZONE_COND_EXP_OPEN, + [NVME_ZRASF_ZONE_STATE_CLOSED] = BLK_ZONE_COND_CLOSED, + [NVME_ZRASF_ZONE_STATE_READONLY] = BLK_ZONE_COND_READONLY, + [NVME_ZRASF_ZONE_STATE_FULL] = BLK_ZONE_COND_FULL, + [NVME_ZRASF_ZONE_STATE_OFFLINE] = BLK_ZONE_COND_OFFLINE, + }; + struct nvmet_report_zone_data *rz = d; + + if (rz->zrasf != NVME_ZRASF_ZONE_REPORT_ALL && + z->cond != nvme_zrasf_to_blk_zcond[rz->zrasf]) + return 0; + + if (rz->nr_zones < rz->out_nr_zones) { + struct nvme_zone_descriptor zdesc = { }; + u16 status; + + zdesc.zcap = nvmet_sect_to_lba(rz->req->ns, z->capacity); + zdesc.zslba = nvmet_sect_to_lba(rz->req->ns, z->start); + zdesc.wp = nvmet_sect_to_lba(rz->req->ns, z->wp); + zdesc.za = z->reset ? 1 << 2 : 0; + zdesc.zs = z->cond << 4; + zdesc.zt = z->type; + + status = nvmet_copy_to_sgl(rz->req, rz->out_buf_offset, &zdesc, + sizeof(zdesc)); + if (status) + return -EINVAL; + + rz->out_buf_offset += sizeof(zdesc); + } + + rz->nr_zones++; + + return 0; +} + +static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req) +{ + unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); + + return blkdev_nr_zones(req->ns->bdev->bd_disk) - + (sect >> ilog2(bdev_zone_sectors(req->ns->bdev))); +} + +static unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 bufsize) +{ + if (bufsize <= sizeof(struct nvme_zone_report)) + return 0; + + return (bufsize - sizeof(struct nvme_zone_report)) / + sizeof(struct nvme_zone_descriptor); +} + +static void nvmet_bdev_zone_zmgmt_recv_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work); + sector_t start_sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); + unsigned long req_slba_nr_zones = nvmet_req_nr_zones_from_slba(req); + u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2; + __le64 nr_zones; + u16 status; + int ret; + struct nvmet_report_zone_data rz_data = { + .out_nr_zones = get_nr_zones_from_buf(req, out_bufsize), + /* leave the place for report zone header */ + .out_buf_offset = sizeof(struct nvme_zone_report), + .zrasf = req->cmd->zmr.zrasf, + .nr_zones = 0, + .req = req, + }; + + status = nvmet_bdev_validate_zone_mgmt_recv(req); + if (status) + goto out; + + if (!req_slba_nr_zones) { + status = NVME_SC_SUCCESS; + goto out; + } + + ret = blkdev_report_zones(req->ns->bdev, start_sect, req_slba_nr_zones, + nvmet_bdev_report_zone_cb, &rz_data); + if (ret < 0) { + status = NVME_SC_INTERNAL; + goto out; + } + + /* + * When partial bit is set nr_zones must indicate the number of zone + * descriptors actually transferred. + */ + if (req->cmd->zmr.pr) + rz_data.nr_zones = min(rz_data.nr_zones, rz_data.out_nr_zones); + + nr_zones = cpu_to_le64(rz_data.nr_zones); + status = nvmet_copy_to_sgl(req, 0, &nr_zones, sizeof(nr_zones)); + +out: + nvmet_req_complete(req, status); +} + +void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req) +{ + INIT_WORK(&req->z.zmgmt_work, nvmet_bdev_zone_zmgmt_recv_work); + queue_work(zbd_wq, &req->z.zmgmt_work); +} + +static inline enum req_opf zsa_req_op(u8 zsa) +{ + switch (zsa) { + case NVME_ZONE_OPEN: + return REQ_OP_ZONE_OPEN; + case NVME_ZONE_CLOSE: + return REQ_OP_ZONE_CLOSE; + case NVME_ZONE_FINISH: + return REQ_OP_ZONE_FINISH; + case NVME_ZONE_RESET: + return REQ_OP_ZONE_RESET; + default: + return REQ_OP_LAST; + } +} + +static u16 blkdev_zone_mgmt_errno_to_nvme_status(int ret) +{ + switch (ret) { + case 0: + return NVME_SC_SUCCESS; + case -EINVAL: + case -EIO: + return NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR; + default: + return NVME_SC_INTERNAL; + } +} + +struct nvmet_zone_mgmt_send_all_data { + unsigned long *zbitmap; + struct nvmet_req *req; +}; + +static int zmgmt_send_scan_cb(struct blk_zone *z, unsigned i, void *d) +{ + struct nvmet_zone_mgmt_send_all_data *data = d; + + switch (zsa_req_op(data->req->cmd->zms.zsa)) { + case REQ_OP_ZONE_OPEN: + switch (z->cond) { + case BLK_ZONE_COND_CLOSED: + break; + default: + return 0; + } + break; + case REQ_OP_ZONE_CLOSE: + switch (z->cond) { + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: + break; + default: + return 0; + } + break; + case REQ_OP_ZONE_FINISH: + switch (z->cond) { + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: + case BLK_ZONE_COND_CLOSED: + break; + default: + return 0; + } + break; + default: + return -EINVAL; + } + + set_bit(i, data->zbitmap); + + return 0; +} + +static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) +{ + struct block_device *bdev = req->ns->bdev; + unsigned int nr_zones = blkdev_nr_zones(bdev->bd_disk); + struct request_queue *q = bdev_get_queue(bdev); + struct bio *bio = NULL; + sector_t sector = 0; + int ret; + struct nvmet_zone_mgmt_send_all_data d = { + .req = req, + }; + + d.zbitmap = kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(*(d.zbitmap)), + GFP_NOIO, q->node); + if (!d.zbitmap) { + ret = -ENOMEM; + goto out; + } + + /* Scan and build bitmap of the eligible zones */ + ret = blkdev_report_zones(bdev, 0, nr_zones, zmgmt_send_scan_cb, &d); + if (ret != nr_zones) { + if (ret > 0) + ret = -EIO; + goto out; + } else { + /* We scanned all the zones */ + ret = 0; + } + + while (sector < get_capacity(bdev->bd_disk)) { + if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) { + bio = blk_next_bio(bio, 0, GFP_KERNEL); + bio->bi_opf = zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC; + bio->bi_iter.bi_sector = sector; + bio_set_dev(bio, bdev); + /* This may take a while, so be nice to others */ + cond_resched(); + } + sector += blk_queue_zone_sectors(q); + } + + if (bio) { + ret = submit_bio_wait(bio); + bio_put(bio); + } + +out: + kfree(d.zbitmap); + + return blkdev_zone_mgmt_errno_to_nvme_status(ret); +} + +static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req) +{ + int ret; + + switch (zsa_req_op(req->cmd->zms.zsa)) { + case REQ_OP_ZONE_RESET: + ret = blkdev_zone_mgmt(req->ns->bdev, REQ_OP_ZONE_RESET, 0, + get_capacity(req->ns->bdev->bd_disk), + GFP_KERNEL); + if (ret < 0) + return blkdev_zone_mgmt_errno_to_nvme_status(ret); + break; + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: + return nvmet_bdev_zone_mgmt_emulate_all(req); + default: + /* this is needed to quiet compiler warning */ + req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa); + return NVME_SC_INVALID_FIELD | NVME_SC_DNR; + } + + return NVME_SC_SUCCESS; +} + +static void nvmet_bdev_zmgmt_send_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work); + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba); + enum req_opf op = zsa_req_op(req->cmd->zms.zsa); + struct block_device *bdev = req->ns->bdev; + sector_t zone_sectors = bdev_zone_sectors(bdev); + u16 status = NVME_SC_SUCCESS; + int ret; + + if (op == REQ_OP_LAST) { + req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa); + status = NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR; + goto out; + } + + /* when select all bit is set slba field is ignored */ + if (req->cmd->zms.select_all) { + status = nvmet_bdev_execute_zmgmt_send_all(req); + goto out; + } + + if (sect >= get_capacity(bdev->bd_disk)) { + req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba); + status = NVME_SC_LBA_RANGE | NVME_SC_DNR; + goto out; + } + + if (sect & (zone_sectors - 1)) { + req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba); + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + goto out; + } + + ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors, GFP_KERNEL); + if (ret < 0) + status = blkdev_zone_mgmt_errno_to_nvme_status(ret); + +out: + nvmet_req_complete(req, status); +} + +void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req) +{ + INIT_WORK(&req->z.zmgmt_work, nvmet_bdev_zmgmt_send_work); + queue_work(zbd_wq, &req->z.zmgmt_work); +} + +static void nvmet_bdev_zone_append_bio_done(struct bio *bio) +{ + struct nvmet_req *req = bio->bi_private; + + if (bio->bi_status == BLK_STS_OK) { + req->cqe->result.u64 = + nvmet_sect_to_lba(req->ns, bio->bi_iter.bi_sector); + } + + nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); + nvmet_req_bio_put(req, bio); +} + +void nvmet_bdev_execute_zone_append(struct nvmet_req *req) +{ + sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); + u16 status = NVME_SC_SUCCESS; + unsigned int total_len = 0; + struct scatterlist *sg; + struct bio *bio; + int sg_cnt; + + /* Request is completed on len mismatch in nvmet_check_transter_len() */ + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) + return; + + if (!req->sg_cnt) { + nvmet_req_complete(req, 0); + return; + } + + if (sect >= get_capacity(req->ns->bdev->bd_disk)) { + req->error_loc = offsetof(struct nvme_rw_command, slba); + status = NVME_SC_LBA_RANGE | NVME_SC_DNR; + goto out; + } + + if (sect & (bdev_zone_sectors(req->ns->bdev) - 1)) { + req->error_loc = offsetof(struct nvme_rw_command, slba); + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + goto out; + } + + if (nvmet_use_inline_bvec(req)) { + bio = &req->z.inline_bio; + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + } else { + bio = bio_alloc(GFP_KERNEL, req->sg_cnt); + } + + bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE; + bio->bi_end_io = nvmet_bdev_zone_append_bio_done; + bio_set_dev(bio, req->ns->bdev); + bio->bi_iter.bi_sector = sect; + bio->bi_private = req; + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) + bio->bi_opf |= REQ_FUA; + + for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) { + struct page *p = sg_page(sg); + unsigned int l = sg->length; + unsigned int o = sg->offset; + unsigned int ret; + + ret = bio_add_zone_append_page(bio, p, l, o); + if (ret != sg->length) { + status = NVME_SC_INTERNAL; + goto out_put_bio; + } + total_len += sg->length; + } + + if (total_len != nvmet_rw_data_len(req)) { + status = NVME_SC_INTERNAL | NVME_SC_DNR; + goto out_put_bio; + } + + submit_bio(bio); + return; + +out_put_bio: + nvmet_req_bio_put(req, bio); +out: + nvmet_req_complete(req, status); +} + +u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req) +{ + struct nvme_command *cmd = req->cmd; + + switch (cmd->common.opcode) { + case nvme_cmd_zone_append: + req->execute = nvmet_bdev_execute_zone_append; + return 0; + case nvme_cmd_zone_mgmt_recv: + req->execute = nvmet_bdev_execute_zone_mgmt_recv; + return 0; + case nvme_cmd_zone_mgmt_send: + req->execute = nvmet_bdev_execute_zone_mgmt_send; + return 0; + default: + return nvmet_bdev_parse_io_cmd(req); + } +} diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c7ba83144d52..cb1197f1cfed 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -944,6 +944,13 @@ struct nvme_zone_mgmt_recv_cmd { enum { NVME_ZRA_ZONE_REPORT = 0, NVME_ZRASF_ZONE_REPORT_ALL = 0, + NVME_ZRASF_ZONE_STATE_EMPTY = 0x01, + NVME_ZRASF_ZONE_STATE_IMP_OPEN = 0x02, + NVME_ZRASF_ZONE_STATE_EXP_OPEN = 0x03, + NVME_ZRASF_ZONE_STATE_CLOSED = 0x04, + NVME_ZRASF_ZONE_STATE_READONLY = 0x05, + NVME_ZRASF_ZONE_STATE_FULL = 0x06, + NVME_ZRASF_ZONE_STATE_OFFLINE = 0x07, NVME_REPORT_ZONE_PARTIAL = 1, }; -- cgit v1.2.3