summaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c323
-rw-r--r--drivers/nvme/host/fabrics.h1
-rw-r--r--drivers/nvme/host/multipath.c2
-rw-r--r--drivers/nvme/host/nvme.h5
-rw-r--r--drivers/nvme/host/pci.c1
-rw-r--r--drivers/nvme/host/rdma.c1
-rw-r--r--drivers/nvme/host/tcp.c74
-rw-r--r--drivers/nvme/target/configfs.c38
-rw-r--r--drivers/nvme/target/core.c3
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c1
10 files changed, 216 insertions, 233 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f204c6f78b5b..efb85c6d8e2d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -77,10 +77,6 @@ module_param(apst_secondary_latency_tol_us, ulong, 0644);
MODULE_PARM_DESC(apst_secondary_latency_tol_us,
"secondary APST latency tolerance in us");
-static bool streams;
-module_param(streams, bool, 0644);
-MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
-
/*
* nvme_wq - hosts nvme related works that are not reset or delete
* nvme_reset_wq - hosts nvme reset works
@@ -401,6 +397,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq);
void nvme_complete_batch_req(struct request *req)
{
+ trace_nvme_complete_rq(req);
nvme_cleanup_cmd(req);
nvme_end_req_zoned(req);
}
@@ -719,108 +716,6 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
}
EXPORT_SYMBOL_GPL(__nvme_check_ready);
-static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
-{
- struct nvme_command c = { };
-
- c.directive.opcode = nvme_admin_directive_send;
- c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
- c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
- c.directive.dtype = NVME_DIR_IDENTIFY;
- c.directive.tdtype = NVME_DIR_STREAMS;
- c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
-
- return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
-}
-
-static int nvme_disable_streams(struct nvme_ctrl *ctrl)
-{
- return nvme_toggle_streams(ctrl, false);
-}
-
-static int nvme_enable_streams(struct nvme_ctrl *ctrl)
-{
- return nvme_toggle_streams(ctrl, true);
-}
-
-static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
- struct streams_directive_params *s, u32 nsid)
-{
- struct nvme_command c = { };
-
- memset(s, 0, sizeof(*s));
-
- c.directive.opcode = nvme_admin_directive_recv;
- c.directive.nsid = cpu_to_le32(nsid);
- c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s)));
- c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
- c.directive.dtype = NVME_DIR_STREAMS;
-
- return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
-}
-
-static int nvme_configure_directives(struct nvme_ctrl *ctrl)
-{
- struct streams_directive_params s;
- u16 nssa;
- int ret;
-
- if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
- return 0;
- if (!streams)
- return 0;
-
- ret = nvme_enable_streams(ctrl);
- if (ret)
- return ret;
-
- ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
- if (ret)
- goto out_disable_stream;
-
- nssa = le16_to_cpu(s.nssa);
- if (nssa < BLK_MAX_WRITE_HINTS - 1) {
- dev_info(ctrl->device, "too few streams (%u) available\n",
- nssa);
- /* this condition is not an error: streams are optional */
- ret = 0;
- goto out_disable_stream;
- }
-
- ctrl->nr_streams = min_t(u16, nssa, BLK_MAX_WRITE_HINTS - 1);
- dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
- return 0;
-
-out_disable_stream:
- nvme_disable_streams(ctrl);
- return ret;
-}
-
-/*
- * Check if 'req' has a write hint associated with it. If it does, assign
- * a valid namespace stream to the write.
- */
-static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
- struct request *req, u16 *control,
- u32 *dsmgmt)
-{
- enum rw_hint streamid = req->write_hint;
-
- if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
- streamid = 0;
- else {
- streamid--;
- if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
- return;
-
- *control |= NVME_RW_DTYPE_STREAMS;
- *dsmgmt |= streamid << 16;
- }
-
- if (streamid < ARRAY_SIZE(req->q->write_hints))
- req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
-}
-
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
@@ -890,6 +785,30 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
return BLK_STS_OK;
}
+static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd,
+ struct request *req)
+{
+ u32 upper, lower;
+ u64 ref48;
+
+ /* both rw and write zeroes share the same reftag format */
+ switch (ns->guard_type) {
+ case NVME_NVM_NS_16B_GUARD:
+ cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
+ break;
+ case NVME_NVM_NS_64B_GUARD:
+ ref48 = ext_pi_ref_tag(req);
+ lower = lower_32_bits(ref48);
+ upper = upper_32_bits(ref48);
+
+ cmnd->rw.reftag = cpu_to_le32(lower);
+ cmnd->rw.cdw3 = cpu_to_le32(upper);
+ break;
+ default:
+ break;
+ }
+}
+
static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd)
{
@@ -911,8 +830,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE1:
case NVME_NS_DPS_PI_TYPE2:
- cmnd->write_zeroes.reftag =
- cpu_to_le32(t10_pi_ref_tag(req));
+ nvme_set_ref_tag(ns, cmnd, req);
break;
}
}
@@ -924,7 +842,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
{
- struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0;
u32 dsmgmt = 0;
@@ -939,7 +856,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
cmnd->rw.opcode = op;
cmnd->rw.flags = 0;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
- cmnd->rw.rsvd2 = 0;
+ cmnd->rw.cdw2 = 0;
+ cmnd->rw.cdw3 = 0;
cmnd->rw.metadata = 0;
cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
@@ -947,9 +865,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
cmnd->rw.apptag = 0;
cmnd->rw.appmask = 0;
- if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
- nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
-
if (ns->ms) {
/*
* If formated with metadata, the block layer always provides a
@@ -973,7 +888,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
NVME_RW_PRINFO_PRCHK_REF;
if (op == nvme_cmd_zone_append)
control |= NVME_RW_APPEND_PIREMAP;
- cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
+ nvme_set_ref_tag(ns, cmnd, req);
break;
}
}
@@ -1629,33 +1544,58 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
+static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
u32 max_integrity_segments)
{
struct blk_integrity integrity = { };
- switch (pi_type) {
+ switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
- integrity.profile = &t10_pi_type3_crc;
- integrity.tag_size = sizeof(u16) + sizeof(u32);
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ switch (ns->guard_type) {
+ case NVME_NVM_NS_16B_GUARD:
+ integrity.profile = &t10_pi_type3_crc;
+ integrity.tag_size = sizeof(u16) + sizeof(u32);
+ integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ break;
+ case NVME_NVM_NS_64B_GUARD:
+ integrity.profile = &ext_pi_type3_crc64;
+ integrity.tag_size = sizeof(u16) + 6;
+ integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ break;
+ default:
+ integrity.profile = NULL;
+ break;
+ }
break;
case NVME_NS_DPS_PI_TYPE1:
case NVME_NS_DPS_PI_TYPE2:
- integrity.profile = &t10_pi_type1_crc;
- integrity.tag_size = sizeof(u16);
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ switch (ns->guard_type) {
+ case NVME_NVM_NS_16B_GUARD:
+ integrity.profile = &t10_pi_type1_crc;
+ integrity.tag_size = sizeof(u16);
+ integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ break;
+ case NVME_NVM_NS_64B_GUARD:
+ integrity.profile = &ext_pi_type1_crc64;
+ integrity.tag_size = sizeof(u16);
+ integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ break;
+ default:
+ integrity.profile = NULL;
+ break;
+ }
break;
default:
integrity.profile = NULL;
break;
}
- integrity.tuple_size = ms;
+
+ integrity.tuple_size = ns->ms;
blk_integrity_register(disk, &integrity);
blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
}
#else
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
+static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
u32 max_integrity_segments)
{
}
@@ -1672,9 +1612,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
return;
}
- if (ctrl->nr_streams && ns->sws && ns->sgs)
- size *= ns->sws * ns->sgs;
-
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
@@ -1700,48 +1637,77 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
a->csi == b->csi;
}
-static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- u32 *phys_bs, u32 *io_opt)
+static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id)
{
- struct streams_directive_params s;
- int ret;
+ bool first = id->dps & NVME_NS_DPS_PI_FIRST;
+ unsigned lbaf = nvme_lbaf_index(id->flbas);
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct nvme_command c = { };
+ struct nvme_id_ns_nvm *nvm;
+ int ret = 0;
+ u32 elbaf;
+
+ ns->pi_size = 0;
+ ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
+ ns->pi_size = sizeof(struct t10_pi_tuple);
+ ns->guard_type = NVME_NVM_NS_16B_GUARD;
+ goto set_pi;
+ }
- if (!ctrl->nr_streams)
- return 0;
+ nvm = kzalloc(sizeof(*nvm), GFP_KERNEL);
+ if (!nvm)
+ return -ENOMEM;
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.nsid = cpu_to_le32(ns->head->ns_id);
+ c.identify.cns = NVME_ID_CNS_CS_NS;
+ c.identify.csi = NVME_CSI_NVM;
- ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
+ ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, nvm, sizeof(*nvm));
if (ret)
- return ret;
+ goto free_data;
- ns->sws = le32_to_cpu(s.sws);
- ns->sgs = le16_to_cpu(s.sgs);
+ elbaf = le32_to_cpu(nvm->elbaf[lbaf]);
- if (ns->sws) {
- *phys_bs = ns->sws * (1 << ns->lba_shift);
- if (ns->sgs)
- *io_opt = *phys_bs * ns->sgs;
+ /* no support for storage tag formats right now */
+ if (nvme_elbaf_sts(elbaf))
+ goto free_data;
+
+ ns->guard_type = nvme_elbaf_guard_type(elbaf);
+ switch (ns->guard_type) {
+ case NVME_NVM_NS_64B_GUARD:
+ ns->pi_size = sizeof(struct crc64_pi_tuple);
+ break;
+ case NVME_NVM_NS_16B_GUARD:
+ ns->pi_size = sizeof(struct t10_pi_tuple);
+ break;
+ default:
+ break;
}
- return 0;
+free_data:
+ kfree(nvm);
+set_pi:
+ if (ns->pi_size && (first || ns->ms == ns->pi_size))
+ ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+ else
+ ns->pi_type = 0;
+
+ return ret;
}
-static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
+static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
- /*
- * The PI implementation requires the metadata size to be equal to the
- * t10 pi tuple size.
- */
- ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
- if (ns->ms == sizeof(struct t10_pi_tuple))
- ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
- else
- ns->pi_type = 0;
+ if (nvme_init_ms(ns, id))
+ return;
ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
- return 0;
+ return;
+
if (ctrl->ops->flags & NVME_F_FABRICS) {
/*
* The NVMe over Fabrics specification only supports metadata as
@@ -1749,7 +1715,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
* remap the separate metadata buffer from the block layer.
*/
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
- return -EINVAL;
+ return;
ns->features |= NVME_NS_EXT_LBAS;
@@ -1776,8 +1742,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
else
ns->features |= NVME_NS_METADATA_SUPPORTED;
}
-
- return 0;
}
static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
@@ -1817,7 +1781,6 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_integrity_unregister(disk);
atomic_bs = phys_bs = bs;
- nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt);
if (id->nabo == 0) {
/*
* Bit 1 indicates whether NAWUPF is defined for this namespace
@@ -1856,7 +1819,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
if (ns->ms) {
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
(ns->features & NVME_NS_METADATA_SUPPORTED))
- nvme_init_integrity(disk, ns->ms, ns->pi_type,
+ nvme_init_integrity(disk, ns,
ns->ctrl->max_integrity_segments);
else if (!nvme_ns_has_pi(ns))
capacity = 0;
@@ -1908,16 +1871,14 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
{
- unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
+ unsigned lbaf = nvme_lbaf_index(id->flbas);
int ret;
blk_mq_freeze_queue(ns->disk->queue);
ns->lba_shift = id->lbaf[lbaf].ds;
nvme_set_queue_limits(ns->ctrl, ns->queue);
- ret = nvme_configure_metadata(ns, id);
- if (ret)
- goto out_unfreeze;
+ nvme_configure_metadata(ns, id);
nvme_set_chunk_sectors(ns, id);
nvme_update_disk_info(ns->disk, ns, id);
@@ -1935,7 +1896,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
if (blk_queue_is_zoned(ns->queue)) {
ret = nvme_revalidate_zones(ns);
if (ret && !nvme_first_scan(ns->disk))
- goto out;
+ return ret;
}
if (nvme_ns_head_multipath(ns->head)) {
@@ -1953,16 +1914,16 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
return 0;
out_unfreeze:
- blk_mq_unfreeze_queue(ns->disk->queue);
-out:
/*
* If probing fails due an unsupported feature, hide the block device,
* but still allow other access.
*/
if (ret == -ENODEV) {
ns->disk->flags |= GENHD_FL_HIDDEN;
+ set_bit(NVME_NS_READY, &ns->flags);
ret = 0;
}
+ blk_mq_unfreeze_queue(ns->disk->queue);
return ret;
}
@@ -2260,20 +2221,27 @@ static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
return ret;
}
-static int nvme_configure_acre(struct nvme_ctrl *ctrl)
+static int nvme_configure_host_options(struct nvme_ctrl *ctrl)
{
struct nvme_feat_host_behavior *host;
+ u8 acre = 0, lbafee = 0;
int ret;
/* Don't bother enabling the feature if retry delay is not reported */
- if (!ctrl->crdt[0])
+ if (ctrl->crdt[0])
+ acre = NVME_ENABLE_ACRE;
+ if (ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)
+ lbafee = NVME_ENABLE_LBAFEE;
+
+ if (!acre && !lbafee)
return 0;
host = kzalloc(sizeof(*host), GFP_KERNEL);
if (!host)
return 0;
- host->acre = NVME_ENABLE_ACRE;
+ host->acre = acre;
+ host->lbafee = lbafee;
ret = nvme_set_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
host, sizeof(*host), NULL);
kfree(host);
@@ -3111,11 +3079,7 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
if (ret < 0)
return ret;
- ret = nvme_configure_directives(ctrl);
- if (ret < 0)
- return ret;
-
- ret = nvme_configure_acre(ctrl);
+ ret = nvme_configure_host_options(ctrl);
if (ret < 0)
return ret;
@@ -4378,7 +4342,14 @@ static void nvme_async_event_work(struct work_struct *work)
container_of(work, struct nvme_ctrl, async_event_work);
nvme_aen_uevent(ctrl);
- ctrl->ops->submit_async_event(ctrl);
+
+ /*
+ * The transport drivers must guarantee AER submission here is safe by
+ * flushing ctrl async_event_work after changing the controller state
+ * from LIVE and before freeing the admin queue.
+ */
+ if (ctrl->state == NVME_CTRL_LIVE)
+ ctrl->ops->submit_async_event(ctrl);
}
static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
@@ -4694,7 +4665,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return;
- blk_set_queue_dying(ns->queue);
+ blk_mark_disk_dead(ns->disk);
nvme_start_ns_queue(ns);
set_capacity_and_notify(ns->disk, 0);
@@ -4856,12 +4827,14 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512);
}
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index c3203ff1c654..1e3a09cad961 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -170,6 +170,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts)
{
if (ctrl->state == NVME_CTRL_DELETING ||
+ ctrl->state == NVME_CTRL_DELETING_NOIO ||
ctrl->state == NVME_CTRL_DEAD ||
strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 7fc58e1f6b09..d464fdf978fb 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -848,7 +848,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
- blk_set_queue_dying(head->disk->queue);
+ blk_mark_disk_dead(head->disk);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a1f8403ffd78..1393bbf82d71 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -280,7 +280,6 @@ struct nvme_ctrl {
u16 crdt[3];
u16 oncs;
u16 oacs;
- u16 nr_streams;
u16 sqsize;
u32 max_namespaces;
atomic_t abort_limit;
@@ -454,9 +453,11 @@ struct nvme_ns {
int lba_shift;
u16 ms;
+ u16 pi_size;
u16 sgs;
u32 sws;
u8 pi_type;
+ u8 guard_type;
#ifdef CONFIG_BLK_DEV_ZONED
u64 zsze;
#endif
@@ -479,7 +480,7 @@ struct nvme_ns {
/* NVMe ns supports metadata actions by the controller (generate/strip) */
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
{
- return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
+ return ns->pi_type && ns->ms == ns->pi_size;
}
struct nvme_ctrl_ops {
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 66f1eee2509a..d817ca17463e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/memremap.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c49b9c3c46f2..d9f19d901313 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1198,6 +1198,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
struct nvme_rdma_ctrl, err_work);
nvme_stop_keep_alive(&ctrl->ctrl);
+ flush_work(&ctrl->ctrl.async_event_work);
nvme_rdma_teardown_io_queues(ctrl, false);
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_teardown_admin_queue(ctrl, false);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 10fc45d95b86..ad3a2bf2f1e9 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -82,6 +82,8 @@ struct nvme_tcp_request {
u32 data_len;
u32 pdu_len;
u32 pdu_sent;
+ u32 h2cdata_left;
+ u32 h2cdata_offset;
u16 ttag;
__le16 status;
struct list_head entry;
@@ -133,6 +135,7 @@ struct nvme_tcp_queue {
struct nvme_tcp_request *request;
int queue_size;
+ u32 maxh2cdata;
size_t cmnd_capsule_len;
struct nvme_tcp_ctrl *ctrl;
unsigned long flags;
@@ -610,23 +613,26 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
return ret;
}
-static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
- struct nvme_tcp_r2t_pdu *pdu)
+static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
{
struct nvme_tcp_data_pdu *data = req->pdu;
struct nvme_tcp_queue *queue = req->queue;
struct request *rq = blk_mq_rq_from_pdu(req);
+ u32 h2cdata_sent = req->pdu_len;
u8 hdgst = nvme_tcp_hdgst_len(queue);
u8 ddgst = nvme_tcp_ddgst_len(queue);
req->state = NVME_TCP_SEND_H2C_PDU;
req->offset = 0;
- req->pdu_len = le32_to_cpu(pdu->r2t_length);
+ req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata);
req->pdu_sent = 0;
+ req->h2cdata_left -= req->pdu_len;
+ req->h2cdata_offset += h2cdata_sent;
memset(data, 0, sizeof(*data));
data->hdr.type = nvme_tcp_h2c_data;
- data->hdr.flags = NVME_TCP_F_DATA_LAST;
+ if (!req->h2cdata_left)
+ data->hdr.flags = NVME_TCP_F_DATA_LAST;
if (queue->hdr_digest)
data->hdr.flags |= NVME_TCP_F_HDGST;
if (queue->data_digest)
@@ -635,9 +641,9 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
data->hdr.pdo = data->hdr.hlen + hdgst;
data->hdr.plen =
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
- data->ttag = pdu->ttag;
+ data->ttag = req->ttag;
data->command_id = nvme_cid(rq);
- data->data_offset = pdu->r2t_offset;
+ data->data_offset = cpu_to_le32(req->h2cdata_offset);
data->data_length = cpu_to_le32(req->pdu_len);
}
@@ -647,6 +653,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
struct nvme_tcp_request *req;
struct request *rq;
u32 r2t_length = le32_to_cpu(pdu->r2t_length);
+ u32 r2t_offset = le32_to_cpu(pdu->r2t_offset);
rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
@@ -671,14 +678,19 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
return -EPROTO;
}
- if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
+ if (unlikely(r2t_offset < req->data_sent)) {
dev_err(queue->ctrl->ctrl.device,
"req %d unexpected r2t offset %u (expected %zu)\n",
- rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent);
+ rq->tag, r2t_offset, req->data_sent);
return -EPROTO;
}
- nvme_tcp_setup_h2c_data_pdu(req, pdu);
+ req->pdu_len = 0;
+ req->h2cdata_left = r2t_length;
+ req->h2cdata_offset = r2t_offset;
+ req->ttag = pdu->ttag;
+
+ nvme_tcp_setup_h2c_data_pdu(req);
nvme_tcp_queue_request(req, false, true);
return 0;
@@ -951,13 +963,22 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
{
- nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR);
+ if (nvme_tcp_async_req(req)) {
+ union nvme_result res = {};
+
+ nvme_complete_async_event(&req->queue->ctrl->ctrl,
+ cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res);
+ } else {
+ nvme_tcp_end_request(blk_mq_rq_from_pdu(req),
+ NVME_SC_HOST_PATH_ERROR);
+ }
}
static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
int req_data_len = req->data_len;
+ u32 h2cdata_left = req->h2cdata_left;
while (true) {
struct page *page = nvme_tcp_req_cur_page(req);
@@ -1002,7 +1023,10 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
req->state = NVME_TCP_SEND_DDGST;
req->offset = 0;
} else {
- nvme_tcp_done_send_req(queue);
+ if (h2cdata_left)
+ nvme_tcp_setup_h2c_data_pdu(req);
+ else
+ nvme_tcp_done_send_req(queue);
}
return 1;
}
@@ -1060,9 +1084,14 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
- ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+ if (!req->h2cdata_left)
+ ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
+ offset_in_page(pdu) + req->offset, len,
+ MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+ else
+ ret = sock_no_sendpage(queue->sock, virt_to_page(pdu),
+ offset_in_page(pdu) + req->offset, len,
+ MSG_DONTWAIT | MSG_MORE);
if (unlikely(ret <= 0))
return ret;
@@ -1082,6 +1111,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
size_t offset = req->offset;
+ u32 h2cdata_left = req->h2cdata_left;
int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
@@ -1099,7 +1129,10 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
return ret;
if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
- nvme_tcp_done_send_req(queue);
+ if (h2cdata_left)
+ nvme_tcp_setup_h2c_data_pdu(req);
+ else
+ nvme_tcp_done_send_req(queue);
return 1;
}
@@ -1291,6 +1324,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
struct msghdr msg = {};
struct kvec iov;
bool ctrl_hdgst, ctrl_ddgst;
+ u32 maxh2cdata;
int ret;
icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
@@ -1374,6 +1408,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
goto free_icresp;
}
+ maxh2cdata = le32_to_cpu(icresp->maxdata);
+ if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) {
+ pr_err("queue %d: invalid maxh2cdata returned %u\n",
+ nvme_tcp_queue_id(queue), maxh2cdata);
+ goto free_icresp;
+ }
+ queue->maxh2cdata = maxh2cdata;
+
ret = 0;
free_icresp:
kfree(icresp);
@@ -2133,6 +2175,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
nvme_stop_keep_alive(ctrl);
+ flush_work(&ctrl->async_event_work);
nvme_tcp_teardown_io_queues(ctrl, false);
/* unquiesce to fail fast pending requests */
nvme_start_queues(ctrl);
@@ -2357,6 +2400,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
req->data_sent = 0;
req->pdu_len = 0;
req->pdu_sent = 0;
+ req->h2cdata_left = 0;
req->data_len = blk_rq_nr_phys_segments(rq) ?
blk_rq_payload_bytes(rq) : 0;
req->curr_bio = rq->bio;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index c542f8457b1e..e44b2988759e 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1234,43 +1234,6 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
-static ssize_t nvmet_subsys_attr_discovery_nqn_show(struct config_item *item,
- char *page)
-{
- return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn);
-}
-
-static ssize_t nvmet_subsys_attr_discovery_nqn_store(struct config_item *item,
- const char *page, size_t count)
-{
- struct nvmet_subsys *subsys = to_subsys(item);
- char *subsysnqn;
- int len;
-
- len = strcspn(page, "\n");
- if (!len)
- return -EINVAL;
-
- subsysnqn = kmemdup_nul(page, len, GFP_KERNEL);
- if (!subsysnqn)
- return -ENOMEM;
-
- /*
- * The discovery NQN must be different from subsystem NQN.
- */
- if (!strcmp(subsysnqn, subsys->subsysnqn)) {
- kfree(subsysnqn);
- return -EBUSY;
- }
- down_write(&nvmet_config_sem);
- kfree(nvmet_disc_subsys->subsysnqn);
- nvmet_disc_subsys->subsysnqn = subsysnqn;
- up_write(&nvmet_config_sem);
-
- return count;
-}
-CONFIGFS_ATTR(nvmet_subsys_, attr_discovery_nqn);
-
#ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
char *page)
@@ -1300,7 +1263,6 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_cntlid_min,
&nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model,
- &nvmet_subsys_attr_attr_discovery_nqn,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_subsys_attr_attr_pi_enable,
#endif
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 7b2b72cf4423..90e75324dae0 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1495,8 +1495,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
if (!port)
return NULL;
- if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn) ||
- !strcmp(nvmet_disc_subsys->subsysnqn, subsysnqn)) {
+ if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
return NULL;
return nvmet_disc_subsys;
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index e9194804ddee..d886c2c59554 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
+#include <linux/memremap.h>
#include <linux/module.h>
#include "nvmet.h"