summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-11 15:36:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-11 15:36:52 -0700
commit130568d5eac5537cbd64cfb12103550af90edb79 (patch)
tree56d582ec11543bf5480822c3ef6c2b118bb70505 /drivers
parent908b852df1d5d27d289e915fea7bfc16d38b8a76 (diff)
parentb222dd2fdd53a40dd8f1d3082ae98e52883cce0d (diff)
downloadlinux-130568d5eac5537cbd64cfb12103550af90edb79.tar.bz2
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: "This is a followup for block changes, that didn't make the initial pull request. It's a bit of a mixed bag, this contains: - A followup pull request from Sagi for NVMe. Outside of fixups for NVMe, it also includes a series for ensuring that we properly quiesce hardware queues when browsing live tags. - Set of integrity fixes from Dmitry (mostly), fixing various issues for folks using DIF/DIX. - Fix for a bug introduced in cciss, with the req init changes. From Christoph. - Fix for a bug in BFQ, from Paolo. - Two followup fixes for lightnvm/pblk from Javier. - Depth fix from Ming for blk-mq-sched. - Also from Ming, performance fix for mtip32xx that was introduced with the dynamic initialization of commands" * 'for-linus' of git://git.kernel.dk/linux-block: (44 commits) block: call bio_uninit in bio_endio nvmet: avoid unneeded assignment of submit_bio return value nvme-pci: add module parameter for io queue depth nvme-pci: compile warnings in nvme_alloc_host_mem() nvmet_fc: Accept variable pad lengths on Create Association LS nvme_fc/nvmet_fc: revise Create Association descriptor length lightnvm: pblk: remove unnecessary checks lightnvm: pblk: control I/O flow also on tear down cciss: initialize struct scsi_req null_blk: fix error flow for shared tags during module_init block: Fix __blkdev_issue_zeroout loop nvme-rdma: unconditionally recycle the request mr nvme: split nvme_uninit_ctrl into stop and uninit virtio_blk: quiesce/unquiesce live IO when entering PM states mtip32xx: quiesce request queues to make sure no submissions are inflight nbd: quiesce request queues to make sure no submissions are inflight nvme: kick requeue list when requeueing a request instead of when starting the queues nvme-pci: quiesce/unquiesce admin_q instead of start/stop its hw queues nvme-loop: quiesce/unquiesce admin_q instead of start/stop its hw queues nvme-fc: quiesce/unquiesce admin_q instead of start/stop its hw queues ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/cciss.c8
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c23
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h1
-rw-r--r--drivers/block/nbd.c4
-rw-r--r--drivers/block/null_blk.c18
-rw-r--r--drivers/block/virtio_blk.c4
-rw-r--r--drivers/lightnvm/pblk-core.c61
-rw-r--r--drivers/lightnvm/pblk-recovery.c31
-rw-r--r--drivers/lightnvm/pblk-write.c26
-rw-r--r--drivers/lightnvm/pblk.h2
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/nvdimm/blk.c16
-rw-r--r--drivers/nvdimm/btt.c16
-rw-r--r--drivers/nvme/host/core.c40
-rw-r--r--drivers/nvme/host/fc.c83
-rw-r--r--drivers/nvme/host/nvme.h4
-rw-r--r--drivers/nvme/host/pci.c96
-rw-r--r--drivers/nvme/host/rdma.c108
-rw-r--r--drivers/nvme/target/fc.c20
-rw-r--r--drivers/nvme/target/io-cmd.c2
-rw-r--r--drivers/nvme/target/loop.c47
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c5
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c8
-rw-r--r--drivers/target/target_core_sbc.c2
24 files changed, 346 insertions, 281 deletions
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 02a611993bb4..678af946be30 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1944,6 +1944,13 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
return;
}
+static void cciss_initialize_rq(struct request *rq)
+{
+ struct scsi_request *sreq = blk_mq_rq_to_pdu(rq);
+
+ scsi_req_init(sreq);
+}
+
/*
* cciss_add_disk sets up the block device queue for a logical drive
*/
@@ -1956,6 +1963,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
disk->queue->cmd_size = sizeof(struct scsi_request);
disk->queue->request_fn = do_cciss_request;
+ disk->queue->initialize_rq_fn = cciss_initialize_rq;
disk->queue->queue_lock = &h->lock;
queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
if (blk_init_allocated_queue(disk->queue) < 0)
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 61b046f256ca..4a3cfc7940de 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -174,7 +174,6 @@ static void mtip_init_cmd_header(struct request *rq)
{
struct driver_data *dd = rq->q->queuedata;
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
- u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
/* Point the command headers at the command tables. */
cmd->command_header = dd->port->command_list +
@@ -182,7 +181,7 @@ static void mtip_init_cmd_header(struct request *rq)
cmd->command_header_dma = dd->port->command_list_dma +
(sizeof(struct mtip_cmd_hdr) * rq->tag);
- if (host_cap_64)
+ if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
@@ -386,6 +385,7 @@ static void mtip_init_port(struct mtip_port *port)
port->mmio + PORT_LST_ADDR_HI);
writel((port->rxfis_dma >> 16) >> 16,
port->mmio + PORT_FIS_ADDR_HI);
+ set_bit(MTIP_PF_HOST_CAP_64, &port->flags);
}
writel(port->command_list_dma & 0xFFFFFFFF,
@@ -950,7 +950,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
unsigned long to;
bool active = true;
- blk_mq_stop_hw_queues(port->dd->queue);
+ blk_mq_quiesce_queue(port->dd->queue);
to = jiffies + msecs_to_jiffies(timeout);
do {
@@ -970,10 +970,10 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
break;
} while (time_before(jiffies, to));
- blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+ blk_mq_unquiesce_queue(port->dd->queue);
return active ? -EBUSY : 0;
err_fault:
- blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+ blk_mq_unquiesce_queue(port->dd->queue);
return -EFAULT;
}
@@ -2737,6 +2737,9 @@ static void mtip_abort_cmd(struct request *req, void *data,
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
struct driver_data *dd = data;
+ if (!blk_mq_request_started(req))
+ return;
+
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2749,6 +2752,9 @@ static void mtip_queue_cmd(struct request *req, void *data,
{
struct driver_data *dd = data;
+ if (!blk_mq_request_started(req))
+ return;
+
set_bit(req->tag, dd->port->cmds_to_issue);
blk_abort_request(req);
}
@@ -2814,6 +2820,8 @@ restart_eh:
dev_warn(&dd->pdev->dev,
"Completion workers still active!");
+ blk_mq_quiesce_queue(dd->queue);
+
spin_lock(dd->queue->queue_lock);
blk_mq_tagset_busy_iter(&dd->tags,
mtip_queue_cmd, dd);
@@ -2826,6 +2834,8 @@ restart_eh:
mtip_abort_cmd, dd);
clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
+
+ blk_mq_unquiesce_queue(dd->queue);
}
if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
@@ -3995,8 +4005,9 @@ static int mtip_block_remove(struct driver_data *dd)
dd->disk->disk_name);
blk_freeze_queue_start(dd->queue);
- blk_mq_stop_hw_queues(dd->queue);
+ blk_mq_quiesce_queue(dd->queue);
blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
+ blk_mq_unquiesce_queue(dd->queue);
/*
* Delete our gendisk structure. This also removes the device
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index e8286af50e16..e20e55dab443 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -140,6 +140,7 @@ enum {
(1 << MTIP_PF_SE_ACTIVE_BIT) |
(1 << MTIP_PF_DM_ACTIVE_BIT) |
(1 << MTIP_PF_TO_ACTIVE_BIT)),
+ MTIP_PF_HOST_CAP_64 = 10, /* cache HOST_CAP_64 */
MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
MTIP_PF_ISSUE_CMDS_BIT = 5,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 977ec960dd2f..dea7d85134ee 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -661,9 +661,9 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
static void nbd_clear_que(struct nbd_device *nbd)
{
- blk_mq_stop_hw_queues(nbd->disk->queue);
+ blk_mq_quiesce_queue(nbd->disk->queue);
blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
- blk_mq_start_hw_queues(nbd->disk->queue);
+ blk_mq_unquiesce_queue(nbd->disk->queue);
dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
}
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 71f4422eba81..85c24cace973 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -844,9 +844,6 @@ static int __init null_init(void)
queue_mode = NULL_Q_MQ;
}
- if (queue_mode == NULL_Q_MQ && shared_tags)
- null_init_tag_set(&tag_set);
-
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
pr_warn("null_blk: submit_queues param is set to %u.",
@@ -858,11 +855,19 @@ static int __init null_init(void)
else if (!submit_queues)
submit_queues = 1;
+ if (queue_mode == NULL_Q_MQ && shared_tags) {
+ ret = null_init_tag_set(&tag_set);
+ if (ret)
+ return ret;
+ }
+
mutex_init(&lock);
null_major = register_blkdev(0, "nullb");
- if (null_major < 0)
- return null_major;
+ if (null_major < 0) {
+ ret = null_major;
+ goto err_tagset;
+ }
if (use_lightnvm) {
ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
@@ -891,6 +896,9 @@ err_dev:
kmem_cache_destroy(ppa_cache);
err_ppa:
unregister_blkdev(null_major, "nullb");
+err_tagset:
+ if (queue_mode == NULL_Q_MQ && shared_tags)
+ blk_mq_free_tag_set(&tag_set);
return ret;
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0297ad7c1452..4e02aa5fdac0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -840,7 +840,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
/* Make sure no work handler is accessing the device. */
flush_work(&vblk->config_work);
- blk_mq_stop_hw_queues(vblk->disk->queue);
+ blk_mq_quiesce_queue(vblk->disk->queue);
vdev->config->del_vqs(vdev);
return 0;
@@ -857,7 +857,7 @@ static int virtblk_restore(struct virtio_device *vdev)
virtio_device_ready(vdev);
- blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+ blk_mq_unquiesce_queue(vblk->disk->queue);
return 0;
}
#endif
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 11fe0c5b2a9c..81501644fb15 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -1670,13 +1670,10 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
queue_work(wq, &line_ws->ws);
}
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
- unsigned long *lun_bitmap)
+static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
+ int nr_ppas, int pos)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+ struct pblk_lun *rlun = &pblk->luns[pos];
int ret;
/*
@@ -1690,14 +1687,8 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
ppa_list[0].g.ch != ppa_list[i].g.ch);
#endif
- /* If the LUN has been locked for this same request, do no attempt to
- * lock it again
- */
- if (test_and_set_bit(pos, lun_bitmap))
- return;
- rlun = &pblk->luns[pos];
- ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+ ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
if (ret) {
switch (ret) {
case -ETIME:
@@ -1710,6 +1701,50 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
}
}
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+ __pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+ unsigned long *lun_bitmap)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+ /* If the LUN has been locked for this same request, do no attempt to
+ * lock it again
+ */
+ if (test_and_set_bit(pos, lun_bitmap))
+ return;
+
+ __pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_lun *rlun;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+#ifdef CONFIG_NVM_DEBUG
+ int i;
+
+ for (i = 1; i < nr_ppas; i++)
+ WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
+ ppa_list[0].g.ch != ppa_list[i].g.ch);
+#endif
+
+ rlun = &pblk->luns[pos];
+ up(&rlun->wr_sem);
+}
+
void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
unsigned long *lun_bitmap)
{
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 0e48d3e4e143..cb556e06673e 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -340,9 +340,14 @@ static void pblk_end_io_recov(struct nvm_rq *rqd)
struct pblk *pblk = pad_rq->pblk;
struct nvm_tgt_dev *dev = pblk->dev;
- kref_put(&pad_rq->ref, pblk_recov_complete);
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
+ bio_put(rqd->bio);
nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
pblk_free_rqd(pblk, rqd, WRITE);
+
+ atomic_dec(&pblk->inflight_io);
+ kref_put(&pad_rq->ref, pblk_recov_complete);
}
static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
@@ -385,7 +390,7 @@ next_pad_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (rq_ppas < pblk->min_write_pgs) {
pr_err("pblk: corrupted pad line %d\n", line->id);
- goto free_rq;
+ goto fail_free_pad;
}
rq_len = rq_ppas * geo->sec_size;
@@ -393,7 +398,7 @@ next_pad_rq:
meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
if (!meta_list) {
ret = -ENOMEM;
- goto free_data;
+ goto fail_free_pad;
}
ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
@@ -404,9 +409,9 @@ next_pad_rq:
ret = PTR_ERR(rqd);
goto fail_free_meta;
}
- memset(rqd, 0, pblk_w_rq_size);
- bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+ PBLK_VMALLOC_META, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
goto fail_free_rqd;
@@ -453,15 +458,15 @@ next_pad_rq:
}
kref_get(&pad_rq->ref);
+ pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
ret = pblk_submit_io(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
- goto free_data;
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+ goto fail_free_bio;
}
- atomic_dec(&pblk->inflight_io);
-
left_line_ppas -= rq_ppas;
left_ppas -= rq_ppas;
if (left_ppas && left_line_ppas)
@@ -475,17 +480,23 @@ next_pad_rq:
ret = -ETIME;
}
+ if (!pblk_line_is_full(line))
+ pr_err("pblk: corrupted padded line: %d\n", line->id);
+
+ vfree(data);
free_rq:
kfree(pad_rq);
-free_data:
- vfree(data);
return ret;
+fail_free_bio:
+ bio_put(bio);
fail_free_rqd:
pblk_free_rqd(pblk, rqd, WRITE);
fail_free_meta:
nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+fail_free_pad:
kfree(pad_rq);
+ vfree(data);
return ret;
}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index d62a8f4faaf4..3ad9e56d2473 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -39,9 +39,7 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
- if (rqd->meta_list)
- nvm_dev_dma_free(dev->parent, rqd->meta_list,
- rqd->dma_meta_list);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
bio_put(rqd->bio);
pblk_free_rqd(pblk, rqd, WRITE);
@@ -178,15 +176,12 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
struct pblk_line *line = m_ctx->private;
struct pblk_emeta *emeta = line->emeta;
- int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
- struct pblk_lun *rlun = &pblk->luns[pos];
int sync;
- up(&rlun->wr_sem);
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
if (rqd->error) {
pblk_log_write_err(pblk, rqd);
@@ -203,6 +198,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
pblk->close_wq);
bio_put(rqd->bio);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
pblk_free_rqd(pblk, rqd, READ);
atomic_dec(&pblk->inflight_io);
@@ -226,9 +222,6 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
if (!rqd->meta_list)
return -ENOMEM;
- if (unlikely(nr_secs == 1))
- return 0;
-
rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
@@ -367,7 +360,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_emeta *emeta = meta_line->emeta;
struct pblk_g_ctx *m_ctx;
- struct pblk_lun *rlun;
struct bio *bio;
struct nvm_rq *rqd;
void *data;
@@ -411,13 +403,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
}
- rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
- ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
- if (ret) {
- pr_err("pblk: lun semaphore timed out (%d)\n", ret);
- goto fail_free_bio;
- }
-
emeta->mem += rq_len;
if (emeta->mem >= lm->emeta_len[0]) {
spin_lock(&l_mg->close_lock);
@@ -427,6 +412,8 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
spin_unlock(&l_mg->close_lock);
}
+ pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
ret = pblk_submit_io(pblk, rqd);
if (ret) {
pr_err("pblk: emeta I/O submission failed: %d\n", ret);
@@ -436,10 +423,13 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
return NVM_IO_OK;
fail_rollback:
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
spin_lock(&l_mg->close_lock);
pblk_dealloc_page(pblk, meta_line, rq_ppas);
list_add(&meta_line->list, &meta_line->list);
spin_unlock(&l_mg->close_lock);
+
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
fail_free_bio:
if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
bio_put(bio);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 15931381348c..0c5692cc2f60 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -739,8 +739,10 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush);
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
unsigned long *lun_bitmap);
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
unsigned long *lun_bitmap);
void pblk_end_bio_sync(struct bio *bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 10cabe961bdb..2edbcc2d7d3f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1279,7 +1279,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
clone->bi_iter.bi_size = to_bytes(len);
if (unlikely(bio_integrity(bio) != NULL))
- bio_integrity_trim(clone, 0, len);
+ bio_integrity_trim(clone);
return 0;
}
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index f12d23c49771..345acca576b3 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -106,7 +106,8 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
len -= cur_len;
dev_offset += cur_len;
- bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+ if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+ return -EIO;
}
return err;
@@ -179,16 +180,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
int err = 0, rw;
bool do_acct;
- /*
- * bio_integrity_enabled also checks if the bio already has an
- * integrity payload attached. If it does, we *don't* do a
- * bio_integrity_prep here - the payload has been generated by
- * another kernel subsystem, and we just pass it through.
- */
- if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_status = BLK_STS_IOERR;
- goto out;
- }
+ if (!bio_integrity_prep(bio))
+ return BLK_QC_T_NONE;
bip = bio_integrity(bio);
nsblk = q->queuedata;
@@ -212,7 +205,6 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
if (do_acct)
nd_iostat_end(bio, start);
- out:
bio_endio(bio);
return BLK_QC_T_NONE;
}
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 64216dea5278..14323faf8bd9 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -985,7 +985,8 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
len -= cur_len;
meta_nsoff += cur_len;
- bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+ if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+ return -EIO;
}
return ret;
@@ -1203,16 +1204,8 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
int err = 0;
bool do_acct;
- /*
- * bio_integrity_enabled also checks if the bio already has an
- * integrity payload attached. If it does, we *don't* do a
- * bio_integrity_prep here - the payload has been generated by
- * another kernel subsystem, and we just pass it through.
- */
- if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_status = BLK_STS_IOERR;
- goto out;
- }
+ if (!bio_integrity_prep(bio))
+ return BLK_QC_T_NONE;
do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) {
@@ -1239,7 +1232,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
if (do_acct)
nd_iostat_end(bio, start);
-out:
bio_endio(bio);
return BLK_QC_T_NONE;
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d70df1d0072d..cb96f4a7ae3a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -131,7 +131,7 @@ void nvme_complete_rq(struct request *req)
{
if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
nvme_req(req)->retries++;
- blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+ blk_mq_requeue_request(req, true);
return;
}
@@ -2591,12 +2591,29 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
spin_unlock(&dev_list_lock);
}
-void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
+ nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
- nvme_remove_namespaces(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
+void nvme_start_ctrl(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->kato)
+ nvme_start_keep_alive(ctrl);
+
+ if (ctrl->queue_count > 1) {
+ nvme_queue_scan(ctrl);
+ nvme_queue_async_events(ctrl);
+ nvme_start_queues(ctrl);
+ }
+}
+EXPORT_SYMBOL_GPL(nvme_start_ctrl);
+
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+{
device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
spin_lock(&dev_list_lock);
@@ -2694,9 +2711,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
/* Forcibly unquiesce queues to avoid blocking dispatch */
blk_mq_unquiesce_queue(ctrl->admin_q);
- /* Forcibly start all queues to avoid having stuck requests */
- blk_mq_start_hw_queues(ctrl->admin_q);
-
list_for_each_entry(ns, &ctrl->namespaces, list) {
/*
* Revalidating a dead namespace sets capacity to 0. This will
@@ -2709,16 +2723,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
/* Forcibly unquiesce queues to avoid blocking dispatch */
blk_mq_unquiesce_queue(ns->queue);
-
- /*
- * Forcibly start all queues to avoid having stuck requests.
- * Note that we must ensure the queues are not stopped
- * when the final removal happens.
- */
- blk_mq_start_hw_queues(ns->queue);
-
- /* draining requests in requeue list */
- blk_mq_kick_requeue_list(ns->queue);
}
mutex_unlock(&ctrl->namespaces_mutex);
}
@@ -2787,10 +2791,8 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
struct nvme_ns *ns;
mutex_lock(&ctrl->namespaces_mutex);
- list_for_each_entry(ns, &ctrl->namespaces, list) {
+ list_for_each_entry(ns, &ctrl->namespaces, list)
blk_mq_unquiesce_queue(ns->queue);
- blk_mq_kick_requeue_list(ns->queue);
- }
mutex_unlock(&ctrl->namespaces_mutex);
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index ed87214fdc0e..d666ada39a9b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -148,13 +148,10 @@ struct nvme_fc_ctrl {
struct device *dev;
struct nvme_fc_lport *lport;
struct nvme_fc_rport *rport;
- u32 queue_count;
u32 cnum;
u64 association_id;
- u64 cap;
-
struct list_head ctrl_list; /* rport->ctrl_list */
struct blk_mq_tag_set admin_tag_set;
@@ -1614,7 +1611,7 @@ nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
{
int i;
- for (i = 1; i < ctrl->queue_count; i++)
+ for (i = 1; i < ctrl->ctrl.queue_count; i++)
nvme_fc_free_queue(&ctrl->queues[i]);
}
@@ -1635,10 +1632,10 @@ __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl,
static void
nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl)
{
- struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1];
+ struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1];
int i;
- for (i = ctrl->queue_count - 1; i >= 1; i--, queue--)
+ for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--)
__nvme_fc_delete_hw_queue(ctrl, queue, i);
}
@@ -1648,7 +1645,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
struct nvme_fc_queue *queue = &ctrl->queues[1];
int i, ret;
- for (i = 1; i < ctrl->queue_count; i++, queue++) {
+ for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) {
ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
if (ret)
goto delete_queues;
@@ -1667,7 +1664,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
{
int i, ret = 0;
- for (i = 1; i < ctrl->queue_count; i++) {
+ for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize,
(qsize / 5));
if (ret)
@@ -1685,7 +1682,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
{
int i;
- for (i = 1; i < ctrl->queue_count; i++)
+ for (i = 1; i < ctrl->ctrl.queue_count; i++)
nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize);
}
@@ -1706,6 +1703,7 @@ nvme_fc_ctrl_free(struct kref *ref)
list_del(&ctrl->ctrl_list);
spin_unlock_irqrestore(&ctrl->rport->lock, flags);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
blk_cleanup_queue(ctrl->ctrl.admin_q);
blk_mq_free_tag_set(&ctrl->admin_tag_set);
@@ -1969,10 +1967,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (ret != -EBUSY)
return BLK_STS_IOERR;
- if (op->rq) {
- blk_mq_stop_hw_queues(op->rq->q);
- blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
- }
+ if (op->rq)
+ blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
+
return BLK_STS_RESOURCE;
}
@@ -2178,17 +2175,20 @@ static int
nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ unsigned int nr_io_queues;
int ret;
- ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+ nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+ ctrl->lport->ops->max_hw_queues);
+ ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret) {
dev_info(ctrl->ctrl.device,
"set_queue_count failed: %d\n", ret);
return ret;
}
- ctrl->queue_count = opts->nr_io_queues + 1;
- if (!opts->nr_io_queues)
+ ctrl->ctrl.queue_count = nr_io_queues + 1;
+ if (!nr_io_queues)
return 0;
nvme_fc_init_io_queues(ctrl);
@@ -2204,7 +2204,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
sizeof(struct scatterlist)) +
ctrl->lport->ops->fcprqst_priv_sz;
ctrl->tag_set.driver_data = ctrl;
- ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+ ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -2232,7 +2232,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
out_delete_hw_queues:
nvme_fc_delete_hw_io_queues(ctrl);
out_cleanup_blk_queue:
- nvme_stop_keep_alive(&ctrl->ctrl);
blk_cleanup_queue(ctrl->ctrl.connect_q);
out_free_tag_set:
blk_mq_free_tag_set(&ctrl->tag_set);
@@ -2248,17 +2247,21 @@ static int
nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ unsigned int nr_io_queues;
int ret;
- ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+ nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+ ctrl->lport->ops->max_hw_queues);
+ ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret) {
dev_info(ctrl->ctrl.device,
"set_queue_count failed: %d\n", ret);
return ret;
}
+ ctrl->ctrl.queue_count = nr_io_queues + 1;
/* check for io queues existing */
- if (ctrl->queue_count == 1)
+ if (ctrl->ctrl.queue_count == 1)
return 0;
nvme_fc_init_io_queues(ctrl);
@@ -2275,6 +2278,8 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_delete_hw_queues;
+ blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+
return 0;
out_delete_hw_queues:
@@ -2316,7 +2321,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
goto out_delete_hw_queue;
if (ctrl->ctrl.state != NVME_CTRL_NEW)
- blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (ret)
@@ -2329,7 +2334,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
* prior connection values
*/
- ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+ ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
if (ret) {
dev_err(ctrl->ctrl.device,
"prop_get NVME_REG_CAP failed\n");
@@ -2337,9 +2342,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
}
ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+ min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
- ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+ ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (ret)
goto out_disconnect_admin_queue;
@@ -2360,8 +2365,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
goto out_disconnect_admin_queue;
}
- nvme_start_keep_alive(&ctrl->ctrl);
-
/* FC-NVME supports normal SGL Data Block Descriptors */
if (opts->queue_size > ctrl->ctrl.maxcmd) {
@@ -2381,7 +2384,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
* Create the io queues
*/
- if (ctrl->queue_count > 1) {
+ if (ctrl->ctrl.queue_count > 1) {
if (ctrl->ctrl.state == NVME_CTRL_NEW)
ret = nvme_fc_create_io_queues(ctrl);
else
@@ -2395,17 +2398,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
ctrl->ctrl.nr_reconnects = 0;
- if (ctrl->queue_count > 1) {
- nvme_start_queues(&ctrl->ctrl);
- nvme_queue_scan(&ctrl->ctrl);
- nvme_queue_async_events(&ctrl->ctrl);
- }
+ nvme_start_ctrl(&ctrl->ctrl);
return 0; /* Success */
out_term_aen_ops:
nvme_fc_term_aen_ops(ctrl);
- nvme_stop_keep_alive(&ctrl->ctrl);
out_disconnect_admin_queue:
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
@@ -2428,8 +2426,6 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
{
unsigned long flags;
- nvme_stop_keep_alive(&ctrl->ctrl);
-
spin_lock_irqsave(&ctrl->lock, flags);
ctrl->flags |= FCCTRL_TERMIO;
ctrl->iocnt = 0;
@@ -2447,7 +2443,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
* io requests back to the block layer as part of normal completions
* (but with error status).
*/
- if (ctrl->queue_count > 1) {
+ if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
@@ -2470,7 +2466,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
* use blk_mq_tagset_busy_itr() and the transport routine to
* terminate the exchanges.
*/
- blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
@@ -2511,7 +2507,8 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
-
+ nvme_stop_ctrl(&ctrl->ctrl);
+ nvme_remove_namespaces(&ctrl->ctrl);
/*
* kill the association on the link side. this will block
* waiting for io to terminate
@@ -2606,6 +2603,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
int ret;
+ nvme_stop_ctrl(&ctrl->ctrl);
/* will block will waiting for io to terminate */
nvme_fc_delete_association(ctrl);
@@ -2702,18 +2700,17 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
spin_lock_init(&ctrl->lock);
/* io queue count */
- ctrl->queue_count = min_t(unsigned int,
+ ctrl->ctrl.queue_count = min_t(unsigned int,
opts->nr_io_queues,
lport->ops->max_hw_queues);
- opts->nr_io_queues = ctrl->queue_count; /* so opts has valid value */
- ctrl->queue_count++; /* +1 for admin queue */
+ ctrl->ctrl.queue_count++; /* +1 for admin queue */
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
ret = -ENOMEM;
- ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue),
- GFP_KERNEL);
+ ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
+ sizeof(struct nvme_fc_queue), GFP_KERNEL);
if (!ctrl->queues)
goto out_free_ida;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d70ff0fdd36b..8f2a168ddc01 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -142,7 +142,9 @@ struct nvme_ctrl {
u16 cntlid;
u32 ctrl_config;
+ u32 queue_count;
+ u64 cap;
u32 page_size;
u32 max_hw_sectors;
u16 oncs;
@@ -278,6 +280,8 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
+void nvme_start_ctrl(struct nvme_ctrl *ctrl);
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
void nvme_put_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_identify(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b7a84c523475..d10d2f279d19 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -35,7 +35,6 @@
#include "nvme.h"
-#define NVME_Q_DEPTH 1024
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
@@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444);
MODULE_PARM_DESC(max_host_mem_size_mb,
"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+ .set = io_queue_depth_set,
+ .get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+
struct nvme_dev;
struct nvme_queue;
@@ -74,7 +83,6 @@ struct nvme_dev {
struct device *dev;
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool;
- unsigned queue_count;
unsigned online_queues;
unsigned max_qid;
int q_depth;
@@ -105,6 +113,17 @@ struct nvme_dev {
void **host_mem_desc_bufs;
};
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+ int n = 0, ret;
+
+ ret = kstrtoint(val, 10, &n);
+ if (ret != 0 || n < 2)
+ return -EINVAL;
+
+ return param_set_int(val, kp);
+}
+
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
{
return qid * 2 * stride;
@@ -1099,9 +1118,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
{
int i;
- for (i = dev->queue_count - 1; i >= lowest; i--) {
+ for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
struct nvme_queue *nvmeq = dev->queues[i];
- dev->queue_count--;
+ dev->ctrl.queue_count--;
dev->queues[i] = NULL;
nvme_free_queue(nvmeq);
}
@@ -1126,7 +1145,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
spin_unlock_irq(&nvmeq->q_lock);
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
- blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+ blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
@@ -1145,8 +1164,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
else
- nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
- dev->bar + NVME_REG_CAP));
+ nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
@@ -1221,7 +1239,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
dev->queues[qid] = nvmeq;
- dev->queue_count++;
+ dev->ctrl.queue_count++;
return nvmeq;
@@ -1317,7 +1335,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
* user requests may be waiting on a stopped queue. Start the
* queue to flush these to completion.
*/
- blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(dev->ctrl.admin_q);
blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
@@ -1354,7 +1372,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return -ENODEV;
}
} else
- blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(dev->ctrl.admin_q);
return 0;
}
@@ -1385,11 +1403,10 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
return 0;
}
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
{
int result;
u32 aqa;
- u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
result = nvme_remap_bar(dev, db_bar_size(dev, 0));
@@ -1397,13 +1414,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
- NVME_CAP_NSSRC(cap) : 0;
+ NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
if (dev->subsystem &&
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(&dev->ctrl, cap);
+ result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
if (result < 0)
return result;
@@ -1422,7 +1439,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(&dev->ctrl, cap);
+ result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
if (result)
return result;
@@ -1441,7 +1458,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
unsigned i, max;
int ret = 0;
- for (i = dev->queue_count; i <= dev->max_qid; i++) {
+ for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
/* vector == qid - 1, match nvme_create_queue */
if (!nvme_alloc_queue(dev, i, dev->q_depth,
pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
@@ -1450,7 +1467,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
}
}
- max = min(dev->max_qid, dev->queue_count - 1);
+ max = min(dev->max_qid, dev->ctrl.queue_count - 1);
for (i = dev->online_queues; i <= max; i++) {
ret = nvme_create_queue(dev->queues[i], i);
if (ret)
@@ -1585,9 +1602,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
struct nvme_host_mem_buf_desc *descs;
- u32 chunk_size, max_entries, i = 0;
+ u32 chunk_size, max_entries;
+ int i = 0;
void **bufs;
- u64 size, tmp;
+ u64 size = 0, tmp;
/* start big and work our way down */
chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
@@ -1866,7 +1884,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
static int nvme_pci_enable(struct nvme_dev *dev)
{
- u64 cap;
int result = -ENOMEM;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -1893,10 +1910,11 @@ static int nvme_pci_enable(struct nvme_dev *dev)
if (result < 0)
return result;
- cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+ dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
- dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
- dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+ dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+ io_queue_depth);
+ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
dev->dbs = dev->bar + 4096;
/*
@@ -1908,6 +1926,12 @@ static int nvme_pci_enable(struct nvme_dev *dev)
dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
"set queue depth=%u to work around controller resets\n",
dev->q_depth);
+ } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+ (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+ NVME_CAP_MQES(dev->ctrl.cap) == 0) {
+ dev->q_depth = 64;
+ dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+ "set queue depth=%u\n", dev->q_depth);
}
/*
@@ -1996,7 +2020,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_stop_queues(&dev->ctrl);
queues = dev->online_queues - 1;
- for (i = dev->queue_count - 1; i > 0; i--)
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--)
nvme_suspend_queue(dev->queues[i]);
if (dead) {
@@ -2004,7 +2028,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
* probe, before the admin queue is configured. Thus,
* queue_count can be 0 here.
*/
- if (dev->queue_count)
+ if (dev->ctrl.queue_count)
nvme_suspend_queue(dev->queues[0]);
} else {
nvme_disable_io_queues(dev, queues);
@@ -2094,7 +2118,7 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
- result = nvme_configure_admin_queue(dev);
+ result = nvme_pci_configure_admin_queue(dev);
if (result)
goto out;
@@ -2133,15 +2157,6 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
/*
- * A controller that can not execute IO typically requires user
- * intervention to correct. For such degraded controllers, the driver
- * should not submit commands the user did not request, so skip
- * registering for asynchronous event notification on this condition.
- */
- if (dev->online_queues > 1)
- nvme_queue_async_events(&dev->ctrl);
-
- /*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
*/
@@ -2161,8 +2176,7 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
- if (dev->online_queues > 1)
- nvme_queue_scan(&dev->ctrl);
+ nvme_start_ctrl(&dev->ctrl);
return;
out:
@@ -2341,11 +2355,13 @@ static void nvme_remove(struct pci_dev *pdev)
}
flush_work(&dev->ctrl.reset_work);
- nvme_uninit_ctrl(&dev->ctrl);
+ nvme_stop_ctrl(&dev->ctrl);
+ nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
+ nvme_uninit_ctrl(&dev->ctrl);
nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
nvme_put_ctrl(&dev->ctrl);
@@ -2458,6 +2474,10 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6d4119dfbdaa..da04df1af231 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -86,7 +86,7 @@ enum nvme_rdma_queue_flags {
struct nvme_rdma_queue {
struct nvme_rdma_qe *rsp_ring;
- u8 sig_count;
+ atomic_t sig_count;
int queue_size;
size_t cmnd_capsule_len;
struct nvme_rdma_ctrl *ctrl;
@@ -103,7 +103,6 @@ struct nvme_rdma_queue {
struct nvme_rdma_ctrl {
/* read only in the hot path */
struct nvme_rdma_queue *queues;
- u32 queue_count;
/* other member variables */
struct blk_mq_tag_set tag_set;
@@ -119,7 +118,6 @@ struct nvme_rdma_ctrl {
struct blk_mq_tag_set admin_tag_set;
struct nvme_rdma_device *device;
- u64 cap;
u32 max_fr_pages;
struct sockaddr_storage addr;
@@ -274,9 +272,6 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
int ret = 0;
- if (!req->mr->need_inval)
- goto out;
-
ib_dereg_mr(req->mr);
req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
@@ -349,7 +344,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
struct nvme_rdma_ctrl *ctrl = data;
struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
- BUG_ON(hctx_idx >= ctrl->queue_count);
+ BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
hctx->driver_data = queue;
return 0;
@@ -525,6 +520,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
queue->cmnd_capsule_len = sizeof(struct nvme_command);
queue->queue_size = queue_size;
+ atomic_set(&queue->sig_count, 0);
queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
RDMA_PS_TCP, IB_QPT_RC);
@@ -587,7 +583,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
{
int i;
- for (i = 1; i < ctrl->queue_count; i++)
+ for (i = 1; i < ctrl->ctrl.queue_count; i++)
nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
}
@@ -595,7 +591,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
{
int i, ret = 0;
- for (i = 1; i < ctrl->queue_count; i++) {
+ for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
if (ret) {
dev_info(ctrl->ctrl.device,
@@ -623,14 +619,14 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
if (ret)
return ret;
- ctrl->queue_count = nr_io_queues + 1;
- if (ctrl->queue_count < 2)
+ ctrl->ctrl.queue_count = nr_io_queues + 1;
+ if (ctrl->ctrl.queue_count < 2)
return 0;
dev_info(ctrl->ctrl.device,
"creating %d I/O queues.\n", nr_io_queues);
- for (i = 1; i < ctrl->queue_count; i++) {
+ for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvme_rdma_init_queue(ctrl, i,
ctrl->ctrl.opts->queue_size);
if (ret) {
@@ -705,7 +701,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
++ctrl->ctrl.nr_reconnects;
- if (ctrl->queue_count > 1) {
+ if (ctrl->ctrl.queue_count > 1) {
nvme_rdma_free_io_queues(ctrl);
ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -729,13 +725,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
- ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+ ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (ret)
goto requeue;
- nvme_start_keep_alive(&ctrl->ctrl);
-
- if (ctrl->queue_count > 1) {
+ if (ctrl->ctrl.queue_count > 1) {
ret = nvme_rdma_init_io_queues(ctrl);
if (ret)
goto requeue;
@@ -743,16 +737,16 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
ret = nvme_rdma_connect_io_queues(ctrl);
if (ret)
goto requeue;
+
+ blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+ ctrl->ctrl.queue_count - 1);
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
ctrl->ctrl.nr_reconnects = 0;
- if (ctrl->queue_count > 1) {
- nvme_queue_scan(&ctrl->ctrl);
- nvme_queue_async_events(&ctrl->ctrl);
- }
+ nvme_start_ctrl(&ctrl->ctrl);
dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
@@ -770,17 +764,17 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
struct nvme_rdma_ctrl, err_work);
int i;
- nvme_stop_keep_alive(&ctrl->ctrl);
+ nvme_stop_ctrl(&ctrl->ctrl);
- for (i = 0; i < ctrl->queue_count; i++)
+ for (i = 0; i < ctrl->ctrl.queue_count; i++)
clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
- if (ctrl->queue_count > 1)
+ if (ctrl->ctrl.queue_count > 1)
nvme_stop_queues(&ctrl->ctrl);
- blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
/* We must take care of fastfail/requeue all our inflight requests */
- if (ctrl->queue_count > 1)
+ if (ctrl->ctrl.queue_count > 1)
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
@@ -790,7 +784,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
* queues are not a live anymore, so restart the queues to fail fast
* new IO
*/
- blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_reconnect_or_remove(ctrl);
@@ -1008,17 +1002,16 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
nvme_rdma_wr_error(cq, wc, "SEND");
}
-static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+/*
+ * We want to signal completion at least every queue depth/2. This returns the
+ * largest power of two that is not above half of (queue size + 1) to optimize
+ * (avoid divisions).
+ */
+static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
{
- int sig_limit;
+ int limit = 1 << ilog2((queue->queue_size + 1) / 2);
- /*
- * We signal completion every queue depth/2 and also handle the
- * degenerated case of a device with queue_depth=1, where we
- * would need to signal every message.
- */
- sig_limit = max(queue->queue_size / 2, 1);
- return (++queue->sig_count % sig_limit) == 0;
+ return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0;
}
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
@@ -1574,7 +1567,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
- error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+ error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP,
+ &ctrl->ctrl.cap);
if (error) {
dev_err(ctrl->ctrl.device,
"prop_get NVME_REG_CAP failed\n");
@@ -1582,9 +1576,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
}
ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+ min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
- error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+ error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (error)
goto out_cleanup_queue;
@@ -1601,8 +1595,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
if (error)
goto out_cleanup_queue;
- nvme_start_keep_alive(&ctrl->ctrl);
-
return 0;
out_cleanup_queue:
@@ -1620,11 +1612,10 @@ out_free_queue:
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
{
- nvme_stop_keep_alive(&ctrl->ctrl);
cancel_work_sync(&ctrl->err_work);
cancel_delayed_work_sync(&ctrl->reconnect_work);
- if (ctrl->queue_count > 1) {
+ if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
@@ -1634,18 +1625,21 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl);
- blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_destroy_admin_queue(ctrl);
}
static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
- nvme_uninit_ctrl(&ctrl->ctrl);
+ nvme_stop_ctrl(&ctrl->ctrl);
+ nvme_remove_namespaces(&ctrl->ctrl);
if (shutdown)
nvme_rdma_shutdown_ctrl(ctrl);
+ nvme_uninit_ctrl(&ctrl->ctrl);
if (ctrl->ctrl.tagset) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
blk_mq_free_tag_set(&ctrl->tag_set);
@@ -1707,6 +1701,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
int ret;
bool changed;
+ nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl);
ret = nvme_rdma_configure_admin_queue(ctrl);
@@ -1716,7 +1711,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
goto del_dead_ctrl;
}
- if (ctrl->queue_count > 1) {
+ if (ctrl->ctrl.queue_count > 1) {
ret = blk_mq_reinit_tagset(&ctrl->tag_set);
if (ret)
goto del_dead_ctrl;
@@ -1728,16 +1723,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
ret = nvme_rdma_connect_io_queues(ctrl);
if (ret)
goto del_dead_ctrl;
+
+ blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+ ctrl->ctrl.queue_count - 1);
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- if (ctrl->queue_count > 1) {
- nvme_start_queues(&ctrl->ctrl);
- nvme_queue_scan(&ctrl->ctrl);
- nvme_queue_async_events(&ctrl->ctrl);
- }
+ nvme_start_ctrl(&ctrl->ctrl);
return;
@@ -1785,7 +1779,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
SG_CHUNK_SIZE * sizeof(struct scatterlist);
ctrl->tag_set.driver_data = ctrl;
- ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+ ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -1863,12 +1857,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
- ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+ ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
ret = -ENOMEM;
- ctrl->queues = kcalloc(ctrl->queue_count, sizeof(*ctrl->queues),
+ ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
GFP_KERNEL);
if (!ctrl->queues)
goto out_uninit_ctrl;
@@ -1925,15 +1919,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
mutex_unlock(&nvme_rdma_ctrl_mutex);
- if (opts->nr_io_queues) {
- nvme_queue_scan(&ctrl->ctrl);
- nvme_queue_async_events(&ctrl->ctrl);
- }
+ nvme_start_ctrl(&ctrl->ctrl);
return &ctrl->ctrl;
out_remove_admin_queue:
- nvme_stop_keep_alive(&ctrl->ctrl);
nvme_rdma_destroy_admin_queue(ctrl);
out_kfree_queues:
kfree(ctrl->queues);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 7692a96c9065..1e6dcc241b3c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1164,18 +1164,24 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
memset(acc, 0, sizeof(*acc));
- if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_assoc_rqst))
+ /*
+ * FC-NVME spec changes. There are initiators sending different
+ * lengths as padding sizes for Create Association Cmd descriptor
+ * was incorrect.
+ * Accept anything of "minimum" length. Assume format per 1.15
+ * spec (with HOSTID reduced to 16 bytes), ignore how long the
+ * trailing pad length is.
+ */
+ if (iod->rqstdatalen < FCNVME_LSDESC_CRA_RQST_MINLEN)
ret = VERR_CR_ASSOC_LEN;
- else if (rqst->desc_list_len !=
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_ls_cr_assoc_rqst)))
+ else if (rqst->desc_list_len <
+ cpu_to_be32(FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN))
ret = VERR_CR_ASSOC_RQST_LEN;
else if (rqst->assoc_cmd.desc_tag !=
cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD))
ret = VERR_CR_ASSOC_CMD;
- else if (rqst->assoc_cmd.desc_len !=
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)))
+ else if (rqst->assoc_cmd.desc_len <
+ cpu_to_be32(FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN))
ret = VERR_CR_ASSOC_CMD_LEN;
else if (!rqst->assoc_cmd.ersp_ratio ||
(be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >=
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 40128793e613..3b4d47a6abdb 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -85,7 +85,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
bio_set_op_attrs(bio, op, op_flags);
bio_chain(bio, prev);
- cookie = submit_bio(prev);
+ submit_bio(prev);
}
sector += sg->length >> 9;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 5f55c683b338..717ed7ddb2f6 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -44,12 +44,10 @@ struct nvme_loop_iod {
struct nvme_loop_ctrl {
struct nvme_loop_queue *queues;
- u32 queue_count;
struct blk_mq_tag_set admin_tag_set;
struct list_head list;
- u64 cap;
struct blk_mq_tag_set tag_set;
struct nvme_loop_iod async_event_iod;
struct nvme_ctrl ctrl;
@@ -241,7 +239,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
struct nvme_loop_ctrl *ctrl = data;
struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
- BUG_ON(hctx_idx >= ctrl->queue_count);
+ BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
hctx->driver_data = queue;
return 0;
@@ -307,7 +305,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
{
int i;
- for (i = 1; i < ctrl->queue_count; i++)
+ for (i = 1; i < ctrl->ctrl.queue_count; i++)
nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
}
@@ -330,7 +328,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
if (ret)
goto out_destroy_queues;
- ctrl->queue_count++;
+ ctrl->ctrl.queue_count++;
}
return 0;
@@ -344,7 +342,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
{
int i, ret;
- for (i = 1; i < ctrl->queue_count; i++) {
+ for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
if (ret)
return ret;
@@ -372,7 +370,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
if (error)
return error;
- ctrl->queue_count = 1;
+ ctrl->ctrl.queue_count = 1;
error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (error)
@@ -388,7 +386,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
if (error)
goto out_cleanup_queue;
- error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+ error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
if (error) {
dev_err(ctrl->ctrl.device,
"prop_get NVME_REG_CAP failed\n");
@@ -396,9 +394,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
}
ctrl->ctrl.sqsize =
- min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+ min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
- error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+ error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (error)
goto out_cleanup_queue;
@@ -409,8 +407,6 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
if (error)
goto out_cleanup_queue;
- nvme_start_keep_alive(&ctrl->ctrl);
-
return 0;
out_cleanup_queue:
@@ -424,9 +420,7 @@ out_free_sq:
static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
{
- nvme_stop_keep_alive(&ctrl->ctrl);
-
- if (ctrl->queue_count > 1) {
+ if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
@@ -436,9 +430,10 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
nvme_shutdown_ctrl(&ctrl->ctrl);
- blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_loop_destroy_admin_queue(ctrl);
}
@@ -447,8 +442,10 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
struct nvme_loop_ctrl *ctrl = container_of(work,
struct nvme_loop_ctrl, delete_work);
- nvme_uninit_ctrl(&ctrl->ctrl);
+ nvme_stop_ctrl(&ctrl->ctrl);
+ nvme_remove_namespaces(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
+ nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
@@ -496,6 +493,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
bool changed;
int ret;
+ nvme_stop_ctrl(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
ret = nvme_loop_configure_admin_queue(ctrl);
@@ -510,13 +508,13 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
if (ret)
goto out_destroy_io;
+ blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+ ctrl->ctrl.queue_count - 1);
+
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- nvme_queue_scan(&ctrl->ctrl);
- nvme_queue_async_events(&ctrl->ctrl);
-
- nvme_start_queues(&ctrl->ctrl);
+ nvme_start_ctrl(&ctrl->ctrl);
return;
@@ -559,7 +557,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
SG_CHUNK_SIZE * sizeof(struct scatterlist);
ctrl->tag_set.driver_data = ctrl;
- ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+ ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
ctrl->ctrl.tagset = &ctrl->tag_set;
@@ -651,10 +649,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
mutex_unlock(&nvme_loop_ctrl_mutex);
- if (opts->nr_io_queues) {
- nvme_queue_scan(&ctrl->ctrl);
- nvme_queue_async_events(&ctrl->ctrl);
- }
+ nvme_start_ctrl(&ctrl->ctrl);
return &ctrl->ctrl;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index cfe1d01eb73f..adc784539061 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -26,6 +26,7 @@
#include <linux/export.h>
#include <linux/delay.h>
#include <asm/unaligned.h>
+#include <linux/t10-pi.h>
#include <linux/crc-t10dif.h>
#include <net/checksum.h>
@@ -2934,8 +2935,8 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
* First check to see if a protection data
* check is valid
*/
- if ((src->ref_tag == 0xffffffff) ||
- (src->app_tag == 0xffff)) {
+ if ((src->ref_tag == T10_PI_REF_ESCAPE) ||
+ (src->app_tag == T10_PI_APP_ESCAPE)) {
start_ref_tag++;
goto skipit;
}
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 6c6e624a5aa6..7b3b702ef622 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2040,9 +2040,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
* For type 3: ref & app tag is all 'f's
* For type 0,1,2: app tag is all 'f's
*/
- if ((a_app_tag == 0xffff) &&
+ if ((a_app_tag == T10_PI_APP_ESCAPE) &&
((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) ||
- (a_ref_tag == 0xffffffff))) {
+ (a_ref_tag == T10_PI_REF_ESCAPE))) {
uint32_t blocks_done, resid;
sector_t lba_s = scsi_get_lba(cmd);
@@ -2084,9 +2084,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
spt = page_address(sg_page(sg)) + sg->offset;
spt += j;
- spt->app_tag = 0xffff;
+ spt->app_tag = T10_PI_APP_ESCAPE;
if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3)
- spt->ref_tag = 0xffffffff;
+ spt->ref_tag = T10_PI_REF_ESCAPE;
}
return 0;
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 4316f7b65fb7..dc9456e7dac9 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1450,7 +1450,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
(unsigned long long)sector, sdt->guard_tag,
sdt->app_tag, be32_to_cpu(sdt->ref_tag));
- if (sdt->app_tag == cpu_to_be16(0xffff)) {
+ if (sdt->app_tag == T10_PI_APP_ESCAPE) {
dsg_off += block_size;
goto next;
}