summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-03-03 10:53:35 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-03 10:53:35 -0800
commite0d072250a54669dce876d8ade70e417356aae74 (patch)
treeecbb2fc170349231f3885749f07748779225805f /drivers
parent1827adb11ad26b2290dc9fe2aaf54976b2439865 (diff)
parent165a5e22fafb127ecb5914e12e8c32a1f0d3f820 (diff)
downloadlinux-e0d072250a54669dce876d8ade70e417356aae74.tar.bz2
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer fixes from Jens Axboe: "A collection of fixes for this merge window, either fixes for existing issues, or parts that were waiting for acks to come in. This pull request contains: - Allocation of nvme queues on the right node from Shaohua. This was ready long before the merge window, but waiting on an ack from Bjorn on the PCI bit. Now that we have that, the three patches can go in. - Two fixes for blk-mq-sched with nvmeof, which uses hctx specific request allocations. This caused an oops. One part from Sagi, one part from Omar. - A loop partition scan deadlock fix from Omar, fixing a regression in this merge window. - A three-patch series from Keith, closing up a hole on clearing out requests on shutdown/resume. - A stable fix for nbd from Josef, fixing a leak of sockets. - Two fixes for a regression in this window from Jan, fixing a problem with one of his earlier patches dealing with queue vs bdi life times. - A fix for a regression with virtio-blk, causing an IO stall if scheduling is used. From me. - A fix for an io context lock ordering problem. From me" * 'for-linus' of git://git.kernel.dk/linux-block: block: Move bdi_unregister() to del_gendisk() blk-mq: ensure that bd->last is always set correctly block: don't call ioc_exit_icq() with the queue lock held for blk-mq block: Initialize bd_bdi on inode initialization loop: fix LO_FLAGS_PARTSCAN hang nvme: Complete all stuck requests blk-mq: Provide freeze queue timeout blk-mq: Export blk_mq_freeze_queue_wait nbd: stop leaking sockets blk-mq: move update of tags->rqs to __blk_mq_alloc_request() blk-mq: kill blk_mq_set_alloc_data() blk-mq: make blk_mq_alloc_request_hctx() allocate a scheduler request blk-mq-sched: Allocate sched reserved tags as specified in the original queue tagset nvme: allocate nvme_queue in correct node PCI: add an API to get node from vector blk-mq: allocate blk_mq_tags and requests in correct node
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/loop.c15
-rw-r--r--drivers/block/nbd.c4
-rw-r--r--drivers/nvme/host/core.c47
-rw-r--r--drivers/nvme/host/nvme.h4
-rw-r--r--drivers/nvme/host/pci.c45
-rw-r--r--drivers/pci/msi.c16
6 files changed, 114 insertions, 17 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index eeb1db73f44e..0712365f2729 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1142,13 +1142,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
(info->lo_flags & LO_FLAGS_AUTOCLEAR))
lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
- if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
- !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
- lo->lo_flags |= LO_FLAGS_PARTSCAN;
- lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
- loop_reread_partitions(lo, lo->lo_device);
- }
-
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
lo->lo_init[0] = info->lo_init[0];
lo->lo_init[1] = info->lo_init[1];
@@ -1163,6 +1156,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
exit:
blk_mq_unfreeze_queue(lo->lo_queue);
+
+ if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
+ !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
+ lo->lo_flags |= LO_FLAGS_PARTSCAN;
+ lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+ loop_reread_partitions(lo, lo->lo_device);
+ }
+
return err;
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1541cb880744..7e4287bc19e5 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -675,8 +675,10 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
nbd->num_connections) {
int i;
- for (i = 0; i < nbd->num_connections; i++)
+ for (i = 0; i < nbd->num_connections; i++) {
+ sockfd_put(nbd->socks[i]->sock);
kfree(nbd->socks[i]);
+ }
kfree(nbd->socks);
nbd->socks = NULL;
nbd->num_connections = 0;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 25ec4e585220..9b3b57fef446 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2344,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_kill_queues);
+void nvme_unfreeze(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_mq_unfreeze_queue(ns->queue);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_unfreeze);
+
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
+ if (timeout <= 0)
+ break;
+ }
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
+
+void nvme_wait_freeze(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_mq_freeze_queue_wait(ns->queue);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze);
+
+void nvme_start_freeze(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_mq_freeze_queue_start(ns->queue);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_start_freeze);
+
void nvme_stop_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a3da1e90b99d..2aa20e3e5675 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -294,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl);
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
void nvme_kill_queues(struct nvme_ctrl *ctrl);
+void nvme_unfreeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
+void nvme_start_freeze(struct nvme_ctrl *ctrl);
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 57a1af52b06e..26a5fd05fe88 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1038,9 +1038,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
- int depth)
+ int depth, int node)
{
- struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+ struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+ node);
if (!nvmeq)
return NULL;
@@ -1217,7 +1218,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
nvmeq = dev->queues[0];
if (!nvmeq) {
- nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+ nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+ dev_to_node(dev->dev));
if (!nvmeq)
return -ENOMEM;
}
@@ -1309,7 +1311,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
int ret = 0;
for (i = dev->queue_count; i <= dev->max_qid; i++) {
- if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+ /* vector == qid - 1, match nvme_create_queue */
+ if (!nvme_alloc_queue(dev, i, dev->q_depth,
+ pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
ret = -ENOMEM;
break;
}
@@ -1671,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
int i, queues;
- u32 csts = -1;
+ bool dead = true;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
del_timer_sync(&dev->watchdog_timer);
mutex_lock(&dev->shutdown_lock);
- if (pci_is_enabled(to_pci_dev(dev->dev))) {
- nvme_stop_queues(&dev->ctrl);
- csts = readl(dev->bar + NVME_REG_CSTS);
+ if (pci_is_enabled(pdev)) {
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ if (dev->ctrl.state == NVME_CTRL_LIVE)
+ nvme_start_freeze(&dev->ctrl);
+ dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+ pdev->error_state != pci_channel_io_normal);
}
+ /*
+ * Give the controller a chance to complete all entered requests if
+ * doing a safe shutdown.
+ */
+ if (!dead && shutdown)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+ nvme_stop_queues(&dev->ctrl);
+
queues = dev->online_queues - 1;
for (i = dev->queue_count - 1; i > 0; i--)
nvme_suspend_queue(dev->queues[i]);
- if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+ if (dead) {
/* A device might become IO incapable very soon during
* probe, before the admin queue is configured. Thus,
* queue_count can be 0 here.
@@ -1700,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+ /*
+ * The driver will not be starting up queues again if shutting down so
+ * must flush all entered requests to their failed completion to avoid
+ * deadlocking blk-mq hot-cpu notifier.
+ */
+ if (shutdown)
+ nvme_start_queues(&dev->ctrl);
mutex_unlock(&dev->shutdown_lock);
}
@@ -1822,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work)
nvme_remove_namespaces(&dev->ctrl);
} else {
nvme_start_queues(&dev->ctrl);
+ nvme_wait_freeze(&dev->ctrl);
nvme_dev_add(dev);
+ nvme_unfreeze(&dev->ctrl);
}
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 980eaf588281..d571bc330686 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1298,6 +1298,22 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
}
EXPORT_SYMBOL(pci_irq_get_affinity);
+/**
+ * pci_irq_get_node - return the numa node of a particular msi vector
+ * @pdev: PCI device to operate on
+ * @vec: device-relative interrupt vector index (0-based).
+ */
+int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+ const struct cpumask *mask;
+
+ mask = pci_irq_get_affinity(pdev, vec);
+ if (mask)
+ return local_memory_node(cpu_to_node(cpumask_first(mask)));
+ return dev_to_node(&pdev->dev);
+}
+EXPORT_SYMBOL(pci_irq_get_node);
+
struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
{
return to_pci_dev(desc->dev);