From f929d42ceb18a8acfd47e0e7b7d90b5d49bd9258 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Fri, 4 Sep 2015 12:08:07 +0200 Subject: xen/blkback: free requests on disconnection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is due to commit 86839c56dee28c315a4c19b7bfee450ccd84cd25 "xen/block: add multi-page ring support" When using an guest under UEFI - after the domain is destroyed the following warning comes from blkback. ------------[ cut here ]------------ WARNING: CPU: 2 PID: 95 at /home/julien/works/linux/drivers/block/xen-blkback/xenbus.c:274 xen_blkif_deferred_free+0x1f4/0x1f8() Modules linked in: CPU: 2 PID: 95 Comm: kworker/2:1 Tainted: G W 4.2.0 #85 Hardware name: APM X-Gene Mustang board (DT) Workqueue: events xen_blkif_deferred_free Call trace: [] dump_backtrace+0x0/0x124 [] show_stack+0x10/0x1c [] dump_stack+0x78/0x98 [] warn_slowpath_common+0x9c/0xd4 [] warn_slowpath_null+0x14/0x20 [] xen_blkif_deferred_free+0x1f0/0x1f8 [] process_one_work+0x160/0x3b4 [] worker_thread+0x140/0x494 [] kthread+0xd8/0xf0 ---[ end trace 6f859b7883c88cdd ]--- Request allocation has been moved to connect_ring, which is called every time blkback connects to the frontend (this can happen multiple times during a blkback instance life cycle). On the other hand, request freeing has not been moved, so it's only called when destroying the backend instance. Due to this mismatch, blkback can allocate the request pool multiple times, without freeing it. In order to fix it, move the freeing of requests to xen_blkif_disconnect to restore the symmetry between request allocation and freeing. Reported-by: Julien Grall Signed-off-by: Roger Pau Monné Tested-by: Julien Grall Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: xen-devel@lists.xenproject.org CC: stable@vger.kernel.org # 4.2 Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index deb3f001791f..767657565de6 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -212,6 +212,9 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, static int xen_blkif_disconnect(struct xen_blkif *blkif) { + struct pending_req *req, *n; + int i = 0, j; + if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); wake_up(&blkif->shutdown_wq); @@ -238,13 +241,28 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) /* Remove all persistent grants and the cache of ballooned pages. */ xen_blkbk_free_caches(blkif); + /* Check that there is no request in use */ + list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { + list_del(&req->free_list); + + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) + kfree(req->segments[j]); + + for (j = 0; j < MAX_INDIRECT_PAGES; j++) + kfree(req->indirect_pages[j]); + + kfree(req); + i++; + } + + WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); + blkif->nr_ring_pages = 0; + return 0; } static void xen_blkif_free(struct xen_blkif *blkif) { - struct pending_req *req, *n; - int i = 0, j; xen_blkif_disconnect(blkif); xen_vbd_free(&blkif->vbd); @@ -257,22 +275,6 @@ static void xen_blkif_free(struct xen_blkif *blkif) BUG_ON(!list_empty(&blkif->free_pages)); BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); - /* Check that there is no request in use */ - list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { - list_del(&req->free_list); - - for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) - kfree(req->segments[j]); - - for (j = 0; j < MAX_INDIRECT_PAGES; j++) - kfree(req->indirect_pages[j]); - - kfree(req); - i++; - } - - WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); - kmem_cache_free(xen_blkif_cachep, blkif); } -- cgit v1.2.3 From bda4e0fb3126aca15586d165b5a15a37edc0a984 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Sep 2015 08:18:17 -0600 Subject: NVMe: Set affinity after allocating request queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The asynchronous namespace scanning caused affinity hints to be set before its tagset initialized, so there was no cpu mask to set the hint. This patch moves the affinity hint setting to after namespaces are scanned. Reported-by: 김경산 Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index b97fc3fe0916..30758bdf69ea 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2439,6 +2439,22 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) list_sort(NULL, &dev->namespaces, ns_cmp); } +static void nvme_set_irq_hints(struct nvme_dev *dev) +{ + struct nvme_queue *nvmeq; + int i; + + for (i = 0; i < dev->online_queues; i++) { + nvmeq = dev->queues[i]; + + if (!nvmeq->tags || !(*nvmeq->tags)) + continue; + + irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, + blk_mq_tags_cpumask(*nvmeq->tags)); + } +} + static void nvme_dev_scan(struct work_struct *work) { struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); @@ -2450,6 +2466,7 @@ static void nvme_dev_scan(struct work_struct *work) return; nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn)); kfree(ctrl); + nvme_set_irq_hints(dev); } /* @@ -2953,22 +2970,6 @@ static const struct file_operations nvme_dev_fops = { .compat_ioctl = nvme_dev_ioctl, }; -static void nvme_set_irq_hints(struct nvme_dev *dev) -{ - struct nvme_queue *nvmeq; - int i; - - for (i = 0; i < dev->online_queues; i++) { - nvmeq = dev->queues[i]; - - if (!nvmeq->tags || !(*nvmeq->tags)) - continue; - - irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, - blk_mq_tags_cpumask(*nvmeq->tags)); - } -} - static int nvme_dev_start(struct nvme_dev *dev) { int result; @@ -3010,8 +3011,6 @@ static int nvme_dev_start(struct nvme_dev *dev) if (result) goto free_tags; - nvme_set_irq_hints(dev); - dev->event_limit = 1; return result; @@ -3062,7 +3061,6 @@ static int nvme_dev_resume(struct nvme_dev *dev) } else { nvme_unfreeze_queues(dev); nvme_dev_add(dev); - nvme_set_irq_hints(dev); } return 0; } -- cgit v1.2.3 From f4829a9b7a61e159367350008a608b062c4f6840 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 27 Sep 2015 21:01:50 +0200 Subject: blk-mq: fix racy updates of rq->errors blk_mq_complete_request may be a no-op if the request has already been completed by others means (e.g. a timeout or cancellation), but currently drivers have to set rq->errors before calling blk_mq_complete_request, which might leave us with the wrong error value. Add an error parameter to blk_mq_complete_request so that we can defer setting rq->errors until we known we won the race to complete the request. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/blk-mq.c | 12 ++++++------ drivers/block/loop.c | 11 +++++------ drivers/block/null_blk.c | 2 +- drivers/block/nvme-core.c | 16 +++++++--------- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 19 ++++++++++--------- drivers/scsi/scsi_lib.c | 2 +- include/linux/blk-mq.h | 2 +- 8 files changed, 32 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/block/blk-mq.c b/block/blk-mq.c index 31c0c6259c4c..2306330530e8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -393,14 +393,16 @@ void __blk_mq_complete_request(struct request *rq) * Ends all I/O on a request. It does not handle partial completions. * The actual completion happens out-of-order, through a IPI handler. **/ -void blk_mq_complete_request(struct request *rq) +void blk_mq_complete_request(struct request *rq, int error) { struct request_queue *q = rq->q; if (unlikely(blk_should_fake_timeout(q))) return; - if (!blk_mark_rq_complete(rq)) + if (!blk_mark_rq_complete(rq)) { + rq->errors = error; __blk_mq_complete_request(rq); + } } EXPORT_SYMBOL(blk_mq_complete_request); @@ -616,10 +618,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, * If a request wasn't started before the queue was * marked dying, kill it here or it'll go unnoticed. */ - if (unlikely(blk_queue_dying(rq->q))) { - rq->errors = -EIO; - blk_mq_complete_request(rq); - } + if (unlikely(blk_queue_dying(rq->q))) + blk_mq_complete_request(rq, -EIO); return; } if (rq->cmd_flags & REQ_NO_TIMEOUT) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f9889b6bc02c..674f800a3b57 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1486,17 +1486,16 @@ static void loop_handle_cmd(struct loop_cmd *cmd) { const bool write = cmd->rq->cmd_flags & REQ_WRITE; struct loop_device *lo = cmd->rq->q->queuedata; - int ret = -EIO; + int ret = 0; - if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) + if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { + ret = -EIO; goto failed; + } ret = do_req_filebacked(lo, cmd->rq); - failed: - if (ret) - cmd->rq->errors = -EIO; - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(cmd->rq, ret ? -EIO : 0); } static void loop_queue_write_work(struct work_struct *work) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index a295b98c6bae..1c9e4fe5aa44 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -289,7 +289,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) case NULL_IRQ_SOFTIRQ: switch (queue_mode) { case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(cmd->rq, cmd->rq->errors); break; case NULL_Q_RQ: blk_complete_request(cmd->rq); diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 30758bdf69ea..6f04771f1019 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -618,16 +618,15 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, spin_unlock_irqrestore(req->q->queue_lock, flags); return; } + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { if (cmd_rq->ctx == CMD_CTX_CANCELLED) - req->errors = -EINTR; - else - req->errors = status; + status = -EINTR; } else { - req->errors = nvme_error_status(status); + status = nvme_error_status(status); } - } else - req->errors = 0; + } + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { u32 result = le32_to_cpup(&cqe->result); req->special = (void *)(uintptr_t)result; @@ -650,7 +649,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } nvme_free_iod(nvmeq->dev, iod); - blk_mq_complete_request(req); + blk_mq_complete_request(req, status); } /* length is in bytes. gfp flags indicates whether we may sleep. */ @@ -863,8 +862,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (ns && ns->ms && !blk_integrity_rq(req)) { if (!(ns->pi_type && ns->ms == 8) && req->cmd_type != REQ_TYPE_DRV_PRIV) { - req->errors = -EFAULT; - blk_mq_complete_request(req); + blk_mq_complete_request(req, -EFAULT); return BLK_MQ_RQ_QUEUE_OK; } } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index e93899cc6f60..6ca35495a5be 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -144,7 +144,7 @@ static void virtblk_done(struct virtqueue *vq) do { virtqueue_disable_cb(vq); while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { - blk_mq_complete_request(vbr->req); + blk_mq_complete_request(vbr->req, vbr->req->errors); req_done = true; } if (unlikely(virtqueue_is_broken(vq))) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 0823a96902f8..611170896b8c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1142,6 +1142,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) RING_IDX i, rp; unsigned long flags; struct blkfront_info *info = (struct blkfront_info *)dev_id; + int error; spin_lock_irqsave(&info->io_lock, flags); @@ -1182,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) continue; } - req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; + error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - req->errors = -EOPNOTSUPP; + error = -EOPNOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); } - blk_mq_complete_request(req); + blk_mq_complete_request(req, error); break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - req->errors = -EOPNOTSUPP; + error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && info->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - req->errors = -EOPNOTSUPP; + error = -EOPNOTSUPP; } - if (unlikely(req->errors)) { - if (req->errors == -EOPNOTSUPP) - req->errors = 0; + if (unlikely(error)) { + if (error == -EOPNOTSUPP) + error = 0; info->feature_flush = 0; xlvbd_flush(info); } @@ -1223,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - blk_mq_complete_request(req); + blk_mq_complete_request(req, error); break; default: BUG(); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index cbfc5990052b..126a48c6431e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1957,7 +1957,7 @@ static int scsi_mq_prep_fn(struct request *req) static void scsi_mq_done(struct scsi_cmnd *cmd) { trace_scsi_dispatch_cmd_done(cmd); - blk_mq_complete_request(cmd->request); + blk_mq_complete_request(cmd->request, cmd->request->errors); } static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b80ba4572a31..c1b5c867ff07 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -214,7 +214,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); void blk_mq_cancel_requeue_work(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_abort_requeue_list(struct request_queue *q); -void blk_mq_complete_request(struct request *rq); +void blk_mq_complete_request(struct request *rq, int error); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3