From 538b75341835e3c2041ff066408de10d24fdc830 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 16 Sep 2014 10:37:37 -0600
Subject: blk-mq: request deadline must be visible before marking rq as started

When we start the request, we set the deadline and flip the bits
marking the request as started and non-complete. However, it's
important that the deadline store is ordered before flipping the
bits, otherwise we could have a small window where the request is
marked started but with an invalid deadline. This can confuse the
timeout handling.

Suggested-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'block/blk-mq.c')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 383ea0cb1f0a..a13c40ca8230 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -392,6 +392,12 @@ static void blk_mq_start_request(struct request *rq, bool last)
 
 	blk_add_timer(rq);
 
+	/*
+	 * Ensure that ->deadline is visible before set the started
+	 * flag and clear the completed flag.
+	 */
+	smp_mb__before_atomic();
+
 	/*
 	 * Mark us as started and clear complete. Complete might have been
 	 * set if requeue raced with timeout, which then marked it as
-- 
cgit v1.2.3


From 683d0e126232d898a481daa3a4ca032c2b1a9660 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 18 Sep 2014 11:04:31 +0200
Subject: blk-mq: Avoid race condition with uninitialized requests

This patch should fix the bug reported in
https://lkml.org/lkml/2014/9/11/249.

We have to initialize at least the atomic_flags and the cmd_flags when
allocating storage for the requests.

Otherwise blk_mq_timeout_check() might dereference uninitialized
pointers when racing with the creation of a request.

Also move the reset of cmd_flags for the initializing code to the point
where a request is freed. So we will never end up with pending flush
request indicators that might trigger dereferences of invalid pointers
in blk_mq_timeout_check().

Cc: stable@vger.kernel.org
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reported-by: Paulo De Rezende Pinatti <ppinatti@linux.vnet.ibm.com>
Tested-by: Paulo De Rezende Pinatti <ppinatti@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'block/blk-mq.c')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a13c40ca8230..1583ed28ea03 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -203,7 +203,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
 	if (tag != BLK_MQ_TAG_FAIL) {
 		rq = data->hctx->tags->rqs[tag];
 
-		rq->cmd_flags = 0;
 		if (blk_mq_tag_busy(data->hctx)) {
 			rq->cmd_flags = REQ_MQ_INFLIGHT;
 			atomic_inc(&data->hctx->nr_active);
@@ -258,6 +257,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 
 	if (rq->cmd_flags & REQ_MQ_INFLIGHT)
 		atomic_dec(&hctx->nr_active);
+	rq->cmd_flags = 0;
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 	blk_mq_put_tag(hctx, tag, &ctx->last_tag);
@@ -1410,6 +1410,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 		left -= to_do * rq_size;
 		for (j = 0; j < to_do; j++) {
 			tags->rqs[i] = p;
+			tags->rqs[i]->atomic_flags = 0;
+			tags->rqs[i]->cmd_flags = 0;
 			if (set->ops->init_request) {
 				if (set->ops->init_request(set->driver_data,
 						tags->rqs[i], hctx_idx, i,
-- 
cgit v1.2.3


From a57a178a490345c7236b0077b3de005754389ed6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 16 Sep 2014 14:44:07 -0700
Subject: blk-mq: avoid infinite recursion with the FUA flag

We should not insert requests into the flush state machine from
blk_mq_insert_request.  All incoming flush requests come through
blk_{m,s}q_make_request and are handled there, while blk_execute_rq_nowait
should only be called for BLOCK_PC requests.  All other callers
deal with requests that already went through the flush statemchine
and shouldn't be reinserted into it.

Reported-by: Robert Elliott  <Elliott@hp.com>
Debugged-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-exec.c |  1 +
 block/blk-mq.c   | 11 +++--------
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'block/blk-mq.c')

diff --git a/block/blk-exec.c b/block/blk-exec.c
index f4d27b12c90b..9924725fa50d 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -56,6 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	bool is_pm_resume;
 
 	WARN_ON(irqs_disabled());
+	WARN_ON(rq->cmd_type == REQ_TYPE_FS);
 
 	rq->rq_disk = bd_disk;
 	rq->end_io = done;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1583ed28ea03..a7d70a1bbf36 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -963,14 +963,9 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
 
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
-	if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) &&
-	    !(rq->cmd_flags & (REQ_FLUSH_SEQ))) {
-		blk_insert_flush(rq);
-	} else {
-		spin_lock(&ctx->lock);
-		__blk_mq_insert_request(hctx, rq, at_head);
-		spin_unlock(&ctx->lock);
-	}
+	spin_lock(&ctx->lock);
+	__blk_mq_insert_request(hctx, rq, at_head);
+	spin_unlock(&ctx->lock);
 
 	if (run_queue)
 		blk_mq_run_hw_queue(hctx, async);
-- 
cgit v1.2.3


From 6b55e1f2d0a5e462e52678278ab749468f1db81c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 19 Sep 2014 08:04:53 -0600
Subject: blk-mq: fix potential oops on out-of-memory in
 __blk_mq_alloc_rq_maps()

__blk_mq_alloc_rq_maps() can be invoked multiple times, if we scale
back the queue depth if we are low on memory. So don't clear
set->tags when we fail, this is handled directly in
the parent function, blk_mq_alloc_tag_set().

Reported-by: Robert Elliott  <Elliott@hp.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'block/blk-mq.c')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a7d70a1bbf36..e83d306907da 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1959,7 +1959,6 @@ out_unwind:
 	while (--i >= 0)
 		blk_mq_free_rq_map(set, set->tags[i], i);
 
-	set->tags = NULL;
 	return -ENOMEM;
 }
 
-- 
cgit v1.2.3


From 8b95741569eabc5eb17da71d1d3668cdb0bef86c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 19 Sep 2014 13:10:29 -0600
Subject: blk-mq: use blk_mq_start_hw_queues() when running requeue work

When requests are retried due to hw or sw resource shortages,
we often stop the associated hardware queue. So ensure that we
restart the queues when running the requeue work, otherwise the
queue run will be a no-op.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'block/blk-mq.c')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e83d306907da..c88e6089746d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -479,7 +479,11 @@ static void blk_mq_requeue_work(struct work_struct *work)
 		blk_mq_insert_request(rq, false, false, false);
 	}
 
-	blk_mq_run_queues(q, false);
+	/*
+	 * Use the start variant of queue running here, so that running
+	 * the requeue work will kick stopped queues.
+	 */
+	blk_mq_start_hw_queues(q);
 }
 
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
-- 
cgit v1.2.3