From 538b75341835e3c2041ff066408de10d24fdc830 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 16 Sep 2014 10:37:37 -0600 Subject: blk-mq: request deadline must be visible before marking rq as started When we start the request, we set the deadline and flip the bits marking the request as started and non-complete. However, it's important that the deadline store is ordered before flipping the bits, otherwise we could have a small window where the request is marked started but with an invalid deadline. This can confuse the timeout handling. Suggested-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index 383ea0cb1f0a..a13c40ca8230 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -392,6 +392,12 @@ static void blk_mq_start_request(struct request *rq, bool last) blk_add_timer(rq); + /* + * Ensure that ->deadline is visible before set the started + * flag and clear the completed flag. + */ + smp_mb__before_atomic(); + /* * Mark us as started and clear complete. Complete might have been * set if requeue raced with timeout, which then marked it as -- cgit v1.2.3 From 683d0e126232d898a481daa3a4ca032c2b1a9660 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Sep 2014 11:04:31 +0200 Subject: blk-mq: Avoid race condition with uninitialized requests This patch should fix the bug reported in https://lkml.org/lkml/2014/9/11/249. We have to initialize at least the atomic_flags and the cmd_flags when allocating storage for the requests. Otherwise blk_mq_timeout_check() might dereference uninitialized pointers when racing with the creation of a request. Also move the reset of cmd_flags for the initializing code to the point where a request is freed. So we will never end up with pending flush request indicators that might trigger dereferences of invalid pointers in blk_mq_timeout_check(). Cc: stable@vger.kernel.org Signed-off-by: David Hildenbrand Reported-by: Paulo De Rezende Pinatti Tested-by: Paulo De Rezende Pinatti Acked-by: Christian Borntraeger Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index a13c40ca8230..1583ed28ea03 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -203,7 +203,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) if (tag != BLK_MQ_TAG_FAIL) { rq = data->hctx->tags->rqs[tag]; - rq->cmd_flags = 0; if (blk_mq_tag_busy(data->hctx)) { rq->cmd_flags = REQ_MQ_INFLIGHT; atomic_inc(&data->hctx->nr_active); @@ -258,6 +257,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, if (rq->cmd_flags & REQ_MQ_INFLIGHT) atomic_dec(&hctx->nr_active); + rq->cmd_flags = 0; clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); blk_mq_put_tag(hctx, tag, &ctx->last_tag); @@ -1410,6 +1410,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, left -= to_do * rq_size; for (j = 0; j < to_do; j++) { tags->rqs[i] = p; + tags->rqs[i]->atomic_flags = 0; + tags->rqs[i]->cmd_flags = 0; if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, tags->rqs[i], hctx_idx, i, -- cgit v1.2.3 From a57a178a490345c7236b0077b3de005754389ed6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 Sep 2014 14:44:07 -0700 Subject: blk-mq: avoid infinite recursion with the FUA flag We should not insert requests into the flush state machine from blk_mq_insert_request. All incoming flush requests come through blk_{m,s}q_make_request and are handled there, while blk_execute_rq_nowait should only be called for BLOCK_PC requests. All other callers deal with requests that already went through the flush statemchine and shouldn't be reinserted into it. Reported-by: Robert Elliott Debugged-by: Ming Lei Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-exec.c | 1 + block/blk-mq.c | 11 +++-------- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-exec.c b/block/blk-exec.c index f4d27b12c90b..9924725fa50d 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -56,6 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, bool is_pm_resume; WARN_ON(irqs_disabled()); + WARN_ON(rq->cmd_type == REQ_TYPE_FS); rq->rq_disk = bd_disk; rq->end_io = done; diff --git a/block/blk-mq.c b/block/blk-mq.c index 1583ed28ea03..a7d70a1bbf36 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -963,14 +963,9 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, hctx = q->mq_ops->map_queue(q, ctx->cpu); - if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) && - !(rq->cmd_flags & (REQ_FLUSH_SEQ))) { - blk_insert_flush(rq); - } else { - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, at_head); - spin_unlock(&ctx->lock); - } + spin_lock(&ctx->lock); + __blk_mq_insert_request(hctx, rq, at_head); + spin_unlock(&ctx->lock); if (run_queue) blk_mq_run_hw_queue(hctx, async); -- cgit v1.2.3 From 6b55e1f2d0a5e462e52678278ab749468f1db81c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Sep 2014 08:04:53 -0600 Subject: blk-mq: fix potential oops on out-of-memory in __blk_mq_alloc_rq_maps() __blk_mq_alloc_rq_maps() can be invoked multiple times, if we scale back the queue depth if we are low on memory. So don't clear set->tags when we fail, this is handled directly in the parent function, blk_mq_alloc_tag_set(). Reported-by: Robert Elliott Signed-off-by: Jens Axboe --- block/blk-mq.c | 1 - 1 file changed, 1 deletion(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index a7d70a1bbf36..e83d306907da 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1959,7 +1959,6 @@ out_unwind: while (--i >= 0) blk_mq_free_rq_map(set, set->tags[i], i); - set->tags = NULL; return -ENOMEM; } -- cgit v1.2.3 From 8b95741569eabc5eb17da71d1d3668cdb0bef86c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Sep 2014 13:10:29 -0600 Subject: blk-mq: use blk_mq_start_hw_queues() when running requeue work When requests are retried due to hw or sw resource shortages, we often stop the associated hardware queue. So ensure that we restart the queues when running the requeue work, otherwise the queue run will be a no-op. Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'block/blk-mq.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index e83d306907da..c88e6089746d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -479,7 +479,11 @@ static void blk_mq_requeue_work(struct work_struct *work) blk_mq_insert_request(rq, false, false, false); } - blk_mq_run_queues(q, false); + /* + * Use the start variant of queue running here, so that running + * the requeue work will kick stopped queues. + */ + blk_mq_start_hw_queues(q); } void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) -- cgit v1.2.3