From 889fa31f00b218a2cef96c32a6b3f57e6d3bf918 Mon Sep 17 00:00:00 2001 From: Chong Yuan Date: Wed, 15 Apr 2015 11:39:29 -0600 Subject: blk-mq: reduce unnecessary software queue looping In flush_busy_ctxs() and blk_mq_hctx_has_pending(), regardless of how many ctxs assigned to one hctx, they will all loop hctx->ctx_map.map_size times. Here hctx->ctx_map.map_size is a const ALIGN(nr_cpu_ids, 8) / 8. Especially, flush_busy_ctxs() is in hot code path. And it's unnecessary. Change ->map_size to contain the actually mapped software queues, so we only loop for as many iterations as we have to. And remove cpumask setting and nr_ctx count in blk_mq_init_cpu_queues() since they are all re-done in blk_mq_map_swqueue(). blk_mq_map_swqueue(). Signed-off-by: Chong Yuan Reviewed-by: Wenbo Wang Updated by me for formatting and commenting. Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 1192f85e5ff3..0b49e42e5310 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1522,8 +1522,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) if (!bitmap->map) return -ENOMEM; - bitmap->map_size = num_maps; - total = nr_cpu_ids; for (i = 0; i < num_maps; i++) { bitmap->map[i].depth = min(total, bitmap->bits_per_word); @@ -1764,8 +1762,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, continue; hctx = q->mq_ops->map_queue(q, i); - cpumask_set_cpu(i, hctx->cpumask); - hctx->nr_ctx++; /* * Set local node, IFF we have more than one hw queue. If @@ -1802,6 +1798,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) } queue_for_each_hw_ctx(q, hctx, i) { + struct blk_mq_ctxmap *map = &hctx->ctx_map; + /* * If no software queues are mapped to this hardware queue, * disable it and free the request entries. @@ -1817,6 +1815,13 @@ static void blk_mq_map_swqueue(struct request_queue *q) continue; } + /* + * Set the map size to the number of mapped software queues. + * This is more accurate and more efficient than looping + * over all possibly mapped software queues. + */ + map->map_size = hctx->nr_ctx / map->bits_per_word; + /* * Initialize batch roundrobin counts */ -- cgit v1.2.3