From dece16353ef47d8d33f5302bc158072a9d65e26f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Nov 2015 10:41:16 -0700 Subject: block: change ->make_request_fn() and users to return a queue cookie No functional changes in this patch, but it prepares us for returning a more useful cookie related to the IO that was queued up. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig Acked-by: Keith Busch --- block/blk-core.c | 26 ++++++++++++++++---------- block/blk-mq.c | 26 ++++++++++++++------------ 2 files changed, 30 insertions(+), 22 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 89eec7965870..e93df6d386a0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -809,7 +809,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) } EXPORT_SYMBOL(blk_init_queue_node); -static void blk_queue_bio(struct request_queue *q, struct bio *bio); +static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio); struct request_queue * blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, @@ -1678,7 +1678,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } -static void blk_queue_bio(struct request_queue *q, struct bio *bio) +static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; @@ -1698,7 +1698,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio->bi_error = -EIO; bio_endio(bio); - return; + return BLK_QC_T_NONE; } if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { @@ -1713,7 +1713,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) */ if (!blk_queue_nomerges(q)) { if (blk_attempt_plug_merge(q, bio, &request_count, NULL)) - return; + return BLK_QC_T_NONE; } else request_count = blk_plug_queued_count(q); @@ -1791,6 +1791,8 @@ get_rq: out_unlock: spin_unlock_irq(q->queue_lock); } + + return BLK_QC_T_NONE; } /* @@ -1996,12 +1998,13 @@ end_io: * a lower device by calling into generic_make_request recursively, which * means the bio should NOT be touched after the call to ->make_request_fn. */ -void generic_make_request(struct bio *bio) +blk_qc_t generic_make_request(struct bio *bio) { struct bio_list bio_list_on_stack; + blk_qc_t ret = BLK_QC_T_NONE; if (!generic_make_request_checks(bio)) - return; + goto out; /* * We only want one ->make_request_fn to be active at a time, else @@ -2015,7 +2018,7 @@ void generic_make_request(struct bio *bio) */ if (current->bio_list) { bio_list_add(current->bio_list, bio); - return; + goto out; } /* following loop may be a bit non-obvious, and so deserves some @@ -2040,7 +2043,7 @@ void generic_make_request(struct bio *bio) if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) { - q->make_request_fn(q, bio); + ret = q->make_request_fn(q, bio); blk_queue_exit(q); @@ -2053,6 +2056,9 @@ void generic_make_request(struct bio *bio) } } while (bio); current->bio_list = NULL; /* deactivate */ + +out: + return ret; } EXPORT_SYMBOL(generic_make_request); @@ -2066,7 +2072,7 @@ EXPORT_SYMBOL(generic_make_request); * interfaces; @bio must be presetup and ready for I/O. * */ -void submit_bio(int rw, struct bio *bio) +blk_qc_t submit_bio(int rw, struct bio *bio) { bio->bi_rw |= rw; @@ -2100,7 +2106,7 @@ void submit_bio(int rw, struct bio *bio) } } - generic_make_request(bio); + return generic_make_request(bio); } EXPORT_SYMBOL(submit_bio); diff --git a/block/blk-mq.c b/block/blk-mq.c index 1c27b3eaef64..65f43bd696a0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1235,7 +1235,7 @@ static int blk_mq_direct_issue_request(struct request *rq) * but will attempt to bypass the hctx queueing if we can go straight to * hardware for SYNC IO. */ -static void blk_mq_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = rw_is_sync(bio->bi_rw); const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); @@ -1249,7 +1249,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); - return; + return BLK_QC_T_NONE; } blk_queue_split(q, &bio, q->bio_split); @@ -1257,13 +1257,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) if (!is_flush_fua && !blk_queue_nomerges(q)) { if (blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) - return; + return BLK_QC_T_NONE; } else request_count = blk_plug_queued_count(q); rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) - return; + return BLK_QC_T_NONE; if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); @@ -1302,11 +1302,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) old_rq = rq; blk_mq_put_ctx(data.ctx); if (!old_rq) - return; + return BLK_QC_T_NONE; if (!blk_mq_direct_issue_request(old_rq)) - return; + return BLK_QC_T_NONE; blk_mq_insert_request(old_rq, false, true, true); - return; + return BLK_QC_T_NONE; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -1320,13 +1320,14 @@ run_queue: blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); } blk_mq_put_ctx(data.ctx); + return BLK_QC_T_NONE; } /* * Single hardware queue variant. This will attempt to use any per-process * plug for merging and IO deferral. */ -static void blk_sq_make_request(struct request_queue *q, struct bio *bio) +static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = rw_is_sync(bio->bi_rw); const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); @@ -1339,18 +1340,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); - return; + return BLK_QC_T_NONE; } blk_queue_split(q, &bio, q->bio_split); if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, NULL)) - return; + return BLK_QC_T_NONE; rq = blk_mq_map_request(q, bio, &data); if (unlikely(!rq)) - return; + return BLK_QC_T_NONE; if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); @@ -1374,7 +1375,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) } list_add_tail(&rq->queuelist, &plug->mq_list); blk_mq_put_ctx(data.ctx); - return; + return BLK_QC_T_NONE; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -1389,6 +1390,7 @@ run_queue: } blk_mq_put_ctx(data.ctx); + return BLK_QC_T_NONE; } /* -- cgit v1.2.3 From 7b371636fb6d187873d9d2730c2b1febc48a9b47 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Nov 2015 10:41:40 -0700 Subject: blk-mq: return tag/queue combo in the make_request_fn handlers Return a cookie, blk_qc_t, from the blk-mq make request functions, that allows a later caller to uniquely identify a specific IO. The cookie doesn't mean anything to the caller, but the caller can use it to later pass back to the block layer. The block layer can then identify the hardware queue and request from that cookie. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig Acked-by: Keith Busch --- block/blk-mq.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 65f43bd696a0..66f3cf9c436d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1198,7 +1198,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, return rq; } -static int blk_mq_direct_issue_request(struct request *rq) +static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie) { int ret; struct request_queue *q = rq->q; @@ -1209,6 +1209,7 @@ static int blk_mq_direct_issue_request(struct request *rq) .list = NULL, .last = 1 }; + blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num); /* * For OK queue, we are done. For error, kill it. Any other @@ -1216,18 +1217,21 @@ static int blk_mq_direct_issue_request(struct request *rq) * would have done */ ret = q->mq_ops->queue_rq(hctx, &bd); - if (ret == BLK_MQ_RQ_QUEUE_OK) + if (ret == BLK_MQ_RQ_QUEUE_OK) { + *cookie = new_cookie; return 0; - else { - __blk_mq_requeue_request(rq); + } - if (ret == BLK_MQ_RQ_QUEUE_ERROR) { - rq->errors = -EIO; - blk_mq_end_request(rq, rq->errors); - return 0; - } - return -1; + __blk_mq_requeue_request(rq); + + if (ret == BLK_MQ_RQ_QUEUE_ERROR) { + *cookie = BLK_QC_T_NONE; + rq->errors = -EIO; + blk_mq_end_request(rq, rq->errors); + return 0; } + + return -1; } /* @@ -1244,6 +1248,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) unsigned int request_count = 0; struct blk_plug *plug; struct request *same_queue_rq = NULL; + blk_qc_t cookie; blk_queue_bounce(q, &bio); @@ -1265,6 +1270,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (unlikely(!rq)) return BLK_QC_T_NONE; + cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); + if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); blk_insert_flush(rq); @@ -1302,11 +1309,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) old_rq = rq; blk_mq_put_ctx(data.ctx); if (!old_rq) - return BLK_QC_T_NONE; - if (!blk_mq_direct_issue_request(old_rq)) - return BLK_QC_T_NONE; + goto done; + if (!blk_mq_direct_issue_request(old_rq, &cookie)) + goto done; blk_mq_insert_request(old_rq, false, true, true); - return BLK_QC_T_NONE; + goto done; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -1320,7 +1327,8 @@ run_queue: blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); } blk_mq_put_ctx(data.ctx); - return BLK_QC_T_NONE; +done: + return cookie; } /* @@ -1335,6 +1343,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) unsigned int request_count = 0; struct blk_map_ctx data; struct request *rq; + blk_qc_t cookie; blk_queue_bounce(q, &bio); @@ -1353,6 +1362,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) if (unlikely(!rq)) return BLK_QC_T_NONE; + cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); + if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); blk_insert_flush(rq); @@ -1375,7 +1386,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) } list_add_tail(&rq->queuelist, &plug->mq_list); blk_mq_put_ctx(data.ctx); - return BLK_QC_T_NONE; + return cookie; } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { @@ -1390,7 +1401,7 @@ run_queue: } blk_mq_put_ctx(data.ctx); - return BLK_QC_T_NONE; + return cookie; } /* -- cgit v1.2.3 From 05229beeddf7e75e2e616ddaad4b70e7fca9528d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Nov 2015 10:44:55 -0700 Subject: block: add block polling support Add basic support for polling for specific IO to complete. This uses the cookie that blk-mq passes back, which enables the block layer to pass this cookie to the driver to spin for a specific request. This will be combined with request latency tracking, so we can make qualified decisions about when to poll and when not to. For now, for benchmark purposes, we add a sysfs file that controls whether polling is enabled or not. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig Acked-by: Keith Busch --- block/blk-core.c | 41 +++++++++++++++++++++++++++++++++++++++++ block/blk-mq-sysfs.c | 10 ++++++++++ block/blk-sysfs.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/blk-mq.h | 10 ++++++++++ include/linux/blkdev.h | 3 +++ 5 files changed, 99 insertions(+) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index e93df6d386a0..fa36b4ff7d63 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3312,6 +3312,47 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); +bool blk_poll(struct request_queue *q, blk_qc_t cookie) +{ + struct blk_plug *plug; + long state; + + if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return false; + + plug = current->plug; + if (plug) + blk_flush_plug_list(plug, false); + + state = current->state; + while (!need_resched()) { + unsigned int queue_num = blk_qc_t_to_queue_num(cookie); + struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[queue_num]; + int ret; + + hctx->poll_invoked++; + + ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie)); + if (ret > 0) { + hctx->poll_success++; + set_current_state(TASK_RUNNING); + return true; + } + + if (signal_pending_state(state, current)) + set_current_state(TASK_RUNNING); + + if (current->state == TASK_RUNNING) + return true; + if (ret < 0) + break; + cpu_relax(); + } + + return false; +} + #ifdef CONFIG_PM /** * blk_pm_runtime_init - Block layer runtime PM initialization routine diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 6f57a110289c..1cf18784c5cf 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -174,6 +174,11 @@ static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) return ret; } +static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page) +{ + return sprintf(page, "invoked=%lu, success=%lu\n", hctx->poll_invoked, hctx->poll_success); +} + static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx, char *page) { @@ -295,6 +300,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { .attr = {.name = "cpu_list", .mode = S_IRUGO }, .show = blk_mq_hw_sysfs_cpus_show, }; +static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = { + .attr = {.name = "io_poll", .mode = S_IRUGO }, + .show = blk_mq_hw_sysfs_poll_show, +}; static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_queued.attr, @@ -304,6 +313,7 @@ static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_tags.attr, &blk_mq_hw_sysfs_cpus.attr, &blk_mq_hw_sysfs_active.attr, + &blk_mq_hw_sysfs_poll.attr, NULL, }; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 31849e328b45..565b8dac5782 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -317,6 +317,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) return ret; } +static ssize_t queue_poll_show(struct request_queue *q, char *page) +{ + return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page); +} + +static ssize_t queue_poll_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long poll_on; + ssize_t ret; + + if (!q->mq_ops || !q->mq_ops->poll) + return -EINVAL; + + ret = queue_var_store(&poll_on, page, count); + if (ret < 0) + return ret; + + spin_lock_irq(q->queue_lock); + if (poll_on) + queue_flag_set(QUEUE_FLAG_POLL, q); + else + queue_flag_clear(QUEUE_FLAG_POLL, q); + spin_unlock_irq(q->queue_lock); + + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -442,6 +470,12 @@ static struct queue_sysfs_entry queue_random_entry = { .store = queue_store_random, }; +static struct queue_sysfs_entry queue_poll_entry = { + .attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR }, + .show = queue_poll_show, + .store = queue_poll_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -466,6 +500,7 @@ static struct attribute *default_attrs[] = { &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, &queue_random_entry.attr, + &queue_poll_entry.attr, NULL, }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 83cc9d4e5455..daf17d70aeca 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -59,6 +59,9 @@ struct blk_mq_hw_ctx { struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; + + unsigned long poll_invoked; + unsigned long poll_success; }; struct blk_mq_tag_set { @@ -97,6 +100,8 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int, typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef void (busy_tag_iter_fn)(struct request *, void *, bool); +typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); + struct blk_mq_ops { /* @@ -114,6 +119,11 @@ struct blk_mq_ops { */ timeout_fn *timeout; + /* + * Called to poll for completion of a specific tag. + */ + poll_fn *poll; + softirq_done_fn *complete; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5ee0f5243025..3fe27f8d91f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -487,6 +487,7 @@ struct request_queue { #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ +#define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -814,6 +815,8 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +bool blk_poll(struct request_queue *q, blk_qc_t cookie); + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; /* this is never NULL */ -- cgit v1.2.3