diff options
author | Artur Paszkiewicz <artur.paszkiewicz@intel.com> | 2020-07-03 11:13:09 +0200 |
---|---|---|
committer | Song Liu <songliubraving@fb.com> | 2020-07-13 23:06:56 -0700 |
commit | 41d2d848e5c09209bdb57ff9c0ca34075e22783d (patch) | |
tree | e4c114e4a7d2c0381cd30ebc8eaf5caa118ecbb4 | |
parent | 9a5a85972c073f720d81a7ebd08bfe278e6e16db (diff) | |
download | linux-41d2d848e5c09209bdb57ff9c0ca34075e22783d.tar.bz2 |
md: improve io stats accounting
Use generic io accounting functions to manage io stats. There was an
attempt to do this earlier in commit 18c0b223cf99 ("md: use generic io
stats accounting functions to simplify io stat accounting"), but it did
not include a call to generic_end_io_acct() and caused issues with
tracking in-flight IOs, so it was later removed in commit 74672d069b29
("md: fix md io stats accounting broken").
This patch attempts to fix this by using both disk_start_io_acct() and
disk_end_io_acct(). To make it possible, a struct md_io is allocated for
every new md bio, which includes the io start_time. A new mempool is
introduced for this purpose. We override bio->bi_end_io with our own
callback and call disk_start_io_acct() before passing the bio to
md_handle_request(). When it completes, we call disk_end_io_acct() and
the original bi_end_io callback.
This adds correct statistics about in-flight IOs and IO processing time,
interpreted e.g. in iostat as await, svctm, aqu-sz and %util.
It also fixes a situation where too many IOs where reported if a bio was
re-submitted to the mddev, because io accounting is now performed only
on newly arriving bios.
Acked-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
-rw-r--r-- | drivers/md/md.c | 57 | ||||
-rw-r--r-- | drivers/md/md.h | 1 |
2 files changed, 46 insertions, 12 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 49452149ac72..07e5b67a2c48 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -463,12 +463,33 @@ check_suspended: } EXPORT_SYMBOL(md_handle_request); +struct md_io { + struct mddev *mddev; + bio_end_io_t *orig_bi_end_io; + void *orig_bi_private; + unsigned long start_time; +}; + +static void md_end_io(struct bio *bio) +{ + struct md_io *md_io = bio->bi_private; + struct mddev *mddev = md_io->mddev; + + disk_end_io_acct(mddev->gendisk, bio_op(bio), md_io->start_time); + + bio->bi_end_io = md_io->orig_bi_end_io; + bio->bi_private = md_io->orig_bi_private; + + mempool_free(md_io, &mddev->md_io_pool); + + if (bio->bi_end_io) + bio->bi_end_io(bio); +} + static blk_qc_t md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); - const int sgrp = op_stat_group(bio_op(bio)); struct mddev *mddev = bio->bi_disk->private_data; - unsigned int sectors; if (mddev == NULL || mddev->pers == NULL) { bio_io_error(bio); @@ -489,21 +510,27 @@ static blk_qc_t md_submit_bio(struct bio *bio) return BLK_QC_T_NONE; } - /* - * save the sectors now since our bio can - * go away inside make_request - */ - sectors = bio_sectors(bio); + if (bio->bi_end_io != md_end_io) { + struct md_io *md_io; + + md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO); + md_io->mddev = mddev; + md_io->orig_bi_end_io = bio->bi_end_io; + md_io->orig_bi_private = bio->bi_private; + + bio->bi_end_io = md_end_io; + bio->bi_private = md_io; + + md_io->start_time = disk_start_io_acct(mddev->gendisk, + bio_sectors(bio), + bio_op(bio)); + } + /* bio could be mergeable after passing to underlayer */ bio->bi_opf &= ~REQ_NOMERGE; md_handle_request(mddev, bio); - part_stat_lock(); - part_stat_inc(&mddev->gendisk->part0, ios[sgrp]); - part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors); - part_stat_unlock(); - return BLK_QC_T_NONE; } @@ -5546,6 +5573,7 @@ static void md_free(struct kobject *ko) bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); + mempool_exit(&mddev->md_io_pool); kfree(mddev); } @@ -5641,6 +5669,11 @@ static int md_alloc(dev_t dev, char *name) */ mddev->hold_active = UNTIL_STOP; + error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE, + sizeof(struct md_io)); + if (error) + goto abort; + error = -ENOMEM; mddev->queue = blk_alloc_queue(NUMA_NO_NODE); if (!mddev->queue) diff --git a/drivers/md/md.h b/drivers/md/md.h index 612814d07d35..c26fa8bd41e7 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -481,6 +481,7 @@ struct mddev { struct bio_set sync_set; /* for sync operations like * metadata and bitmap writes */ + mempool_t md_io_pool; /* Generic flush handling. * The last to finish preflush schedules a worker to submit |