From 9f6dc633761006f974701d4c88da71ab68670749 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:02 -0500 Subject: dm: interlock pending dm_io and dm_wait_for_bios_completion Commit d208b89401e0 ("dm: fix mempool NULL pointer race when completing IO") didn't go far enough. When bio_end_io_acct ends the count of in-flight I/Os may reach zero and the DM device may be suspended. There is a possibility that the suspend races with dm_stats_account_io. Fix this by adding percpu "pending_io" counters to track outstanding dm_io. Move kicking of suspend queue to dm_io_dec_pending(). Also, rename md_in_flight_bios() to dm_in_flight_bios() and update it to iterate all pending_io counters. Fixes: d208b89401e0 ("dm: fix mempool NULL pointer race when completing IO") Cc: stable@vger.kernel.org Co-developed-by: Mikulas Patocka Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 2 ++ drivers/md/dm.c | 35 +++++++++++++++++++++++------------ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 72d18c3fbf1f..7ec2d75d9292 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -64,6 +64,8 @@ struct mapped_device { struct gendisk *disk; struct dax_device *dax_dev; + unsigned long __percpu *pending_io; + /* * A list of ios that arrived while we were suspended. */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ab9cc91931f9..18329b33e8cd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -508,10 +508,6 @@ static void end_io_acct(struct mapped_device *md, struct bio *bio, dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), true, duration, stats_aux); - - /* nudge anyone waiting on suspend queue */ - if (unlikely(wq_has_sleeper(&md->wait))) - wake_up(&md->wait); } static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) @@ -530,6 +526,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->magic = DM_IO_MAGIC; io->status = 0; atomic_set(&io->io_count, 1); + this_cpu_inc(*md->pending_io); io->orig_bio = bio; io->md = md; spin_lock_init(&io->endio_lock); @@ -827,6 +824,12 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) stats_aux = io->stats_aux; free_io(md, io); end_io_acct(md, bio, start_time, &stats_aux); + smp_wmb(); + this_cpu_dec(*md->pending_io); + + /* nudge anyone waiting on suspend queue */ + if (unlikely(wq_has_sleeper(&md->wait))) + wake_up(&md->wait); if (io_error == BLK_STS_DM_REQUEUE) return; @@ -1569,6 +1572,11 @@ static void cleanup_mapped_device(struct mapped_device *md) blk_cleanup_disk(md->disk); } + if (md->pending_io) { + free_percpu(md->pending_io); + md->pending_io = NULL; + } + cleanup_srcu_struct(&md->io_barrier); mutex_destroy(&md->suspend_lock); @@ -1671,6 +1679,10 @@ static struct mapped_device *alloc_dev(int minor) if (!md->wq) goto bad; + md->pending_io = alloc_percpu(unsigned long); + if (!md->pending_io) + goto bad; + dm_stats_init(&md->stats); /* Populate the mapping, nobody knows we exist yet */ @@ -2078,16 +2090,13 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); -static bool md_in_flight_bios(struct mapped_device *md) +static bool dm_in_flight_bios(struct mapped_device *md) { int cpu; - struct block_device *part = dm_disk(md)->part0; - long sum = 0; + unsigned long sum = 0; - for_each_possible_cpu(cpu) { - sum += part_stat_local_read_cpu(part, in_flight[0], cpu); - sum += part_stat_local_read_cpu(part, in_flight[1], cpu); - } + for_each_possible_cpu(cpu) + sum += *per_cpu_ptr(md->pending_io, cpu); return sum != 0; } @@ -2100,7 +2109,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta while (true) { prepare_to_wait(&md->wait, &wait, task_state); - if (!md_in_flight_bios(md)) + if (!dm_in_flight_bios(md)) break; if (signal_pending_state(task_state, current)) { @@ -2112,6 +2121,8 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta } finish_wait(&md->wait, &wait); + smp_rmb(); + return r; } -- cgit v1.2.3 From 8d394bc4adf588ca4a0650745167cb83f86c18c9 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:39:57 -0500 Subject: dm: fix double accounting of flush with data DM handles a flush with data by first issuing an empty flush and then once it completes the REQ_PREFLUSH flag is removed and the payload is issued. The problem fixed by this commit is that both the empty flush bio and the data payload will account the full extent of the data payload. Fix this by factoring out dm_io_acct() and having it wrap all IO accounting to set the size of bio with REQ_PREFLUSH to 0, account the IO, and then restore the original size. Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-stats.c | 6 ++++-- drivers/md/dm-stats.h | 2 +- drivers/md/dm.c | 47 +++++++++++++++++++++++++++++++++-------------- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 35d368c418d0..80328f1d1083 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -621,13 +621,14 @@ static void __dm_stat_bio(struct dm_stat *s, int bi_rw, void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, sector_t bi_sector, unsigned bi_sectors, bool end, - unsigned long duration_jiffies, + unsigned long start_time, struct dm_stats_aux *stats_aux) { struct dm_stat *s; sector_t end_sector; struct dm_stats_last_position *last; bool got_precise_time; + unsigned long duration_jiffies = 0; if (unlikely(!bi_sectors)) return; @@ -647,7 +648,8 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, )); WRITE_ONCE(last->last_sector, end_sector); WRITE_ONCE(last->last_rw, bi_rw); - } + } else + duration_jiffies = jiffies - start_time; rcu_read_lock(); diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h index 2ddfae678f32..09fa579e2e24 100644 --- a/drivers/md/dm-stats.h +++ b/drivers/md/dm-stats.h @@ -32,7 +32,7 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv, void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, sector_t bi_sector, unsigned bi_sectors, bool end, - unsigned long duration_jiffies, + unsigned long start_time, struct dm_stats_aux *aux); static inline bool dm_stats_used(struct dm_stats *st) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 18329b33e8cd..c72a271ea999 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -485,29 +485,48 @@ u64 dm_start_time_ns_from_clone(struct bio *bio) } EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); -static void start_io_acct(struct dm_io *io) +static bool bio_is_flush_with_data(struct bio *bio) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; + return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size); +} + +static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) +{ + bool is_flush_with_data; + unsigned int bi_size; + + /* If REQ_PREFLUSH set save any payload but do not account it */ + is_flush_with_data = bio_is_flush_with_data(bio); + if (is_flush_with_data) { + bi_size = bio->bi_iter.bi_size; + bio->bi_iter.bi_size = 0; + } + + if (!end) + bio_start_io_acct_time(bio, start_time); + else + bio_end_io_acct(bio, start_time); - bio_start_io_acct_time(bio, io->start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - false, 0, &io->stats_aux); + end, start_time, stats_aux); + + /* Restore bio's payload so it does get accounted upon requeue */ + if (is_flush_with_data) + bio->bi_iter.bi_size = bi_size; +} + +static void start_io_acct(struct dm_io *io) +{ + dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux); } static void end_io_acct(struct mapped_device *md, struct bio *bio, unsigned long start_time, struct dm_stats_aux *stats_aux) { - unsigned long duration = jiffies - start_time; - - bio_end_io_acct(bio, start_time); - - if (unlikely(dm_stats_used(&md->stats))) - dm_stats_account_io(&md->stats, bio_data_dir(bio), - bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, stats_aux); + dm_io_acct(true, md, bio, start_time, stats_aux); } static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) @@ -834,7 +853,7 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) if (io_error == BLK_STS_DM_REQUEUE) return; - if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) { + if (bio_is_flush_with_data(bio)) { /* * Preflush done for flush with data, reissue * without REQ_PREFLUSH. -- cgit v1.2.3 From 0cdb90f0f306384ecbc60dfd6dc48cdbc1f2d0d8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:39:59 -0500 Subject: dm stats: fix too short end duration_ns when using precise_timestamps dm_stats_account_io()'s STAT_PRECISE_TIMESTAMPS support doesn't handle the fact that with commit b879f915bc48 ("dm: properly fix redundant bio-based IO accounting") io->start_time _may_ be in the past (meaning the start_io_acct() was deferred until later). Add a new dm_stats_recalc_precise_timestamps() helper that will set/clear a new 'precise_timestamps' flag in the dm_stats struct based on whether any configured stats enable STAT_PRECISE_TIMESTAMPS. And update DM core's alloc_io() to use dm_stats_record_start() to set stats_aux.duration_ns if stats->precise_timestamps is true. Also, remove unused 'last_sector' and 'last_rw' members from the dm_stats struct. Fixes: b879f915bc48 ("dm: properly fix redundant bio-based IO accounting") Cc: stable@vger.kernel.org Co-developed-by: Mikulas Patocka Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-stats.c | 28 +++++++++++++++++++++++++--- drivers/md/dm-stats.h | 9 +++++++-- drivers/md/dm.c | 2 ++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 80328f1d1083..0e039a8c0bf2 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -195,6 +195,7 @@ void dm_stats_init(struct dm_stats *stats) mutex_init(&stats->mutex); INIT_LIST_HEAD(&stats->list); + stats->precise_timestamps = false; stats->last = alloc_percpu(struct dm_stats_last_position); for_each_possible_cpu(cpu) { last = per_cpu_ptr(stats->last, cpu); @@ -231,6 +232,22 @@ void dm_stats_cleanup(struct dm_stats *stats) mutex_destroy(&stats->mutex); } +static void dm_stats_recalc_precise_timestamps(struct dm_stats *stats) +{ + struct list_head *l; + struct dm_stat *tmp_s; + bool precise_timestamps = false; + + list_for_each(l, &stats->list) { + tmp_s = container_of(l, struct dm_stat, list_entry); + if (tmp_s->stat_flags & STAT_PRECISE_TIMESTAMPS) { + precise_timestamps = true; + break; + } + } + stats->precise_timestamps = precise_timestamps; +} + static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, sector_t step, unsigned stat_flags, unsigned n_histogram_entries, @@ -376,6 +393,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, } ret_id = s->id; list_add_tail_rcu(&s->list_entry, l); + + dm_stats_recalc_precise_timestamps(stats); + mutex_unlock(&stats->mutex); resume_callback(md); @@ -418,6 +438,9 @@ static int dm_stats_delete(struct dm_stats *stats, int id) } list_del_rcu(&s->list_entry); + + dm_stats_recalc_precise_timestamps(stats); + mutex_unlock(&stats->mutex); /* @@ -656,9 +679,8 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, got_precise_time = false; list_for_each_entry_rcu(s, &stats->list, list_entry) { if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) { - if (!end) - stats_aux->duration_ns = ktime_to_ns(ktime_get()); - else + /* start (!end) duration_ns is set by DM core's alloc_io() */ + if (end) stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns; got_precise_time = true; } diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h index 09fa579e2e24..09c81a1ec057 100644 --- a/drivers/md/dm-stats.h +++ b/drivers/md/dm-stats.h @@ -13,8 +13,7 @@ struct dm_stats { struct mutex mutex; struct list_head list; /* list of struct dm_stat */ struct dm_stats_last_position __percpu *last; - sector_t last_sector; - unsigned last_rw; + bool precise_timestamps; }; struct dm_stats_aux { @@ -40,4 +39,10 @@ static inline bool dm_stats_used(struct dm_stats *st) return !list_empty(&st->list); } +static inline void dm_stats_record_start(struct dm_stats *stats, struct dm_stats_aux *aux) +{ + if (unlikely(stats->precise_timestamps)) + aux->duration_ns = ktime_to_ns(ktime_get()); +} + #endif diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c72a271ea999..3ae76900023b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -552,6 +552,8 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->start_time = jiffies; + dm_stats_record_start(&md->stats, &io->stats_aux); + return io; } -- cgit v1.2.3 From 0ab30b4079e103b517bdfc4331507bf8d8dd8f7b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sun, 20 Feb 2022 12:57:11 -0500 Subject: dm: eliminate copying of dm_io fields in dm_io_dec_pending There is no need for dm_io_dec_pending() to copy dm_io fields anymore now that DM provides its own pending_io counters again. The race documented in commit d208b89401e0 ("dm: fix mempool NULL pointer race when completing IO") no longer exists now that block core's in_flight counters aren't used to signal all dm_io is complete. Also, rename {start,end}_io_acct to dm_{start,end}_io_acct. Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3ae76900023b..33376e679ef5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -518,15 +518,14 @@ static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio, bio->bi_iter.bi_size = bi_size; } -static void start_io_acct(struct dm_io *io) +static void dm_start_io_acct(struct dm_io *io) { dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux); } -static void end_io_acct(struct mapped_device *md, struct bio *bio, - unsigned long start_time, struct dm_stats_aux *stats_aux) +static void dm_end_io_acct(struct dm_io *io) { - dm_io_acct(true, md, bio, start_time, stats_aux); + dm_io_acct(true, io->md, io->orig_bio, io->start_time, &io->stats_aux); } static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) @@ -808,8 +807,6 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) blk_status_t io_error; struct bio *bio; struct mapped_device *md = io->md; - unsigned long start_time = 0; - struct dm_stats_aux stats_aux; /* Push-back supersedes any I/O errors */ if (unlikely(error)) { @@ -841,10 +838,8 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) } io_error = io->status; - start_time = io->start_time; - stats_aux = io->stats_aux; + dm_end_io_acct(io); free_io(md, io); - end_io_acct(md, bio, start_time, &stats_aux); smp_wmb(); this_cpu_dec(*md->pending_io); @@ -1444,8 +1439,8 @@ static void __split_and_process_bio(struct mapped_device *md, * so that it gets handled *after* bios already submitted * have been completely processed. * We take a clone of the original to store in - * ci.io->orig_bio to be used by end_io_acct() and - * for dec_pending to use for completion handling. + * ci.io->orig_bio to be used by dm_end_io_acct() and + * for dm_io_dec_pending() to use for completion handling. */ struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, GFP_NOIO, &md->queue->bio_split); @@ -1456,7 +1451,7 @@ static void __split_and_process_bio(struct mapped_device *md, submit_bio_noacct(bio); } } - start_io_acct(ci.io); + dm_start_io_acct(ci.io); /* drop the extra reference count */ dm_io_dec_pending(ci.io, errno_to_blk_status(error)); -- cgit v1.2.3 From 205649d84cdbfb64c4a86c497f0555221b67c160 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:04 -0500 Subject: dm: reorder members in mapped_device struct Improves alignment and groups related members relative to cachelines. Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 7ec2d75d9292..253b6bb5a0fc 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -64,13 +64,21 @@ struct mapped_device { struct gendisk *disk; struct dax_device *dax_dev; + wait_queue_head_t wait; unsigned long __percpu *pending_io; + /* forced geometry settings */ + struct hd_geometry geometry; + + /* + * Processing queue (flush) + */ + struct workqueue_struct *wq; + /* * A list of ios that arrived while we were suspended. */ struct work_struct work; - wait_queue_head_t wait; spinlock_t deferred_lock; struct bio_list deferred; @@ -85,36 +93,28 @@ struct mapped_device { struct list_head uevent_list; spinlock_t uevent_lock; /* Protect access to uevent_list */ + /* for blk-mq request-based DM support */ + bool init_tio_pdu:1; + struct blk_mq_tag_set *tag_set; + + struct dm_stats stats; + /* the number of internal suspends */ unsigned internal_suspend_count; + int swap_bios; + struct semaphore swap_bios_semaphore; + struct mutex swap_bios_lock; + /* * io objects are allocated from here. */ struct bio_set io_bs; struct bio_set bs; - /* - * Processing queue (flush) - */ - struct workqueue_struct *wq; - - /* forced geometry settings */ - struct hd_geometry geometry; - /* kobject and completion */ struct dm_kobject_holder kobj_holder; - int swap_bios; - struct semaphore swap_bios_semaphore; - struct mutex swap_bios_lock; - - struct dm_stats stats; - - /* for blk-mq request-based DM support */ - struct blk_mq_tag_set *tag_set; - bool init_tio_pdu:1; - struct srcu_struct io_barrier; #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From 96c9865cb6dd068a74f844e1c14114b7e676f727 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:07 -0500 Subject: dm: rename split functions Rename __split_and_process_bio to dm_split_and_process_bio. Rename __split_and_process_non_flush to __split_and_process_bio. Also fix a stale comment and whitespace. Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 33376e679ef5..7d1bf59c4663 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1378,7 +1378,7 @@ static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, /* * Select the correct strategy for processing a non-flush bio. */ -static int __split_and_process_non_flush(struct clone_info *ci) +static int __split_and_process_bio(struct clone_info *ci) { struct dm_target *ti; unsigned len; @@ -1414,8 +1414,8 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, /* * Entry point to split a bio into clones and submit them to the targets. */ -static void __split_and_process_bio(struct mapped_device *md, - struct dm_table *map, struct bio *bio) +static void dm_split_and_process_bio(struct mapped_device *md, + struct dm_table *map, struct bio *bio) { struct clone_info ci; int error = 0; @@ -1428,19 +1428,19 @@ static void __split_and_process_bio(struct mapped_device *md, } else if (op_is_zone_mgmt(bio_op(bio))) { ci.bio = bio; ci.sector_count = 0; - error = __split_and_process_non_flush(&ci); + error = __split_and_process_bio(&ci); } else { ci.bio = bio; ci.sector_count = bio_sectors(bio); - error = __split_and_process_non_flush(&ci); + error = __split_and_process_bio(&ci); if (ci.sector_count && !error) { /* * Remainder must be passed to submit_bio_noacct() * so that it gets handled *after* bios already submitted * have been completely processed. * We take a clone of the original to store in - * ci.io->orig_bio to be used by dm_end_io_acct() and - * for dm_io_dec_pending() to use for completion handling. + * ci.io->orig_bio to be used by dm_end_io_acct() and for + * dm_io_dec_pending() to use for completion handling. */ struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, GFP_NOIO, &md->queue->bio_split); @@ -1489,7 +1489,7 @@ static void dm_submit_bio(struct bio *bio) if (is_abnormal_io(bio)) blk_queue_split(&bio); - __split_and_process_bio(md, map, bio); + dm_split_and_process_bio(md, map, bio); out: dm_put_live_table(md, srcu_idx); } @@ -2310,11 +2310,11 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, /* * Here we must make sure that no processes are submitting requests * to target drivers i.e. no one may be executing - * __split_and_process_bio from dm_submit_bio. + * dm_split_and_process_bio from dm_submit_bio. * - * To get all processes out of __split_and_process_bio in dm_submit_bio, + * To get all processes out of dm_split_and_process_bio in dm_submit_bio, * we take the write lock. To prevent any process from reentering - * __split_and_process_bio from dm_submit_bio and quiesce the thread + * dm_split_and_process_bio from dm_submit_bio and quiesce the thread * (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND and call * flush_workqueue(md->wq). */ -- cgit v1.2.3 From 66bdaa4302d3b58d1f8f2afcdd424addc9a80e3c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:09 -0500 Subject: dm: fold __clone_and_map_data_bio into __split_and_process_bio Fold __clone_and_map_data_bio into its only caller. Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7d1bf59c4663..bed236d604f8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1207,25 +1207,6 @@ static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) bio->bi_iter.bi_size = to_bytes(len); } -/* - * Creates a bio that consists of range of complete bvecs. - */ -static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, - sector_t sector, unsigned *len) -{ - struct bio *bio = ci->bio, *clone; - - clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); - clone->bi_iter.bi_size = to_bytes(*len); - - if (bio_integrity(bio)) - bio_integrity_trim(clone); - - __map_bio(clone); - return 0; -} - static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, struct dm_target *ti, unsigned num_bios, unsigned *len) @@ -1380,6 +1361,7 @@ static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, */ static int __split_and_process_bio(struct clone_info *ci) { + struct bio *clone; struct dm_target *ti; unsigned len; int r; @@ -1393,9 +1375,13 @@ static int __split_and_process_bio(struct clone_info *ci) len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); - r = __clone_and_map_data_bio(ci, ti, ci->sector, &len); - if (r < 0) - return r; + clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); + bio_advance(clone, to_bytes(ci->sector - clone->bi_iter.bi_sector)); + clone->bi_iter.bi_size = to_bytes(len); + if (bio_integrity(clone)) + bio_integrity_trim(clone); + + __map_bio(clone); ci->sector += len; ci->sector_count -= len; -- cgit v1.2.3 From d41e077ab6ea3c21ecd844498ff1a3a9abe3c731 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:11 -0500 Subject: dm: refactor dm_split_and_process_bio a bit Remove needless branching and indentation. Leaves code to catch malformed op_is_zone_mgmt bios (they shouldn't have a payload). Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 54 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index bed236d604f8..c01bfa3b6544 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1394,7 +1394,13 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, { ci->map = map; ci->io = alloc_io(md, bio); + ci->bio = bio; ci->sector = bio->bi_iter.bi_sector; + ci->sector_count = bio_sectors(bio); + + /* Shouldn't happen but sector_count was being set to 0 so... */ + if (WARN_ON_ONCE(op_is_zone_mgmt(bio_op(bio)) && ci->sector_count)) + ci->sector_count = 0; } /* @@ -1404,6 +1410,7 @@ static void dm_split_and_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { struct clone_info ci; + struct bio *b; int error = 0; init_clone_info(&ci, md, map, bio); @@ -1411,34 +1418,29 @@ static void dm_split_and_process_bio(struct mapped_device *md, if (bio->bi_opf & REQ_PREFLUSH) { error = __send_empty_flush(&ci); /* dm_io_dec_pending submits any data associated with flush */ - } else if (op_is_zone_mgmt(bio_op(bio))) { - ci.bio = bio; - ci.sector_count = 0; - error = __split_and_process_bio(&ci); - } else { - ci.bio = bio; - ci.sector_count = bio_sectors(bio); - error = __split_and_process_bio(&ci); - if (ci.sector_count && !error) { - /* - * Remainder must be passed to submit_bio_noacct() - * so that it gets handled *after* bios already submitted - * have been completely processed. - * We take a clone of the original to store in - * ci.io->orig_bio to be used by dm_end_io_acct() and for - * dm_io_dec_pending() to use for completion handling. - */ - struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, - GFP_NOIO, &md->queue->bio_split); - ci.io->orig_bio = b; - - bio_chain(b, bio); - trace_block_split(b, bio->bi_iter.bi_sector); - submit_bio_noacct(bio); - } + goto out; } - dm_start_io_acct(ci.io); + error = __split_and_process_bio(&ci); + if (error || !ci.sector_count) + goto out; + + /* + * Remainder must be passed to submit_bio_noacct() so it gets handled + * *after* bios already submitted have been completely processed. + * We take a clone of the original to store in ci.io->orig_bio to be + * used by dm_end_io_acct() and for dm_io_dec_pending() to use for + * completion handling. + */ + b = bio_split(bio, bio_sectors(bio) - ci.sector_count, + GFP_NOIO, &md->queue->bio_split); + ci.io->orig_bio = b; + + bio_chain(b, bio); + trace_block_split(b, bio->bi_iter.bi_sector); + submit_bio_noacct(bio); +out: + dm_start_io_acct(ci.io); /* drop the extra reference count */ dm_io_dec_pending(ci.io, errno_to_blk_status(error)); } -- cgit v1.2.3 From 90a2326edede9e801e45c24b3c71f3092b279b66 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:14 -0500 Subject: dm: reduce code duplication in __map_bio Error path code (for handling DM_MAPIO_REQUEUE and DM_MAPIO_KILL) is effectively identical. Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c01bfa3b6544..fb129fe94a0d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1180,20 +1180,14 @@ static void __map_bio(struct bio *clone) submit_bio_noacct(clone); break; case DM_MAPIO_KILL: - if (unlikely(swap_bios_limit(ti, clone))) { - struct mapped_device *md = io->md; - up(&md->swap_bios_semaphore); - } - free_tio(clone); - dm_io_dec_pending(io, BLK_STS_IOERR); - break; case DM_MAPIO_REQUEUE: - if (unlikely(swap_bios_limit(ti, clone))) { - struct mapped_device *md = io->md; - up(&md->swap_bios_semaphore); - } + if (unlikely(swap_bios_limit(ti, clone))) + up(&io->md->swap_bios_semaphore); free_tio(clone); - dm_io_dec_pending(io, BLK_STS_DM_REQUEUE); + if (r == DM_MAPIO_KILL) + dm_io_dec_pending(io, BLK_STS_IOERR); + else + dm_io_dec_pending(io, BLK_STS_DM_REQUEUE); break; default: DMWARN("unimplemented target map return value: %d", r); -- cgit v1.2.3 From 5b27b8ddbf37962fae8131cf595231b1be2c2b27 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:16 -0500 Subject: dm: remove impossible BUG_ON in __send_empty_flush The flush_bio in question was just initialized to be empty, so there is no way bio_has_data() will return true. So remove stale BUG_ON(). Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fb129fe94a0d..c7246b00d363 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1274,7 +1274,6 @@ static int __send_empty_flush(struct clone_info *ci) ci->bio = &flush_bio; ci->sector_count = 0; - BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); -- cgit v1.2.3 From 0119ab14c31587198daed671a5bf949df2c14552 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:18 -0500 Subject: dm: remove unused mapped_device argument from free_tio Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c7246b00d363..b8cdd8c78c0e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -556,7 +556,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) return io; } -static void free_io(struct mapped_device *md, struct dm_io *io) +static void free_io(struct dm_io *io) { bio_put(&io->tio.clone); } @@ -839,7 +839,7 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) io_error = io->status; dm_end_io_acct(io); - free_io(md, io); + free_io(io); smp_wmb(); this_cpu_dec(*md->pending_io); -- cgit v1.2.3 From 77c11720a48cbbd5ee1071e8540d0aa0f26d98f4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:21 -0500 Subject: dm: remove legacy code only needed before submit_bio recursion Commit 8615cb65bd63 ("dm: remove useless loop in __split_and_process_bio") showcased that we no longer loop. Remove the bio_advance() in __split_and_process_bio() that was only needed when looping was possible. Similarly there is no need to advance the bio, using ci->sector cursor, in __send_duplicate_bios(). Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b8cdd8c78c0e..280c17ad5b1e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1195,12 +1195,6 @@ static void __map_bio(struct bio *clone) } } -static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) -{ - bio->bi_iter.bi_sector = sector; - bio->bi_iter.bi_size = to_bytes(len); -} - static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, struct dm_target *ti, unsigned num_bios, unsigned *len) @@ -1243,14 +1237,14 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, case 1: clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); if (len) - bio_setup_sector(clone, ci->sector, *len); + clone->bi_iter.bi_size = to_bytes(*len); __map_bio(clone); break; default: alloc_multiple_bios(&blist, ci, ti, num_bios, len); while ((clone = bio_list_pop(&blist))) { if (len) - bio_setup_sector(clone, ci->sector, *len); + clone->bi_iter.bi_size = to_bytes(*len); __map_bio(clone); } break; @@ -1369,7 +1363,6 @@ static int __split_and_process_bio(struct clone_info *ci) len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); - bio_advance(clone, to_bytes(ci->sector - clone->bi_iter.bi_sector)); clone->bi_iter.bi_size = to_bytes(len); if (bio_integrity(clone)) bio_integrity_trim(clone); -- cgit v1.2.3 From 743598f049df473869138d599d6ad3b7c740a73f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:23 -0500 Subject: dm: record old_sector in dm_target_io before calling map function Prep for being able to defer trace_block_bio_remap() until when the bio is remapped and submitted by the DM target. Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 1 + drivers/md/dm.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 253b6bb5a0fc..e2128d176831 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -216,6 +216,7 @@ struct dm_target_io { unsigned int target_bio_nr; unsigned int *len_ptr; bool inside_dm_io; + sector_t old_sector; struct bio clone; }; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 280c17ad5b1e..08689b3e501a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -584,6 +584,7 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, tio->ti = ti; tio->target_bio_nr = target_bio_nr; tio->len_ptr = len; + tio->old_sector = 0; return &tio->clone; } @@ -1139,7 +1140,6 @@ static void __map_bio(struct bio *clone) { struct dm_target_io *tio = clone_to_tio(clone); int r; - sector_t sector; struct dm_io *io = tio->io; struct dm_target *ti = tio->ti; @@ -1151,7 +1151,7 @@ static void __map_bio(struct bio *clone) * this io. */ dm_io_inc_pending(io); - sector = clone->bi_iter.bi_sector; + tio->old_sector = clone->bi_iter.bi_sector; if (unlikely(swap_bios_limit(ti, clone))) { struct mapped_device *md = io->md; @@ -1176,7 +1176,8 @@ static void __map_bio(struct bio *clone) break; case DM_MAPIO_REMAPPED: /* the bio has been remapped so dispatch it */ - trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector); + trace_block_bio_remap(clone, bio_dev(io->orig_bio), + tio->old_sector); submit_bio_noacct(clone); break; case DM_MAPIO_KILL: -- cgit v1.2.3 From 018b05ebbff4f3ed611e950fe5f8760d2348b814 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:25 -0500 Subject: dm: move duplicate code from callers of alloc_tio into alloc_tio Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 08689b3e501a..ec230d3a99da 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -565,13 +565,16 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask) { struct dm_target_io *tio; + struct bio *clone; if (!ci->io->tio.io) { /* the dm_target_io embedded in ci->io is available */ tio = &ci->io->tio; + /* alloc_io() already initialized embedded clone */ + clone = &tio->clone; } else { - struct bio *clone = bio_alloc_clone(ci->bio->bi_bdev, ci->bio, - gfp_mask, &ci->io->md->bs); + clone = bio_alloc_clone(ci->bio->bi_bdev, ci->bio, + gfp_mask, &ci->io->md->bs); if (!clone) return NULL; @@ -586,7 +589,13 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, tio->len_ptr = len; tio->old_sector = 0; - return &tio->clone; + if (len) { + clone->bi_iter.bi_size = to_bytes(*len); + if (bio_integrity(clone)) + bio_integrity_trim(clone); + } + + return clone; } static void free_tio(struct bio *clone) @@ -1237,17 +1246,12 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, break; case 1: clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - if (len) - clone->bi_iter.bi_size = to_bytes(*len); __map_bio(clone); break; default: alloc_multiple_bios(&blist, ci, ti, num_bios, len); - while ((clone = bio_list_pop(&blist))) { - if (len) - clone->bi_iter.bi_size = to_bytes(*len); + while ((clone = bio_list_pop(&blist))) __map_bio(clone); - } break; } } @@ -1362,12 +1366,7 @@ static int __split_and_process_bio(struct clone_info *ci) return r; len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); - clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); - clone->bi_iter.bi_size = to_bytes(len); - if (bio_integrity(clone)) - bio_integrity_trim(clone); - __map_bio(clone); ci->sector += len; -- cgit v1.2.3 From 300432f58b99449076f8a662504bb74cb452efcf Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:28 -0500 Subject: dm: reduce dm_io and dm_target_io struct sizes Remove one 4 byte hole in dm_io struct. Remove two 4 byte holes in dm_target_io struct. Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index e2128d176831..a86b5dacef0e 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -211,9 +211,9 @@ struct dm_table { #define DM_TIO_MAGIC 7282014 struct dm_target_io { unsigned int magic; + unsigned int target_bio_nr; struct dm_io *io; struct dm_target *ti; - unsigned int target_bio_nr; unsigned int *len_ptr; bool inside_dm_io; sector_t old_sector; @@ -227,10 +227,10 @@ struct dm_target_io { #define DM_IO_MAGIC 5191977 struct dm_io { unsigned int magic; - struct mapped_device *md; - blk_status_t status; atomic_t io_count; + struct mapped_device *md; struct bio *orig_bio; + blk_status_t status; unsigned long start_time; spinlock_t endio_lock; struct dm_stats_aux stats_aux; -- cgit v1.2.3 From e6fc9f62ce6e412acb1699a5373174aa42ca2bd3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:30 -0500 Subject: dm: flag clones created by __send_duplicate_bios Formally disallow dm_accept_partial_bio() on clones created by __send_duplicate_bios() because their len_ptr points to a shared unsigned int. __send_duplicate_bios() is only used for flush bios and other "abnormal" bios (discards, writezeroes, etc). And dm_accept_partial_bio() already didn't support flush bios. Also refactor __send_changing_extent_only() to reflect it cannot fail. As such __send_changing_extent_only() can update the clone_info before __send_duplicate_bios() is called to fan-out __map_bio() calls. Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 3 ++- drivers/md/dm.c | 48 ++++++++++++++++++++++++++++-------------------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index a86b5dacef0e..33ef92e90462 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -215,7 +215,8 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; unsigned int *len_ptr; - bool inside_dm_io; + bool inside_dm_io:1; + bool is_duplicate_bio:1; sector_t old_sector; struct bio clone; }; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ec230d3a99da..d1c618c3f6c6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -586,6 +586,7 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, tio->io = ci->io; tio->ti = ti; tio->target_bio_nr = target_bio_nr; + tio->is_duplicate_bio = false; tio->len_ptr = len; tio->old_sector = 0; @@ -1087,7 +1088,8 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management - * operations and REQ_OP_ZONE_APPEND (zone append writes). + * operations, REQ_OP_ZONE_APPEND (zone append writes) and any bio serviced by + * __send_duplicate_bios(). * * dm_accept_partial_bio informs the dm that the target only wants to process * additional n_sectors sectors of the bio and the rest of the data should be @@ -1118,7 +1120,7 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) struct dm_target_io *tio = clone_to_tio(bio); unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; - BUG_ON(bio->bi_opf & REQ_PREFLUSH); + BUG_ON(tio->is_duplicate_bio); BUG_ON(op_is_zone_mgmt(bio_op(bio))); BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND); BUG_ON(bi_size > *tio->len_ptr); @@ -1246,12 +1248,15 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, break; case 1: clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); + clone_to_tio(clone)->is_duplicate_bio = true; __map_bio(clone); break; default: alloc_multiple_bios(&blist, ci, ti, num_bios, len); - while ((clone = bio_list_pop(&blist))) + while ((clone = bio_list_pop(&blist))) { + clone_to_tio(clone)->is_duplicate_bio = true; __map_bio(clone); + } break; } } @@ -1280,29 +1285,22 @@ static int __send_empty_flush(struct clone_info *ci) return 0; } -static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios) +static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, + unsigned num_bios) { unsigned len; - /* - * Even though the device advertised support for this type of - * request, that does not mean every target supports it, and - * reconfiguration might also have changed that since the - * check was performed. - */ - if (!num_bios) - return -EOPNOTSUPP; - len = min_t(sector_t, ci->sector_count, max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector))); - __send_duplicate_bios(ci, ti, num_bios, &len); - + /* + * dm_accept_partial_bio cannot be used with duplicate bios, + * so update clone_info cursor before __send_duplicate_bios(). + */ ci->sector += len; ci->sector_count -= len; - return 0; + __send_duplicate_bios(ci, ti, num_bios, &len); } static bool is_abnormal_io(struct bio *bio) @@ -1324,10 +1322,9 @@ static bool is_abnormal_io(struct bio *bio) static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, int *result) { - struct bio *bio = ci->bio; unsigned num_bios = 0; - switch (bio_op(bio)) { + switch (bio_op(ci->bio)) { case REQ_OP_DISCARD: num_bios = ti->num_discard_bios; break; @@ -1344,7 +1341,18 @@ static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, return false; } - *result = __send_changing_extent_only(ci, ti, num_bios); + /* + * Even though the device advertised support for this type of + * request, that does not mean every target supports it, and + * reconfiguration might also have changed that since the + * check was performed. + */ + if (!num_bios) + *result = -EOPNOTSUPP; + else { + __send_changing_extent_only(ci, ti, num_bios); + *result = 0; + } return true; } -- cgit v1.2.3 From 0fbb4d93b38bce1f8235aacfa37e90ad8f011473 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:32 -0500 Subject: dm: add dm_submit_bio_remap interface Where possible, switch from early bio-based IO accounting (at the time DM clones each incoming bio) to late IO accounting just before each remapped bio is issued to underlying device via submit_bio_noacct(). Allows more precise bio-based IO accounting for DM targets that use their own workqueues to perform additional processing of each bio in conjunction with their DM_MAPIO_SUBMITTED return from their map function. When a target is updated to use dm_submit_bio_remap() they must also set ti->accounts_remapped_io to true. Use xchg() in start_io_acct(), as suggested by Mikulas, to ensure each IO is only started once. The xchg race only happens if __send_duplicate_bios() sends multiple bios -- that case is reflected via tio->is_duplicate_bio. Given the niche nature of this race, it is best to avoid any xchg performance penalty for normal IO. For IO that was never submitted with dm_bio_submit_remap(), but the target completes the clone with bio_endio, accounting is started then ended and pending_io counter decremented. Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 2 + drivers/md/dm.c | 127 +++++++++++++++++++++++++++++++++++------- include/linux/device-mapper.h | 7 +++ include/uapi/linux/dm-ioctl.h | 4 +- 4 files changed, 118 insertions(+), 22 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 33ef92e90462..8078b6c155ef 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -232,6 +232,8 @@ struct dm_io { struct mapped_device *md; struct bio *orig_bio; blk_status_t status; + bool start_io_acct:1; + int was_accounted; unsigned long start_time; spinlock_t endio_lock; struct dm_stats_aux stats_aux; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d1c618c3f6c6..082366d0ad49 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -518,14 +518,33 @@ static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio, bio->bi_iter.bi_size = bi_size; } -static void dm_start_io_acct(struct dm_io *io) +static void __dm_start_io_acct(struct dm_io *io, struct bio *bio) { - dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux); + dm_io_acct(false, io->md, bio, io->start_time, &io->stats_aux); } -static void dm_end_io_acct(struct dm_io *io) +static void dm_start_io_acct(struct dm_io *io, struct bio *clone) { - dm_io_acct(true, io->md, io->orig_bio, io->start_time, &io->stats_aux); + /* Must account IO to DM device in terms of orig_bio */ + struct bio *bio = io->orig_bio; + + /* + * Ensure IO accounting is only ever started once. + * Expect no possibility for race unless is_duplicate_bio. + */ + if (!clone || likely(!clone_to_tio(clone)->is_duplicate_bio)) { + if (WARN_ON(io->was_accounted)) + return; + io->was_accounted = 1; + } else if (xchg(&io->was_accounted, 1) == 1) + return; + + __dm_start_io_acct(io, bio); +} + +static void dm_end_io_acct(struct dm_io *io, struct bio *bio) +{ + dm_io_acct(true, io->md, bio, io->start_time, &io->stats_aux); } static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) @@ -545,11 +564,13 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->status = 0; atomic_set(&io->io_count, 1); this_cpu_inc(*md->pending_io); - io->orig_bio = bio; + io->orig_bio = NULL; io->md = md; spin_lock_init(&io->endio_lock); io->start_time = jiffies; + io->start_io_acct = false; + io->was_accounted = 0; dm_stats_record_start(&md->stats, &io->stats_aux); @@ -849,7 +870,16 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) } io_error = io->status; - dm_end_io_acct(io); + if (io->was_accounted) + dm_end_io_acct(io, bio); + else if (!io_error) { + /* + * Must handle target that DM_MAPIO_SUBMITTED only to + * then bio_endio() rather than dm_submit_bio_remap() + */ + __dm_start_io_acct(io, bio); + dm_end_io_acct(io, bio); + } free_io(io); smp_wmb(); this_cpu_dec(*md->pending_io); @@ -1131,6 +1161,56 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); +static inline void __dm_submit_bio_remap(struct bio *clone, + dev_t dev, sector_t old_sector) +{ + trace_block_bio_remap(clone, dev, old_sector); + submit_bio_noacct(clone); +} + +/* + * @clone: clone bio that DM core passed to target's .map function + * @tgt_clone: clone of @clone bio that target needs submitted + * @from_wq: caller is a workqueue thread managed by DM target + * + * Targets should use this interface to submit bios they take + * ownership of when returning DM_MAPIO_SUBMITTED. + * + * Target should also enable ti->accounts_remapped_io + */ +void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, + bool from_wq) +{ + struct dm_target_io *tio = clone_to_tio(clone); + struct dm_io *io = tio->io; + + /* establish bio that will get submitted */ + if (!tgt_clone) + tgt_clone = clone; + + /* + * Account io->origin_bio to DM dev on behalf of target + * that took ownership of IO with DM_MAPIO_SUBMITTED. + */ + if (!from_wq) { + /* Still in target's map function */ + io->start_io_acct = true; + } else { + /* + * Called by another thread, managed by DM target, + * wait for dm_split_and_process_bio() to store + * io->orig_bio + */ + while (unlikely(!smp_load_acquire(&io->orig_bio))) + msleep(1); + dm_start_io_acct(io, clone); + } + + __dm_submit_bio_remap(tgt_clone, disk_devt(io->md->disk), + tio->old_sector); +} +EXPORT_SYMBOL_GPL(dm_submit_bio_remap); + static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) { mutex_lock(&md->swap_bios_lock); @@ -1157,9 +1237,7 @@ static void __map_bio(struct bio *clone) clone->bi_end_io = clone_endio; /* - * Map the clone. If r == 0 we don't need to do - * anything, the target has assumed ownership of - * this io. + * Map the clone. */ dm_io_inc_pending(io); tio->old_sector = clone->bi_iter.bi_sector; @@ -1184,12 +1262,18 @@ static void __map_bio(struct bio *clone) switch (r) { case DM_MAPIO_SUBMITTED: + /* target has assumed ownership of this io */ + if (!ti->accounts_remapped_io) + io->start_io_acct = true; break; case DM_MAPIO_REMAPPED: - /* the bio has been remapped so dispatch it */ - trace_block_bio_remap(clone, bio_dev(io->orig_bio), + /* + * the bio has been remapped so dispatch it, but defer + * dm_start_io_acct() until after possible bio_split(). + */ + __dm_submit_bio_remap(clone, disk_devt(io->md->disk), tio->old_sector); - submit_bio_noacct(clone); + io->start_io_acct = true; break; case DM_MAPIO_KILL: case DM_MAPIO_REQUEUE: @@ -1404,7 +1488,7 @@ static void dm_split_and_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { struct clone_info ci; - struct bio *b; + struct bio *orig_bio = NULL; int error = 0; init_clone_info(&ci, md, map, bio); @@ -1426,15 +1510,18 @@ static void dm_split_and_process_bio(struct mapped_device *md, * used by dm_end_io_acct() and for dm_io_dec_pending() to use for * completion handling. */ - b = bio_split(bio, bio_sectors(bio) - ci.sector_count, - GFP_NOIO, &md->queue->bio_split); - ci.io->orig_bio = b; - - bio_chain(b, bio); - trace_block_split(b, bio->bi_iter.bi_sector); + orig_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count, + GFP_NOIO, &md->queue->bio_split); + bio_chain(orig_bio, bio); + trace_block_split(orig_bio, bio->bi_iter.bi_sector); submit_bio_noacct(bio); out: - dm_start_io_acct(ci.io); + if (!orig_bio) + orig_bio = bio; + smp_store_release(&ci.io->orig_bio, orig_bio); + if (ci.io->start_io_acct) + dm_start_io_acct(ci.io, NULL); + /* drop the extra reference count */ dm_io_dec_pending(ci.io, errno_to_blk_status(error)); } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b26fecf6c8e8..7752d14d13f8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -362,6 +362,12 @@ struct dm_target { * zone append operations using regular writes. */ bool emulate_zone_append:1; + + /* + * Set if the target will submit IO using dm_submit_bio_remap() + * after returning DM_MAPIO_SUBMITTED from its map function. + */ + bool accounts_remapped_io:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); @@ -465,6 +471,7 @@ int dm_suspended(struct dm_target *ti); int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); +void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, bool from_wq); union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index c12ce30b52df..2e9550fef90f 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 45 +#define DM_VERSION_MINOR 46 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2021-03-22)" +#define DM_VERSION_EXTRA "-ioctl (2022-02-22)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From e5524e128f6f80b7b8bb901465aded2518b44051 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:35 -0500 Subject: dm crypt: use dm_submit_bio_remap Care was taken to support kcryptd_io_read being called from crypt_map or workqueue. Use of an intermediate CRYPT_MAP_READ_GFP gfp_t (defined as GFP_NOWAIT) should protect from maintenance burden if that flag were to change for some reason. Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a5006cb6ee8a..f310f2ba8bad 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1828,6 +1828,8 @@ static void crypt_endio(struct bio *clone) crypt_dec_pending(io); } +#define CRYPT_MAP_READ_GFP GFP_NOWAIT + static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) { struct crypt_config *cc = io->cc; @@ -1855,7 +1857,7 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) return 1; } - submit_bio_noacct(clone); + dm_submit_bio_remap(io->base_bio, clone, (gfp != CRYPT_MAP_READ_GFP)); return 0; } @@ -1881,7 +1883,7 @@ static void kcryptd_io_write(struct dm_crypt_io *io) { struct bio *clone = io->ctx.bio_out; - submit_bio_noacct(clone); + dm_submit_bio_remap(io->base_bio, clone, true); } #define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node) @@ -1960,7 +1962,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) || test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) { - submit_bio_noacct(clone); + dm_submit_bio_remap(io->base_bio, clone, true); return; } @@ -3363,6 +3365,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = 1; ti->limit_swap_bios = true; + ti->accounts_remapped_io = true; dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); return 0; @@ -3431,7 +3434,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) io->ctx.r.req = (struct skcipher_request *)(io + 1); if (bio_data_dir(io->base_bio) == READ) { - if (kcryptd_io_read(io, GFP_NOWAIT)) + if (kcryptd_io_read(io, CRYPT_MAP_READ_GFP)) kcryptd_queue_read(io); } else kcryptd_queue_crypt(io); @@ -3626,7 +3629,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 23, 0}, + .version = {1, 24, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- cgit v1.2.3 From c357342186dce510564bb6351e8567328e19541c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:37 -0500 Subject: dm delay: use dm_submit_bio_remap Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-delay.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 59e51d285b0e..b25b45011b11 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -72,7 +72,7 @@ static void flush_bios(struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - submit_bio_noacct(bio); + dm_submit_bio_remap(bio, NULL, true); bio = n; } } @@ -232,6 +232,7 @@ out: ti->num_flush_bios = 1; ti->num_discard_bios = 1; + ti->accounts_remapped_io = true; ti->per_io_data_size = sizeof(struct dm_delay_info); return 0; @@ -355,7 +356,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 2, 1}, + .version = {1, 3, 0}, .features = DM_TARGET_PASSES_INTEGRITY, .module = THIS_MODULE, .ctr = delay_ctr, -- cgit v1.2.3 From 302f035141d3d4faee36f8543d34a32fb36a886d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 23 Jan 2022 22:47:40 +0000 Subject: dm cache policy smq: make static read-only array table const The 'table' static array is read-only so it make sense to make it const. Add in the int type to clean up checkpatch warning. Signed-off-by: Colin Ian King Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index b61aac00ff40..a3d281fc14c3 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1026,7 +1026,9 @@ static unsigned default_promote_level(struct smq_policy *mq) * This scheme reminds me of a graph of entropy vs probability of a * binary variable. */ - static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1}; + static const unsigned int table[] = { + 1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1 + }; unsigned hits = mq->cache_stats.hits; unsigned misses = mq->cache_stats.misses; -- cgit v1.2.3 From 118f31b4964fa9fce65b0901ef598c734553cfc7 Mon Sep 17 00:00:00 2001 From: Thore Sommer Date: Tue, 25 Jan 2022 15:05:47 +0100 Subject: dm ima: fix wrong length calculation for no_data string All entries measured by dm ima are prefixed by a version string (dm_version=N.N.N). When there is no data to measure, the entire buffer is overwritten with a string containing the version string again and the length of that string is added to the length of the version string. The new length is now wrong because it contains the version string twice. This caused entries like this: dm_version=4.45.0;name=test,uuid=test;table_clear=no_data; \ \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \ current_device_capacity=204808; Signed-off-by: Thore Sommer Signed-off-by: Mike Snitzer --- drivers/md/dm-ima.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index 957999998d70..1842d3a958ef 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -455,7 +455,7 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, "%sname=%s,uuid=%s;device_resume=no_data;", DM_IMA_VERSION_STR, dev_name, dev_uuid); - l += strlen(device_table_data); + l = strlen(device_table_data); } @@ -568,7 +568,7 @@ void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, "%sname=%s,uuid=%s;device_remove=no_data;", DM_IMA_VERSION_STR, dev_name, dev_uuid); - l += strlen(device_table_data); + l = strlen(device_table_data); } memcpy(device_table_data + l, remove_all_str, remove_all_len); @@ -654,7 +654,7 @@ void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, "%sname=%s,uuid=%s;table_clear=no_data;", DM_IMA_VERSION_STR, dev_name, dev_uuid); - l += strlen(device_table_data); + l = strlen(device_table_data); } capacity_len = strlen(capacity_str); -- cgit v1.2.3 From a8b9d116cda047f38ba29ce26df49c57479ffaa2 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 26 Jan 2022 05:18:26 -0800 Subject: dm: cleanup double word in comment Remove the second 'a'. Signed-off-by: Tom Rix Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 7752d14d13f8..70cd9449275a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -358,7 +358,7 @@ struct dm_target { bool limit_swap_bios:1; /* - * Set if this target implements a a zoned device and needs emulation of + * Set if this target implements a zoned device and needs emulation of * zone append operations using regular writes. */ bool emulate_zone_append:1; -- cgit v1.2.3 From cd9c88da171a62c4b0f1c70e50c75845969fbc18 Mon Sep 17 00:00:00 2001 From: Jordy Zomer Date: Sat, 29 Jan 2022 15:58:39 +0100 Subject: dm ioctl: prevent potential spectre v1 gadget It appears like cmd could be a Spectre v1 gadget as it's supplied by a user and used as an array index. Prevent the contents of kernel memory from being leaked to userspace via speculative execution by using array_index_nospec. Signed-off-by: Jordy Zomer Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 21fe8652b095..901abd6dea41 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -1788,6 +1789,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) return NULL; + cmd = array_index_nospec(cmd, ARRAY_SIZE(_ioctls)); *ioctl_flags = _ioctls[cmd].flags; return _ioctls[cmd].fn; } -- cgit v1.2.3 From 588b7f5df0cb64f281290c7672470c006abe7160 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 1 Feb 2022 11:39:52 +0300 Subject: dm: fix use-after-free in dm_cleanup_zoned_dev() dm_cleanup_zoned_dev() uses queue, so it must be called before blk_cleanup_disk() starts its killing: blk_cleanup_disk->blk_cleanup_queue()->kobject_put()->blk_release_queue()-> ->...RCU...->blk_free_queue_rcu()->kmem_cache_free() Otherwise, RCU callback may be executed first and dm_cleanup_zoned_dev() will touch free'd memory: BUG: KASAN: use-after-free in dm_cleanup_zoned_dev+0x33/0xd0 Read of size 8 at addr ffff88805ac6e430 by task dmsetup/681 CPU: 4 PID: 681 Comm: dmsetup Not tainted 5.17.0-rc2+ #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_address_description.constprop.0+0x1f/0x150 ? dm_cleanup_zoned_dev+0x33/0xd0 kasan_report.cold+0x7f/0x11b ? dm_cleanup_zoned_dev+0x33/0xd0 dm_cleanup_zoned_dev+0x33/0xd0 __dm_destroy+0x26a/0x400 ? dm_blk_ioctl+0x230/0x230 ? up_write+0xd8/0x270 dev_remove+0x156/0x1d0 ctl_ioctl+0x269/0x530 ? table_clear+0x140/0x140 ? lock_release+0xb2/0x750 ? remove_all+0x40/0x40 ? rcu_read_lock_sched_held+0x12/0x70 ? lock_downgrade+0x3c0/0x3c0 ? rcu_read_lock_sched_held+0x12/0x70 dm_ctl_ioctl+0xa/0x10 __x64_sys_ioctl+0xb9/0xf0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fb6dfa95c27 Fixes: bb37d77239af ("dm: introduce zone append emulation") Cc: stable@vger.kernel.org Signed-off-by: Kirill Tkhai Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 082366d0ad49..ba1068f4cb52 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1645,6 +1645,7 @@ static void cleanup_mapped_device(struct mapped_device *md) md->dax_dev = NULL; } + dm_cleanup_zoned_dev(md); if (md->disk) { spin_lock(&_minor_lock); md->disk->private_data = NULL; @@ -1670,7 +1671,6 @@ static void cleanup_mapped_device(struct mapped_device *md) mutex_destroy(&md->swap_bios_lock); dm_mq_cleanup_mapped_device(md); - dm_cleanup_zoned_dev(md); } /* -- cgit v1.2.3 From 6fc51504388c1a1a53db8faafe9fff78fccc7c87 Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Fri, 11 Feb 2022 12:15:38 +0000 Subject: dm crypt: fix get_key_size compiler warning if !CONFIG_KEYS Explicitly convert unsigned int in the right of the conditional expression to int to match the left side operand and the return type, fixing the following compiler warning: drivers/md/dm-crypt.c:2593:43: warning: signed and unsigned type in conditional expression [-Wsign-compare] Fixes: c538f6ec9f56 ("dm crypt: add ability to use keys from the kernel key retention service") Signed-off-by: Aashish Sharma Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f310f2ba8bad..ec746cc4b1f8 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2582,7 +2582,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string static int get_key_size(char **key_string) { - return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1; + return (*key_string[0] == ':') ? -EINVAL : (int)(strlen(*key_string) >> 1); } #endif /* CONFIG_KEYS */ -- cgit v1.2.3 From 8ca8b1e147b7235527bef1a4ea274cc8894e98c5 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 14 Feb 2022 17:55:44 -0800 Subject: dm thin: use time_is_before_jiffies instead of open coding it Use time_is_before_jiffies() to improve code readability. Signed-off-by: Wang Qing Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index f4234d615aa1..2c13731a3b44 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -161,7 +161,7 @@ static void throttle_work_start(struct throttle *t) static void throttle_work_update(struct throttle *t) { - if (!t->throttle_applied && jiffies > t->threshold) { + if (!t->throttle_applied && time_is_before_jiffies(t->threshold)) { down_write(&t->lock); t->throttle_applied = true; } -- cgit v1.2.3 From 75274a4bf2da6b17155f8b4e54c4bb3b665c95e3 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Wed, 16 Feb 2022 14:37:03 +0800 Subject: dm thin metadata: remove unused dm_thin_remove_block and __remove Signed-off-by: Zhiqiang Liu Signed-off-by: Mike Snitzer --- drivers/md/dm-thin-metadata.c | 28 ---------------------------- drivers/md/dm-thin-metadata.h | 1 - 2 files changed, 29 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 1a96a07cbf44..2db7030aba00 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1665,22 +1665,6 @@ int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, return r; } -static int __remove(struct dm_thin_device *td, dm_block_t block) -{ - int r; - struct dm_pool_metadata *pmd = td->pmd; - dm_block_t keys[2] = { td->id, block }; - - r = dm_btree_remove(&pmd->info, pmd->root, keys, &pmd->root); - if (r) - return r; - - td->mapped_blocks--; - td->changed = true; - - return 0; -} - static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end) { int r; @@ -1740,18 +1724,6 @@ static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_ return dm_btree_insert(&pmd->tl_info, pmd->root, keys, &value, &pmd->root); } -int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) -{ - int r = -EINVAL; - - pmd_write_lock(td->pmd); - if (!td->pmd->fail_io) - r = __remove(td, block); - pmd_write_unlock(td->pmd); - - return r; -} - int dm_thin_remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end) { diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 7ef56bd2a7e3..4d7a2caf21d9 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -166,7 +166,6 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result); int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, dm_block_t data_block); -int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block); int dm_thin_remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end); -- cgit v1.2.3 From a6a4901a5ef35070297c769120fea04145df0aaa Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Sat, 19 Feb 2022 06:27:24 +0800 Subject: dm io: remove stale comment block for dm_io() Commit 7eaceaccab5f ("block: remove per-queue plugging") dropped unplug_delay and blk_unplug(). Plus, the current kernel has no fundamental difference between sync_io() and async_io() except sync_io() uses sync_io_complete() as the notify.fn and explicitly calls wait_for_completion_io() to sync. The comment isn't valid any more. Reviewed-by: Christoph Hellwig Signed-off-by: Barry Song Signed-off-by: Mike Snitzer --- drivers/md/dm-io.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 23e038f8dc84..32f8f4ed5e0d 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -525,14 +525,6 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, return 0; } -/* - * New collapsed (a)synchronous interface. - * - * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug - * the queue with blk_unplug() some time later or set REQ_SYNC in - * io_req->bi_opf. If you fail to do one of these, the IO will be submitted to - * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. - */ int dm_io(struct dm_io_request *io_req, unsigned num_regions, struct dm_io_region *where, unsigned long *sync_error_bits) { -- cgit v1.2.3 From fa247089de9936a46e290d4724cb5f0b845600f5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 22 Feb 2022 13:28:12 -0500 Subject: dm: requeue IO if mapping table not yet available Update both bio-based and request-based DM to requeue IO if the mapping table not available. This race of IO being submitted before the DM device ready is so narrow, yet possible for initial table load given that the DM device's request_queue is created prior, that it best to requeue IO to handle this unlikely case. Reported-by: Zhang Yi Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 7 ++++++- drivers/md/dm.c | 11 +++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 6948d5db9092..3dd040a56318 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -491,8 +491,13 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(!ti)) { int srcu_idx; - struct dm_table *map = dm_get_live_table(md, &srcu_idx); + struct dm_table *map; + map = dm_get_live_table(md, &srcu_idx); + if (unlikely(!map)) { + dm_put_live_table(md, srcu_idx); + return BLK_STS_RESOURCE; + } ti = dm_table_find_target(map, 0); dm_put_live_table(md, srcu_idx); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ba1068f4cb52..3640ef4bf327 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1533,15 +1533,10 @@ static void dm_submit_bio(struct bio *bio) struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); - if (unlikely(!map)) { - DMERR_LIMIT("%s: mapping table unavailable, erroring io", - dm_device_name(md)); - bio_io_error(bio); - goto out; - } - /* If suspended, queue this IO for later */ - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { + /* If suspended, or map not yet available, queue this IO for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || + unlikely(!map)) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else if (bio->bi_opf & REQ_RAHEAD) -- cgit v1.2.3 From f5b4aee10c024a35901cc2f1b9b4d80986a93937 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 22 Feb 2022 13:38:02 -0500 Subject: dm: remove unnecessary local variables in __bind Also remove empty newline before 'out:' label at end of __bind. Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3640ef4bf327..454d39bc7745 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1872,8 +1872,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct queue_limits *limits) { struct dm_table *old_map; - struct request_queue *q = md->queue; - bool request_based = dm_table_request_based(t); sector_t size; int ret; @@ -1894,7 +1892,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, dm_table_event_callback(t, event_callback, md); - if (request_based) { + if (dm_table_request_based(t)) { /* * Leverage the fact that request-based DM targets are * immutable singletons - used to optimize dm_mq_queue_rq. @@ -1908,7 +1906,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, goto out; } - ret = dm_table_set_restrictions(t, q, limits); + ret = dm_table_set_restrictions(t, md->queue, limits); if (ret) { old_map = ERR_PTR(ret); goto out; @@ -1920,7 +1918,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (old_map) dm_sync_table(md); - out: return old_map; } -- cgit v1.2.3 From 977ff73e64151d94811f4ba905419c90573f63e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Mar 2022 10:38:14 +0200 Subject: dm-zoned: remove the ->name field in struct dmz_dev Just use the %pg format specifier to print the block device name directly. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 4 ++-- drivers/md/dm-zoned-target.c | 1 - drivers/md/dm-zoned.h | 9 ++++----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index e5f1eb27ce2e..d1ea66114d14 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1101,8 +1101,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, */ static int dmz_read_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set) { - dmz_zmd_debug(zmd, "read superblock set %d dev %s block %llu", - set, sb->dev->name, sb->block); + dmz_zmd_debug(zmd, "read superblock set %d dev %pg block %llu", + set, sb->dev->bdev, sb->block); return dmz_rdwr_block(sb->dev, REQ_OP_READ, sb->block, sb->mblk->page); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index a3f6d3ef3817..cac295cc8840 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -730,7 +730,6 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path, } dev->bdev = bdev; dev->dev_idx = idx; - (void)bdevname(dev->bdev, dev->name); dev->capacity = bdev_nr_sectors(bdev); if (ti->begin) { diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index 22f11440b423..a02744a0846c 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -56,7 +56,6 @@ struct dmz_dev { struct dmz_metadata *metadata; struct dmz_reclaim *reclaim; - char name[BDEVNAME_SIZE]; uuid_t uuid; sector_t capacity; @@ -176,16 +175,16 @@ enum { * Message functions. */ #define dmz_dev_info(dev, format, args...) \ - DMINFO("(%s): " format, (dev)->name, ## args) + DMINFO("(%pg): " format, (dev)->bdev, ## args) #define dmz_dev_err(dev, format, args...) \ - DMERR("(%s): " format, (dev)->name, ## args) + DMERR("(%pg): " format, (dev)->bdev, ## args) #define dmz_dev_warn(dev, format, args...) \ - DMWARN("(%s): " format, (dev)->name, ## args) + DMWARN("(%pg): " format, (dev)->bdev, ## args) #define dmz_dev_debug(dev, format, args...) \ - DMDEBUG("(%s): " format, (dev)->name, ## args) + DMDEBUG("(%pg): " format, (dev)->bdev, ## args) /* * Functions defined in dm-zoned-metadata.c -- cgit v1.2.3 From 385411ffba0c3305491346b98ba4d2cd8063f002 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Mar 2022 10:38:15 +0200 Subject: dm: stop using bdevname Just use the %pg format specifier instead. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 10 ++++------ drivers/md/dm-clone-target.c | 10 ++++------ drivers/md/dm-mpath.c | 6 ++---- drivers/md/dm-table.c | 30 ++++++++++++++---------------- drivers/md/dm-thin.c | 8 +++----- 5 files changed, 27 insertions(+), 37 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 89fdfb49d564..d46c06f2ffcc 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2015,7 +2015,6 @@ static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, { int r; sector_t metadata_dev_size; - char b[BDEVNAME_SIZE]; if (!at_least_one_arg(as, error)) return -EINVAL; @@ -2029,8 +2028,8 @@ static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, metadata_dev_size = get_dev_size(ca->metadata_dev); if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) - DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", - bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); + DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.", + ca->metadata_dev->bdev, THIN_METADATA_MAX_SECTORS); return 0; } @@ -3345,7 +3344,6 @@ static void disable_passdown_if_not_supported(struct cache *cache) struct block_device *origin_bdev = cache->origin_dev->bdev; struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; const char *reason = NULL; - char buf[BDEVNAME_SIZE]; if (!cache->features.discard_passdown) return; @@ -3357,8 +3355,8 @@ static void disable_passdown_if_not_supported(struct cache *cache) reason = "max discard sectors smaller than a block"; if (reason) { - DMWARN("Origin device (%s) %s: Disabling discard passdown.", - bdevname(origin_bdev, buf), reason); + DMWARN("Origin device (%pg) %s: Disabling discard passdown.", + origin_bdev, reason); cache->features.discard_passdown = false; } } diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 4599632d7a84..128316a73d01 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -1682,7 +1682,6 @@ static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char * { int r; sector_t metadata_dev_size; - char b[BDEVNAME_SIZE]; r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, &clone->metadata_dev); @@ -1693,8 +1692,8 @@ static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char * metadata_dev_size = get_dev_size(clone->metadata_dev); if (metadata_dev_size > DM_CLONE_METADATA_MAX_SECTORS_WARNING) - DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", - bdevname(clone->metadata_dev->bdev, b), DM_CLONE_METADATA_MAX_SECTORS); + DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.", + clone->metadata_dev->bdev, DM_CLONE_METADATA_MAX_SECTORS); return 0; } @@ -2033,7 +2032,6 @@ static void disable_passdown_if_not_supported(struct clone *clone) struct block_device *dest_dev = clone->dest_dev->bdev; struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits; const char *reason = NULL; - char buf[BDEVNAME_SIZE]; if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) return; @@ -2044,8 +2042,8 @@ static void disable_passdown_if_not_supported(struct clone *clone) reason = "max discard sectors smaller than a region"; if (reason) { - DMWARN("Destination device (%s) %s: Disabling discard passdown.", - bdevname(dest_dev, buf), reason); + DMWARN("Destination device (%pd) %s: Disabling discard passdown.", + dest_dev, reason); clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags); } } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f4719b65e5e3..411403b3ec43 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -899,10 +899,8 @@ retain: if (m->hw_handler_name) { r = scsi_dh_attach(q, m->hw_handler_name); if (r == -EBUSY) { - char b[BDEVNAME_SIZE]; - - printk(KERN_INFO "dm-mpath: retaining handler on device %s\n", - bdevname(bdev, b)); + printk(KERN_INFO "dm-mpath: retaining handler on device %pg\n", + bdev); goto retain; } if (r < 0) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e43096cfe9e2..f4ed756ab391 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -230,15 +230,14 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, sector_t dev_size = bdev_nr_sectors(bdev); unsigned short logical_block_size_sectors = limits->logical_block_size >> SECTOR_SHIFT; - char b[BDEVNAME_SIZE]; if (!dev_size) return 0; if ((start >= dev_size) || (start + len > dev_size)) { - DMWARN("%s: %s too small for target: " + DMWARN("%s: %pg too small for target: " "start=%llu, len=%llu, dev_size=%llu", - dm_device_name(ti->table->md), bdevname(bdev, b), + dm_device_name(ti->table->md), bdev, (unsigned long long)start, (unsigned long long)len, (unsigned long long)dev_size); @@ -253,10 +252,10 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, unsigned int zone_sectors = bdev_zone_sectors(bdev); if (start & (zone_sectors - 1)) { - DMWARN("%s: start=%llu not aligned to h/w zone size %u of %s", + DMWARN("%s: start=%llu not aligned to h/w zone size %u of %pg", dm_device_name(ti->table->md), (unsigned long long)start, - zone_sectors, bdevname(bdev, b)); + zone_sectors, bdev); return 1; } @@ -270,10 +269,10 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, * the sector range. */ if (len & (zone_sectors - 1)) { - DMWARN("%s: len=%llu not aligned to h/w zone size %u of %s", + DMWARN("%s: len=%llu not aligned to h/w zone size %u of %pg", dm_device_name(ti->table->md), (unsigned long long)len, - zone_sectors, bdevname(bdev, b)); + zone_sectors, bdev); return 1; } } @@ -283,19 +282,19 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, if (start & (logical_block_size_sectors - 1)) { DMWARN("%s: start=%llu not aligned to h/w " - "logical block size %u of %s", + "logical block size %u of %pg", dm_device_name(ti->table->md), (unsigned long long)start, - limits->logical_block_size, bdevname(bdev, b)); + limits->logical_block_size, bdev); return 1; } if (len & (logical_block_size_sectors - 1)) { DMWARN("%s: len=%llu not aligned to h/w " - "logical block size %u of %s", + "logical block size %u of %pg", dm_device_name(ti->table->md), (unsigned long long)len, - limits->logical_block_size, bdevname(bdev, b)); + limits->logical_block_size, bdev); return 1; } @@ -400,20 +399,19 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; struct request_queue *q = bdev_get_queue(bdev); - char b[BDEVNAME_SIZE]; if (unlikely(!q)) { - DMWARN("%s: Cannot set limits for nonexistent device %s", - dm_device_name(ti->table->md), bdevname(bdev, b)); + DMWARN("%s: Cannot set limits for nonexistent device %pg", + dm_device_name(ti->table->md), bdev); return 0; } if (blk_stack_limits(limits, &q->limits, get_start_sect(bdev) + start) < 0) - DMWARN("%s: adding target device %s caused an alignment inconsistency: " + DMWARN("%s: adding target device %pg caused an alignment inconsistency: " "physical_block_size=%u, logical_block_size=%u, " "alignment_offset=%u, start=%llu", - dm_device_name(ti->table->md), bdevname(bdev, b), + dm_device_name(ti->table->md), bdev, q->limits.physical_block_size, q->limits.logical_block_size, q->limits.alignment_offset, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2c13731a3b44..a01aa4ef2042 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2824,7 +2824,6 @@ static void disable_passdown_if_not_supported(struct pool_c *pt) struct block_device *data_bdev = pt->data_dev->bdev; struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; const char *reason = NULL; - char buf[BDEVNAME_SIZE]; if (!pt->adjusted_pf.discard_passdown) return; @@ -2836,7 +2835,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt) reason = "max discard sectors smaller than a block"; if (reason) { - DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); + DMWARN("Data device (%pg) %s: Disabling discard passdown.", data_bdev, reason); pt->adjusted_pf.discard_passdown = false; } } @@ -3201,11 +3200,10 @@ static sector_t get_dev_size(struct block_device *bdev) static void warn_if_metadata_device_too_big(struct block_device *bdev) { sector_t metadata_dev_size = get_dev_size(bdev); - char buffer[BDEVNAME_SIZE]; if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) - DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", - bdevname(bdev, buffer), THIN_METADATA_MAX_SECTORS); + DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.", + bdev, THIN_METADATA_MAX_SECTORS); } static sector_t get_metadata_dev_size(struct block_device *bdev) -- cgit v1.2.3 From 168678d765d3659fddffe80a70668488066a9569 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 2 Mar 2022 12:19:47 -0500 Subject: dm mpath: use DMINFO instead of printk with KERN_INFO Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 411403b3ec43..70ffe4efc03e 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -899,8 +899,7 @@ retain: if (m->hw_handler_name) { r = scsi_dh_attach(q, m->hw_handler_name); if (r == -EBUSY) { - printk(KERN_INFO "dm-mpath: retaining handler on device %pg\n", - bdev); + DMINFO("retaining handler on device %pg", bdev); goto retain; } if (r < 0) { -- cgit v1.2.3 From 69fe0f29892077f14b56e2a479b6bcf533209d53 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 4 Mar 2022 21:08:03 -0500 Subject: block: add ->poll_bio to block_device_operations Prepare for supporting IO polling for bio-based driver. Add ->poll_bio callback so that bio-based driver can provide their own logic for polling bio. Also fix ->submit_bio_bio typo in comment block above __submit_bio_noacct. Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- block/blk-core.c | 14 +++++++++----- block/genhd.c | 4 ++++ include/linux/blkdev.h | 2 ++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 94bf37f8e61d..ce08f0aa9dfc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -708,7 +708,7 @@ static void __submit_bio(struct bio *bio) * * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. * bio_list_on_stack[1] contains bios that were submitted before the current - * ->submit_bio_bio, but that haven't been processed yet. + * ->submit_bio, but that haven't been processed yet. */ static void __submit_bio_noacct(struct bio *bio) { @@ -975,7 +975,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); blk_qc_t cookie = READ_ONCE(bio->bi_cookie); - int ret; + int ret = 0; if (cookie == BLK_QC_T_NONE || !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) @@ -985,10 +985,14 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) return 0; - if (WARN_ON_ONCE(!queue_is_mq(q))) - ret = 0; /* not yet implemented, should not happen */ - else + if (queue_is_mq(q)) { ret = blk_mq_poll(q, cookie, iob, flags); + } else { + struct gendisk *disk = q->disk; + + if (disk && disk->fops->poll_bio) + ret = disk->fops->poll_bio(bio, iob, flags); + } blk_queue_exit(q); return ret; } diff --git a/block/genhd.c b/block/genhd.c index e351fac41bf2..1ed46a6f94f5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -410,6 +410,10 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, struct device *ddev = disk_to_dev(disk); int ret; + /* Only makes sense for bio-based to set ->poll_bio */ + if (queue_is_mq(disk->queue) && disk->fops->poll_bio) + return -EINVAL; + /* * The disk queue should now be all set with enough information about * the device for the elevator code to pick an adequate default diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f757f9c2871f..51f1b1ddbed2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1455,6 +1455,8 @@ enum blk_unique_id { struct block_device_operations { void (*submit_bio)(struct bio *bio); + int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob, + unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); -- cgit v1.2.3 From b99fdcdc36369d2a3a26bf44736d420eb629d8b7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 4 Mar 2022 21:08:04 -0500 Subject: dm: support bio polling Support bio polling (REQ_POLLED) in the following approach: 1) only support io polling on normal READ/WRITE, and other abnormal IOs still fallback to IRQ mode, so the target io (and DM's clone bio) is exactly inside the dm io. 2) hold one refcnt on io->io_count after submitting this dm bio with REQ_POLLED 3) support dm native bio splitting, any dm io instance associated with current bio will be added into one list which head is bio->bi_private which will be recovered before ending this bio 4) implement .poll_bio() callback, call bio_poll() on the single target bio inside the dm io which is retrieved via bio->bi_bio_drv_data; call dm_io_dec_pending() after the target io is done in .poll_bio() 5) enable QUEUE_FLAG_POLL if all underlying queues enable QUEUE_FLAG_POLL, which is based on Jeffle's previous patch. These changes are good for a 30-35% IOPS improvement for polled IO. For detailed test results please see (Jens, thanks for testing!): https://listman.redhat.com/archives/dm-devel/2022-March/049868.html or https://marc.info/?l=linux-block&m=164684246214700&w=2 Tested-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 2 + drivers/md/dm-table.c | 27 ++++++++++ drivers/md/dm.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 169 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 8078b6c155ef..8cc03c0c262e 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -235,6 +235,8 @@ struct dm_io { bool start_io_acct:1; int was_accounted; unsigned long start_time; + void *data; + struct hlist_node node; spinlock_t endio_lock; struct dm_stats_aux stats_aux; /* last member of dm_target_io is 'struct bio' */ diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f4ed756ab391..c0be4f60b427 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1481,6 +1481,14 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) return &t->targets[(KEYS_PER_NODE * n) + k]; } +static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return !test_bit(QUEUE_FLAG_POLL, &q->queue_flags); +} + /* * type->iterate_devices() should be called when the sanity check needs to * iterate and check all underlying data devices. iterate_devices() will @@ -1531,6 +1539,11 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev, return 0; } +static int dm_table_supports_poll(struct dm_table *t) +{ + return !dm_table_any_dev_attr(t, device_not_poll_capable, NULL); +} + /* * Check whether a table has no data devices attached using each * target's iterate_devices method. @@ -2067,6 +2080,20 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_update_crypto_profile(q, t); disk_update_readahead(t->md->disk); + /* + * Check for request-based device is left to + * dm_mq_init_request_queue()->blk_mq_init_allocated_queue(). + * + * For bio-based device, only set QUEUE_FLAG_POLL when all + * underlying devices supporting polling. + */ + if (__table_type_bio_based(t->type)) { + if (dm_table_supports_poll(t)) + blk_queue_flag_set(QUEUE_FLAG_POLL, q); + else + blk_queue_flag_clear(QUEUE_FLAG_POLL, q); + } + return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 454d39bc7745..d9111e17f0fc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -40,6 +40,13 @@ #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" #define DM_COOKIE_LENGTH 24 +/* + * For REQ_POLLED fs bio, this flag is set if we link mapped underlying + * dm_io into one list, and reuse bio->bi_private as the list head. Before + * ending this fs bio, we will recover its ->bi_private. + */ +#define REQ_DM_POLL_LIST REQ_DRV + static const char *_name = DM_NAME; static unsigned int major = 0; @@ -73,6 +80,7 @@ struct clone_info { struct dm_io *io; sector_t sector; unsigned sector_count; + bool submit_as_polled; }; #define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone)) @@ -599,6 +607,9 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, if (!clone) return NULL; + /* REQ_DM_POLL_LIST shouldn't be inherited */ + clone->bi_opf &= ~REQ_DM_POLL_LIST; + tio = clone_to_tio(clone); tio->inside_dm_io = false; } @@ -888,8 +899,15 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) if (unlikely(wq_has_sleeper(&md->wait))) wake_up(&md->wait); - if (io_error == BLK_STS_DM_REQUEUE) + if (io_error == BLK_STS_DM_REQUEUE) { + /* + * Upper layer won't help us poll split bio, io->orig_bio + * may only reflect a subset of the pre-split original, + * so clear REQ_POLLED in case of requeue + */ + bio->bi_opf &= ~REQ_POLLED; return; + } if (bio_is_flush_with_data(bio)) { /* @@ -1440,6 +1458,47 @@ static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, return true; } +/* + * Reuse ->bi_private as hlist head for storing all dm_io instances + * associated with this bio, and this bio's bi_private needs to be + * stored in dm_io->data before the reuse. + * + * bio->bi_private is owned by fs or upper layer, so block layer won't + * touch it after splitting. Meantime it won't be changed by anyone after + * bio is submitted. So this reuse is safe. + */ +static inline struct hlist_head *dm_get_bio_hlist_head(struct bio *bio) +{ + return (struct hlist_head *)&bio->bi_private; +} + +static void dm_queue_poll_io(struct bio *bio, struct dm_io *io) +{ + struct hlist_head *head = dm_get_bio_hlist_head(bio); + + if (!(bio->bi_opf & REQ_DM_POLL_LIST)) { + bio->bi_opf |= REQ_DM_POLL_LIST; + /* + * Save .bi_private into dm_io, so that we can reuse + * .bi_private as hlist head for storing dm_io list + */ + io->data = bio->bi_private; + + INIT_HLIST_HEAD(head); + + /* tell block layer to poll for completion */ + bio->bi_cookie = ~BLK_QC_T_NONE; + } else { + /* + * bio recursed due to split, reuse original poll list, + * and save bio->bi_private too. + */ + io->data = hlist_entry(head->first, struct dm_io, node)->data; + } + + hlist_add_head(&io->node, head); +} + /* * Select the correct strategy for processing a non-flush bio. */ @@ -1457,6 +1516,12 @@ static int __split_and_process_bio(struct clone_info *ci) if (__process_abnormal_io(ci, ti, &r)) return r; + /* + * Only support bio polling for normal IO, and the target io is + * exactly inside the dm_io instance (verified in dm_poll_dm_io) + */ + ci->submit_as_polled = ci->bio->bi_opf & REQ_POLLED; + len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); __map_bio(clone); @@ -1473,6 +1538,7 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, ci->map = map; ci->io = alloc_io(md, bio); ci->bio = bio; + ci->submit_as_polled = false; ci->sector = bio->bi_iter.bi_sector; ci->sector_count = bio_sectors(bio); @@ -1522,8 +1588,17 @@ out: if (ci.io->start_io_acct) dm_start_io_acct(ci.io, NULL); - /* drop the extra reference count */ - dm_io_dec_pending(ci.io, errno_to_blk_status(error)); + /* + * Drop the extra reference count for non-POLLED bio, and hold one + * reference for POLLED bio, which will be released in dm_poll_bio + * + * Add every dm_io instance into the hlist_head which is stored in + * bio->bi_private, so that dm_poll_bio can poll them all. + */ + if (error || !ci.submit_as_polled) + dm_io_dec_pending(ci.io, errno_to_blk_status(error)); + else + dm_queue_poll_io(bio, ci.io); } static void dm_submit_bio(struct bio *bio) @@ -1558,6 +1633,67 @@ out: dm_put_live_table(md, srcu_idx); } +static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob, + unsigned int flags) +{ + WARN_ON_ONCE(!io->tio.inside_dm_io); + + /* don't poll if the mapped io is done */ + if (atomic_read(&io->io_count) > 1) + bio_poll(&io->tio.clone, iob, flags); + + /* bio_poll holds the last reference */ + return atomic_read(&io->io_count) == 1; +} + +static int dm_poll_bio(struct bio *bio, struct io_comp_batch *iob, + unsigned int flags) +{ + struct hlist_head *head = dm_get_bio_hlist_head(bio); + struct hlist_head tmp = HLIST_HEAD_INIT; + struct hlist_node *next; + struct dm_io *io; + + /* Only poll normal bio which was marked as REQ_DM_POLL_LIST */ + if (!(bio->bi_opf & REQ_DM_POLL_LIST)) + return 0; + + WARN_ON_ONCE(hlist_empty(head)); + + hlist_move_list(head, &tmp); + + /* + * Restore .bi_private before possibly completing dm_io. + * + * bio_poll() is only possible once @bio has been completely + * submitted via submit_bio_noacct()'s depth-first submission. + * So there is no dm_queue_poll_io() race associated with + * clearing REQ_DM_POLL_LIST here. + */ + bio->bi_opf &= ~REQ_DM_POLL_LIST; + bio->bi_private = hlist_entry(tmp.first, struct dm_io, node)->data; + + hlist_for_each_entry_safe(io, next, &tmp, node) { + if (dm_poll_dm_io(io, iob, flags)) { + hlist_del_init(&io->node); + /* + * clone_endio() has already occurred, so passing + * error as 0 here doesn't override io->status + */ + dm_io_dec_pending(io, 0); + } + } + + /* Not done? */ + if (!hlist_empty(&tmp)) { + bio->bi_opf |= REQ_DM_POLL_LIST; + /* Reset bio->bi_private to dm_io list head */ + hlist_move_list(&tmp, head); + return 0; + } + return 1; +} + /*----------------------------------------------------------------- * An IDR is used to keep track of allocated minor numbers. *---------------------------------------------------------------*/ @@ -2983,6 +3119,7 @@ static const struct pr_ops dm_pr_ops = { static const struct block_device_operations dm_blk_dops = { .submit_bio = dm_submit_bio, + .poll_bio = dm_poll_bio, .open = dm_blk_open, .release = dm_blk_close, .ioctl = dm_blk_ioctl, -- cgit v1.2.3 From 0a8e9599b91db84b25e4f13e89ed9b5cc3c2d15f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 8 Mar 2022 17:15:19 -0500 Subject: dm: add WARN_ON_ONCE to dm_submit_bio_remap If a target uses dm_submit_bio_remap() it should set ti->accounts_remapped_io. Also, switch dm_start_io_acct() WARN_ON to WARN_ON_ONCE. Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d9111e17f0fc..1c5c9036a20e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -541,7 +541,7 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone) * Expect no possibility for race unless is_duplicate_bio. */ if (!clone || likely(!clone_to_tio(clone)->is_duplicate_bio)) { - if (WARN_ON(io->was_accounted)) + if (WARN_ON_ONCE(io->was_accounted)) return; io->was_accounted = 1; } else if (xchg(&io->was_accounted, 1) == 1) @@ -1202,6 +1202,8 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, struct dm_target_io *tio = clone_to_tio(clone); struct dm_io *io = tio->io; + WARN_ON_ONCE(!tio->ti->accounts_remapped_io); + /* establish bio that will get submitted */ if (!tgt_clone) tgt_clone = clone; -- cgit v1.2.3 From a92512819b1f2971674675edbf695b717d43629c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 8 Mar 2022 17:18:07 -0500 Subject: dm thin: use dm_submit_bio_remap Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a01aa4ef2042..ba74bc22ba42 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -755,7 +755,7 @@ static void issue(struct thin_c *tc, struct bio *bio) struct pool *pool = tc->pool; if (!bio_triggers_commit(tc, bio)) { - submit_bio_noacct(bio); + dm_submit_bio_remap(bio, NULL, true); return; } @@ -2383,7 +2383,7 @@ static void process_deferred_bios(struct pool *pool) if (bio->bi_opf & REQ_PREFLUSH) bio_endio(bio); else - submit_bio_noacct(bio); + dm_submit_bio_remap(bio, NULL, true); } } @@ -4231,6 +4231,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_bios = 1; ti->flush_supported = true; + ti->accounts_remapped_io = true; ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ -- cgit v1.2.3 From b7f8dff09827c96032c34a945ee7757e394b5952 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 10 Mar 2022 11:45:58 -0500 Subject: dm: simplify dm_sumbit_bio_remap interface Remove the from_wq argument from dm_sumbit_bio_remap(). Eliminates the need for dm_sumbit_bio_remap() callers to know whether they are calling for a workqueue or from the original dm_submit_bio(). Add map_task to dm_io struct, record the map_task in alloc_io and clear it after all target ->map() calls have completed. Update dm_sumbit_bio_remap to check if 'current' matches io->map_task rather than rely on passed 'from_rq' argument. This change really simplifies the chore of porting each DM target to using dm_sumbit_bio_remap() because there is no longer the risk of programming error by not completely knowing all the different contexts a particular method that calls dm_sumbit_bio_remap() might be used in. Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 1 + drivers/md/dm-crypt.c | 6 +++--- drivers/md/dm-delay.c | 2 +- drivers/md/dm-thin.c | 4 ++-- drivers/md/dm.c | 8 ++++---- include/linux/device-mapper.h | 2 +- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 8cc03c0c262e..8d3d11887343 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -237,6 +237,7 @@ struct dm_io { unsigned long start_time; void *data; struct hlist_node node; + struct task_struct *map_task; spinlock_t endio_lock; struct dm_stats_aux stats_aux; /* last member of dm_target_io is 'struct bio' */ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ec746cc4b1f8..a1730cc8ae27 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1857,7 +1857,7 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) return 1; } - dm_submit_bio_remap(io->base_bio, clone, (gfp != CRYPT_MAP_READ_GFP)); + dm_submit_bio_remap(io->base_bio, clone); return 0; } @@ -1883,7 +1883,7 @@ static void kcryptd_io_write(struct dm_crypt_io *io) { struct bio *clone = io->ctx.bio_out; - dm_submit_bio_remap(io->base_bio, clone, true); + dm_submit_bio_remap(io->base_bio, clone); } #define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node) @@ -1962,7 +1962,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) || test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) { - dm_submit_bio_remap(io->base_bio, clone, true); + dm_submit_bio_remap(io->base_bio, clone); return; } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index b25b45011b11..9a51bf51a859 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -72,7 +72,7 @@ static void flush_bios(struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - dm_submit_bio_remap(bio, NULL, true); + dm_submit_bio_remap(bio, NULL); bio = n; } } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ba74bc22ba42..4d25d0e27031 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -755,7 +755,7 @@ static void issue(struct thin_c *tc, struct bio *bio) struct pool *pool = tc->pool; if (!bio_triggers_commit(tc, bio)) { - dm_submit_bio_remap(bio, NULL, true); + dm_submit_bio_remap(bio, NULL); return; } @@ -2383,7 +2383,7 @@ static void process_deferred_bios(struct pool *pool) if (bio->bi_opf & REQ_PREFLUSH) bio_endio(bio); else - dm_submit_bio_remap(bio, NULL, true); + dm_submit_bio_remap(bio, NULL); } } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1c5c9036a20e..c470f54f9193 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -574,6 +574,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) this_cpu_inc(*md->pending_io); io->orig_bio = NULL; io->md = md; + io->map_task = current; spin_lock_init(&io->endio_lock); io->start_time = jiffies; @@ -1189,15 +1190,13 @@ static inline void __dm_submit_bio_remap(struct bio *clone, /* * @clone: clone bio that DM core passed to target's .map function * @tgt_clone: clone of @clone bio that target needs submitted - * @from_wq: caller is a workqueue thread managed by DM target * * Targets should use this interface to submit bios they take * ownership of when returning DM_MAPIO_SUBMITTED. * * Target should also enable ti->accounts_remapped_io */ -void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, - bool from_wq) +void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone) { struct dm_target_io *tio = clone_to_tio(clone); struct dm_io *io = tio->io; @@ -1212,7 +1211,7 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, * Account io->origin_bio to DM dev on behalf of target * that took ownership of IO with DM_MAPIO_SUBMITTED. */ - if (!from_wq) { + if (io->map_task == current) { /* Still in target's map function */ io->start_io_acct = true; } else { @@ -1568,6 +1567,7 @@ static void dm_split_and_process_bio(struct mapped_device *md, } error = __split_and_process_bio(&ci); + ci.io->map_task = NULL; if (error || !ci.sector_count) goto out; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 70cd9449275a..901ec191250c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -471,7 +471,7 @@ int dm_suspended(struct dm_target *ti); int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); -void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, bool from_wq); +void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From 69596f555b81942b41b10669e5faefc6ce883abb Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 8 Mar 2022 20:01:37 -0500 Subject: dm cache: use dm_submit_bio_remap Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index d46c06f2ffcc..780a61bc6cc0 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -803,7 +803,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio) static void accounted_request(struct cache *cache, struct bio *bio) { accounted_begin(cache, bio); - submit_bio_noacct(bio); + dm_submit_bio_remap(bio, NULL); } static void issue_op(struct bio *bio, void *context) @@ -1708,7 +1708,7 @@ static bool process_bio(struct cache *cache, struct bio *bio) bool commit_needed; if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED) - submit_bio_noacct(bio); + dm_submit_bio_remap(bio, NULL); return commit_needed; } @@ -1774,7 +1774,7 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio) if (cache->features.discard_passdown) { remap_to_origin(cache, bio); - submit_bio_noacct(bio); + dm_submit_bio_remap(bio, NULL); } else bio_endio(bio); @@ -2356,6 +2356,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) cache->ti = ca->ti; ti->private = cache; + ti->accounts_remapped_io = true; ti->num_flush_bios = 2; ti->flush_supported = true; -- cgit v1.2.3 From e27363472f9bc96db831ceb2c31cf8b9a7c5b6f3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 9 Mar 2022 16:07:06 -0500 Subject: dm: factor out dm_io_complete Optimizes dm_io_dec_pending() slightly by avoiding local variables. Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 149 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 77 insertions(+), 72 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c470f54f9193..f8df6e3bcdc5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -841,89 +841,94 @@ static int __noflush_suspending(struct mapped_device *md) return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); } +static void dm_io_complete(struct dm_io *io) +{ + blk_status_t io_error; + struct mapped_device *md = io->md; + struct bio *bio = io->orig_bio; + + if (io->status == BLK_STS_DM_REQUEUE) { + unsigned long flags; + /* + * Target requested pushing back the I/O. + */ + spin_lock_irqsave(&md->deferred_lock, flags); + if (__noflush_suspending(md) && + !WARN_ON_ONCE(dm_is_zone_write(md, bio))) { + /* NOTE early return due to BLK_STS_DM_REQUEUE below */ + bio_list_add_head(&md->deferred, bio); + } else { + /* + * noflush suspend was interrupted or this is + * a write to a zoned target. + */ + io->status = BLK_STS_IOERR; + } + spin_unlock_irqrestore(&md->deferred_lock, flags); + } + + io_error = io->status; + if (io->was_accounted) + dm_end_io_acct(io, bio); + else if (!io_error) { + /* + * Must handle target that DM_MAPIO_SUBMITTED only to + * then bio_endio() rather than dm_submit_bio_remap() + */ + __dm_start_io_acct(io, bio); + dm_end_io_acct(io, bio); + } + free_io(io); + smp_wmb(); + this_cpu_dec(*md->pending_io); + + /* nudge anyone waiting on suspend queue */ + if (unlikely(wq_has_sleeper(&md->wait))) + wake_up(&md->wait); + + if (io_error == BLK_STS_DM_REQUEUE) { + /* + * Upper layer won't help us poll split bio, io->orig_bio + * may only reflect a subset of the pre-split original, + * so clear REQ_POLLED in case of requeue + */ + bio->bi_opf &= ~REQ_POLLED; + return; + } + + if (bio_is_flush_with_data(bio)) { + /* + * Preflush done for flush with data, reissue + * without REQ_PREFLUSH. + */ + bio->bi_opf &= ~REQ_PREFLUSH; + queue_io(md, bio); + } else { + /* done with normal IO or empty flush */ + if (io_error) + bio->bi_status = io_error; + bio_endio(bio); + } +} + /* * Decrements the number of outstanding ios that a bio has been * cloned into, completing the original io if necc. */ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) { - unsigned long flags; - blk_status_t io_error; - struct bio *bio; - struct mapped_device *md = io->md; - /* Push-back supersedes any I/O errors */ if (unlikely(error)) { + unsigned long flags; spin_lock_irqsave(&io->endio_lock, flags); - if (!(io->status == BLK_STS_DM_REQUEUE && __noflush_suspending(md))) + if (!(io->status == BLK_STS_DM_REQUEUE && + __noflush_suspending(io->md))) io->status = error; spin_unlock_irqrestore(&io->endio_lock, flags); } - if (atomic_dec_and_test(&io->io_count)) { - bio = io->orig_bio; - if (io->status == BLK_STS_DM_REQUEUE) { - /* - * Target requested pushing back the I/O. - */ - spin_lock_irqsave(&md->deferred_lock, flags); - if (__noflush_suspending(md) && - !WARN_ON_ONCE(dm_is_zone_write(md, bio))) { - /* NOTE early return due to BLK_STS_DM_REQUEUE below */ - bio_list_add_head(&md->deferred, bio); - } else { - /* - * noflush suspend was interrupted or this is - * a write to a zoned target. - */ - io->status = BLK_STS_IOERR; - } - spin_unlock_irqrestore(&md->deferred_lock, flags); - } - - io_error = io->status; - if (io->was_accounted) - dm_end_io_acct(io, bio); - else if (!io_error) { - /* - * Must handle target that DM_MAPIO_SUBMITTED only to - * then bio_endio() rather than dm_submit_bio_remap() - */ - __dm_start_io_acct(io, bio); - dm_end_io_acct(io, bio); - } - free_io(io); - smp_wmb(); - this_cpu_dec(*md->pending_io); - - /* nudge anyone waiting on suspend queue */ - if (unlikely(wq_has_sleeper(&md->wait))) - wake_up(&md->wait); - - if (io_error == BLK_STS_DM_REQUEUE) { - /* - * Upper layer won't help us poll split bio, io->orig_bio - * may only reflect a subset of the pre-split original, - * so clear REQ_POLLED in case of requeue - */ - bio->bi_opf &= ~REQ_POLLED; - return; - } - - if (bio_is_flush_with_data(bio)) { - /* - * Preflush done for flush with data, reissue - * without REQ_PREFLUSH. - */ - bio->bi_opf &= ~REQ_PREFLUSH; - queue_io(md, bio); - } else { - /* done with normal IO or empty flush */ - if (io_error) - bio->bi_status = io_error; - bio_endio(bio); - } - } + if (atomic_dec_and_test(&io->io_count)) + dm_io_complete(io); } void disable_discard(struct mapped_device *md) @@ -1562,7 +1567,7 @@ static void dm_split_and_process_bio(struct mapped_device *md, if (bio->bi_opf & REQ_PREFLUSH) { error = __send_empty_flush(&ci); - /* dm_io_dec_pending submits any data associated with flush */ + /* dm_io_complete submits any data associated with flush */ goto out; } @@ -1575,7 +1580,7 @@ static void dm_split_and_process_bio(struct mapped_device *md, * Remainder must be passed to submit_bio_noacct() so it gets handled * *after* bios already submitted have been completely processed. * We take a clone of the original to store in ci.io->orig_bio to be - * used by dm_end_io_acct() and for dm_io_dec_pending() to use for + * used by dm_end_io_acct() and for dm_io_complete() to use for * completion handling. */ orig_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count, -- cgit v1.2.3 From 332f2b1e7360dc118d95bc6f15bcb6830b73a8aa Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 10 Mar 2022 16:25:28 -0500 Subject: dm: return void from __send_empty_flush Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f8df6e3bcdc5..df7664f3028c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1369,7 +1369,7 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, } } -static int __send_empty_flush(struct clone_info *ci) +static void __send_empty_flush(struct clone_info *ci) { unsigned target_nr = 0; struct dm_target *ti; @@ -1390,7 +1390,6 @@ static int __send_empty_flush(struct clone_info *ci) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); bio_uninit(ci->bio); - return 0; } static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, @@ -1566,7 +1565,7 @@ static void dm_split_and_process_bio(struct mapped_device *md, init_clone_info(&ci, md, map, bio); if (bio->bi_opf & REQ_PREFLUSH) { - error = __send_empty_flush(&ci); + __send_empty_flush(&ci); /* dm_io_complete submits any data associated with flush */ goto out; } -- cgit v1.2.3 From e9567332a4a0bf3533b4126e50992d5a569ea675 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 11 Mar 2022 14:16:47 -0500 Subject: dm: update email address in MAINTAINERS Update my email address to kernel.org to allow distinction between my "upstream" and "Red" Hats. Signed-off-by: Mike Snitzer Signed-off-by: Mike Snitzer --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f88a16ba8b47..49620663c230 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5587,7 +5587,7 @@ F: include/linux/devm-helpers.h DEVICE-MAPPER (LVM) M: Alasdair Kergon -M: Mike Snitzer +M: Mike Snitzer M: dm-devel@redhat.com L: dm-devel@redhat.com S: Maintained -- cgit v1.2.3 From 82f6cdcc3676c68abaad2aac51a32f4e5d67d01e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 18 Mar 2022 00:15:28 -0400 Subject: dm: switch dm_io booleans over to proper flags Add flags to dm_io and manage them using the same pattern used for bi_flags in struct bio. Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 22 ++++++++++++++++++++-- drivers/md/dm.c | 30 ++++++++++++++++++------------ 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 8d3d11887343..e127cbcaf33d 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -232,18 +232,36 @@ struct dm_io { struct mapped_device *md; struct bio *orig_bio; blk_status_t status; - bool start_io_acct:1; - int was_accounted; + unsigned short flags; unsigned long start_time; void *data; struct hlist_node node; struct task_struct *map_task; + spinlock_t startio_lock; spinlock_t endio_lock; struct dm_stats_aux stats_aux; /* last member of dm_target_io is 'struct bio' */ struct dm_target_io tio; }; +/* + * dm_io flags + */ +enum { + DM_IO_START_ACCT, + DM_IO_ACCOUNTED +}; + +static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit) +{ + return (io->flags & (1U << bit)) != 0; +} + +static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit) +{ + io->flags |= (1U << bit); +} + static inline void dm_io_inc_pending(struct dm_io *io) { atomic_inc(&io->io_count); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index df7664f3028c..83328f03bcb2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -541,11 +541,18 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone) * Expect no possibility for race unless is_duplicate_bio. */ if (!clone || likely(!clone_to_tio(clone)->is_duplicate_bio)) { - if (WARN_ON_ONCE(io->was_accounted)) + if (WARN_ON_ONCE(dm_io_flagged(io, DM_IO_ACCOUNTED))) return; - io->was_accounted = 1; - } else if (xchg(&io->was_accounted, 1) == 1) - return; + dm_io_set_flag(io, DM_IO_ACCOUNTED); + } else { + unsigned long flags; + if (dm_io_flagged(io, DM_IO_ACCOUNTED)) + return; + /* Can afford locking given is_duplicate_bio */ + spin_lock_irqsave(&io->startio_lock, flags); + dm_io_set_flag(io, DM_IO_ACCOUNTED); + spin_unlock_irqrestore(&io->startio_lock, flags); + } __dm_start_io_acct(io, bio); } @@ -575,11 +582,10 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->orig_bio = NULL; io->md = md; io->map_task = current; + spin_lock_init(&io->startio_lock); spin_lock_init(&io->endio_lock); - io->start_time = jiffies; - io->start_io_acct = false; - io->was_accounted = 0; + io->flags = 0; dm_stats_record_start(&md->stats, &io->stats_aux); @@ -868,7 +874,7 @@ static void dm_io_complete(struct dm_io *io) } io_error = io->status; - if (io->was_accounted) + if (dm_io_flagged(io, DM_IO_ACCOUNTED)) dm_end_io_acct(io, bio); else if (!io_error) { /* @@ -1218,7 +1224,7 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone) */ if (io->map_task == current) { /* Still in target's map function */ - io->start_io_acct = true; + dm_io_set_flag(io, DM_IO_START_ACCT); } else { /* * Called by another thread, managed by DM target, @@ -1288,7 +1294,7 @@ static void __map_bio(struct bio *clone) case DM_MAPIO_SUBMITTED: /* target has assumed ownership of this io */ if (!ti->accounts_remapped_io) - io->start_io_acct = true; + dm_io_set_flag(io, DM_IO_START_ACCT); break; case DM_MAPIO_REMAPPED: /* @@ -1297,7 +1303,7 @@ static void __map_bio(struct bio *clone) */ __dm_submit_bio_remap(clone, disk_devt(io->md->disk), tio->old_sector); - io->start_io_acct = true; + dm_io_set_flag(io, DM_IO_START_ACCT); break; case DM_MAPIO_KILL: case DM_MAPIO_REQUEUE: @@ -1591,7 +1597,7 @@ out: if (!orig_bio) orig_bio = bio; smp_store_release(&ci.io->orig_bio, orig_bio); - if (ci.io->start_io_acct) + if (dm_io_flagged(ci.io, DM_IO_START_ACCT)) dm_start_io_acct(ci.io, NULL); /* -- cgit v1.2.3 From 655f3aad7aa4858d06cdaca6c4b14635cc3c0eba Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 19 Mar 2022 18:04:20 -0400 Subject: dm: switch dm_target_io booleans over to proper flags Add flags to dm_target_io and manage them using the same pattern used for bi_flags in struct bio. Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 21 +++++++++++++++++++-- drivers/md/dm.c | 31 +++++++++++++++++++------------ 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index e127cbcaf33d..feb53232d777 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -215,12 +215,29 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; unsigned int *len_ptr; - bool inside_dm_io:1; - bool is_duplicate_bio:1; + unsigned short flags; sector_t old_sector; struct bio clone; }; +/* + * dm_target_io flags + */ +enum { + DM_TIO_INSIDE_DM_IO, + DM_TIO_IS_DUPLICATE_BIO +}; + +static inline bool dm_tio_flagged(struct dm_target_io *tio, unsigned int bit) +{ + return (tio->flags & (1U << bit)) != 0; +} + +static inline void dm_tio_set_flag(struct dm_target_io *tio, unsigned int bit) +{ + tio->flags |= (1U << bit); +} + /* * One of these is allocated per original bio. * It contains the first clone used for that original. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 83328f03bcb2..cca11cf42cc3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -94,7 +94,7 @@ static inline struct dm_target_io *clone_to_tio(struct bio *clone) void *dm_per_bio_data(struct bio *bio, size_t data_size) { - if (!clone_to_tio(bio)->inside_dm_io) + if (!dm_tio_flagged(clone_to_tio(bio), DM_TIO_INSIDE_DM_IO)) return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size; return (char *)bio - DM_IO_BIO_OFFSET - data_size; } @@ -538,9 +538,10 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone) /* * Ensure IO accounting is only ever started once. - * Expect no possibility for race unless is_duplicate_bio. + * Expect no possibility for race unless DM_TIO_IS_DUPLICATE_BIO. */ - if (!clone || likely(!clone_to_tio(clone)->is_duplicate_bio)) { + if (!clone || + likely(!dm_tio_flagged(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO))) { if (WARN_ON_ONCE(dm_io_flagged(io, DM_IO_ACCOUNTED))) return; dm_io_set_flag(io, DM_IO_ACCOUNTED); @@ -548,7 +549,7 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone) unsigned long flags; if (dm_io_flagged(io, DM_IO_ACCOUNTED)) return; - /* Can afford locking given is_duplicate_bio */ + /* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */ spin_lock_irqsave(&io->startio_lock, flags); dm_io_set_flag(io, DM_IO_ACCOUNTED); spin_unlock_irqrestore(&io->startio_lock, flags); @@ -571,7 +572,8 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) clone = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, &md->io_bs); tio = clone_to_tio(clone); - tio->inside_dm_io = true; + tio->flags = 0; + dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); tio->io = NULL; io = container_of(tio, struct dm_io, tio); @@ -618,14 +620,13 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, clone->bi_opf &= ~REQ_DM_POLL_LIST; tio = clone_to_tio(clone); - tio->inside_dm_io = false; + tio->flags = 0; /* also clears DM_TIO_INSIDE_DM_IO */ } tio->magic = DM_TIO_MAGIC; tio->io = ci->io; tio->ti = ti; tio->target_bio_nr = target_bio_nr; - tio->is_duplicate_bio = false; tio->len_ptr = len; tio->old_sector = 0; @@ -640,7 +641,7 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, static void free_tio(struct bio *clone) { - if (clone_to_tio(clone)->inside_dm_io) + if (dm_tio_flagged(clone_to_tio(clone), DM_TIO_INSIDE_DM_IO)) return; bio_put(clone); } @@ -917,6 +918,12 @@ static void dm_io_complete(struct dm_io *io) } } +static inline bool dm_tio_is_normal(struct dm_target_io *tio) +{ + return (dm_tio_flagged(tio, DM_TIO_INSIDE_DM_IO) && + !dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO)); +} + /* * Decrements the number of outstanding ios that a bio has been * cloned into, completing the original io if necc. @@ -1180,7 +1187,7 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) struct dm_target_io *tio = clone_to_tio(bio); unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; - BUG_ON(tio->is_duplicate_bio); + BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO)); BUG_ON(op_is_zone_mgmt(bio_op(bio))); BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND); BUG_ON(bi_size > *tio->len_ptr); @@ -1362,13 +1369,13 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, break; case 1: clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - clone_to_tio(clone)->is_duplicate_bio = true; + dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); break; default: alloc_multiple_bios(&blist, ci, ti, num_bios, len); while ((clone = bio_list_pop(&blist))) { - clone_to_tio(clone)->is_duplicate_bio = true; + dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); } break; @@ -1648,7 +1655,7 @@ out: static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob, unsigned int flags) { - WARN_ON_ONCE(!io->tio.inside_dm_io); + WARN_ON_ONCE(!dm_tio_is_normal(&io->tio)); /* don't poll if the mapped io is done */ if (atomic_read(&io->io_count) > 1) -- cgit v1.2.3 From bd4a6dd241ae0a0bf36274d61e1a1fbf80b99ecb Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 19 Mar 2022 18:29:50 -0400 Subject: dm: reduce size of dm_io and dm_target_io structs Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index feb53232d777..f186d7afaeab 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -208,14 +208,14 @@ struct dm_table { /* * One of these is allocated per clone bio. */ -#define DM_TIO_MAGIC 7282014 +#define DM_TIO_MAGIC 28714 struct dm_target_io { - unsigned int magic; + unsigned short magic; + unsigned short flags; unsigned int target_bio_nr; struct dm_io *io; struct dm_target *ti; unsigned int *len_ptr; - unsigned short flags; sector_t old_sector; struct bio clone; }; @@ -242,14 +242,14 @@ static inline void dm_tio_set_flag(struct dm_target_io *tio, unsigned int bit) * One of these is allocated per original bio. * It contains the first clone used for that original. */ -#define DM_IO_MAGIC 5191977 +#define DM_IO_MAGIC 19577 struct dm_io { - unsigned int magic; + unsigned short magic; + unsigned short flags; atomic_t io_count; struct mapped_device *md; struct bio *orig_bio; blk_status_t status; - unsigned short flags; unsigned long start_time; void *data; struct hlist_node node; -- cgit v1.2.3 From 4d7bca13dd9a5033174b0735056c5658cb893e76 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 19 Mar 2022 18:41:16 -0400 Subject: dm: consolidate spinlocks in dm_io struct No reason to have separate startio_lock and endio_lock given endio_lock could be used during submission anyway. This change leaves the dm_io struct weighing in at 256 bytes (down from 272 bytes, so saves a cacheline). Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 3 +-- drivers/md/dm.c | 11 +++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index f186d7afaeab..2c49aa6501b0 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -250,12 +250,11 @@ struct dm_io { struct mapped_device *md; struct bio *orig_bio; blk_status_t status; + spinlock_t lock; unsigned long start_time; void *data; struct hlist_node node; struct task_struct *map_task; - spinlock_t startio_lock; - spinlock_t endio_lock; struct dm_stats_aux stats_aux; /* last member of dm_target_io is 'struct bio' */ struct dm_target_io tio; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index cca11cf42cc3..b762a48d3fdf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -550,9 +550,9 @@ static void dm_start_io_acct(struct dm_io *io, struct bio *clone) if (dm_io_flagged(io, DM_IO_ACCOUNTED)) return; /* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */ - spin_lock_irqsave(&io->startio_lock, flags); + spin_lock_irqsave(&io->lock, flags); dm_io_set_flag(io, DM_IO_ACCOUNTED); - spin_unlock_irqrestore(&io->startio_lock, flags); + spin_unlock_irqrestore(&io->lock, flags); } __dm_start_io_acct(io, bio); @@ -584,8 +584,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->orig_bio = NULL; io->md = md; io->map_task = current; - spin_lock_init(&io->startio_lock); - spin_lock_init(&io->endio_lock); + spin_lock_init(&io->lock); io->start_time = jiffies; io->flags = 0; @@ -933,11 +932,11 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) /* Push-back supersedes any I/O errors */ if (unlikely(error)) { unsigned long flags; - spin_lock_irqsave(&io->endio_lock, flags); + spin_lock_irqsave(&io->lock, flags); if (!(io->status == BLK_STS_DM_REQUEUE && __noflush_suspending(io->md))) io->status = error; - spin_unlock_irqrestore(&io->endio_lock, flags); + spin_unlock_irqrestore(&io->lock, flags); } if (atomic_dec_and_test(&io->io_count)) -- cgit v1.2.3