summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c189
1 files changed, 102 insertions, 87 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 109e81f33edb..87cf45f619fd 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/blkpg.h>
#include <linux/bio.h>
@@ -142,6 +143,7 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
#define DMF_NOFLUSH_SUSPENDING 5
#define DMF_DEFERRED_REMOVE 6
#define DMF_SUSPENDED_INTERNALLY 7
+#define DMF_POST_SUSPENDING 8
#define DM_NUMA_NODE NUMA_NO_NODE
static int dm_numa_node = DM_NUMA_NODE;
@@ -654,28 +656,6 @@ static void free_tio(struct dm_target_io *tio)
bio_put(&tio->clone);
}
-static bool md_in_flight_bios(struct mapped_device *md)
-{
- int cpu;
- struct hd_struct *part = &dm_disk(md)->part0;
- long sum = 0;
-
- for_each_possible_cpu(cpu) {
- sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
- sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
- }
-
- return sum != 0;
-}
-
-static bool md_in_flight(struct mapped_device *md)
-{
- if (queue_is_mq(md->queue))
- return blk_mq_queue_inflight(md->queue);
- else
- return md_in_flight_bios(md);
-}
-
u64 dm_start_time_ns_from_clone(struct bio *bio)
{
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
@@ -1009,6 +989,7 @@ static void clone_endio(struct bio *bio)
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
+ struct bio *orig_bio = io->orig_bio;
if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
if (bio_op(bio) == REQ_OP_DISCARD &&
@@ -1022,6 +1003,18 @@ static void clone_endio(struct bio *bio)
disable_write_zeroes(md);
}
+ /*
+ * For zone-append bios get offset in zone of the written
+ * sector and add that to the original bio sector pos.
+ */
+ if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
+ sector_t written_sector = bio->bi_iter.bi_sector;
+ struct request_queue *q = orig_bio->bi_disk->queue;
+ u64 mask = (u64)blk_queue_zone_sectors(q) - 1;
+
+ orig_bio->bi_iter.bi_sector += written_sector & mask;
+ }
+
if (endio) {
int r = endio(tio->ti, bio, &error);
switch (r) {
@@ -1280,7 +1273,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
sector_t sector;
struct bio *clone = &tio->clone;
struct dm_io *io = tio->io;
- struct mapped_device *md = io->md;
struct dm_target *ti = tio->ti;
blk_qc_t ret = BLK_QC_T_NONE;
@@ -1302,10 +1294,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
/* the bio has been remapped so dispatch it */
trace_block_bio_remap(clone->bi_disk->queue, clone,
bio_dev(io->orig_bio), sector);
- if (md->type == DM_TYPE_NVME_BIO_BASED)
- ret = direct_make_request(clone);
- else
- ret = generic_make_request(clone);
+ ret = submit_bio_noacct(clone);
break;
case DM_MAPIO_KILL:
free_tio(tio);
@@ -1452,9 +1441,6 @@ static int __send_empty_flush(struct clone_info *ci)
BUG_ON(bio_has_data(ci->bio));
while ((ti = dm_table_get_target(ci->map, target_nr++)))
__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
-
- bio_disassociate_blkg(ci->bio);
-
return 0;
}
@@ -1642,6 +1628,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
ci.bio = &flush_bio;
ci.sector_count = 0;
error = __send_empty_flush(&ci);
+ bio_uninit(ci.bio);
/* dec_pending submits any data associated with flush */
} else if (op_is_zone_mgmt(bio_op(bio))) {
ci.bio = bio;
@@ -1654,7 +1641,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
error = __split_and_process_non_flush(&ci);
if (current->bio_list && ci.sector_count && !error) {
/*
- * Remainder must be passed to generic_make_request()
+ * Remainder must be passed to submit_bio_noacct()
* so that it gets handled *after* bios already submitted
* have been completely processed.
* We take a clone of the original to store in
@@ -1679,7 +1666,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
bio_chain(b, bio);
trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
- ret = generic_make_request(bio);
+ ret = submit_bio_noacct(bio);
break;
}
}
@@ -1716,6 +1703,7 @@ static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map,
ci.bio = &flush_bio;
ci.sector_count = 0;
error = __send_empty_flush(&ci);
+ bio_uninit(ci.bio);
/* dec_pending submits any data associated with flush */
} else {
struct dm_target_io *tio;
@@ -1746,7 +1734,7 @@ static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struc
bio_chain(split, *bio);
trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
- generic_make_request(*bio);
+ submit_bio_noacct(*bio);
*bio = split;
}
}
@@ -1771,13 +1759,13 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
}
/*
- * If in ->make_request_fn we need to use blk_queue_split(), otherwise
+ * If in ->queue_bio we need to use blk_queue_split(), otherwise
* queue_limits for abnormal requests (e.g. discard, writesame, etc)
* won't be imposed.
*/
if (current->bio_list) {
if (is_abnormal_io(bio))
- blk_queue_split(md->queue, &bio);
+ blk_queue_split(&bio);
else
dm_queue_split(md, ti, &bio);
}
@@ -1788,9 +1776,9 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
return __split_and_process_bio(md, map, bio);
}
-static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
+static blk_qc_t dm_submit_bio(struct bio *bio)
{
- struct mapped_device *md = q->queuedata;
+ struct mapped_device *md = bio->bi_disk->private_data;
blk_qc_t ret = BLK_QC_T_NONE;
int srcu_idx;
struct dm_table *map;
@@ -1799,12 +1787,12 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
/*
* We are called with a live reference on q_usage_counter, but
* that one will be released as soon as we return. Grab an
- * extra one as blk_mq_make_request expects to be able to
- * consume a reference (which lives until the request is freed
- * in case a request is allocated).
+ * extra one as blk_mq_submit_bio expects to be able to consume
+ * a reference (which lives until the request is freed in case a
+ * request is allocated).
*/
- percpu_ref_get(&q->q_usage_counter);
- return blk_mq_make_request(q, bio);
+ percpu_ref_get(&bio->bi_disk->queue->q_usage_counter);
+ return blk_mq_submit_bio(bio);
}
map = dm_get_live_table(md, &srcu_idx);
@@ -1826,31 +1814,6 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
return ret;
}
-static int dm_any_congested(void *congested_data, int bdi_bits)
-{
- int r = bdi_bits;
- struct mapped_device *md = congested_data;
- struct dm_table *map;
-
- if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
- if (dm_request_based(md)) {
- /*
- * With request-based DM we only need to check the
- * top-level queue for congestion.
- */
- struct backing_dev_info *bdi = md->queue->backing_dev_info;
- r = bdi->wb.congested->state & bdi_bits;
- } else {
- map = dm_get_live_table_fast(md);
- if (map)
- r = dm_table_any_congested(map, bdi_bits);
- dm_put_live_table_fast(md);
- }
- }
-
- return r;
-}
-
/*-----------------------------------------------------------------
* An IDR is used to keep track of allocated minor numbers.
*---------------------------------------------------------------*/
@@ -1989,14 +1952,13 @@ static struct mapped_device *alloc_dev(int minor)
spin_lock_init(&md->uevent_lock);
/*
- * default to bio-based required ->make_request_fn until DM
- * table is loaded and md->type established. If request-based
- * table is loaded: blk-mq will override accordingly.
+ * default to bio-based until DM table is loaded and md->type
+ * established. If request-based table is loaded: blk-mq will
+ * override accordingly.
*/
- md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
+ md->queue = blk_alloc_queue(numa_node_id);
if (!md->queue)
goto bad;
- md->queue->queuedata = md;
md->disk = alloc_disk_node(1, md->numa_node_id);
if (!md->disk)
@@ -2290,12 +2252,6 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
-static void dm_init_congested_fn(struct mapped_device *md)
-{
- md->queue->backing_dev_info->congested_data = md;
- md->queue->backing_dev_info->congested_fn = dm_any_congested;
-}
-
/*
* Setup the DM device's queue based on md's type
*/
@@ -2312,12 +2268,10 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
return r;
}
- dm_init_congested_fn(md);
break;
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
case DM_TYPE_NVME_BIO_BASED:
- dm_init_congested_fn(md);
break;
case DM_TYPE_NONE:
WARN_ON_ONCE(true);
@@ -2417,6 +2371,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
if (!dm_suspended_md(md)) {
dm_table_presuspend_targets(map);
set_bit(DMF_SUSPENDED, &md->flags);
+ set_bit(DMF_POST_SUSPENDING, &md->flags);
dm_table_postsuspend_targets(map);
}
/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
@@ -2457,15 +2412,29 @@ void dm_put(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_put);
-static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+static bool md_in_flight_bios(struct mapped_device *md)
+{
+ int cpu;
+ struct hd_struct *part = &dm_disk(md)->part0;
+ long sum = 0;
+
+ for_each_possible_cpu(cpu) {
+ sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
+ sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
+ }
+
+ return sum != 0;
+}
+
+static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state)
{
int r = 0;
DEFINE_WAIT(wait);
- while (1) {
+ while (true) {
prepare_to_wait(&md->wait, &wait, task_state);
- if (!md_in_flight(md))
+ if (!md_in_flight_bios(md))
break;
if (signal_pending_state(task_state, current)) {
@@ -2480,6 +2449,28 @@ static int dm_wait_for_completion(struct mapped_device *md, long task_state)
return r;
}
+static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+{
+ int r = 0;
+
+ if (!queue_is_mq(md->queue))
+ return dm_wait_for_bios_completion(md, task_state);
+
+ while (true) {
+ if (!blk_mq_queue_inflight(md->queue))
+ break;
+
+ if (signal_pending_state(task_state, current)) {
+ r = -EINTR;
+ break;
+ }
+
+ msleep(5);
+ }
+
+ return r;
+}
+
/*
* Process the deferred bios
*/
@@ -2502,7 +2493,7 @@ static void dm_wq_work(struct work_struct *work)
break;
if (dm_request_based(md))
- (void) generic_make_request(c);
+ (void) submit_bio_noacct(c);
else
(void) dm_process_bio(md, map, c);
}
@@ -2739,7 +2730,9 @@ retry:
if (r)
goto out_unlock;
+ set_bit(DMF_POST_SUSPENDING, &md->flags);
dm_table_postsuspend_targets(map);
+ clear_bit(DMF_POST_SUSPENDING, &md->flags);
out_unlock:
mutex_unlock(&md->suspend_lock);
@@ -2836,7 +2829,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
(void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE,
DMF_SUSPENDED_INTERNALLY);
+ set_bit(DMF_POST_SUSPENDING, &md->flags);
dm_table_postsuspend_targets(map);
+ clear_bit(DMF_POST_SUSPENDING, &md->flags);
}
static void __dm_internal_resume(struct mapped_device *md)
@@ -2913,17 +2908,25 @@ EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
unsigned cookie)
{
+ int r;
+ unsigned noio_flag;
char udev_cookie[DM_COOKIE_LENGTH];
char *envp[] = { udev_cookie, NULL };
+ noio_flag = memalloc_noio_save();
+
if (!cookie)
- return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+ r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
else {
snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
DM_COOKIE_ENV_VAR_NAME, cookie);
- return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
- action, envp);
+ r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
+ action, envp);
}
+
+ memalloc_noio_restore(noio_flag);
+
+ return r;
}
uint32_t dm_next_uevent_seq(struct mapped_device *md)
@@ -2989,6 +2992,11 @@ int dm_suspended_md(struct mapped_device *md)
return test_bit(DMF_SUSPENDED, &md->flags);
}
+static int dm_post_suspending_md(struct mapped_device *md)
+{
+ return test_bit(DMF_POST_SUSPENDING, &md->flags);
+}
+
int dm_suspended_internally_md(struct mapped_device *md)
{
return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
@@ -3005,6 +3013,12 @@ int dm_suspended(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_suspended);
+int dm_post_suspending(struct dm_target *ti)
+{
+ return dm_post_suspending_md(dm_table_get_md(ti->table));
+}
+EXPORT_SYMBOL_GPL(dm_post_suspending);
+
int dm_noflush_suspending(struct dm_target *ti)
{
return __noflush_suspending(dm_table_get_md(ti->table));
@@ -3234,6 +3248,7 @@ static const struct pr_ops dm_pr_ops = {
};
static const struct block_device_operations dm_blk_dops = {
+ .submit_bio = dm_submit_bio,
.open = dm_blk_open,
.release = dm_blk_close,
.ioctl = dm_blk_ioctl,