summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 14:21:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 14:21:25 -0700
commit8374cfe647a1f360be3228b949dd6d753c55c19c (patch)
treed7e5cf67dc5b3ea62901556a2baf2be090a1dc6a /drivers/md/dm.c
parentc013d0af81f60cc7dbe357c4e2a925fb6738dbfe (diff)
parent9dd1cd3220eca534f2d47afad7ce85f4c40118d8 (diff)
downloadlinux-8374cfe647a1f360be3228b949dd6d753c55c19c.tar.bz2
Merge tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Refactor DM core's mempool allocation so that it clearer by not being split acorss files. - Improve DM core's BLK_STS_DM_REQUEUE and BLK_STS_AGAIN handling. - Optimize DM core's more common bio splitting by eliminating the use of bio cloning with bio_split+bio_chain. Shift that cloning cost to the relatively unlikely dm_io requeue case that only occurs during error handling. Introduces dm_io_rewind() that will clone a bio that reflects the subset of the original bio that must be requeued. - Remove DM core's dm_table_get_num_targets() wrapper and audit all dm_table_get_target() callers. - Fix potential for OOM with DM writecache target by setting a default MAX_WRITEBACK_JOBS (set to 256MiB or 1/16 of total system memory, whichever is smaller). - Fix DM writecache target's stats that are reported through DM-specific table info. - Fix use-after-free crash in dm_sm_register_threshold_callback(). - Refine DM core's Persistent Reservation handling in preparation for broader work Mike Christie is doing to add compatibility with Microsoft Windows Failover Cluster. - Fix various KASAN reported bugs in the DM raid target. - Fix DM raid target crash due to md_handle_request() bio splitting that recurses to block core without properly initializing the bio's bi_dev. - Fix some code comment typos and fix some Documentation formatting. * tag 'for-6.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (29 commits) dm: fix dm-raid crash if md_handle_request() splits bio dm raid: fix address sanitizer warning in raid_resume dm raid: fix address sanitizer warning in raid_status dm: Start pr_preempt from the same starting path dm: Fix PR release handling for non All Registrants dm: Start pr_reserve from the same starting path dm: Allow dm_call_pr to be used for path searches dm: return early from dm_pr_call() if DM device is suspended dm thin: fix use-after-free crash in dm_sm_register_threshold_callback dm writecache: count number of blocks discarded, not number of discard bios dm writecache: count number of blocks written, not number of write bios dm writecache: count number of blocks read, not number of read bios dm writecache: return void from functions dm kcopyd: use __GFP_HIGHMEM when allocating pages dm writecache: set a default MAX_WRITEBACK_JOBS Documentation: dm writecache: Render status list as list Documentation: dm writecache: add blank line before optional parameters dm snapshot: fix typo in snapshot_map() comment dm raid: remove redundant "the" in parse_raid_params() comment dm cache: fix typo in 2 comment blocks ...
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c462
1 files changed, 291 insertions, 171 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 54c2a23f4e55..99642f69bfa7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -88,10 +88,6 @@ struct clone_info {
bool submit_as_polled:1;
};
-#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
-#define DM_IO_BIO_OFFSET \
- (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
-
static inline struct dm_target_io *clone_to_tio(struct bio *clone)
{
return container_of(clone, struct dm_target_io, clone);
@@ -415,7 +411,7 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
{
- struct dm_target *tgt;
+ struct dm_target *ti;
struct dm_table *map;
int r;
@@ -426,17 +422,17 @@ retry:
return r;
/* We only support devices that have a single target */
- if (dm_table_get_num_targets(map) != 1)
+ if (map->num_targets != 1)
return r;
- tgt = dm_table_get_target(map, 0);
- if (!tgt->type->prepare_ioctl)
+ ti = dm_table_get_target(map, 0);
+ if (!ti->type->prepare_ioctl)
return r;
if (dm_suspended_md(md))
return -EAGAIN;
- r = tgt->type->prepare_ioctl(tgt, bdev);
+ r = ti->type->prepare_ioctl(ti, bdev);
if (r == -ENOTCONN && !fatal_signal_pending(current)) {
dm_put_live_table(md, *srcu_idx);
msleep(10);
@@ -578,9 +574,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
struct bio *clone;
clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
- /* Set default bdev, but target must bio_set_dev() before issuing IO */
- clone->bi_bdev = md->disk->part0;
-
tio = clone_to_tio(clone);
tio->flags = 0;
dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
@@ -594,7 +587,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
atomic_set(&io->io_count, 2);
this_cpu_inc(*md->pending_io);
io->orig_bio = bio;
- io->split_bio = NULL;
io->md = md;
spin_lock_init(&io->lock);
io->start_time = jiffies;
@@ -614,6 +606,7 @@ static void free_io(struct dm_io *io)
static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
{
+ struct mapped_device *md = ci->io->md;
struct dm_target_io *tio;
struct bio *clone;
@@ -623,14 +616,10 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
/* alloc_io() already initialized embedded clone */
clone = &tio->clone;
} else {
- struct mapped_device *md = ci->io->md;
-
clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
&md->mempools->bs);
if (!clone)
return NULL;
- /* Set default bdev, but target must bio_set_dev() before issuing IO */
- clone->bi_bdev = md->disk->part0;
/* REQ_DM_POLL_LIST shouldn't be inherited */
clone->bi_opf &= ~REQ_DM_POLL_LIST;
@@ -646,6 +635,11 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
tio->len_ptr = len;
tio->old_sector = 0;
+ /* Set default bdev, but target must bio_set_dev() before issuing IO */
+ clone->bi_bdev = md->disk->part0;
+ if (unlikely(ti->needs_bio_set_dev))
+ bio_set_dev(clone, md->disk->part0);
+
if (len) {
clone->bi_iter.bi_size = to_bytes(*len);
if (bio_integrity(clone))
@@ -884,22 +878,63 @@ static int __noflush_suspending(struct mapped_device *md)
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
-static void dm_io_complete(struct dm_io *io)
+static void dm_requeue_add_io(struct dm_io *io, bool first_stage)
{
- blk_status_t io_error;
struct mapped_device *md = io->md;
- struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
- if (io->status == BLK_STS_DM_REQUEUE) {
+ if (first_stage) {
+ struct dm_io *next = md->requeue_list;
+
+ md->requeue_list = io;
+ io->next = next;
+ } else {
+ bio_list_add_head(&md->deferred, io->orig_bio);
+ }
+}
+
+static void dm_kick_requeue(struct mapped_device *md, bool first_stage)
+{
+ if (first_stage)
+ queue_work(md->wq, &md->requeue_work);
+ else
+ queue_work(md->wq, &md->work);
+}
+
+/*
+ * Return true if the dm_io's original bio is requeued.
+ * io->status is updated with error if requeue disallowed.
+ */
+static bool dm_handle_requeue(struct dm_io *io, bool first_stage)
+{
+ struct bio *bio = io->orig_bio;
+ bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE);
+ bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) &&
+ (bio->bi_opf & REQ_POLLED));
+ struct mapped_device *md = io->md;
+ bool requeued = false;
+
+ if (handle_requeue || handle_polled_eagain) {
unsigned long flags;
+
+ if (bio->bi_opf & REQ_POLLED) {
+ /*
+ * Upper layer won't help us poll split bio
+ * (io->orig_bio may only reflect a subset of the
+ * pre-split original) so clear REQ_POLLED.
+ */
+ bio_clear_polled(bio);
+ }
+
/*
- * Target requested pushing back the I/O.
+ * Target requested pushing back the I/O or
+ * polled IO hit BLK_STS_AGAIN.
*/
spin_lock_irqsave(&md->deferred_lock, flags);
- if (__noflush_suspending(md) &&
- !WARN_ON_ONCE(dm_is_zone_write(md, bio))) {
- /* NOTE early return due to BLK_STS_DM_REQUEUE below */
- bio_list_add_head(&md->deferred, bio);
+ if ((__noflush_suspending(md) &&
+ !WARN_ON_ONCE(dm_is_zone_write(md, bio))) ||
+ handle_polled_eagain || first_stage) {
+ dm_requeue_add_io(io, first_stage);
+ requeued = true;
} else {
/*
* noflush suspend was interrupted or this is
@@ -910,6 +945,23 @@ static void dm_io_complete(struct dm_io *io)
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
+ if (requeued)
+ dm_kick_requeue(md, first_stage);
+
+ return requeued;
+}
+
+static void __dm_io_complete(struct dm_io *io, bool first_stage)
+{
+ struct bio *bio = io->orig_bio;
+ struct mapped_device *md = io->md;
+ blk_status_t io_error;
+ bool requeued;
+
+ requeued = dm_handle_requeue(io, first_stage);
+ if (requeued && first_stage)
+ return;
+
io_error = io->status;
if (dm_io_flagged(io, DM_IO_ACCOUNTED))
dm_end_io_acct(io);
@@ -929,23 +981,9 @@ static void dm_io_complete(struct dm_io *io)
if (unlikely(wq_has_sleeper(&md->wait)))
wake_up(&md->wait);
- if (io_error == BLK_STS_DM_REQUEUE || io_error == BLK_STS_AGAIN) {
- if (bio->bi_opf & REQ_POLLED) {
- /*
- * Upper layer won't help us poll split bio (io->orig_bio
- * may only reflect a subset of the pre-split original)
- * so clear REQ_POLLED in case of requeue.
- */
- bio_clear_polled(bio);
- if (io_error == BLK_STS_AGAIN) {
- /* io_uring doesn't handle BLK_STS_AGAIN (yet) */
- queue_io(md, bio);
- return;
- }
- }
- if (io_error == BLK_STS_DM_REQUEUE)
- return;
- }
+ /* Return early if the original bio was requeued */
+ if (requeued)
+ return;
if (bio_is_flush_with_data(bio)) {
/*
@@ -962,6 +1000,58 @@ static void dm_io_complete(struct dm_io *io)
}
}
+static void dm_wq_requeue_work(struct work_struct *work)
+{
+ struct mapped_device *md = container_of(work, struct mapped_device,
+ requeue_work);
+ unsigned long flags;
+ struct dm_io *io;
+
+ /* reuse deferred lock to simplify dm_handle_requeue */
+ spin_lock_irqsave(&md->deferred_lock, flags);
+ io = md->requeue_list;
+ md->requeue_list = NULL;
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
+
+ while (io) {
+ struct dm_io *next = io->next;
+
+ dm_io_rewind(io, &md->queue->bio_split);
+
+ io->next = NULL;
+ __dm_io_complete(io, false);
+ io = next;
+ }
+}
+
+/*
+ * Two staged requeue:
+ *
+ * 1) io->orig_bio points to the real original bio, and the part mapped to
+ * this io must be requeued, instead of other parts of the original bio.
+ *
+ * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
+ */
+static void dm_io_complete(struct dm_io *io)
+{
+ bool first_requeue;
+
+ /*
+ * Only dm_io that has been split needs two stage requeue, otherwise
+ * we may run into long bio clone chain during suspend and OOM could
+ * be triggered.
+ *
+ * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they
+ * also aren't handled via the first stage requeue.
+ */
+ if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
+ first_requeue = true;
+ else
+ first_requeue = false;
+
+ __dm_io_complete(io, first_requeue);
+}
+
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
@@ -1240,6 +1330,7 @@ out:
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
{
struct dm_target_io *tio = clone_to_tio(bio);
+ struct dm_io *io = tio->io;
unsigned bio_sectors = bio_sectors(bio);
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
@@ -1255,8 +1346,9 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
* __split_and_process_bio() may have already saved mapped part
* for accounting but it is being reduced so update accordingly.
*/
- dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT);
- tio->io->sectors = n_sectors;
+ dm_io_set_flag(io, DM_IO_WAS_SPLIT);
+ io->sectors = n_sectors;
+ io->sector_offset = bio_sectors(io->orig_bio);
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
@@ -1379,17 +1471,7 @@ static void setup_split_accounting(struct clone_info *ci, unsigned len)
*/
dm_io_set_flag(io, DM_IO_WAS_SPLIT);
io->sectors = len;
- }
-
- if (static_branch_unlikely(&stats_enabled) &&
- unlikely(dm_stats_used(&io->md->stats))) {
- /*
- * Save bi_sector in terms of its offset from end of
- * original bio, only needed for DM-stats' benefit.
- * - saved regardless of whether split needed so that
- * dm_accept_partial_bio() doesn't need to.
- */
- io->sector_offset = bio_end_sector(ci->bio) - ci->sector;
+ io->sector_offset = bio_sectors(ci->bio);
}
}
@@ -1423,11 +1505,11 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
}
static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
- unsigned num_bios, unsigned *len)
+ unsigned int num_bios, unsigned *len)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone;
- int ret = 0;
+ unsigned int ret = 0;
switch (num_bios) {
case 0:
@@ -1455,8 +1537,7 @@ static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
static void __send_empty_flush(struct clone_info *ci)
{
- unsigned target_nr = 0;
- struct dm_target *ti;
+ struct dm_table *t = ci->map;
struct bio flush_bio;
/*
@@ -1471,8 +1552,9 @@ static void __send_empty_flush(struct clone_info *ci)
ci->sector_count = 0;
ci->io->tio.clone.bi_iter.bi_size = 0;
- while ((ti = dm_table_get_target(ci->map, target_nr++))) {
- int bios;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ unsigned int bios;
+ struct dm_target *ti = dm_table_get_target(t, i);
atomic_add(ti->num_flush_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
@@ -1492,7 +1574,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
unsigned num_bios)
{
unsigned len;
- int bios;
+ unsigned int bios;
len = min_t(sector_t, ci->sector_count,
max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector)));
@@ -1691,11 +1773,9 @@ static void dm_split_and_process_bio(struct mapped_device *md,
* Remainder must be passed to submit_bio_noacct() so it gets handled
* *after* bios already submitted have been completely processed.
*/
- WARN_ON_ONCE(!dm_io_flagged(io, DM_IO_WAS_SPLIT));
- io->split_bio = bio_split(bio, io->sectors, GFP_NOIO,
- &md->queue->bio_split);
- bio_chain(io->split_bio, bio);
- trace_block_split(io->split_bio, bio->bi_iter.bi_sector);
+ bio_trim(bio, io->sectors, ci.sector_count);
+ trace_block_split(bio, bio->bi_iter.bi_sector);
+ bio_inc_remaining(bio);
submit_bio_noacct(bio);
out:
/*
@@ -1971,9 +2051,11 @@ static struct mapped_device *alloc_dev(int minor)
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
+ INIT_WORK(&md->requeue_work, dm_wq_requeue_work);
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
+ md->requeue_list = NULL;
md->swap_bios = get_swap_bios();
sema_init(&md->swap_bios_semaphore, md->swap_bios);
mutex_init(&md->swap_bios_lock);
@@ -2980,54 +3062,6 @@ int dm_noflush_suspending(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
- unsigned per_io_data_size, unsigned min_pool_size,
- bool integrity, bool poll)
-{
- struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
- unsigned int pool_size = 0;
- unsigned int front_pad, io_front_pad;
- int ret;
-
- if (!pools)
- return NULL;
-
- switch (type) {
- case DM_TYPE_BIO_BASED:
- case DM_TYPE_DAX_BIO_BASED:
- pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
- front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
- io_front_pad = roundup(per_io_data_size, __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
- ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, poll ? BIOSET_PERCPU_CACHE : 0);
- if (ret)
- goto out;
- if (integrity && bioset_integrity_create(&pools->io_bs, pool_size))
- goto out;
- break;
- case DM_TYPE_REQUEST_BASED:
- pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
- front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
- /* per_io_data_size is used for blk-mq pdu at queue allocation */
- break;
- default:
- BUG();
- }
-
- ret = bioset_init(&pools->bs, pool_size, front_pad, 0);
- if (ret)
- goto out;
-
- if (integrity && bioset_integrity_create(&pools->bs, pool_size))
- goto out;
-
- return pools;
-
-out:
- dm_free_md_mempools(pools);
-
- return NULL;
-}
-
void dm_free_md_mempools(struct dm_md_mempools *pools)
{
if (!pools)
@@ -3043,11 +3077,14 @@ struct dm_pr {
u64 old_key;
u64 new_key;
u32 flags;
+ bool abort;
bool fail_early;
+ int ret;
+ enum pr_type type;
};
static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
- void *data)
+ struct dm_pr *pr)
{
struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_table *table;
@@ -3059,15 +3096,21 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
goto out;
/* We only support devices that have a single target */
- if (dm_table_get_num_targets(table) != 1)
+ if (table->num_targets != 1)
goto out;
ti = dm_table_get_target(table, 0);
+ if (dm_suspended_md(md)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
ret = -EINVAL;
if (!ti->type->iterate_devices)
goto out;
- ret = ti->type->iterate_devices(ti, fn, data);
+ ti->type->iterate_devices(ti, fn, pr);
+ ret = 0;
out:
dm_put_live_table(md, srcu_idx);
return ret;
@@ -3081,10 +3124,24 @@ static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
{
struct dm_pr *pr = data;
const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+ int ret;
+
+ if (!ops || !ops->pr_register) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
- if (!ops || !ops->pr_register)
- return -EOPNOTSUPP;
- return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
+ ret = ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
+ if (!ret)
+ return 0;
+
+ if (!pr->ret)
+ pr->ret = ret;
+
+ if (pr->fail_early)
+ return -1;
+
+ return 0;
}
static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
@@ -3095,82 +3152,145 @@ static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
.new_key = new_key,
.flags = flags,
.fail_early = true,
+ .ret = 0,
};
int ret;
ret = dm_call_pr(bdev, __dm_pr_register, &pr);
- if (ret && new_key) {
- /* unregister all paths if we failed to register any path */
- pr.old_key = new_key;
- pr.new_key = 0;
- pr.flags = 0;
- pr.fail_early = false;
- dm_call_pr(bdev, __dm_pr_register, &pr);
+ if (ret) {
+ /* Didn't even get to register a path */
+ return ret;
}
+ if (!pr.ret)
+ return 0;
+ ret = pr.ret;
+
+ if (!new_key)
+ return ret;
+
+ /* unregister all paths if we failed to register any path */
+ pr.old_key = new_key;
+ pr.new_key = 0;
+ pr.flags = 0;
+ pr.fail_early = false;
+ (void) dm_call_pr(bdev, __dm_pr_register, &pr);
return ret;
}
+
+static int __dm_pr_reserve(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_reserve) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_reserve(dev->bdev, pr->old_key, pr->type, pr->flags);
+ if (!pr->ret)
+ return -1;
+
+ return 0;
+}
+
static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
u32 flags)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .old_key = key,
+ .flags = flags,
+ .type = type,
+ .fail_early = false,
+ .ret = 0,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_reserve, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_reserve)
- r = ops->pr_reserve(bdev, key, type, flags);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
+}
+
+/*
+ * If there is a non-All Registrants type of reservation, the release must be
+ * sent down the holding path. For the cases where there is no reservation or
+ * the path is not the holder the device will also return success, so we must
+ * try each path to make sure we got the correct path.
+ */
+static int __dm_pr_release(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_release) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_release(dev->bdev, pr->old_key, pr->type);
+ if (pr->ret)
+ return -1;
+
+ return 0;
}
static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .old_key = key,
+ .type = type,
+ .fail_early = false,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_release, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_release)
- r = ops->pr_release(bdev, key, type);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
+}
+
+static int __dm_pr_preempt(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_pr *pr = data;
+ const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
+
+ if (!ops || !ops->pr_preempt) {
+ pr->ret = -EOPNOTSUPP;
+ return -1;
+ }
+
+ pr->ret = ops->pr_preempt(dev->bdev, pr->old_key, pr->new_key, pr->type,
+ pr->abort);
+ if (!pr->ret)
+ return -1;
+
+ return 0;
}
static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
enum pr_type type, bool abort)
{
- struct mapped_device *md = bdev->bd_disk->private_data;
- const struct pr_ops *ops;
- int r, srcu_idx;
+ struct dm_pr pr = {
+ .new_key = new_key,
+ .old_key = old_key,
+ .type = type,
+ .fail_early = false,
+ };
+ int ret;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
- goto out;
+ ret = dm_call_pr(bdev, __dm_pr_preempt, &pr);
+ if (ret)
+ return ret;
- ops = bdev->bd_disk->fops->pr_ops;
- if (ops && ops->pr_preempt)
- r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
- else
- r = -EOPNOTSUPP;
-out:
- dm_unprepare_ioctl(md, srcu_idx);
- return r;
+ return pr.ret;
}
static int dm_pr_clear(struct block_device *bdev, u64 key)