diff options
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r-- | drivers/md/dm-thin.c | 612 |
1 files changed, 445 insertions, 167 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e852602c0091..c33f61a4cc28 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -111,22 +111,30 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, /* * Key building. */ -static void build_data_key(struct dm_thin_device *td, - dm_block_t b, struct dm_cell_key *key) +enum lock_space { + VIRTUAL, + PHYSICAL +}; + +static void build_key(struct dm_thin_device *td, enum lock_space ls, + dm_block_t b, dm_block_t e, struct dm_cell_key *key) { - key->virtual = 0; + key->virtual = (ls == VIRTUAL); key->dev = dm_thin_dev_id(td); key->block_begin = b; - key->block_end = b + 1ULL; + key->block_end = e; +} + +static void build_data_key(struct dm_thin_device *td, dm_block_t b, + struct dm_cell_key *key) +{ + build_key(td, PHYSICAL, b, b + 1llu, key); } static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, struct dm_cell_key *key) { - key->virtual = 1; - key->dev = dm_thin_dev_id(td); - key->block_begin = b; - key->block_end = b + 1ULL; + build_key(td, VIRTUAL, b, b + 1llu, key); } /*----------------------------------------------------------------*/ @@ -312,6 +320,138 @@ struct thin_c { /*----------------------------------------------------------------*/ +/** + * __blkdev_issue_discard_async - queue a discard with async completion + * @bdev: blockdev to issue discard for + * @sector: start sector + * @nr_sects: number of sectors to discard + * @gfp_mask: memory allocation flags (for bio_alloc) + * @flags: BLKDEV_IFL_* flags to control behaviour + * @parent_bio: parent discard bio that all sub discards get chained to + * + * Description: + * Asynchronously issue a discard request for the sectors in question. + * NOTE: this variant of blk-core's blkdev_issue_discard() is a stop-gap + * that is being kept local to DM thinp until the block changes to allow + * late bio splitting land upstream. + */ +static int __blkdev_issue_discard_async(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, unsigned long flags, + struct bio *parent_bio) +{ + struct request_queue *q = bdev_get_queue(bdev); + int type = REQ_WRITE | REQ_DISCARD; + unsigned int max_discard_sectors, granularity; + int alignment; + struct bio *bio; + int ret = 0; + struct blk_plug plug; + + if (!q) + return -ENXIO; + + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + + /* Zero-sector (unknown) and one-sector granularities are the same. */ + granularity = max(q->limits.discard_granularity >> 9, 1U); + alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; + + /* + * Ensure that max_discard_sectors is of the proper + * granularity, so that requests stay aligned after a split. + */ + max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); + max_discard_sectors -= max_discard_sectors % granularity; + if (unlikely(!max_discard_sectors)) { + /* Avoid infinite loop below. Being cautious never hurts. */ + return -EOPNOTSUPP; + } + + if (flags & BLKDEV_DISCARD_SECURE) { + if (!blk_queue_secdiscard(q)) + return -EOPNOTSUPP; + type |= REQ_SECURE; + } + + blk_start_plug(&plug); + while (nr_sects) { + unsigned int req_sects; + sector_t end_sect, tmp; + + /* + * Required bio_put occurs in bio_endio thanks to bio_chain below + */ + bio = bio_alloc(gfp_mask, 1); + if (!bio) { + ret = -ENOMEM; + break; + } + + req_sects = min_t(sector_t, nr_sects, max_discard_sectors); + + /* + * If splitting a request, and the next starting sector would be + * misaligned, stop the discard at the previous aligned sector. + */ + end_sect = sector + req_sects; + tmp = end_sect; + if (req_sects < nr_sects && + sector_div(tmp, granularity) != alignment) { + end_sect = end_sect - alignment; + sector_div(end_sect, granularity); + end_sect = end_sect * granularity + alignment; + req_sects = end_sect - sector; + } + + bio_chain(bio, parent_bio); + + bio->bi_iter.bi_sector = sector; + bio->bi_bdev = bdev; + + bio->bi_iter.bi_size = req_sects << 9; + nr_sects -= req_sects; + sector = end_sect; + + submit_bio(type, bio); + + /* + * We can loop for a long time in here, if someone does + * full device discards (like mkfs). Be nice and allow + * us to schedule out to avoid softlocking if preempt + * is disabled. + */ + cond_resched(); + } + blk_finish_plug(&plug); + + return ret; +} + +static bool block_size_is_power_of_two(struct pool *pool) +{ + return pool->sectors_per_block_shift >= 0; +} + +static sector_t block_to_sectors(struct pool *pool, dm_block_t b) +{ + return block_size_is_power_of_two(pool) ? + (b << pool->sectors_per_block_shift) : + (b * pool->sectors_per_block); +} + +static int issue_discard(struct thin_c *tc, dm_block_t data_b, dm_block_t data_e, + struct bio *parent_bio) +{ + sector_t s = block_to_sectors(tc->pool, data_b); + sector_t len = block_to_sectors(tc->pool, data_e - data_b); + + return __blkdev_issue_discard_async(tc->pool_dev->bdev, s, len, + GFP_NOWAIT, 0, parent_bio); +} + +/*----------------------------------------------------------------*/ + /* * wake_worker() is used when new work is queued and when pool_resume is * ready to continue deferred IO processing. @@ -461,6 +601,7 @@ struct dm_thin_endio_hook { struct dm_deferred_entry *all_io_entry; struct dm_thin_new_mapping *overwrite_mapping; struct rb_node rb_node; + struct dm_bio_prison_cell *cell; }; static void __merge_bio_list(struct bio_list *bios, struct bio_list *master) @@ -541,11 +682,6 @@ static void error_retry_list(struct pool *pool) * target. */ -static bool block_size_is_power_of_two(struct pool *pool) -{ - return pool->sectors_per_block_shift >= 0; -} - static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; @@ -559,6 +695,34 @@ static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) return block_nr; } +/* + * Returns the _complete_ blocks that this bio covers. + */ +static void get_bio_block_range(struct thin_c *tc, struct bio *bio, + dm_block_t *begin, dm_block_t *end) +{ + struct pool *pool = tc->pool; + sector_t b = bio->bi_iter.bi_sector; + sector_t e = b + (bio->bi_iter.bi_size >> SECTOR_SHIFT); + + b += pool->sectors_per_block - 1ull; /* so we round up */ + + if (block_size_is_power_of_two(pool)) { + b >>= pool->sectors_per_block_shift; + e >>= pool->sectors_per_block_shift; + } else { + (void) sector_div(b, pool->sectors_per_block); + (void) sector_div(e, pool->sectors_per_block); + } + + if (e < b) + /* Can happen if the bio is within a single block. */ + e = b; + + *begin = b; + *end = e; +} + static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) { struct pool *pool = tc->pool; @@ -647,7 +811,7 @@ struct dm_thin_new_mapping { struct list_head list; bool pass_discard:1; - bool definitely_not_shared:1; + bool maybe_shared:1; /* * Track quiescing, copying and zeroing preparation actions. When this @@ -658,9 +822,9 @@ struct dm_thin_new_mapping { int err; struct thin_c *tc; - dm_block_t virt_block; + dm_block_t virt_begin, virt_end; dm_block_t data_block; - struct dm_bio_prison_cell *cell, *cell2; + struct dm_bio_prison_cell *cell; /* * If the bio covers the whole area of a block then we can avoid @@ -705,6 +869,8 @@ static void overwrite_endio(struct bio *bio, int err) struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; + bio->bi_end_io = m->saved_bi_end_io; + m->err = err; complete_mapping_preparation(m); } @@ -793,9 +959,6 @@ static void inc_remap_and_issue_cell(struct thin_c *tc, static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { - if (m->bio) - m->bio->bi_end_io = m->saved_bi_end_io; - cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); @@ -805,13 +968,9 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; struct pool *pool = tc->pool; - struct bio *bio; + struct bio *bio = m->bio; int r; - bio = m->bio; - if (bio) - bio->bi_end_io = m->saved_bi_end_io; - if (m->err) { cell_error(pool, m->cell); goto out; @@ -822,7 +981,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) * Any I/O for this block arriving after this point will get * remapped to it directly. */ - r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); + r = dm_thin_insert_block(tc->td, m->virt_begin, m->data_block); if (r) { metadata_operation_failed(pool, "dm_thin_insert_block", r); cell_error(pool, m->cell); @@ -849,50 +1008,112 @@ out: mempool_free(m, pool->mapping_pool); } -static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) +/*----------------------------------------------------------------*/ + +static void free_discard_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; + if (m->cell) + cell_defer_no_holder(tc, m->cell); + mempool_free(m, tc->pool->mapping_pool); +} +static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) +{ bio_io_error(m->bio); + free_discard_mapping(m); +} + +static void process_prepared_discard_success(struct dm_thin_new_mapping *m) +{ + bio_endio(m->bio, 0); + free_discard_mapping(m); +} + +static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m) +{ + int r; + struct thin_c *tc = m->tc; + + r = dm_thin_remove_range(tc->td, m->cell->key.block_begin, m->cell->key.block_end); + if (r) { + metadata_operation_failed(tc->pool, "dm_thin_remove_range", r); + bio_io_error(m->bio); + } else + bio_endio(m->bio, 0); + cell_defer_no_holder(tc, m->cell); - cell_defer_no_holder(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } -static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) +static int passdown_double_checking_shared_status(struct dm_thin_new_mapping *m) { + /* + * We've already unmapped this range of blocks, but before we + * passdown we have to check that these blocks are now unused. + */ + int r; + bool used = true; struct thin_c *tc = m->tc; + struct pool *pool = tc->pool; + dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin; - inc_all_io_entry(tc->pool, m->bio); - cell_defer_no_holder(tc, m->cell); - cell_defer_no_holder(tc, m->cell2); + while (b != end) { + /* find start of unmapped run */ + for (; b < end; b++) { + r = dm_pool_block_is_used(pool->pmd, b, &used); + if (r) + return r; - if (m->pass_discard) - if (m->definitely_not_shared) - remap_and_issue(tc, m->bio, m->data_block); - else { - bool used = false; - if (dm_pool_block_is_used(tc->pool->pmd, m->data_block, &used) || used) - bio_endio(m->bio, 0); - else - remap_and_issue(tc, m->bio, m->data_block); + if (!used) + break; } - else - bio_endio(m->bio, 0); - mempool_free(m, tc->pool->mapping_pool); + if (b == end) + break; + + /* find end of run */ + for (e = b + 1; e != end; e++) { + r = dm_pool_block_is_used(pool->pmd, e, &used); + if (r) + return r; + + if (used) + break; + } + + r = issue_discard(tc, b, e, m->bio); + if (r) + return r; + + b = e; + } + + return 0; } -static void process_prepared_discard(struct dm_thin_new_mapping *m) +static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) { int r; struct thin_c *tc = m->tc; + struct pool *pool = tc->pool; - r = dm_thin_remove_block(tc->td, m->virt_block); + r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end); if (r) - DMERR_LIMIT("dm_thin_remove_block() failed"); + metadata_operation_failed(pool, "dm_thin_remove_range", r); + + else if (m->maybe_shared) + r = passdown_double_checking_shared_status(m); + else + r = issue_discard(tc, m->data_block, m->data_block + (m->virt_end - m->virt_begin), m->bio); - process_prepared_discard_passdown(m); + /* + * Even if r is set, there could be sub discards in flight that we + * need to wait for. + */ + bio_endio(m->bio, r); + cell_defer_no_holder(tc, m->cell); + mempool_free(m, pool->mapping_pool); } static void process_prepared(struct pool *pool, struct list_head *head, @@ -976,7 +1197,7 @@ static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m, } static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio, - dm_block_t data_block, + dm_block_t data_begin, struct dm_thin_new_mapping *m) { struct pool *pool = tc->pool; @@ -986,7 +1207,7 @@ static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio, m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); inc_all_io_entry(pool, bio); - remap_and_issue(tc, bio, data_block); + remap_and_issue(tc, bio, data_begin); } /* @@ -1003,7 +1224,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, struct dm_thin_new_mapping *m = get_next_mapping(pool); m->tc = tc; - m->virt_block = virt_block; + m->virt_begin = virt_block; + m->virt_end = virt_block + 1u; m->data_block = data_dest; m->cell = cell; @@ -1082,7 +1304,8 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, atomic_set(&m->prepare_actions, 1); /* no need to quiesce */ m->tc = tc; - m->virt_block = virt_block; + m->virt_begin = virt_block; + m->virt_end = virt_block + 1u; m->data_block = data_block; m->cell = cell; @@ -1091,16 +1314,14 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, * zeroing pre-existing data, we can issue the bio immediately. * Otherwise we use kcopyd to zero the data first. */ - if (!pool->pf.zero_new_blocks) + if (pool->pf.zero_new_blocks) { + if (io_overwrites_block(pool, bio)) + remap_and_issue_overwrite(tc, bio, data_block, m); + else + ll_zero(tc, m, data_block * pool->sectors_per_block, + (data_block + 1) * pool->sectors_per_block); + } else process_prepared_mapping(m); - - else if (io_overwrites_block(pool, bio)) - remap_and_issue_overwrite(tc, bio, data_block, m); - - else - ll_zero(tc, m, - data_block * pool->sectors_per_block, - (data_block + 1) * pool->sectors_per_block); } static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, @@ -1291,99 +1512,149 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c retry_on_resume(bio); } -static void process_discard_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) +static void process_discard_cell_no_passdown(struct thin_c *tc, + struct dm_bio_prison_cell *virt_cell) { - int r; - struct bio *bio = cell->holder; struct pool *pool = tc->pool; - struct dm_bio_prison_cell *cell2; - struct dm_cell_key key2; - dm_block_t block = get_bio_block(tc, bio); - struct dm_thin_lookup_result lookup_result; - struct dm_thin_new_mapping *m; + struct dm_thin_new_mapping *m = get_next_mapping(pool); - if (tc->requeue_mode) { - cell_requeue(pool, cell); - return; - } + /* + * We don't need to lock the data blocks, since there's no + * passdown. We only lock data blocks for allocation and breaking sharing. + */ + m->tc = tc; + m->virt_begin = virt_cell->key.block_begin; + m->virt_end = virt_cell->key.block_end; + m->cell = virt_cell; + m->bio = virt_cell->holder; - r = dm_thin_find_block(tc->td, block, 1, &lookup_result); - switch (r) { - case 0: - /* - * Check nobody is fiddling with this pool block. This can - * happen if someone's in the process of breaking sharing - * on this block. - */ - build_data_key(tc->td, lookup_result.block, &key2); - if (bio_detain(tc->pool, &key2, bio, &cell2)) { - cell_defer_no_holder(tc, cell); - break; - } + if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) + pool->process_prepared_discard(m); +} - if (io_overlaps_block(pool, bio)) { - /* - * IO may still be going to the destination block. We must - * quiesce before we can do the removal. - */ - m = get_next_mapping(pool); - m->tc = tc; - m->pass_discard = pool->pf.discard_passdown; - m->definitely_not_shared = !lookup_result.shared; - m->virt_block = block; - m->data_block = lookup_result.block; - m->cell = cell; - m->cell2 = cell2; - m->bio = bio; - - if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) - pool->process_prepared_discard(m); +/* + * FIXME: DM local hack to defer parent bios's end_io until we + * _know_ all chained sub range discard bios have completed. + * Will go away once late bio splitting lands upstream! + */ +static inline void __bio_inc_remaining(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} - } else { - inc_all_io_entry(pool, bio); - cell_defer_no_holder(tc, cell); - cell_defer_no_holder(tc, cell2); +static void break_up_discard_bio(struct thin_c *tc, dm_block_t begin, dm_block_t end, + struct bio *bio) +{ + struct pool *pool = tc->pool; + int r; + bool maybe_shared; + struct dm_cell_key data_key; + struct dm_bio_prison_cell *data_cell; + struct dm_thin_new_mapping *m; + dm_block_t virt_begin, virt_end, data_begin; + + while (begin != end) { + r = ensure_next_mapping(pool); + if (r) + /* we did our best */ + return; + + r = dm_thin_find_mapped_range(tc->td, begin, end, &virt_begin, &virt_end, + &data_begin, &maybe_shared); + if (r) /* - * The DM core makes sure that the discard doesn't span - * a block boundary. So we submit the discard of a - * partial block appropriately. + * Silently fail, letting any mappings we've + * created complete. */ - if ((!lookup_result.shared) && pool->pf.discard_passdown) - remap_and_issue(tc, bio, lookup_result.block); - else - bio_endio(bio, 0); + break; + + build_key(tc->td, PHYSICAL, data_begin, data_begin + (virt_end - virt_begin), &data_key); + if (bio_detain(tc->pool, &data_key, NULL, &data_cell)) { + /* contention, we'll give up with this range */ + begin = virt_end; + continue; } - break; - case -ENODATA: /* - * It isn't provisioned, just forget it. + * IO may still be going to the destination block. We must + * quiesce before we can do the removal. */ - cell_defer_no_holder(tc, cell); - bio_endio(bio, 0); - break; + m = get_next_mapping(pool); + m->tc = tc; + m->maybe_shared = maybe_shared; + m->virt_begin = virt_begin; + m->virt_end = virt_end; + m->data_block = data_begin; + m->cell = data_cell; + m->bio = bio; - default: - DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", - __func__, r); - cell_defer_no_holder(tc, cell); - bio_io_error(bio); - break; + /* + * The parent bio must not complete before sub discard bios are + * chained to it (see __blkdev_issue_discard_async's bio_chain)! + * + * This per-mapping bi_remaining increment is paired with + * the implicit decrement that occurs via bio_endio() in + * process_prepared_discard_{passdown,no_passdown}. + */ + __bio_inc_remaining(bio); + if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) + pool->process_prepared_discard(m); + + begin = virt_end; } } +static void process_discard_cell_passdown(struct thin_c *tc, struct dm_bio_prison_cell *virt_cell) +{ + struct bio *bio = virt_cell->holder; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); + + /* + * The virt_cell will only get freed once the origin bio completes. + * This means it will remain locked while all the individual + * passdown bios are in flight. + */ + h->cell = virt_cell; + break_up_discard_bio(tc, virt_cell->key.block_begin, virt_cell->key.block_end, bio); + + /* + * We complete the bio now, knowing that the bi_remaining field + * will prevent completion until the sub range discards have + * completed. + */ + bio_endio(bio, 0); +} + static void process_discard_bio(struct thin_c *tc, struct bio *bio) { - struct dm_bio_prison_cell *cell; - struct dm_cell_key key; - dm_block_t block = get_bio_block(tc, bio); + dm_block_t begin, end; + struct dm_cell_key virt_key; + struct dm_bio_prison_cell *virt_cell; - build_virtual_key(tc->td, block, &key); - if (bio_detain(tc->pool, &key, bio, &cell)) + get_bio_block_range(tc, bio, &begin, &end); + if (begin == end) { + /* + * The discard covers less than a block. + */ + bio_endio(bio, 0); + return; + } + + build_key(tc->td, VIRTUAL, begin, end, &virt_key); + if (bio_detain(tc->pool, &virt_key, bio, &virt_cell)) + /* + * Potential starvation issue: We're relying on the + * fs/application being well behaved, and not trying to + * send IO to a region at the same time as discarding it. + * If they do this persistently then it's possible this + * cell will never be granted. + */ return; - process_discard_cell(tc, cell); + tc->pool->process_discard_cell(tc, virt_cell); } static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, @@ -2099,6 +2370,24 @@ static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) dm_device_name(pool->pool_md), new_mode); } +static bool passdown_enabled(struct pool_c *pt) +{ + return pt->adjusted_pf.discard_passdown; +} + +static void set_discard_callbacks(struct pool *pool) +{ + struct pool_c *pt = pool->ti->private; + + if (passdown_enabled(pt)) { + pool->process_discard_cell = process_discard_cell_passdown; + pool->process_prepared_discard = process_prepared_discard_passdown; + } else { + pool->process_discard_cell = process_discard_cell_no_passdown; + pool->process_prepared_discard = process_prepared_discard_no_passdown; + } +} + static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) { struct pool_c *pt = pool->ti->private; @@ -2150,7 +2439,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pool->process_cell = process_cell_read_only; pool->process_discard_cell = process_cell_success; pool->process_prepared_mapping = process_prepared_mapping_fail; - pool->process_prepared_discard = process_prepared_discard_passdown; + pool->process_prepared_discard = process_prepared_discard_success; error_retry_list(pool); break; @@ -2169,9 +2458,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pool->process_bio = process_bio_read_only; pool->process_discard = process_discard_bio; pool->process_cell = process_cell_read_only; - pool->process_discard_cell = process_discard_cell; pool->process_prepared_mapping = process_prepared_mapping; - pool->process_prepared_discard = process_prepared_discard; + set_discard_callbacks(pool); if (!pool->pf.error_if_no_space && no_space_timeout) queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout); @@ -2184,9 +2472,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pool->process_bio = process_bio; pool->process_discard = process_discard_bio; pool->process_cell = process_cell; - pool->process_discard_cell = process_discard_cell; pool->process_prepared_mapping = process_prepared_mapping; - pool->process_prepared_discard = process_prepared_discard; + set_discard_callbacks(pool); break; } @@ -2275,6 +2562,7 @@ static void thin_hook_bio(struct thin_c *tc, struct bio *bio) h->shared_read_entry = NULL; h->all_io_entry = NULL; h->overwrite_mapping = NULL; + h->cell = NULL; } /* @@ -2422,7 +2710,6 @@ static void disable_passdown_if_not_supported(struct pool_c *pt) struct pool *pool = pt->pool; struct block_device *data_bdev = pt->data_dev->bdev; struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; - sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT; const char *reason = NULL; char buf[BDEVNAME_SIZE]; @@ -2435,12 +2722,6 @@ static void disable_passdown_if_not_supported(struct pool_c *pt) else if (data_limits->max_discard_sectors < pool->sectors_per_block) reason = "max discard sectors smaller than a block"; - else if (data_limits->discard_granularity > block_size) - reason = "discard granularity larger than a block"; - - else if (!is_factor(block_size, data_limits->discard_granularity)) - reason = "discard granularity not a factor of block size"; - if (reason) { DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); pt->adjusted_pf.discard_passdown = false; @@ -3375,7 +3656,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) if (get_pool_mode(pool) >= PM_READ_ONLY) { DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode", dm_device_name(pool->pool_md)); - return -EINVAL; + return -EOPNOTSUPP; } if (!strcasecmp(argv[0], "create_thin")) @@ -3573,24 +3854,6 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) -{ - struct pool *pool = pt->pool; - struct queue_limits *data_limits; - - limits->max_discard_sectors = pool->sectors_per_block; - - /* - * discard_granularity is just a hint, and not enforced. - */ - if (pt->adjusted_pf.discard_passdown) { - data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; - limits->discard_granularity = max(data_limits->discard_granularity, - pool->sectors_per_block << SECTOR_SHIFT); - } else - limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; -} - static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct pool_c *pt = ti->private; @@ -3645,14 +3908,17 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) disable_passdown_if_not_supported(pt); - set_discard_limits(pt, limits); + /* + * The pool uses the same discard limits as the underlying data + * device. DM core has already set this up. + */ } static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 14, 0}, + .version = {1, 15, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3811,8 +4077,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; ti->num_discard_bios = 1; - /* Discard bios must be split on a block boundary */ - ti->split_discard_bios = true; + ti->split_discard_bios = false; } mutex_unlock(&dm_thin_pool_table.mutex); @@ -3899,6 +4164,9 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err) } } + if (h->cell) + cell_defer_no_holder(h->tc, h->cell); + return 0; } @@ -4026,9 +4294,18 @@ static int thin_iterate_devices(struct dm_target *ti, return 0; } +static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct thin_c *tc = ti->private; + struct pool *pool = tc->pool; + + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; + limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */ +} + static struct target_type thin_target = { .name = "thin", - .version = {1, 14, 0}, + .version = {1, 15, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, @@ -4040,6 +4317,7 @@ static struct target_type thin_target = { .status = thin_status, .merge = thin_merge, .iterate_devices = thin_iterate_devices, + .io_hints = thin_io_hints, }; /*----------------------------------------------------------------*/ |