From 2e8ed71102ff8fe3919dd3a2d73ac4da72686efc Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 19 Nov 2015 13:50:32 +0000 Subject: dm block manager: make block locking optional The block manager's locking is useful for catching cycles that may result from certain btree metadata corruption. But in general it serves as a developer tool to catch bugs in code. Unless you're finding that DM thin provisioning is hanging due to infinite loops within the block manager's access to btree nodes you can safely disable this feature. Signed-off-by: Joe Thornber Signed-off-by: Arnd Bergmann # do/while(0) macro fix Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 10 +++++++++- drivers/md/persistent-data/dm-block-manager.c | 19 ++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 02a5345a44a6..b7767da50c26 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -240,9 +240,17 @@ config DM_BUFIO as a cache, holding recently-read blocks in memory and performing delayed writes. +config DM_DEBUG_BLOCK_MANAGER_LOCKING + bool "Block manager locking" + depends on DM_BUFIO + ---help--- + Block manager locking can catch various metadata corruption issues. + + If unsure, say N. + config DM_DEBUG_BLOCK_STACK_TRACING bool "Keep stack trace of persistent data block lock holders" - depends on STACKTRACE_SUPPORT && DM_BUFIO + depends on STACKTRACE_SUPPORT && DM_DEBUG_BLOCK_MANAGER_LOCKING select STACKTRACE ---help--- Enable this for messages that may help debug problems with the diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 1e33dd51c21f..a6dde7cab458 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -18,6 +18,8 @@ /*----------------------------------------------------------------*/ +#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING + /* * This is a read/write semaphore with a couple of differences. * @@ -302,6 +304,18 @@ static void report_recursive_bug(dm_block_t b, int r) (unsigned long long) b); } +#else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ + +#define bl_init(x) do { } while (0) +#define bl_down_read(x) 0 +#define bl_down_read_nonblock(x) 0 +#define bl_up_read(x) do { } while (0) +#define bl_down_write(x) 0 +#define bl_up_write(x) do { } while (0) +#define report_recursive_bug(x, y) do { } while (0) + +#endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ + /*----------------------------------------------------------------*/ /* @@ -330,8 +344,11 @@ EXPORT_SYMBOL_GPL(dm_block_data); struct buffer_aux { struct dm_block_validator *validator; - struct block_lock lock; int write_locked; + +#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING + struct block_lock lock; +#endif }; static void dm_block_manager_alloc_callback(struct dm_buffer *buf) -- cgit v1.2.3 From d15bb3a6467e102e60d954aadda5fb19ce6fd8ec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 11 Nov 2016 17:05:27 -0800 Subject: dm rq: fix a race condition in rq_completed() It is required to hold the queue lock when calling blk_run_queue_async() to avoid that a race between blk_run_queue_async() and blk_cleanup_queue() is triggered. Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 1d0d2adc050a..31a89c8832c0 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -226,6 +226,9 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { + struct request_queue *q = md->queue; + unsigned long flags; + atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ @@ -238,8 +241,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) * back into ->request_fn() could deadlock attempting to grab the * queue lock again. */ - if (!md->queue->mq_ops && run_queue) - blk_run_queue_async(md->queue); + if (!q->mq_ops && run_queue) { + spin_lock_irqsave(q->queue_lock, flags); + blk_run_queue_async(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } /* * dm_put() must be at the end of this function. See the comment above -- cgit v1.2.3 From 4f9c74c6043891d415730bcb153c579be35c352f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Nov 2016 20:05:36 +0800 Subject: dm rq: replace 'bio->bi_vcnt == 1' with !bio_multiple_segments Avoid accessing .bi_vcnt directly, because the bio can be split from block layer and .bi_vcnt should never have been used here. Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 31a89c8832c0..54b081588b55 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -825,7 +825,7 @@ static void dm_old_request_fn(struct request_queue *q) pos = blk_rq_pos(rq); if ((dm_old_request_peeked_before_merge_deadline(md) && - md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 && + md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) && md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) || (ti->type->busy && ti->type->busy(ti))) { blk_delay_queue(q, 10); -- cgit v1.2.3 From cacc7b0556739bd6018252731c0237c071ba51da Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Nov 2016 20:05:35 +0800 Subject: dm io: use bvec iterator helpers to implement .get_page and .next_page Firstly we have mature bvec/bio iterator helper for iterate each page in one bio, not necessary to reinvent a wheel to do that. Secondly the coming multipage bvecs requires this patch. Also add comments about the direct access to bvec table. Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm-io.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 0bf1a12e35fe..03940bf36f6c 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -162,7 +162,10 @@ struct dpages { struct page **p, unsigned long *len, unsigned *offset); void (*next_page)(struct dpages *dp); - unsigned context_u; + union { + unsigned context_u; + struct bvec_iter context_bi; + }; void *context_ptr; void *vma_invalidate_address; @@ -204,25 +207,36 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse static void bio_get_page(struct dpages *dp, struct page **p, unsigned long *len, unsigned *offset) { - struct bio_vec *bvec = dp->context_ptr; - *p = bvec->bv_page; - *len = bvec->bv_len - dp->context_u; - *offset = bvec->bv_offset + dp->context_u; + struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr, + dp->context_bi); + + *p = bvec.bv_page; + *len = bvec.bv_len; + *offset = bvec.bv_offset; + + /* avoid figuring it out again in bio_next_page() */ + dp->context_bi.bi_sector = (sector_t)bvec.bv_len; } static void bio_next_page(struct dpages *dp) { - struct bio_vec *bvec = dp->context_ptr; - dp->context_ptr = bvec + 1; - dp->context_u = 0; + unsigned int len = (unsigned int)dp->context_bi.bi_sector; + + bvec_iter_advance((struct bio_vec *)dp->context_ptr, + &dp->context_bi, len); } static void bio_dp_init(struct dpages *dp, struct bio *bio) { dp->get_page = bio_get_page; dp->next_page = bio_next_page; - dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); - dp->context_u = bio->bi_iter.bi_bvec_done; + + /* + * We just use bvec iterator to retrieve pages, so it is ok to + * access the bvec table directly here + */ + dp->context_ptr = bio->bi_io_vec; + dp->context_bi = bio->bi_iter; } /* -- cgit v1.2.3 From 0dae7fe59749ba3a3644f717315097422bea9680 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2016 16:08:06 +0800 Subject: dm crypt: use bio_add_page() Use bio_add_page(), the standard interface for adding a page to a bio, rather than open-coding the same. It should be noted that the 'clone' bio that is allocated using bio_alloc_bioset(), in crypt_alloc_buffer(), does _not_ set the bio's BIO_CLONED flag. As such, bio_add_page()'s early return for true bio clones (those with BIO_CLONED set) isn't applicable. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a2768835d394..4999c7497f95 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -994,7 +994,6 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; unsigned i, len, remaining_size; struct page *page; - struct bio_vec *bvec; retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) @@ -1019,12 +1018,7 @@ retry: len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; - bvec = &clone->bi_io_vec[clone->bi_vcnt++]; - bvec->bv_page = page; - bvec->bv_len = len; - bvec->bv_offset = 0; - - clone->bi_iter.bi_size += len; + bio_add_page(clone, page, len, 0); remaining_size -= len; } -- cgit v1.2.3 From 265e9098bac02bc5e36cda21fdbad34cb5b2f48d Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Wed, 2 Nov 2016 15:02:08 +0100 Subject: dm crypt: mark key as invalid until properly loaded In crypt_set_key(), if a failure occurs while replacing the old key (e.g. tfm->setkey() fails) the key must not have DM_CRYPT_KEY_VALID flag set. Otherwise, the crypto layer would have an invalid key that still has DM_CRYPT_KEY_VALID flag set. Cc: stable@vger.kernel.org Signed-off-by: Ondrej Kozina Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 4999c7497f95..01fdaec5df60 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1497,12 +1497,15 @@ static int crypt_set_key(struct crypt_config *cc, char *key) if (!cc->key_size && strcmp(key, "-")) goto out; + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - r = crypt_setkey_allcpus(cc); + if (!r) + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); out: /* Hex key string not needed after here, so wipe it. */ -- cgit v1.2.3 From 671ea6b4574e28c5f5f51a4261b9492639507bbc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 25 Aug 2016 07:12:54 -0400 Subject: dm crypt: rename crypt_setkey_allcpus to crypt_setkey In the past, dm-crypt used per-cpu crypto context. This has been removed in the kernel 3.15 and the crypto context is shared between all cpus. This patch renames the function crypt_setkey_allcpus to crypt_setkey, because there is really no activity that is done for all cpus. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 01fdaec5df60..3b8eab985947 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1465,7 +1465,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) return 0; } -static int crypt_setkey_allcpus(struct crypt_config *cc) +static int crypt_setkey(struct crypt_config *cc) { unsigned subkey_size; int err = 0, i, r; @@ -1503,7 +1503,7 @@ static int crypt_set_key(struct crypt_config *cc, char *key) if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; - r = crypt_setkey_allcpus(cc); + r = crypt_setkey(cc); if (!r) set_bit(DM_CRYPT_KEY_VALID, &cc->flags); @@ -1519,7 +1519,7 @@ static int crypt_wipe_key(struct crypt_config *cc) clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); memset(&cc->key, 0, cc->key_size * sizeof(u8)); - return crypt_setkey_allcpus(cc); + return crypt_setkey(cc); } static void crypt_dtr(struct dm_target *ti) -- cgit v1.2.3 From 21ffe552e9cd9f5ce8f214d8a8f07c8fe9a9fc8b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Sep 2016 12:21:28 -0700 Subject: dm verity: fix incorrect error message Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 0aba34a7b3b3..7335d8a3fc47 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -868,7 +868,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev); if (r) { - ti->error = "Data device lookup failed"; + ti->error = "Hash device lookup failed"; goto bad; } -- cgit v1.2.3 From 07d938822a1a105ae806f9fc263bc0b86d71935a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 3 Oct 2016 11:56:22 -0400 Subject: dm cache metadata: remove an extra newline in DMERR and code Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-metadata.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 695577812cf6..624fe4319b24 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -383,7 +383,6 @@ static int __format_metadata(struct dm_cache_metadata *cmd) goto bad; dm_disk_bitset_init(cmd->tm, &cmd->discard_info); - r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); if (r < 0) goto bad; @@ -789,7 +788,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) { if (cmd->data_block_size != data_block_size) { - DMERR("data_block_size (%llu) different from that in metadata (%llu)\n", + DMERR("data_block_size (%llu) different from that in metadata (%llu)", (unsigned long long) data_block_size, (unsigned long long) cmd->data_block_size); return false; -- cgit v1.2.3 From 23cab26dfca10d453866e89f0dcbf2168de5b3fe Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 4 Oct 2016 12:04:08 -0400 Subject: dm cache: add missing cache device name to DMERR in set_cache_mode() Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 59b2c50562e4..e04c61e0839e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -989,7 +989,8 @@ static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mod enum cache_metadata_mode old_mode = get_cache_mode(cache); if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) { - DMERR("unable to read needs_check flag, setting failure mode"); + DMERR("%s: unable to read needs_check flag, setting failure mode.", + cache_device_name(cache)); new_mode = CM_FAIL; } -- cgit v1.2.3 From 453c2a8967e582212eaf68c80fa8256d41b94fc9 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 18 Oct 2016 17:46:45 +0200 Subject: dm raid: correct error messages on old metadata validation When target 1.9.1 gets takeover/reshape requests on devices with old superblock format not supporting such conversions and rejects them in super_init_validation(), it logs bogus error message (e.g. Reshape when a takeover is requested). Whilst on it, add messages for disk adding/removing and stripe sectors reshape requests, use the newer rs_{takeover,reshape}_requested() API, address a raid10 false positive in checking array positions and remove rs_set_new() because device members are already set proper. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 71 +++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 32 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 6d53810963f7..698b03e9d955 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2050,16 +2050,17 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) mddev->reshape_position = MaxSector; + mddev->raid_disks = le32_to_cpu(sb->num_devices); + mddev->level = le32_to_cpu(sb->level); + mddev->layout = le32_to_cpu(sb->layout); + mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors); + /* * Reshaping is supported, e.g. reshape_position is valid * in superblock and superblock content is authoritative. */ if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) { /* Superblock is authoritative wrt given raid set layout! */ - mddev->raid_disks = le32_to_cpu(sb->num_devices); - mddev->level = le32_to_cpu(sb->level); - mddev->layout = le32_to_cpu(sb->layout); - mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors); mddev->new_level = le32_to_cpu(sb->new_level); mddev->new_layout = le32_to_cpu(sb->new_layout); mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors); @@ -2087,38 +2088,44 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) /* * No takeover/reshaping, because we don't have the extended v1.9.0 metadata */ - if (le32_to_cpu(sb->level) != mddev->new_level) { - DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)"); - return -EINVAL; - } - if (le32_to_cpu(sb->layout) != mddev->new_layout) { - DMERR("Reshaping raid sets not yet supported. (raid layout change)"); - DMERR(" 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout); - DMERR(" Old layout: %s w/ %d copies", - raid10_md_layout_to_format(le32_to_cpu(sb->layout)), - raid10_md_layout_to_copies(le32_to_cpu(sb->layout))); - DMERR(" New layout: %s w/ %d copies", - raid10_md_layout_to_format(mddev->layout), - raid10_md_layout_to_copies(mddev->layout)); - return -EINVAL; - } - if (le32_to_cpu(sb->stripe_sectors) != mddev->new_chunk_sectors) { - DMERR("Reshaping raid sets not yet supported. (stripe sectors change)"); - return -EINVAL; - } + struct raid_type *rt_cur = get_raid_type_by_ll(mddev->level, mddev->layout); + struct raid_type *rt_new = get_raid_type_by_ll(mddev->new_level, mddev->new_layout); - /* We can only change the number of devices in raid1 with old (i.e. pre 1.0.7) metadata */ - if (!rt_is_raid1(rs->raid_type) && - (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { - DMERR("Reshaping raid sets not yet supported. (device count change from %u to %u)", - sb->num_devices, mddev->raid_disks); + if (rs_takeover_requested(rs)) { + if (rt_cur && rt_new) + DMERR("Takeover raid sets from %s to %s not yet supported by metadata. (raid level change)", + rt_cur->name, rt_new->name); + else + DMERR("Takeover raid sets not yet supported by metadata. (raid level change)"); + return -EINVAL; + } else if (rs_reshape_requested(rs)) { + DMERR("Reshaping raid sets not yet supported by metadata. (raid layout change keeping level)"); + if (mddev->layout != mddev->new_layout) { + if (rt_cur && rt_new) + DMERR(" current layout %s vs new layout %s", + rt_cur->name, rt_new->name); + else + DMERR(" current layout 0x%X vs new layout 0x%X", + le32_to_cpu(sb->layout), mddev->new_layout); + } + if (mddev->chunk_sectors != mddev->new_chunk_sectors) + DMERR(" current stripe sectors %u vs new stripe sectors %u", + mddev->chunk_sectors, mddev->new_chunk_sectors); + if (rs->delta_disks) + DMERR(" current %u disks vs new %u disks", + mddev->raid_disks, mddev->raid_disks + rs->delta_disks); + if (rs_is_raid10(rs)) { + DMERR(" Old layout: %s w/ %u copies", + raid10_md_layout_to_format(mddev->layout), + raid10_md_layout_to_copies(mddev->layout)); + DMERR(" New layout: %s w/ %u copies", + raid10_md_layout_to_format(mddev->new_layout), + raid10_md_layout_to_copies(mddev->new_layout)); + } return -EINVAL; } DMINFO("Discovered old metadata format; upgrading to extended metadata format"); - - /* Table line is checked vs. authoritative superblock */ - rs_set_new(rs); } if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) @@ -2211,7 +2218,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) continue; if (role != r->raid_disk) { - if (__is_raid10_near(mddev->layout)) { + if (rs_is_raid10(rs) && __is_raid10_near(mddev->layout)) { if (mddev->raid_disks % __raid10_near_copies(mddev->layout) || rs->raid_disks % rs->raid10_copies) { rs->ti->error = -- cgit v1.2.3 From 1b1b58f54fdb1d28a85ab9e1b6d3d6b9b37f4ac7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 29 Nov 2015 14:09:19 +0100 Subject: dm crypt: constify crypt_iv_operations structures The crypt_iv_operations are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3b8eab985947..590d7c4e1083 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -141,7 +141,7 @@ struct crypt_config { char *cipher; char *cipher_string; - struct crypt_iv_operations *iv_gen_ops; + const struct crypt_iv_operations *iv_gen_ops; union { struct iv_essiv_private essiv; struct iv_benbi_private benbi; @@ -758,15 +758,15 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, return r; } -static struct crypt_iv_operations crypt_iv_plain_ops = { +static const struct crypt_iv_operations crypt_iv_plain_ops = { .generator = crypt_iv_plain_gen }; -static struct crypt_iv_operations crypt_iv_plain64_ops = { +static const struct crypt_iv_operations crypt_iv_plain64_ops = { .generator = crypt_iv_plain64_gen }; -static struct crypt_iv_operations crypt_iv_essiv_ops = { +static const struct crypt_iv_operations crypt_iv_essiv_ops = { .ctr = crypt_iv_essiv_ctr, .dtr = crypt_iv_essiv_dtr, .init = crypt_iv_essiv_init, @@ -774,17 +774,17 @@ static struct crypt_iv_operations crypt_iv_essiv_ops = { .generator = crypt_iv_essiv_gen }; -static struct crypt_iv_operations crypt_iv_benbi_ops = { +static const struct crypt_iv_operations crypt_iv_benbi_ops = { .ctr = crypt_iv_benbi_ctr, .dtr = crypt_iv_benbi_dtr, .generator = crypt_iv_benbi_gen }; -static struct crypt_iv_operations crypt_iv_null_ops = { +static const struct crypt_iv_operations crypt_iv_null_ops = { .generator = crypt_iv_null_gen }; -static struct crypt_iv_operations crypt_iv_lmk_ops = { +static const struct crypt_iv_operations crypt_iv_lmk_ops = { .ctr = crypt_iv_lmk_ctr, .dtr = crypt_iv_lmk_dtr, .init = crypt_iv_lmk_init, @@ -793,7 +793,7 @@ static struct crypt_iv_operations crypt_iv_lmk_ops = { .post = crypt_iv_lmk_post }; -static struct crypt_iv_operations crypt_iv_tcw_ops = { +static const struct crypt_iv_operations crypt_iv_tcw_ops = { .ctr = crypt_iv_tcw_ctr, .dtr = crypt_iv_tcw_dtr, .init = crypt_iv_tcw_init, -- cgit v1.2.3 From bff7e067ee518f9ed7e1cbc63e4c9e01670d0b71 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 14:09:27 +0000 Subject: dm flakey: return -EINVAL on interval bounds error in flakey_ctr() Fix to return error code -EINVAL instead of 0, as is done elsewhere in this function. Fixes: e80d1c805a3b ("dm: do not override error code returned from dm_get_device()") Cc: stable@vger.kernel.org # 4.3+ Signed-off-by: Wei Yongjun Signed-off-by: Mike Snitzer --- drivers/md/dm-flakey.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 6a2e8dd44a1b..3643cba71351 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -200,11 +200,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!(fc->up_interval + fc->down_interval)) { ti->error = "Total (up + down) interval is zero"; + r = -EINVAL; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { ti->error = "Interval overflow"; + r = -EINVAL; goto bad; } -- cgit v1.2.3 From f97dc421280e16b8eb0c8f685610ba007ec11b79 Mon Sep 17 00:00:00 2001 From: "tang.junhui" Date: Fri, 28 Oct 2016 17:04:46 +0800 Subject: dm mpath: add m->hw_handler_name NULL pointer check in parse_hw_handler() Avoids false positive of no hardware handler being specified (which is implied by a NULL m->hw_handler_name). Signed-off-by: tang.junhui Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e477af8596e2..d234b6c33646 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1002,6 +1002,8 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) } m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); + if (!m->hw_handler_name) + return -EINVAL; if (hw_argc > 1) { char *p; -- cgit v1.2.3 From cc5bd925f194c8dc7e2a4eee2c81a4f148018b08 Mon Sep 17 00:00:00 2001 From: "tang.junhui" Date: Fri, 4 Nov 2016 12:37:09 +0800 Subject: dm mpath: add checks for priority group count to avoid invalid memory access This avoids the potential for invalid memory access, if/when there are no priority groups, in response to invalid arguments being sent by the user via DM message (e.g. "switch_group", "disable_group" or "enable_group"). Signed-off-by: tang.junhui Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d234b6c33646..f8369697af12 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1364,7 +1364,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) char dummy; if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || - (pgnum > m->nr_priority_groups)) { + !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { DMWARN("invalid PG number supplied to switch_pg_num"); return -EINVAL; } @@ -1396,7 +1396,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed) char dummy; if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || - (pgnum > m->nr_priority_groups)) { + !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { DMWARN("invalid PG number supplied to bypass_pg"); return -EINVAL; } -- cgit v1.2.3 From 4813577f932050f33c29e2a35a01431814437700 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Nov 2016 15:34:09 -0800 Subject: dm mpath: change return type of pg_init_all_paths() from int to void None of the callers of pg_init_all_paths() check its return value. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f8369697af12..373e5e3eec78 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -372,16 +372,13 @@ static int __pg_init_all_paths(struct multipath *m) return atomic_read(&m->pg_init_in_progress); } -static int pg_init_all_paths(struct multipath *m) +static void pg_init_all_paths(struct multipath *m) { - int r; unsigned long flags; spin_lock_irqsave(&m->lock, flags); - r = __pg_init_all_paths(m); + __pg_init_all_paths(m); spin_unlock_irqrestore(&m->lock, flags); - - return r; } static void __switch_pg(struct multipath *m, struct priority_group *pg) -- cgit v1.2.3 From 6599c84e4ce6e428180f9958c7c40aa4202d8072 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Nov 2016 15:34:32 -0800 Subject: dm mpath: do not modify *__clone if blk_mq_alloc_request() fails Purely cleanup, avoids potential for strange coding bugs. But in reality if __multipath_map() fails the caller has no business looking at *__clone. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 373e5e3eec78..0caab4bf3515 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -580,16 +580,17 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, * .request_fn stacked on blk-mq path(s) and * blk-mq stacked on blk-mq path(s). */ - *__clone = blk_mq_alloc_request(bdev_get_queue(bdev), - rq_data_dir(rq), BLK_MQ_REQ_NOWAIT); - if (IS_ERR(*__clone)) { - /* ENOMEM, requeue */ + clone = blk_mq_alloc_request(bdev_get_queue(bdev), + rq_data_dir(rq), BLK_MQ_REQ_NOWAIT); + if (IS_ERR(clone)) { + /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ clear_request_fn_mpio(m, map_context); return r; } - (*__clone)->bio = (*__clone)->biotail = NULL; - (*__clone)->rq_disk = bdev->bd_disk; - (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT; + clone->bio = clone->biotail = NULL; + clone->rq_disk = bdev->bd_disk; + clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; + *__clone = clone; } if (pgpath->pg->ps.type->start_io) -- cgit v1.2.3 From 6936c12cf809850180b24947271b8f068fdb15e9 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 23 Nov 2016 13:51:09 -0500 Subject: dm table: fix 'all_blk_mq' inconsistency when an empty table is loaded An earlier DM multipath table could have been build ontop of underlying devices that were all using blk-mq. In that case, if that active multipath table is replaced with an empty DM multipath table (that reflects all paths have failed) then it is important that the 'all_blk_mq' state of the active table is transfered to the new empty DM table. Otherwise dm-rq.c:dm_old_prep_tio() will incorrectly clone a request that isn't needed by the DM multipath target when it is to issue IO to an underlying blk-mq device. Fixes: e83068a5 ("dm mpath: add optional "queue_mode" feature") Cc: stable@vger.kernel.org # 4.8+ Reported-by: Bart Van Assche Tested-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c4b53b332607..53b817b29134 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -924,12 +924,6 @@ static int dm_table_determine_type(struct dm_table *t) BUG_ON(!request_based); /* No targets in this table */ - if (list_empty(devices) && __table_type_request_based(live_md_type)) { - /* inherit live MD type */ - t->type = live_md_type; - return 0; - } - /* * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by * having a compatible target use dm_table_set_type. @@ -948,6 +942,19 @@ verify_rq_based: return -EINVAL; } + if (list_empty(devices)) { + int srcu_idx; + struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx); + + /* inherit live table's type and all_blk_mq */ + if (live_table) { + t->type = live_table->type; + t->all_blk_mq = live_table->all_blk_mq; + } + dm_put_live_table(t->md, srcu_idx); + return 0; + } + /* Non-request-stackable devices can't be used for request-based dm */ list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); -- cgit v1.2.3 From 301fc3f5efb98633115bd887655b19f42c6dfaa8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Dec 2016 16:56:06 -0800 Subject: dm table: an 'all_blk_mq' table must be loaded for a blk-mq DM device When dm_table_set_type() is used by a target to establish a DM table's type (e.g. DM_TYPE_MQ_REQUEST_BASED in the case of DM multipath) the DM core must go on to verify that the devices in the table are compatible with the established type. Fixes: e83068a5 ("dm mpath: add optional "queue_mode" feature") Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 53b817b29134..5ac239d0f787 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -981,6 +981,11 @@ verify_rq_based: t->all_blk_mq = true; } + if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) { + DMERR("table load rejected: all devices are not blk-mq request-stackable"); + return -EINVAL; + } + return 0; } -- cgit v1.2.3 From 5b8c01f74cf03b1ec013fcf358b384706233f2f0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 15 Nov 2016 15:33:16 -0800 Subject: dm table: simplify dm_table_determine_type() Use a single loop instead of two loops to determine whether or not all_blk_mq has to be set. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5ac239d0f787..0a427de23ed2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -871,7 +871,7 @@ static int dm_table_determine_type(struct dm_table *t) { unsigned i; unsigned bio_based = 0, request_based = 0, hybrid = 0; - bool verify_blk_mq = false; + unsigned sq_count = 0, mq_count = 0; struct dm_target *tgt; struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); @@ -966,20 +966,15 @@ verify_rq_based: } if (q->mq_ops) - verify_blk_mq = true; + mq_count++; + else + sq_count++; } - - if (verify_blk_mq) { - /* verify _all_ devices in the table are blk-mq devices */ - list_for_each_entry(dd, devices, list) - if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) { - DMERR("table load rejected: not all devices" - " are blk-mq request-stackable"); - return -EINVAL; - } - - t->all_blk_mq = true; + if (sq_count && mq_count) { + DMERR("table load rejected: not all devices are blk-mq request-stackable"); + return -EINVAL; } + t->all_blk_mq = mq_count > 0; if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) { DMERR("table load rejected: all devices are not blk-mq request-stackable"); -- cgit v1.2.3 From 9ea61cac0b1ad0c09022f39fd97e9b99a2cfc2dc Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Nov 2016 11:24:20 -0800 Subject: dm bufio: avoid sleeping while holding the dm_bufio lock We've seen in-field reports showing _lots_ (18 in one case, 41 in another) of tasks all sitting there blocked on: mutex_lock+0x4c/0x68 dm_bufio_shrink_count+0x38/0x78 shrink_slab.part.54.constprop.65+0x100/0x464 shrink_zone+0xa8/0x198 In the two cases analyzed, we see one task that looks like this: Workqueue: kverityd verity_prefetch_io __switch_to+0x9c/0xa8 __schedule+0x440/0x6d8 schedule+0x94/0xb4 schedule_timeout+0x204/0x27c schedule_timeout_uninterruptible+0x44/0x50 wait_iff_congested+0x9c/0x1f0 shrink_inactive_list+0x3a0/0x4cc shrink_lruvec+0x418/0x5cc shrink_zone+0x88/0x198 try_to_free_pages+0x51c/0x588 __alloc_pages_nodemask+0x648/0xa88 __get_free_pages+0x34/0x7c alloc_buffer+0xa4/0x144 __bufio_new+0x84/0x278 dm_bufio_prefetch+0x9c/0x154 verity_prefetch_io+0xe8/0x10c process_one_work+0x240/0x424 worker_thread+0x2fc/0x424 kthread+0x10c/0x114 ...and that looks to be the one holding the mutex. The problem has been reproduced on fairly easily: 0. Be running Chrome OS w/ verity enabled on the root filesystem 1. Pick test patch: http://crosreview.com/412360 2. Install launchBalloons.sh and balloon.arm from http://crbug.com/468342 ...that's just a memory stress test app. 3. On a 4GB rk3399 machine, run nice ./launchBalloons.sh 4 900 100000 ...that tries to eat 4 * 900 MB of memory and keep accessing. 4. Login to the Chrome web browser and restore many tabs With that, I've seen printouts like: DOUG: long bufio 90758 ms ...and stack trace always show's we're in dm_bufio_prefetch(). The problem is that we try to allocate memory with GFP_NOIO while we're holding the dm_bufio lock. Instead we should be using GFP_NOWAIT. Using GFP_NOIO can cause us to sleep while holding the lock and that causes the above problems. The current behavior explained by David Rientjes: It will still try reclaim initially because __GFP_WAIT (or __GFP_KSWAPD_RECLAIM) is set by GFP_NOIO. This is the cause of contention on dm_bufio_lock() that the thread holds. You want to pass GFP_NOWAIT instead of GFP_NOIO to alloc_buffer() when holding a mutex that can be contended by a concurrent slab shrinker (if count_objects didn't use a trylock, this pattern would trivially deadlock). This change significantly increases responsiveness of the system while in this state. It makes a real difference because it unblocks kswapd. In the bug report analyzed, kswapd was hung: kswapd0 D ffffffc000204fd8 0 72 2 0x00000000 Call trace: [] __switch_to+0x9c/0xa8 [] __schedule+0x440/0x6d8 [] schedule+0x94/0xb4 [] schedule_preempt_disabled+0x28/0x44 [] __mutex_lock_slowpath+0x120/0x1ac [] mutex_lock+0x4c/0x68 [] dm_bufio_shrink_count+0x38/0x78 [] shrink_slab.part.54.constprop.65+0x100/0x464 [] shrink_zone+0xa8/0x198 [] balance_pgdat+0x328/0x508 [] kswapd+0x424/0x51c [] kthread+0x10c/0x114 [] ret_from_fork+0x10/0x40 By unblocking kswapd memory pressure should be reduced. Suggested-by: David Rientjes Reviewed-by: Guenter Roeck Signed-off-by: Douglas Anderson Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 125aedc3875f..b5d3428d109e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -827,7 +827,8 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client * dm-bufio is resistant to allocation failures (it just keeps * one buffer reserved in cases all the allocations fail). * So set flags to not try too hard: - * GFP_NOIO: don't recurse into the I/O layer + * GFP_NOWAIT: don't wait; if we need to sleep we'll release our + * mutex and wait ourselves. * __GFP_NORETRY: don't retry and rather return failure * __GFP_NOMEMALLOC: don't use emergency reserves * __GFP_NOWARN: don't print a warning in case of failure @@ -837,7 +838,7 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client */ while (1) { if (dm_bufio_cache_size_latch != 1) { - b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); if (b) return b; } -- cgit v1.2.3 From d12067f428c037b4575aaeb2be00847fc214c24a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 23 Nov 2016 16:52:01 -0500 Subject: dm bufio: don't take the lock in dm_bufio_shrink_count dm_bufio_shrink_count() is called from do_shrink_slab to find out how many freeable objects are there. The reported value doesn't have to be precise, so we don't need to take the dm-bufio lock. Suggested-by: David Rientjes Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index b5d3428d109e..9ef88f0e2382 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1588,18 +1588,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - struct dm_bufio_client *c; - unsigned long count; - - c = container_of(shrink, struct dm_bufio_client, shrinker); - if (sc->gfp_mask & __GFP_FS) - dm_bufio_lock(c); - else if (!dm_bufio_trylock(c)) - return 0; + struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); - count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; - dm_bufio_unlock(c); - return count; + return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]); } /* -- cgit v1.2.3 From 41c73a49df31151f4ff868f28fe4f129f113fa2c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 23 Nov 2016 17:04:00 -0500 Subject: dm bufio: drop the lock when doing GFP_NOIO allocation If the first allocation attempt using GFP_NOWAIT fails, drop the lock and retry using GFP_NOIO allocation (lock is dropped because the allocation can take some time). Note that we won't do GFP_NOIO allocation when we loop for the second time, because the lock shouldn't be dropped between __wait_for_free_buffer and __get_unclaimed_buffer. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9ef88f0e2382..1c2e1dd7ca16 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -822,6 +822,7 @@ enum new_flag { static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf) { struct dm_buffer *b; + bool tried_noio_alloc = false; /* * dm-bufio is resistant to allocation failures (it just keeps @@ -846,6 +847,15 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client if (nf == NF_PREFETCH) return NULL; + if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) { + dm_bufio_unlock(c); + b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + dm_bufio_lock(c); + if (b) + return b; + tried_noio_alloc = true; + } + if (!list_empty(&c->reserved_buffers)) { b = list_entry(c->reserved_buffers.next, struct dm_buffer, lru_list); -- cgit v1.2.3 From 2e91c3694181dc500faffec16c5aaa0ac5e15449 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Nov 2016 14:26:47 -0800 Subject: dm: use blk_set_queue_dying() in __dm_destroy() After QUEUE_FLAG_DYING has been set any code that is waiting in get_request() should be woken up. But to get this behaviour blk_set_queue_dying() must be used instead of only setting QUEUE_FLAG_DYING. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ef7bf1dd6900..091eaa635645 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1886,9 +1886,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); - spin_lock_irq(q->queue_lock); - queue_flag_set(QUEUE_FLAG_DYING, q); - spin_unlock_irq(q->queue_lock); + blk_set_queue_dying(q); if (dm_request_based(md) && md->kworker_task) kthread_flush_worker(&md->kworker); -- cgit v1.2.3 From b23df0d048e5f137ad5c2a116ec0849a98d43b96 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Nov 2016 14:27:42 -0800 Subject: dm rq: simplify use_blk_mq initialization Use a single statement to declare and initialize 'use_blk_mq' instead of two statements. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 54b081588b55..87ca5dbbe45d 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -23,11 +23,7 @@ static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH; #define RESERVED_REQUEST_BASED_IOS 256 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; -#ifdef CONFIG_DM_MQ_DEFAULT -static bool use_blk_mq = true; -#else -static bool use_blk_mq = false; -#endif +static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT); bool dm_use_blk_mq_default(void) { -- cgit v1.2.3 From 6080758d441acd7765314f3e9b6ca843e18e9a68 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Nov 2016 14:28:00 -0800 Subject: dm ioctl: use offsetof() instead of open-coding it Subtracting sizes is a fragile approach because the result is only correct if the compiler has not added any padding at the end of the structure. Hence use offsetof() instead of size subtraction. An additional advantage of offsetof() is that it makes the intent more clear. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 966eb4b61aed..c72a77048b73 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1697,7 +1697,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern { struct dm_ioctl *dmi; int secure_data; - const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data); + const size_t minimum_data_size = offsetof(struct dm_ioctl, data); if (copy_from_user(param_kernel, user, minimum_data_size)) return -EFAULT; -- cgit v1.2.3 From 0637018dff106e2591c1baa628e27a24a37ccf44 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Nov 2016 14:28:32 -0800 Subject: dm array: remove a dead assignment in populate_ablock_with_values() A value is assigned to 'nr_entries' but is never used, remove it. Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-array.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index e83047cbb2da..7938cd21fa4c 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -700,13 +700,11 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_ { int r; unsigned i; - uint32_t nr_entries; struct dm_btree_value_type *vt = &info->value_type; BUG_ON(le32_to_cpu(ab->nr_entries)); BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); - nr_entries = le32_to_cpu(ab->nr_entries); for (i = 0; i < new_nr; i++) { r = fn(base + i, element_at(info, ab, i), context); if (r) -- cgit v1.2.3 From c538f6ec9f56996677c58cfd1f7f8108b0a944cb Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Mon, 21 Nov 2016 15:58:51 +0100 Subject: dm crypt: add ability to use keys from the kernel key retention service The kernel key service is a generic way to store keys for the use of other subsystems. Currently there is no way to use kernel keys in dm-crypt. This patch aims to fix that. Instead of key userspace may pass a key description with preceding ':'. So message that constructs encryption mapping now looks like this: [|:] [<#opt_params> ] where is in format: :: Currently we only support two elementary key types: 'user' and 'logon'. Keys may be loaded in dm-crypt either via or using classical method and pass the key in hex representation directly. dm-crypt device initialised with a key passed in hex representation may be replaced with key passed in key_string format and vice versa. (Based on original work by Andrey Ryabinin) Signed-off-by: Ondrej Kozina Reviewed-by: David Howells Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-crypt.txt | 25 ++++- drivers/md/dm-crypt.c | 159 ++++++++++++++++++++++++++++--- 2 files changed, 170 insertions(+), 14 deletions(-) (limited to 'drivers/md') diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt index 692171fe9da0..6f15fcea9566 100644 --- a/Documentation/device-mapper/dm-crypt.txt +++ b/Documentation/device-mapper/dm-crypt.txt @@ -21,13 +21,30 @@ Parameters: \ /proc/crypto contains supported crypto modes - Key used for encryption. It is encoded as a hexadecimal number. + Key used for encryption. It is encoded either as a hexadecimal number + or it can be passed as prefixed with single colon + character (':') for keys residing in kernel keyring service. You can only use key sizes that are valid for the selected cipher in combination with the selected iv mode. Note that for some iv modes the key string can contain additional keys (for example IV seed) so the key contains more parts concatenated into a single string. + + The kernel keyring key is identified by string in following format: + ::. + + + The encryption key size in bytes. The kernel key payload size must match + the value passed in . + + + Either 'logon' or 'user' kernel key type. + + + The kernel keyring key description crypt target should look for + when loading key of . + Multi-key compatibility mode. You can define keys and then sectors are encrypted according to their offsets (sector 0 uses key0; @@ -88,6 +105,12 @@ https://gitlab.com/cryptsetup/cryptsetup dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0" ]] +[[ +#!/bin/sh +# Create a crypt device using dmsetup when encryption key is stored in keyring service +dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0" +]] + [[ #!/bin/sh # Create a crypt device using cryptsetup and LUKS header with default cipher diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 590d7c4e1083..da0b2e05fdf1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include @@ -140,6 +142,7 @@ struct crypt_config { char *cipher; char *cipher_string; + char *key_string; const struct crypt_iv_operations *iv_gen_ops; union { @@ -1484,22 +1487,134 @@ static int crypt_setkey(struct crypt_config *cc) return err; } +#ifdef CONFIG_KEYS + +static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string) +{ + char *new_key_string, *key_desc; + int ret; + struct key *key; + const struct user_key_payload *ukp; + + /* look for next ':' separating key_type from key_description */ + key_desc = strpbrk(key_string, ":"); + if (!key_desc || key_desc == key_string || !strlen(key_desc + 1)) + return -EINVAL; + + if (strncmp(key_string, "logon:", key_desc - key_string + 1) && + strncmp(key_string, "user:", key_desc - key_string + 1)) + return -EINVAL; + + new_key_string = kstrdup(key_string, GFP_KERNEL); + if (!new_key_string) + return -ENOMEM; + + key = request_key(key_string[0] == 'l' ? &key_type_logon : &key_type_user, + key_desc + 1, NULL); + if (IS_ERR(key)) { + kzfree(new_key_string); + return PTR_ERR(key); + } + + rcu_read_lock(); + + ukp = user_key_payload(key); + if (!ukp) { + rcu_read_unlock(); + key_put(key); + kzfree(new_key_string); + return -EKEYREVOKED; + } + + if (cc->key_size != ukp->datalen) { + rcu_read_unlock(); + key_put(key); + kzfree(new_key_string); + return -EINVAL; + } + + memcpy(cc->key, ukp->data, cc->key_size); + + rcu_read_unlock(); + key_put(key); + + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + + ret = crypt_setkey(cc); + + /* wipe the kernel key payload copy in each case */ + memset(cc->key, 0, cc->key_size * sizeof(u8)); + + if (!ret) { + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); + kzfree(cc->key_string); + cc->key_string = new_key_string; + } else + kzfree(new_key_string); + + return ret; +} + +static int get_key_size(char **key_string) +{ + char *colon, dummy; + int ret; + + if (*key_string[0] != ':') + return strlen(*key_string) >> 1; + + /* look for next ':' in key string */ + colon = strpbrk(*key_string + 1, ":"); + if (!colon) + return -EINVAL; + + if (sscanf(*key_string + 1, "%u%c", &ret, &dummy) != 2 || dummy != ':') + return -EINVAL; + + *key_string = colon; + + /* remaining key string should be :: */ + + return ret; +} + +#else + +static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string) +{ + return -EINVAL; +} + +static int get_key_size(char **key_string) +{ + return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1; +} + +#endif + static int crypt_set_key(struct crypt_config *cc, char *key) { int r = -EINVAL; int key_string_len = strlen(key); - /* The key size may not be changed. */ - if (cc->key_size != (key_string_len >> 1)) - goto out; - /* Hyphen (which gives a key_size of zero) means there is no key. */ if (!cc->key_size && strcmp(key, "-")) goto out; + /* ':' means the key is in kernel keyring, short-circuit normal key processing */ + if (key[0] == ':') { + r = crypt_set_keyring_key(cc, key + 1); + goto out; + } + /* clear the flag since following operations may invalidate previously valid key */ clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + /* wipe references to any kernel keyring key */ + kzfree(cc->key_string); + cc->key_string = NULL; + if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; @@ -1518,6 +1633,8 @@ static int crypt_wipe_key(struct crypt_config *cc) { clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); memset(&cc->key, 0, cc->key_size * sizeof(u8)); + kzfree(cc->key_string); + cc->key_string = NULL; return crypt_setkey(cc); } @@ -1555,6 +1672,7 @@ static void crypt_dtr(struct dm_target *ti) kzfree(cc->cipher); kzfree(cc->cipher_string); + kzfree(cc->key_string); /* Must zero key material before freeing */ kzfree(cc); @@ -1723,12 +1841,13 @@ bad_mem: /* * Construct an encryption mapping: - * + * [|:::] */ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct crypt_config *cc; - unsigned int key_size, opt_params; + int key_size; + unsigned int opt_params; unsigned long long tmpll; int ret; size_t iv_size_padding; @@ -1745,7 +1864,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - key_size = strlen(argv[1]) >> 1; + key_size = get_key_size(&argv[1]); + if (key_size < 0) { + ti->error = "Cannot parse key size"; + return -EINVAL; + } cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL); if (!cc) { @@ -1952,10 +2075,13 @@ static void crypt_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s ", cc->cipher_string); - if (cc->key_size > 0) - for (i = 0; i < cc->key_size; i++) - DMEMIT("%02x", cc->key[i]); - else + if (cc->key_size > 0) { + if (cc->key_string) + DMEMIT(":%u:%s", cc->key_size, cc->key_string); + else + for (i = 0; i < cc->key_size; i++) + DMEMIT("%02x", cc->key[i]); + } else DMEMIT("-"); DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, @@ -2011,7 +2137,7 @@ static void crypt_resume(struct dm_target *ti) static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) { struct crypt_config *cc = ti->private; - int ret = -EINVAL; + int key_size, ret = -EINVAL; if (argc < 2) goto error; @@ -2022,6 +2148,13 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; } if (argc == 3 && !strcasecmp(argv[1], "set")) { + /* The key size may not be changed. */ + key_size = get_key_size(&argv[2]); + if (key_size < 0 || cc->key_size != key_size) { + memset(argv[2], '0', strlen(argv[2])); + return -EINVAL; + } + ret = crypt_set_key(cc, argv[2]); if (ret) return ret; @@ -2065,7 +2198,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 14, 1}, + .version = {1, 15, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- cgit v1.2.3 From 54cd640d20de46bb54747286ca19f3995be921f2 Mon Sep 17 00:00:00 2001 From: "tang.junhui" Date: Thu, 24 Nov 2016 15:11:48 +0800 Subject: dm mpath: use hw_handler_params if attached hw_handler is same as requested Let the requested m->hw_handler_params be used if the attached hardware handler is the same handler as requested with m->hw_handler_name. Signed-off-by: tang.junhui Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 0caab4bf3515..6400cffb986d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -849,19 +849,23 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps retain: attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); if (attached_handler_name) { + /* + * Clear any hw_handler_params associated with a + * handler that isn't already attached. + */ + if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) { + kfree(m->hw_handler_params); + m->hw_handler_params = NULL; + } + /* * Reset hw_handler_name to match the attached handler - * and clear any hw_handler_params associated with the - * ignored handler. * * NB. This modifies the table line to show the actual * handler instead of the original table passed in. */ kfree(m->hw_handler_name); m->hw_handler_name = attached_handler_name; - - kfree(m->hw_handler_params); - m->hw_handler_params = NULL; } } -- cgit v1.2.3 From affa9d28f7e9a802c8f497ed54fe79a5689d95e9 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 24 Nov 2016 18:53:44 +0100 Subject: dm raid: don't allow "write behind" with raid4/5/6 Remove CTR_FLAG_MAX_WRITE_BEHIND from raid4/5/6's valid ctr flags. Only the md raid1 personality supports setting a maximum number of "write behind" write IOs on any legs set to "write mostly". "write mostly" enhances throughput with slow links/disks. Technically the "write behind" value is a write intent bitmap property only being respected by the raid1 personality. It allows a maximum number of "write behind" writes to any "write mostly" raid1 mirror legs to be delayed and avoids reads from such legs. No other MD personalities supported via dm-raid make use of "write behind", thus setting this property is superfluous; it wouldn't cause harm but it is correct to reject it. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 698b03e9d955..9d5c6bb9e35b 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -160,7 +160,6 @@ struct raid_dev { CTR_FLAG_DAEMON_SLEEP | \ CTR_FLAG_MIN_RECOVERY_RATE | \ CTR_FLAG_MAX_RECOVERY_RATE | \ - CTR_FLAG_MAX_WRITE_BEHIND | \ CTR_FLAG_STRIPE_CACHE | \ CTR_FLAG_REGION_SIZE | \ CTR_FLAG_DELTA_DISKS | \ @@ -171,7 +170,6 @@ struct raid_dev { CTR_FLAG_DAEMON_SLEEP | \ CTR_FLAG_MIN_RECOVERY_RATE | \ CTR_FLAG_MAX_RECOVERY_RATE | \ - CTR_FLAG_MAX_WRITE_BEHIND | \ CTR_FLAG_STRIPE_CACHE | \ CTR_FLAG_REGION_SIZE | \ CTR_FLAG_DELTA_DISKS | \ -- cgit v1.2.3 From 11e2968478edc07a75ee1efb45011b3033c621c2 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 29 Nov 2016 22:37:30 +0100 Subject: dm raid: fix discard support regression Commit ecbfb9f118 ("dm raid: add raid level takeover support") moved the configure_discard_support() call from raid_ctr() to raid_preresume(). Enabling/disabling discard _must_ happen during table load (through the .ctr hook). Fix this regression by moving the configure_discard_support() call back to raid_ctr(). Fixes: ecbfb9f118 ("dm raid: add raid level takeover support") Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9d5c6bb9e35b..b49b12242651 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2999,6 +2999,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) } } + /* Disable/enable discard support on raid set. */ + configure_discard_support(rs); + mddev_unlock(&rs->md); return 0; @@ -3585,12 +3588,6 @@ static int raid_preresume(struct dm_target *ti) if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) rs_update_sbs(rs); - /* - * Disable/enable discard support on raid set after any - * conversion, because devices can have been added - */ - configure_discard_support(rs); - /* Load the bitmap from disk unless raid0 */ r = __load_dirty_region_bitmap(rs); if (r) -- cgit v1.2.3 From 314c25c56c1ee5026cf99c570bdfe01847927acb Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Wed, 30 Nov 2016 17:56:14 -0600 Subject: dm space map metadata: fix 'struct sm_metadata' leak on failed create In dm_sm_metadata_create() we temporarily change the dm_space_map operations from 'ops' (whose .destroy function deallocates the sm_metadata) to 'bootstrap_ops' (whose .destroy function doesn't). If dm_sm_metadata_create() fails in sm_ll_new_metadata() or sm_ll_extend(), it exits back to dm_tm_create_internal(), which calls dm_sm_destroy() with the intention of freeing the sm_metadata, but it doesn't (because the dm_space_map operations is still set to 'bootstrap_ops'). Fix this by setting the dm_space_map operations back to 'ops' if dm_sm_metadata_create() fails when it is set to 'bootstrap_ops'. Signed-off-by: Benjamin Marzinski Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/persistent-data/dm-space-map-metadata.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 7e44005595c1..20557e2c60c6 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -775,17 +775,15 @@ int dm_sm_metadata_create(struct dm_space_map *sm, memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); r = sm_ll_new_metadata(&smm->ll, tm); + if (!r) { + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; + r = sm_ll_extend(&smm->ll, nr_blocks); + } + memcpy(&smm->sm, &ops, sizeof(smm->sm)); if (r) return r; - if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) - nr_blocks = DM_SM_METADATA_MAX_BLOCKS; - r = sm_ll_extend(&smm->ll, nr_blocks); - if (r) - return r; - - memcpy(&smm->sm, &ops, sizeof(smm->sm)); - /* * Now we need to update the newly created data structures with the * allocated blocks that they were built from. -- cgit v1.2.3 From 0c79ce0b75304fbbe42282a7d40af4367e31efc6 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 2 Dec 2016 15:14:17 -0600 Subject: dm space map metadata: skip useless memcpy in metadata_ll_init_index() When metadata_ll_init_index() is called by sm_ll_new_metadata(), ll->mi_le hasn't been initialized yet. So, when metadata_ll_init_index() copies the contents of ll->mi_le into the newly allocated bitmap_root, it is just copying garbage. ll->mi_le will be allocated later in sm_ll_extend() and copied into the bitmap_root, in sm_ll_commit(). Signed-off-by: Benjamin Marzinski Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-space-map-common.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 306d2e4502c4..10978ba1d2c1 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -547,7 +547,6 @@ static int metadata_ll_init_index(struct ll_disk *ll) if (r < 0) return r; - memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le)); ll->bitmap_root = dm_block_location(b); dm_tm_unlock(ll->tm, b); -- cgit v1.2.3 From b446396b7482938c859bfaa42320026d158616ae Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Sun, 4 Dec 2016 23:26:38 -0600 Subject: dm space map: always set ev if sm_ll_mutate() succeeds If no block was allocated or freed, sm_ll_mutate() wasn't setting *ev, leaving the variable unitialized. sm_ll_insert(), sm_disk_inc_block(), and sm_disk_new_block() all check ev to see if there was an allocation event in sm_ll_mutate(), possibly reading unitialized data. If no allocation event occured, sm_ll_mutate() should set *ev to SM_NONE. Signed-off-by: Benjamin Marzinski Acked-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-space-map-common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 10978ba1d2c1..4c28608a0c94 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -464,7 +464,8 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, ll->nr_allocated--; le32_add_cpu(&ie_disk.nr_free, 1); ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit)); - } + } else + *ev = SM_NONE; return ll->save_ie(ll, index, &ie_disk); } -- cgit v1.2.3 From 027c431ccfcc89f7398d9f3e9bc2eb60e6cc57ad Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Thu, 1 Dec 2016 18:20:52 +0100 Subject: dm crypt: reject key strings containing whitespace chars Unfortunately key_string may theoretically contain whitespace even after it's processed by dm_split_args(). The reason for this is DM core supports escaping of almost all chars including any whitespace. If userspace passes a key to the kernel in format ":32:logon:my_prefix:my\ key" dm-crypt will look up key "my_prefix:my key" in kernel keyring service. So far everything's fine. Unfortunately if userspace later calls DM_TABLE_STATUS ioctl, it will not receive back expected ":32:logon:my_prefix:my\ key" but the unescaped version instead. Also userpace (most notably cryptsetup) is not ready to parse single target argument containing (even escaped) whitespace chars and any whitespace is simply taken as delimiter of another argument. This effect is mitigated by the fact libdevmapper curently performs double escaping of '\' char. Any user input in format "x\ x" is transformed into "x\\ x" before being passed to the kernel. Nonetheless dm-crypt may be used without libdevmapper. Therefore the near-term solution to this is to reject any key string containing whitespace. Signed-off-by: Ondrej Kozina Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index da0b2e05fdf1..9b99ee9a8690 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1489,6 +1490,14 @@ static int crypt_setkey(struct crypt_config *cc) #ifdef CONFIG_KEYS +static bool contains_whitespace(const char *str) +{ + while (*str) + if (isspace(*str++)) + return true; + return false; +} + static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string) { char *new_key_string, *key_desc; @@ -1496,6 +1505,15 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string struct key *key; const struct user_key_payload *ukp; + /* + * Reject key_string with whitespace. dm core currently lacks code for + * proper whitespace escaping in arguments on DM_TABLE_STATUS path. + */ + if (contains_whitespace(key_string)) { + DMERR("whitespace chars not allowed in key string"); + return -EINVAL; + } + /* look for next ':' separating key_type from key_description */ key_desc = strpbrk(key_string, ":"); if (!key_desc || key_desc == key_string || !strlen(key_desc + 1)) -- cgit v1.2.3 From e99dda8fc4567db6e39ec25b60bbcd68182dfd7b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 8 Dec 2016 19:42:37 -0500 Subject: dm cache policy smq: use hash_32() instead of hash_32_generic() Switch to using hash_32() because hash_32_generic() should only be used by the kernel's selftests. Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index c33f4a6e1d7d..f19c6930a67c 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1361,7 +1361,7 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) static unsigned random_level(dm_cblock_t cblock) { - return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); + return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); } static int smq_load_mapping(struct dm_cache_policy *p, -- cgit v1.2.3 From ef548c551e72dbbdcc6d9ed7c7b3b01083fea8e2 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 13 Dec 2016 14:54:50 -0500 Subject: dm flakey: introduce "error_writes" feature Recent dm-flakey fixes, to have reads error out during the "down" interval, made it so that the previous read behaviour is no longer available. It is useful to have reads complete like normal but have writes error out, so make it possible again with a new "error_writes" feature. Signed-off-by: Mike Snitzer --- drivers/md/dm-flakey.c | 51 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 9 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 3643cba71351..13305a182611 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -36,7 +36,8 @@ struct flakey_c { }; enum feature_flag_bits { - DROP_WRITES + DROP_WRITES, + ERROR_WRITES }; struct per_bio_data { @@ -76,6 +77,25 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, if (test_and_set_bit(DROP_WRITES, &fc->flags)) { ti->error = "Feature drop_writes duplicated"; return -EINVAL; + } else if (test_bit(ERROR_WRITES, &fc->flags)) { + ti->error = "Feature drop_writes conflicts with feature error_writes"; + return -EINVAL; + } + + continue; + } + + /* + * error_writes + */ + if (!strcasecmp(arg_name, "error_writes")) { + if (test_and_set_bit(ERROR_WRITES, &fc->flags)) { + ti->error = "Feature error_writes duplicated"; + return -EINVAL; + + } else if (test_bit(DROP_WRITES, &fc->flags)) { + ti->error = "Feature error_writes conflicts with feature drop_writes"; + return -EINVAL; } continue; @@ -135,6 +155,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; return -EINVAL; + + } else if (test_bit(ERROR_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { + ti->error = "error_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; + return -EINVAL; } return 0; @@ -291,22 +315,27 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) pb->bio_submitted = true; /* - * Error reads if neither corrupt_bio_byte or drop_writes are set. + * Error reads if neither corrupt_bio_byte or drop_writes or error_writes are set. * Otherwise, flakey_end_io() will decide if the reads should be modified. */ if (bio_data_dir(bio) == READ) { - if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags)) + if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) && + !test_bit(ERROR_WRITES, &fc->flags)) return -EIO; goto map_bio; } /* - * Drop writes? + * Drop or error writes? */ if (test_bit(DROP_WRITES, &fc->flags)) { bio_endio(bio); return DM_MAPIO_SUBMITTED; } + else if (test_bit(ERROR_WRITES, &fc->flags)) { + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } /* * Corrupt matching writes. @@ -342,10 +371,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) */ corrupt_bio_data(bio, fc); - } else if (!test_bit(DROP_WRITES, &fc->flags)) { + } else if (!test_bit(DROP_WRITES, &fc->flags) && + !test_bit(ERROR_WRITES, &fc->flags)) { /* * Error read during the down_interval if drop_writes - * wasn't configured. + * and error_writes were not configured. */ return -EIO; } @@ -359,7 +389,7 @@ static void flakey_status(struct dm_target *ti, status_type_t type, { unsigned sz = 0; struct flakey_c *fc = ti->private; - unsigned drop_writes; + unsigned drop_writes, error_writes; switch (type) { case STATUSTYPE_INFO: @@ -372,10 +402,13 @@ static void flakey_status(struct dm_target *ti, status_type_t type, fc->down_interval); drop_writes = test_bit(DROP_WRITES, &fc->flags); - DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5); + error_writes = test_bit(ERROR_WRITES, &fc->flags); + DMEMIT("%u ", drop_writes + error_writes + (fc->corrupt_bio_byte > 0) * 5); if (drop_writes) DMEMIT("drop_writes "); + else if (error_writes) + DMEMIT("error_writes "); if (fc->corrupt_bio_byte) DMEMIT("corrupt_bio_byte %u %c %u %u ", @@ -412,7 +445,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 3, 1}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, -- cgit v1.2.3