From 816b0acf3deb6d6be5d0519b286fdd4bafade905 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 21 Mar 2016 19:18:32 +0800 Subject: md:raid1: fix a dead loop when read from a WriteMostly disk If first_bad == this_sector when we get the WriteMostly disk in read_balance(), valid disk will be returned with zero max_sectors. It'll lead to a dead loop in make_request(), and OOM will happen because of endless allocation of struct bio. Since we can't get data from this disk in this case, so continue for another disk. Signed-off-by: Wei Fang Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 39fb21e048e6..a7f2b9c9f8a0 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -570,7 +570,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect if (best_dist_disk < 0) { if (is_badblock(rdev, this_sector, sectors, &first_bad, &bad_sectors)) { - if (first_bad < this_sector) + if (first_bad <= this_sector) /* Cannot use this */ continue; best_good_sectors = first_bad - this_sector; -- cgit v1.2.3 From 466ad292235bd7f23d5cf04c7b507d146424d019 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 21 Mar 2016 19:19:30 +0800 Subject: md: fix a trivial typo in comments Fix a trivial typo in md_ioctl(). Signed-off-by: Wei Fang Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index c068f171b4eb..32ac6faf9d6c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6883,7 +6883,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, case ADD_NEW_DISK: /* We can support ADD_NEW_DISK on read-only arrays - * on if we are re-adding a preexisting device. + * only if we are re-adding a preexisting device. * So require mddev->pers and MD_DISK_SYNC. */ if (mddev->pers) { -- cgit v1.2.3 From ed3b98c71cd91fa05954df52a79dcd6be08dd730 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 29 Mar 2016 14:00:19 -0700 Subject: MD: add rdev reference for super write Xiao Ni reported below crash: [26396.335146] BUG: unable to handle kernel NULL pointer dereference at 00000000000002a8 [26396.342990] IP: [] super_written+0x20/0x80 [md_mod] [26396.349449] PGD 0 [26396.351468] Oops: 0002 [#1] SMP [26396.354898] Modules linked in: ext4 mbcache jbd2 raid456 async_raid6_recov async_memcpy async_pq async_xor xor async_td [26396.408404] CPU: 5 PID: 3261 Comm: loop0 Not tainted 4.5.0 #1 [26396.414140] Hardware name: Dell Inc. PowerEdge R715/0G2DP3, BIOS 3.2.2 09/15/2014 [26396.421608] task: ffff8808339be680 ti: ffff8808365f4000 task.ti: ffff8808365f4000 [26396.429074] RIP: 0010:[] [] super_written+0x20/0x80 [md_mod] [26396.437952] RSP: 0018:ffff8808365f7c38 EFLAGS: 00010046 [26396.443252] RAX: ffffffffa0425ae0 RBX: ffff8804336a7900 RCX: ffffe8f9f7b41198 [26396.450371] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8804336a7900 [26396.457489] RBP: ffff8808365f7c50 R08: 0000000000000005 R09: 00001801e02ce3d7 [26396.464608] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 [26396.471728] R13: ffff8808338d9a00 R14: 0000000000000000 R15: ffff880833f9fe00 [26396.478849] FS: 00007f9e5066d740(0000) GS:ffff880237b40000(0000) knlGS:0000000000000000 [26396.486922] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [26396.492656] CR2: 00000000000002a8 CR3: 00000000019ea000 CR4: 00000000000006e0 [26396.499775] Stack: [26396.501781] ffff8804336a7900 0000000000000000 0000000000000000 ffff8808365f7c68 [26396.509199] ffffffff81308cd0 ffff8804336a7900 ffff8808365f7ca8 ffffffff81310637 [26396.516618] 00000000a0233a00 ffff880833f9fe00 0000000000000000 ffff880833fb0000 [26396.524038] Call Trace: [26396.526485] [] bio_endio+0x40/0x60 [26396.531529] [] blk_update_request+0x87/0x320 [26396.537439] [] blk_mq_end_request+0x1a/0x70 [26396.543261] [] blk_flush_complete_seq+0xd9/0x2a0 [26396.549517] [] flush_end_io+0x15f/0x240 [26396.554993] [] blk_mq_end_request+0x3a/0x70 [26396.560815] [] __blk_mq_complete_request+0xb4/0xe0 [26396.567246] [] blk_mq_complete_request+0x1c/0x20 [26396.573506] [] loop_queue_work+0x6f/0x72c [loop] [26396.579764] [] ? __schedule+0x2b4/0x8f0 [26396.585242] [] kthread_worker_fn+0x52/0x170 [26396.591065] [] ? kthread_create_on_node+0x1a0/0x1a0 [26396.597582] [] kthread+0xd8/0xf0 [26396.602453] [] ? kthread_park+0x60/0x60 [26396.607929] [] ret_from_fork+0x3f/0x70 [26396.613319] [] ? kthread_park+0x60/0x60 md_super_write() and corresponding md_super_wait() generally are called with reconfig_mutex locked, which prevents disk disappears. There is one case this rule is broken. write_sb_page of bitmap.c doesn't hold the mutex. next_active_rdev does increase rdev reference, but it decreases the reference too early (eg, before IO finish). disk can disappear at the window. We unconditionally increase rdev reference in md_super_write() to avoid the race. Reported-and-tested-by: Xiao Ni Reviewed-by: Neil Brown Signed-off-by: Shaohua Li --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 32ac6faf9d6c..194580fba7fd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -718,6 +718,7 @@ static void super_written(struct bio *bio) if (atomic_dec_and_test(&mddev->pending_writes)) wake_up(&mddev->sb_wait); + rdev_dec_pending(rdev, mddev); bio_put(bio); } @@ -732,6 +733,8 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, */ struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); + atomic_inc(&rdev->nr_pending); + bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev; bio->bi_iter.bi_sector = sector; bio_add_page(bio, page, size, 0); -- cgit v1.2.3 From f9a67b1182e5abfcfcec24762ea95a77332f035e Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 1 Apr 2016 17:08:49 +0800 Subject: md/bitmap: clear bitmap if bitmap_create failed If bitmap_create returns an error, we need to call either bitmap_destroy or bitmap_free to do clean up, and the selection is based on mddev->bitmap is set or not. And the sysfs_put(bitmap->sysfs_can_clear) is moved from bitmap_destroy to bitmap_free, and the comment of bitmap_create is changed as well. Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/bitmap.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7df6b4f1548a..2a0362fc2107 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1673,6 +1673,9 @@ static void bitmap_free(struct bitmap *bitmap) if (!bitmap) /* there was no bitmap */ return; + if (bitmap->sysfs_can_clear) + sysfs_put(bitmap->sysfs_can_clear); + if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev)) md_cluster_stop(bitmap->mddev); @@ -1712,15 +1715,13 @@ void bitmap_destroy(struct mddev *mddev) if (mddev->thread) mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - if (bitmap->sysfs_can_clear) - sysfs_put(bitmap->sysfs_can_clear); - bitmap_free(bitmap); } /* * initialize the bitmap structure * if this returns an error, bitmap_destroy must be called to do clean up + * once mddev->bitmap is set */ struct bitmap *bitmap_create(struct mddev *mddev, int slot) { @@ -1865,8 +1866,10 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot, struct bitmap_counts *counts; struct bitmap *bitmap = bitmap_create(mddev, slot); - if (IS_ERR(bitmap)) + if (IS_ERR(bitmap)) { + bitmap_free(bitmap); return PTR_ERR(bitmap); + } rv = bitmap_init_from_disk(bitmap, 0); if (rv) @@ -2170,14 +2173,14 @@ location_store(struct mddev *mddev, const char *buf, size_t len) else { mddev->bitmap = bitmap; rv = bitmap_load(mddev); - if (rv) { - bitmap_destroy(mddev); + if (rv) mddev->bitmap_info.offset = 0; - } } mddev->pers->quiesce(mddev, 0); - if (rv) + if (rv) { + bitmap_destroy(mddev); return rv; + } } } } -- cgit v1.2.3