From cc27b0c78c79680d128dbac79de0d40556d041bb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Jun 2017 16:49:39 +1000 Subject: md: fix deadlock between mddev_suspend() and md_write_start() If mddev_suspend() races with md_write_start() we can deadlock with mddev_suspend() waiting for the request that is currently in md_write_start() to complete the ->make_request() call, and md_write_start() waiting for the metadata to be updated to mark the array as 'dirty'. As metadata updates done by md_check_recovery() only happen then the mddev_lock() can be claimed, and as mddev_suspend() is often called with the lock held, these threads wait indefinitely for each other. We fix this by having md_write_start() abort if mddev_suspend() is happening, and ->make_request() aborts if md_write_start() aborted. md_make_request() can detect this abort, decrease the ->active_io count, and wait for mddev_suspend(). Reported-by: Nix Fix: 68866e425be2(MD: no sync IO while suspended) Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/md/raid10.c') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 797ed60abd5e..52acffa7a06a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1303,8 +1303,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, sector_t sectors; int max_sectors; - md_write_start(mddev, bio); - /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. @@ -1525,7 +1523,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) raid10_write_request(mddev, bio, r10_bio); } -static void raid10_make_request(struct mddev *mddev, struct bio *bio) +static bool raid10_make_request(struct mddev *mddev, struct bio *bio) { struct r10conf *conf = mddev->private; sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask); @@ -1534,9 +1532,12 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { md_flush_request(mddev, bio); - return; + return true; } + if (!md_write_start(mddev, bio)) + return false; + /* * If this request crosses a chunk boundary, we need to split * it. @@ -1553,6 +1554,7 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); + return true; } static void raid10_status(struct seq_file *seq, struct mddev *mddev) -- cgit v1.2.3 From 1cdd1257949c85c5ddff8313fe3b1e39c5bee8b8 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Tue, 13 Jun 2017 11:16:08 +0800 Subject: md/raid10: fix FailFast test for wrong device We need to test FailFast flag for replacement device here since the set up for writing is for the replacement, so we need fix it like: - if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) + if (test_bit(FailFast, &conf->mirrors[d].replacement->flags)) Since commit f90145f317ef ("md/raid10: add rcu protection to rdev access in raid10_sync_request.") had added the rcu protection for the part, so let's extend the range protected by rcu and use rdev directly. Fixes: 1919cbb ("md/raid10: add failfast handling for writes.") Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md/raid10.c') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 52acffa7a06a..305b0db5a293 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3295,7 +3295,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, biolist = bio; bio->bi_end_io = end_sync_read; bio_set_op_attrs(bio, REQ_OP_READ, 0); - if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) + if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; bio->bi_bdev = rdev->bdev; @@ -3307,7 +3307,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, continue; } atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); /* Need to set up for writing to the replacement */ bio = r10_bio->devs[i].repl_bio; @@ -3318,11 +3317,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, biolist = bio; bio->bi_end_io = end_sync_write; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) + if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; bio->bi_bdev = rdev->bdev; count++; + rcu_read_unlock(); } if (count < 2) { -- cgit v1.2.3