diff options
-rw-r--r-- | drivers/md/raid5.c | 69 |
1 files changed, 62 insertions, 7 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 516baf49a1fa..b443cd2459df 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -532,13 +532,21 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi->bi_end_io = raid5_end_read_request; rcu_read_lock(); - rdev = rcu_dereference(conf->disks[i].rdev); rrdev = rcu_dereference(conf->disks[i].replacement); + smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */ + rdev = rcu_dereference(conf->disks[i].rdev); + if (!rdev) { + rdev = rrdev; + rrdev = NULL; + } if (rw & WRITE) { if (replace_only) rdev = NULL; + if (rdev == rrdev) + /* We raced and saw duplicates */ + rrdev = NULL; } else { - if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) + if (test_bit(R5_ReadRepl, &sh->dev[i].flags) && rrdev) rdev = rrdev; rrdev = NULL; } @@ -1640,7 +1648,7 @@ static void raid5_end_read_request(struct bio * bi, int error) int disks = sh->disks, i; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); char b[BDEVNAME_SIZE]; - struct md_rdev *rdev; + struct md_rdev *rdev = NULL; for (i=0 ; i<disks; i++) @@ -1655,8 +1663,13 @@ static void raid5_end_read_request(struct bio * bi, int error) return; } if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) + /* If replacement finished while this request was outstanding, + * 'replacement' might be NULL already. + * In that case it moved down to 'rdev'. + * rdev is not removed until all requests are finished. + */ rdev = conf->disks[i].replacement; - else + if (!rdev) rdev = conf->disks[i].rdev; if (uptodate) { @@ -1753,7 +1766,14 @@ static void raid5_end_write_request(struct bio *bi, int error) } if (bi == &sh->dev[i].rreq) { rdev = conf->disks[i].replacement; - replacement = 1; + if (rdev) + replacement = 1; + else + /* rdev was removed and 'replacement' + * replaced it. rdev is not removed + * until all requests are finished. + */ + rdev = conf->disks[i].rdev; break; } } @@ -3539,6 +3559,9 @@ finish: } if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { rdev = conf->disks[i].replacement; + if (!rdev) + /* rdev have been moved down */ + rdev = conf->disks[i].rdev; rdev_clear_badblocks(rdev, sh->sector, STRIPE_SECTORS); rdev_dec_pending(rdev, conf->mddev); @@ -5204,7 +5227,25 @@ static int raid5_spare_active(struct mddev *mddev) for (i = 0; i < conf->raid_disks; i++) { tmp = conf->disks + i; - if (tmp->rdev + if (tmp->replacement + && tmp->replacement->recovery_offset == MaxSector + && !test_bit(Faulty, &tmp->replacement->flags) + && !test_and_set_bit(In_sync, &tmp->replacement->flags)) { + /* Replacement has just become active. */ + if (!tmp->rdev + || !test_and_clear_bit(In_sync, &tmp->rdev->flags)) + count++; + if (tmp->rdev) { + /* Replaced device not technically faulty, + * but we need to be sure it gets removed + * and never re-added. + */ + set_bit(Faulty, &tmp->rdev->flags); + sysfs_notify_dirent_safe( + tmp->rdev->sysfs_state); + } + sysfs_notify_dirent_safe(tmp->replacement->sysfs_state); + } else if (tmp->rdev && tmp->rdev->recovery_offset == MaxSector && !test_bit(Faulty, &tmp->rdev->flags) && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { @@ -5250,6 +5291,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) if (!test_bit(Faulty, &rdev->flags) && mddev->recovery_disabled != conf->recovery_disabled && !has_failed(conf) && + (!p->replacement || p->replacement == rdev) && number < conf->raid_disks) { err = -EBUSY; goto abort; @@ -5260,7 +5302,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) /* lost the race, try later */ err = -EBUSY; *rdevp = rdev; - } + } else if (p->replacement) { + /* We must have just cleared 'rdev' */ + p->rdev = p->replacement; + clear_bit(Replacement, &p->replacement->flags); + smp_mb(); /* Make sure other CPUs may see both as identical + * but will never see neither - if they are careful + */ + p->replacement = NULL; + clear_bit(WantReplacement, &rdev->flags); + } else + /* We might have just removed the Replacement as faulty- + * clear the bit just in case + */ + clear_bit(WantReplacement, &rdev->flags); abort: print_raid5_conf(conf); |