diff options
-rw-r--r-- | drivers/md/raid5-cache.c | 11 | ||||
-rw-r--r-- | drivers/md/raid5-log.h | 3 | ||||
-rw-r--r-- | drivers/md/raid5.c | 29 |
3 files changed, 34 insertions, 9 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index a6a62e212cd3..cc3f8442f11f 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -24,6 +24,7 @@ #include "md.h" #include "raid5.h" #include "bitmap.h" +#include "raid5-log.h" /* * metadata/data stored in disk with 4k size unit (a block) regardless @@ -680,6 +681,11 @@ static void r5c_disable_writeback_async(struct work_struct *work) return; pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n", mdname(mddev)); + + /* wait superblock change before suspend */ + wait_event(mddev->sb_wait, + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); + mddev_suspend(mddev); log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; mddev_resume(mddev); @@ -2983,7 +2989,7 @@ ioerr: return ret; } -void r5c_update_on_rdev_error(struct mddev *mddev) +void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev) { struct r5conf *conf = mddev->private; struct r5l_log *log = conf->log; @@ -2991,7 +2997,8 @@ void r5c_update_on_rdev_error(struct mddev *mddev) if (!log) return; - if (raid5_calc_degraded(conf) > 0 && + if ((raid5_calc_degraded(conf) > 0 || + test_bit(Journal, &rdev->flags)) && conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK) schedule_work(&log->disable_writeback_work); } diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index 27097101ccca..328d67aedda4 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h @@ -28,7 +28,8 @@ extern void r5c_flush_cache(struct r5conf *conf, int num); extern void r5c_check_stripe_cache_usage(struct r5conf *conf); extern void r5c_check_cached_full_stripe(struct r5conf *conf); extern struct md_sysfs_entry r5c_journal_mode; -extern void r5c_update_on_rdev_error(struct mddev *mddev); +extern void r5c_update_on_rdev_error(struct mddev *mddev, + struct md_rdev *rdev); extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect); extern struct dma_async_tx_descriptor * diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f8055a7abb4b..0ac57a925606 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2689,7 +2689,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev) bdevname(rdev->bdev, b), mdname(mddev), conf->raid_disks - mddev->degraded); - r5c_update_on_rdev_error(mddev); + r5c_update_on_rdev_error(mddev, rdev); } /* @@ -3050,6 +3050,11 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous) * When LOG_CRITICAL, stripes with injournal == 0 will be sent to * no_space_stripes list. * + * 3. during journal failure + * In journal failure, we try to flush all cached data to raid disks + * based on data in stripe cache. The array is read-only to upper + * layers, so we would skip all pending writes. + * */ static inline bool delay_towrite(struct r5conf *conf, struct r5dev *dev, @@ -3063,6 +3068,9 @@ static inline bool delay_towrite(struct r5conf *conf, if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && s->injournal > 0) return true; + /* case 3 above */ + if (s->log_failed && s->injournal) + return true; return false; } @@ -4696,10 +4704,15 @@ static void handle_stripe(struct stripe_head *sh) " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, s.failed_num[0], s.failed_num[1]); - /* check if the array has lost more than max_degraded devices and, + /* + * check if the array has lost more than max_degraded devices and, * if so, some requests might need to be failed. + * + * When journal device failed (log_failed), we will only process + * the stripe if there is data need write to raid disks */ - if (s.failed > conf->max_degraded || s.log_failed) { + if (s.failed > conf->max_degraded || + (s.log_failed && s.injournal == 0)) { sh->check_state = 0; sh->reconstruct_state = 0; break_stripe_batch_list(sh, 0); @@ -5272,8 +5285,10 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) struct stripe_head *sh, *tmp; struct list_head *handle_list = NULL; struct r5worker_group *wg; - bool second_try = !r5c_is_writeback(conf->log); - bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state); + bool second_try = !r5c_is_writeback(conf->log) && + !r5l_log_disk_error(conf); + bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) || + r5l_log_disk_error(conf); again: wg = NULL; @@ -7521,7 +7536,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) * neilb: there is no locking about new writes here, * so this cannot be safe. */ - if (atomic_read(&conf->active_stripes)) { + if (atomic_read(&conf->active_stripes) || + atomic_read(&conf->r5c_cached_full_stripes) || + atomic_read(&conf->r5c_cached_partial_stripes)) { return -EBUSY; } log_exit(conf); |