diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 414 |
1 files changed, 320 insertions, 94 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index f6ae1d67bcd0..82f798be964f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -65,6 +65,8 @@ #include <linux/raid/md_p.h> #include <linux/raid/md_u.h> #include <linux/slab.h> +#include <linux/percpu-refcount.h> + #include <trace/events/block.h> #include "md.h" #include "bitmap.h" @@ -172,6 +174,16 @@ static const struct block_device_operations md_fops; static int start_readonly; +/* + * The original mechanism for creating an md device is to create + * a device node in /dev and to open it. This causes races with device-close. + * The preferred method is to write to the "new_array" module parameter. + * This can avoid races. + * Setting create_on_open to false disables the original mechanism + * so all the races disappear. + */ +static bool create_on_open = true; + /* bio_clone_mddev * like bio_clone, but with a local bio set */ @@ -1507,6 +1519,12 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ } else if (sb->bblog_offset != 0) rdev->badblocks.shift = 0; + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { + rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset); + rdev->ppl.size = le16_to_cpu(sb->ppl.size); + rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset; + } + if (!refdev) { ret = 1; } else { @@ -1619,6 +1637,13 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) set_bit(MD_HAS_JOURNAL, &mddev->flags); + + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) { + if (le32_to_cpu(sb->feature_map) & + (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL)) + return -EINVAL; + set_bit(MD_HAS_PPL, &mddev->flags); + } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for * spares (which don't need an event count) */ @@ -1832,6 +1857,12 @@ retry: if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL); + if (test_bit(MD_HAS_PPL, &mddev->flags)) { + sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL); + sb->ppl.offset = cpu_to_le16(rdev->ppl.offset); + sb->ppl.size = cpu_to_le16(rdev->ppl.size); + } + rdev_for_each(rdev2, mddev) { i = rdev2->desc_nr; if (test_bit(Faulty, &rdev2->flags)) @@ -2072,6 +2103,10 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) if (find_rdev(mddev, rdev->bdev->bd_dev)) return -EEXIST; + if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) && + mddev->pers) + return -EROFS; + /* make sure rdev->sectors exceeds mddev->dev_sectors */ if (!test_bit(Journal, &rdev->flags) && rdev->sectors && @@ -2233,6 +2268,33 @@ static void export_array(struct mddev *mddev) mddev->major_version = 0; } +static bool set_in_sync(struct mddev *mddev) +{ + WARN_ON_ONCE(!spin_is_locked(&mddev->lock)); + if (!mddev->in_sync) { + mddev->sync_checkers++; + spin_unlock(&mddev->lock); + percpu_ref_switch_to_atomic_sync(&mddev->writes_pending); + spin_lock(&mddev->lock); + if (!mddev->in_sync && + percpu_ref_is_zero(&mddev->writes_pending)) { + mddev->in_sync = 1; + /* + * Ensure ->in_sync is visible before we clear + * ->sync_checkers. + */ + smp_mb(); + set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); + sysfs_notify_dirent_safe(mddev->sysfs_state); + } + if (--mddev->sync_checkers == 0) + percpu_ref_switch_to_percpu(&mddev->writes_pending); + } + if (mddev->safemode == 1) + mddev->safemode = 0; + return mddev->in_sync; +} + static void sync_sbs(struct mddev *mddev, int nospares) { /* Update each superblock (in-memory image), but @@ -3131,6 +3193,78 @@ static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len) static struct rdev_sysfs_entry rdev_unack_bad_blocks = __ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store); +static ssize_t +ppl_sector_show(struct md_rdev *rdev, char *page) +{ + return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector); +} + +static ssize_t +ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len) +{ + unsigned long long sector; + + if (kstrtoull(buf, 10, §or) < 0) + return -EINVAL; + if (sector != (sector_t)sector) + return -EINVAL; + + if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) && + rdev->raid_disk >= 0) + return -EBUSY; + + if (rdev->mddev->persistent) { + if (rdev->mddev->major_version == 0) + return -EINVAL; + if ((sector > rdev->sb_start && + sector - rdev->sb_start > S16_MAX) || + (sector < rdev->sb_start && + rdev->sb_start - sector > -S16_MIN)) + return -EINVAL; + rdev->ppl.offset = sector - rdev->sb_start; + } else if (!rdev->mddev->external) { + return -EBUSY; + } + rdev->ppl.sector = sector; + return len; +} + +static struct rdev_sysfs_entry rdev_ppl_sector = +__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store); + +static ssize_t +ppl_size_show(struct md_rdev *rdev, char *page) +{ + return sprintf(page, "%u\n", rdev->ppl.size); +} + +static ssize_t +ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len) +{ + unsigned int size; + + if (kstrtouint(buf, 10, &size) < 0) + return -EINVAL; + + if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) && + rdev->raid_disk >= 0) + return -EBUSY; + + if (rdev->mddev->persistent) { + if (rdev->mddev->major_version == 0) + return -EINVAL; + if (size > U16_MAX) + return -EINVAL; + } else if (!rdev->mddev->external) { + return -EBUSY; + } + rdev->ppl.size = size; + return len; +} + +static struct rdev_sysfs_entry rdev_ppl_size = +__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store); + static struct attribute *rdev_default_attrs[] = { &rdev_state.attr, &rdev_errors.attr, @@ -3141,6 +3275,8 @@ static struct attribute *rdev_default_attrs[] = { &rdev_recovery_start.attr, &rdev_bad_blocks.attr, &rdev_unack_bad_blocks.attr, + &rdev_ppl_sector.attr, + &rdev_ppl_size.attr, NULL, }; static ssize_t @@ -3903,6 +4039,7 @@ array_state_show(struct mddev *mddev, char *page) st = read_auto; break; case 0: + spin_lock(&mddev->lock); if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) st = write_pending; else if (mddev->in_sync) @@ -3911,6 +4048,7 @@ array_state_show(struct mddev *mddev, char *page) st = active_idle; else st = active; + spin_unlock(&mddev->lock); } else { if (list_empty(&mddev->disks) && @@ -3931,7 +4069,7 @@ static int restart_array(struct mddev *mddev); static ssize_t array_state_store(struct mddev *mddev, const char *buf, size_t len) { - int err; + int err = 0; enum array_state st = match_word(buf, array_states); if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) { @@ -3944,18 +4082,9 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags); md_wakeup_thread(mddev->thread); wake_up(&mddev->sb_wait); - err = 0; } else /* st == clean */ { restart_array(mddev); - if (atomic_read(&mddev->writes_pending) == 0) { - if (mddev->in_sync == 0) { - mddev->in_sync = 1; - if (mddev->safemode == 1) - mddev->safemode = 0; - set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); - } - err = 0; - } else + if (!set_in_sync(mddev)) err = -EBUSY; } if (!err) @@ -4013,15 +4142,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) if (err) break; spin_lock(&mddev->lock); - if (atomic_read(&mddev->writes_pending) == 0) { - if (mddev->in_sync == 0) { - mddev->in_sync = 1; - if (mddev->safemode == 1) - mddev->safemode = 0; - set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); - } - err = 0; - } else + if (!set_in_sync(mddev)) err = -EBUSY; spin_unlock(&mddev->lock); } else @@ -4843,8 +4964,10 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len) return err; /* cluster raid doesn't support change array_sectors */ - if (mddev_is_clustered(mddev)) + if (mddev_is_clustered(mddev)) { + mddev_unlock(mddev); return -EINVAL; + } if (strncmp(buf, "default", 7) == 0) { if (mddev->pers) @@ -4877,6 +5000,52 @@ static struct md_sysfs_entry md_array_size = __ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show, array_size_store); +static ssize_t +consistency_policy_show(struct mddev *mddev, char *page) +{ + int ret; + + if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { + ret = sprintf(page, "journal\n"); + } else if (test_bit(MD_HAS_PPL, &mddev->flags)) { + ret = sprintf(page, "ppl\n"); + } else if (mddev->bitmap) { + ret = sprintf(page, "bitmap\n"); + } else if (mddev->pers) { + if (mddev->pers->sync_request) + ret = sprintf(page, "resync\n"); + else + ret = sprintf(page, "none\n"); + } else { + ret = sprintf(page, "unknown\n"); + } + + return ret; +} + +static ssize_t +consistency_policy_store(struct mddev *mddev, const char *buf, size_t len) +{ + int err = 0; + + if (mddev->pers) { + if (mddev->pers->change_consistency_policy) + err = mddev->pers->change_consistency_policy(mddev, buf); + else + err = -EBUSY; + } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) { + set_bit(MD_HAS_PPL, &mddev->flags); + } else { + err = -EINVAL; + } + + return err ? err : len; +} + +static struct md_sysfs_entry md_consistency_policy = +__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show, + consistency_policy_store); + static struct attribute *md_default_attrs[] = { &md_level.attr, &md_layout.attr, @@ -4892,6 +5061,7 @@ static struct attribute *md_default_attrs[] = { &md_reshape_direction.attr, &md_array_size.attr, &max_corr_read_errors.attr, + &md_consistency_policy.attr, NULL, }; @@ -4976,6 +5146,7 @@ static void md_free(struct kobject *ko) del_gendisk(mddev->gendisk); put_disk(mddev->gendisk); } + percpu_ref_exit(&mddev->writes_pending); kfree(mddev); } @@ -5001,8 +5172,19 @@ static void mddev_delayed_delete(struct work_struct *ws) kobject_put(&mddev->kobj); } +static void no_op(struct percpu_ref *r) {} + static int md_alloc(dev_t dev, char *name) { + /* + * If dev is zero, name is the name of a device to allocate with + * an arbitrary minor number. It will be "md_???" + * If dev is non-zero it must be a device number with a MAJOR of + * MD_MAJOR or mdp_major. In this case, if "name" is NULL, then + * the device is being created by opening a node in /dev. + * If "name" is not NULL, the device is being created by + * writing to /sys/module/md_mod/parameters/new_array. + */ static DEFINE_MUTEX(disks_mutex); struct mddev *mddev = mddev_find(dev); struct gendisk *disk; @@ -5028,7 +5210,7 @@ static int md_alloc(dev_t dev, char *name) if (mddev->gendisk) goto abort; - if (name) { + if (name && !dev) { /* Need to ensure that 'name' is not a duplicate. */ struct mddev *mddev2; @@ -5042,6 +5224,11 @@ static int md_alloc(dev_t dev, char *name) } spin_unlock(&all_mddevs_lock); } + if (name && dev) + /* + * Creating /dev/mdNNN via "newarray", so adjust hold_active. + */ + mddev->hold_active = UNTIL_STOP; error = -ENOMEM; mddev->queue = blk_alloc_queue(GFP_KERNEL); @@ -5052,6 +5239,10 @@ static int md_alloc(dev_t dev, char *name) blk_queue_make_request(mddev->queue, md_make_request); blk_set_stacking_limits(&mddev->queue->limits); + if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0) + goto abort; + /* We want to start with the refcount at zero */ + percpu_ref_put(&mddev->writes_pending); disk = alloc_disk(1 << shift); if (!disk) { blk_cleanup_queue(mddev->queue); @@ -5108,38 +5299,48 @@ static int md_alloc(dev_t dev, char *name) static struct kobject *md_probe(dev_t dev, int *part, void *data) { - md_alloc(dev, NULL); + if (create_on_open) + md_alloc(dev, NULL); return NULL; } static int add_named_array(const char *val, struct kernel_param *kp) { - /* val must be "md_*" where * is not all digits. - * We allocate an array with a large free minor number, and + /* + * val must be "md_*" or "mdNNN". + * For "md_*" we allocate an array with a large free minor number, and * set the name to val. val must not already be an active name. + * For "mdNNN" we allocate an array with the minor number NNN + * which must not already be in use. */ int len = strlen(val); char buf[DISK_NAME_LEN]; + unsigned long devnum; while (len && val[len-1] == '\n') len--; if (len >= DISK_NAME_LEN) return -E2BIG; strlcpy(buf, val, len+1); - if (strncmp(buf, "md_", 3) != 0) - return -EINVAL; - return md_alloc(0, buf); + if (strncmp(buf, "md_", 3) == 0) + return md_alloc(0, buf); + if (strncmp(buf, "md", 2) == 0 && + isdigit(buf[2]) && + kstrtoul(buf+2, 10, &devnum) == 0 && + devnum <= MINORMASK) + return md_alloc(MKDEV(MD_MAJOR, devnum), NULL); + + return -EINVAL; } static void md_safemode_timeout(unsigned long data) { struct mddev *mddev = (struct mddev *) data; - if (!atomic_read(&mddev->writes_pending)) { - mddev->safemode = 1; - if (mddev->external) - sysfs_notify_dirent_safe(mddev->sysfs_state); - } + mddev->safemode = 1; + if (mddev->external) + sysfs_notify_dirent_safe(mddev->sysfs_state); + md_wakeup_thread(mddev->thread); } @@ -5185,6 +5386,13 @@ int md_run(struct mddev *mddev) continue; sync_blockdev(rdev->bdev); invalidate_bdev(rdev->bdev); + if (mddev->ro != 1 && + (bdev_read_only(rdev->bdev) || + bdev_read_only(rdev->meta_bdev))) { + mddev->ro = 1; + if (mddev->gendisk) + set_disk_ro(mddev->gendisk, 1); + } /* perform some consistency tests on the device. * We don't want the data to overlap the metadata, @@ -5344,7 +5552,6 @@ int md_run(struct mddev *mddev) } else if (mddev->ro == 2) /* auto-readonly not meaningful */ mddev->ro = 0; - atomic_set(&mddev->writes_pending,0); atomic_set(&mddev->max_corr_read_errors, MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); mddev->safemode = 0; @@ -5410,6 +5617,9 @@ out: static int restart_array(struct mddev *mddev) { struct gendisk *disk = mddev->gendisk; + struct md_rdev *rdev; + bool has_journal = false; + bool has_readonly = false; /* Complain if it has no devices */ if (list_empty(&mddev->disks)) @@ -5418,24 +5628,21 @@ static int restart_array(struct mddev *mddev) return -EINVAL; if (!mddev->ro) return -EBUSY; - if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { - struct md_rdev *rdev; - bool has_journal = false; - - rcu_read_lock(); - rdev_for_each_rcu(rdev, mddev) { - if (test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags)) { - has_journal = true; - break; - } - } - rcu_read_unlock(); + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (test_bit(Journal, &rdev->flags) && + !test_bit(Faulty, &rdev->flags)) + has_journal = true; + if (bdev_read_only(rdev->bdev)) + has_readonly = true; + } + rcu_read_unlock(); + if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal) /* Don't restart rw with journal missing/faulty */ - if (!has_journal) return -EINVAL; - } + if (has_readonly) + return -EROFS; mddev->safemode = 0; mddev->ro = 0; @@ -5535,15 +5742,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { - struct bitmap *bitmap = mddev->bitmap; - /* wait for behind writes to complete */ - if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { - pr_debug("md:%s: behind writes in progress - waiting to stop.\n", - mdname(mddev)); - /* need to kick something here to make sure I/O goes? */ - wait_event(bitmap->behind_wait, - atomic_read(&bitmap->behind_writes) == 0); - } + bitmap_wait_behind_writes(mddev); if (mddev->pers && mddev->pers->quiesce) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); @@ -5556,6 +5755,7 @@ static void mddev_detach(struct mddev *mddev) static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; + bitmap_destroy(mddev); mddev_detach(mddev); /* Ensure ->event_work is done */ flush_workqueue(md_misc_wq); @@ -5576,7 +5776,6 @@ void md_stop(struct mddev *mddev) * This is called from dm-raid */ __md_stop(mddev); - bitmap_destroy(mddev); if (mddev->bio_set) bioset_free(mddev->bio_set); } @@ -5714,7 +5913,6 @@ static int do_md_stop(struct mddev *mddev, int mode, if (mode == 0) { pr_info("md: %s stopped.\n", mdname(mddev)); - bitmap_destroy(mddev); if (mddev->bitmap_info.file) { struct file *f = mddev->bitmap_info.file; spin_lock(&mddev->lock); @@ -6493,10 +6691,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) struct md_rdev *rdev; int rv; int fit = (num_sectors == 0); - - /* cluster raid doesn't support update size */ - if (mddev_is_clustered(mddev)) - return -EINVAL; + sector_t old_dev_sectors = mddev->dev_sectors; if (mddev->pers->resize == NULL) return -EINVAL; @@ -6525,7 +6720,9 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) } rv = mddev->pers->resize(mddev, num_sectors); if (!rv) { - if (mddev->queue) { + if (mddev_is_clustered(mddev)) + md_cluster_ops->update_size(mddev, old_dev_sectors); + else if (mddev->queue) { set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); } @@ -6776,6 +6973,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; int ro; + bool did_set_md_closing = false; if (!md_ioctl_valid(cmd)) return -ENOTTY; @@ -6865,7 +7063,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } + WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags)); set_bit(MD_CLOSING, &mddev->flags); + did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7058,6 +7258,8 @@ unlock: mddev->hold_active = 0; mddev_unlock(mddev); out: + if(did_set_md_closing) + clear_bit(MD_CLOSING, &mddev->flags); return err; } #ifdef CONFIG_COMPAT @@ -7208,8 +7410,8 @@ void md_wakeup_thread(struct md_thread *thread) { if (thread) { pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); - set_bit(THREAD_WAKEUP, &thread->flags); - wake_up(&thread->wqueue); + if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags)) + wake_up(&thread->wqueue); } } EXPORT_SYMBOL(md_wakeup_thread); @@ -7756,10 +7958,13 @@ void md_write_start(struct mddev *mddev, struct bio *bi) md_wakeup_thread(mddev->sync_thread); did_change = 1; } - atomic_inc(&mddev->writes_pending); + rcu_read_lock(); + percpu_ref_get(&mddev->writes_pending); + smp_mb(); /* Match smp_mb in set_in_sync() */ if (mddev->safemode == 1) mddev->safemode = 0; - if (mddev->in_sync) { + /* sync_checkers is always 0 when writes_pending is in per-cpu mode */ + if (mddev->in_sync || !mddev->sync_checkers) { spin_lock(&mddev->lock); if (mddev->in_sync) { mddev->in_sync = 0; @@ -7770,6 +7975,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi) } spin_unlock(&mddev->lock); } + rcu_read_unlock(); if (did_change) sysfs_notify_dirent_safe(mddev->sysfs_state); wait_event(mddev->sb_wait, @@ -7777,15 +7983,38 @@ void md_write_start(struct mddev *mddev, struct bio *bi) } EXPORT_SYMBOL(md_write_start); +/* md_write_inc can only be called when md_write_start() has + * already been called at least once of the current request. + * It increments the counter and is useful when a single request + * is split into several parts. Each part causes an increment and + * so needs a matching md_write_end(). + * Unlike md_write_start(), it is safe to call md_write_inc() inside + * a spinlocked region. + */ +void md_write_inc(struct mddev *mddev, struct bio *bi) +{ + if (bio_data_dir(bi) != WRITE) + return; + WARN_ON_ONCE(mddev->in_sync || mddev->ro); + percpu_ref_get(&mddev->writes_pending); +} +EXPORT_SYMBOL(md_write_inc); + void md_write_end(struct mddev *mddev) { - if (atomic_dec_and_test(&mddev->writes_pending)) { - if (mddev->safemode == 2) - md_wakeup_thread(mddev->thread); - else if (mddev->safemode_delay) - mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay); - } + percpu_ref_put(&mddev->writes_pending); + + if (mddev->safemode == 2) + md_wakeup_thread(mddev->thread); + else if (mddev->safemode_delay) + /* The roundup() ensures this only performs locking once + * every ->safemode_delay jiffies + */ + mod_timer(&mddev->safemode_timer, + roundup(jiffies, mddev->safemode_delay) + + mddev->safemode_delay); } + EXPORT_SYMBOL(md_write_end); /* md_allow_write(mddev) @@ -8385,9 +8614,8 @@ void md_check_recovery(struct mddev *mddev) (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || - test_bit(MD_RELOAD_SB, &mddev->flags) || (mddev->external == 0 && mddev->safemode == 1) || - (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) + (mddev->safemode == 2 && !mddev->in_sync && mddev->recovery_cp == MaxSector) )) return; @@ -8434,27 +8662,12 @@ void md_check_recovery(struct mddev *mddev) rdev->raid_disk < 0) md_kick_rdev_from_array(rdev); } - - if (test_and_clear_bit(MD_RELOAD_SB, &mddev->flags)) - md_reload_sb(mddev, mddev->good_device_nr); } - if (!mddev->external) { - int did_change = 0; + if (!mddev->external && !mddev->in_sync) { spin_lock(&mddev->lock); - if (mddev->safemode && - !atomic_read(&mddev->writes_pending) && - !mddev->in_sync && - mddev->recovery_cp == MaxSector) { - mddev->in_sync = 1; - did_change = 1; - set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); - } - if (mddev->safemode == 1) - mddev->safemode = 0; + set_in_sync(mddev); spin_unlock(&mddev->lock); - if (did_change) - sysfs_notify_dirent_safe(mddev->sysfs_state); } if (mddev->sb_flags) @@ -8747,6 +8960,18 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) int role, ret; char b[BDEVNAME_SIZE]; + /* + * If size is changed in another node then we need to + * do resize as well. + */ + if (mddev->dev_sectors != le64_to_cpu(sb->size)) { + ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size)); + if (ret) + pr_info("md-cluster: resize failed\n"); + else + bitmap_update_sb(mddev->bitmap); + } + /* Check for change of roles in the active devices */ rdev_for_each(rdev2, mddev) { if (test_bit(Faulty, &rdev2->flags)) @@ -8997,6 +9222,7 @@ static int set_ro(const char *val, struct kernel_param *kp) module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); +module_param(create_on_open, bool, S_IRUSR|S_IWUSR); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD RAID framework"); |