summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-03-16 11:43:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-16 11:43:48 -0700
commit3009b303b0f9868295205d5f750a2d088e3965ce (patch)
tree2fa619ad6efb1dfdd53bb9ba54b29b4176edfa64
parent69eea5a4ab9c705496e912b55a9d312325de19e6 (diff)
parent11353b9d10392e79e32603d2178e75feb25eaf0d (diff)
downloadlinux-3009b303b0f9868295205d5f750a2d088e3965ce.tar.bz2
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD fixes from Shaohua Li: - fix a parity calculation bug of raid5 cache by Song - fix a potential deadlock issue by me - fix two endian issues by Jason - fix a disk limitation issue by Neil - other small fixes and cleanup * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: md/raid1: fix a trivial typo in comments md/r5cache: fix set_syndrome_sources() for data in cache md: fix incorrect use of lexx_to_cpu in does_sb_need_changing md: fix super_offset endianness in super_1_rdev_size_change md/raid1/10: fix potential deadlock md: don't impose the MD_SB_DISKS limit on arrays without metadata. md: move funcs from pers->resize to update_size md-cluster: remove useless memset from gather_all_resync_info md-cluster: free md_cluster_info if node leave cluster md: delete dead code md/raid10: submit bio directly to replacement disk
-rw-r--r--drivers/md/md-cluster.c2
-rw-r--r--drivers/md/md.c27
-rw-r--r--drivers/md/md.h6
-rw-r--r--drivers/md/raid1.c29
-rw-r--r--drivers/md/raid10.c41
-rw-r--r--drivers/md/raid5.c5
6 files changed, 72 insertions, 38 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 2b13117fb918..321ecac23027 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
bm_lockres->flags |= DLM_LKF_NOQUEUE;
ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
if (ret == -EAGAIN) {
- memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
s = read_resync_info(mddev, bm_lockres);
if (s) {
pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
@@ -974,6 +973,7 @@ static int leave(struct mddev *mddev)
lockres_free(cinfo->bitmap_lockres);
unlock_all_bitmaps(mddev);
dlm_release_lockspace(cinfo->lockspace, 2);
+ kfree(cinfo);
return 0;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 548d1b8014f8..f6ae1d67bcd0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
}
EXPORT_SYMBOL(md_flush_request);
-void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
- struct mddev *mddev = cb->data;
- md_wakeup_thread(mddev->thread);
- kfree(cb);
-}
-EXPORT_SYMBOL(md_unplug);
-
static inline struct mddev *mddev_get(struct mddev *mddev)
{
atomic_inc(&mddev->active);
@@ -1887,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
}
sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
- sb->super_offset = rdev->sb_start;
+ sb->super_offset = cpu_to_le64(rdev->sb_start);
sb->sb_csum = calc_sb_1_csum(sb);
do {
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@@ -2295,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev)
/* Check if any mddev parameters have changed */
if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
(mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
- (mddev->layout != le64_to_cpu(sb->layout)) ||
+ (mddev->layout != le32_to_cpu(sb->layout)) ||
(mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
(mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
return true;
@@ -6458,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->layout = info->layout;
mddev->chunk_sectors = info->chunk_size >> 9;
- mddev->max_disks = MD_SB_DISKS;
-
if (mddev->persistent) {
- mddev->flags = 0;
- mddev->sb_flags = 0;
+ mddev->max_disks = MD_SB_DISKS;
+ mddev->flags = 0;
+ mddev->sb_flags = 0;
}
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
@@ -6533,8 +6524,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
return -ENOSPC;
}
rv = mddev->pers->resize(mddev, num_sectors);
- if (!rv)
- revalidate_disk(mddev->gendisk);
+ if (!rv) {
+ if (mddev->queue) {
+ set_capacity(mddev->gendisk, mddev->array_sectors);
+ revalidate_disk(mddev->gendisk);
+ }
+ }
return rv;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b8859cbf84b6..dde8ecb760c8 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev);
extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
struct mddev *mddev);
-extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
-static inline int mddev_check_plugged(struct mddev *mddev)
-{
- return !!blk_check_plugged(md_unplug, mddev,
- sizeof(struct blk_plug_cb));
-}
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
{
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fbc2d7851b49..a34f58772022 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf)
static void freeze_array(struct r1conf *conf, int extra)
{
/* Stop sync I/O and normal I/O and wait for everything to
- * go quite.
+ * go quiet.
* This is called in two situations:
* 1) management command handlers (reshape, remove disk, quiesce).
* 2) one normal I/O request failed.
@@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
split = bio;
}
- if (bio_data_dir(split) == READ)
+ if (bio_data_dir(split) == READ) {
raid1_read_request(mddev, split);
- else
+
+ /*
+ * If a bio is splitted, the first part of bio will
+ * pass barrier but the bio is queued in
+ * current->bio_list (see generic_make_request). If
+ * there is a raise_barrier() called here, the second
+ * part of bio can't pass barrier. But since the first
+ * part bio isn't dispatched to underlaying disks yet,
+ * the barrier is never released, hence raise_barrier
+ * will alays wait. We have a deadlock.
+ * Note, this only happens in read path. For write
+ * path, the first part of bio is dispatched in a
+ * schedule() call (because of blk plug) or offloaded
+ * to raid10d.
+ * Quitting from the function immediately can change
+ * the bio order queued in bio_list and avoid the deadlock.
+ */
+ if (split != bio) {
+ generic_make_request(bio);
+ break;
+ }
+ } else
raid1_write_request(mddev, split);
} while (split != bio);
}
@@ -3246,8 +3267,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, newsize);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0536658c9d40..e89a8d78a9ed 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1478,11 +1478,24 @@ retry_write:
mbio->bi_bdev = (void*)rdev;
atomic_inc(&r10_bio->remaining);
+
+ cb = blk_check_plugged(raid10_unplug, mddev,
+ sizeof(*plug));
+ if (cb)
+ plug = container_of(cb, struct raid10_plug_cb,
+ cb);
+ else
+ plug = NULL;
spin_lock_irqsave(&conf->device_lock, flags);
- bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
+ if (plug) {
+ bio_list_add(&plug->pending, mbio);
+ plug->pending_cnt++;
+ } else {
+ bio_list_add(&conf->pending_bio_list, mbio);
+ conf->pending_count++;
+ }
spin_unlock_irqrestore(&conf->device_lock, flags);
- if (!mddev_check_plugged(mddev))
+ if (!plug)
md_wakeup_thread(mddev->thread);
}
}
@@ -1572,7 +1585,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
split = bio;
}
+ /*
+ * If a bio is splitted, the first part of bio will pass
+ * barrier but the bio is queued in current->bio_list (see
+ * generic_make_request). If there is a raise_barrier() called
+ * here, the second part of bio can't pass barrier. But since
+ * the first part bio isn't dispatched to underlaying disks
+ * yet, the barrier is never released, hence raise_barrier will
+ * alays wait. We have a deadlock.
+ * Note, this only happens in read path. For write path, the
+ * first part of bio is dispatched in a schedule() call
+ * (because of blk plug) or offloaded to raid10d.
+ * Quitting from the function immediately can change the bio
+ * order queued in bio_list and avoid the deadlock.
+ */
__make_request(mddev, split);
+ if (split != bio && bio_data_dir(bio) == READ) {
+ generic_make_request(bio);
+ break;
+ }
} while (split != bio);
/* In case raid10d snuck in to freeze_array */
@@ -3944,10 +3975,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, size);
- if (mddev->queue) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
- }
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > oldsize) {
mddev->recovery_cp = oldsize;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4fb09b3fcb41..ed5cd705b985 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs,
(test_bit(R5_Wantdrain, &dev->flags) ||
test_bit(R5_InJournal, &dev->flags))) ||
(srctype == SYNDROME_SRC_WRITTEN &&
- dev->written)) {
+ (dev->written ||
+ test_bit(R5_InJournal, &dev->flags)))) {
if (test_bit(R5_InJournal, &dev->flags))
srcs[slot] = sh->dev[i].orig_page;
else
@@ -7605,8 +7606,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
return ret;
}
md_set_array_sectors(mddev, newsize);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;