summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-linear.c16
-rw-r--r--drivers/md/dm-log-writes.c15
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-stripe.c21
-rw-r--r--drivers/md/dm-table.c10
-rw-r--r--drivers/md/dm-verity-fec.c2
-rw-r--r--drivers/md/dm.c25
-rw-r--r--drivers/md/md.c212
-rw-r--r--drivers/md/md.h22
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c10
-rw-r--r--drivers/md/raid5.c12
-rw-r--r--drivers/md/raid5.h1
13 files changed, 245 insertions, 107 deletions
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 775c06d953b7..d10964d41fd7 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -185,9 +185,24 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
+static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ struct linear_c *lc = ti->private;
+ struct block_device *bdev = lc->dev->bdev;
+ struct dax_device *dax_dev = lc->dev->dax_dev;
+ sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+
+ dev_sector = linear_map_sector(ti, sector);
+ if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+ return 0;
+ return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
+}
+
#else
#define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL
+#define linear_dax_copy_to_iter NULL
#endif
static struct target_type linear_target = {
@@ -204,6 +219,7 @@ static struct target_type linear_target = {
.iterate_devices = linear_iterate_devices,
.direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
+ .dax_copy_to_iter = linear_dax_copy_to_iter,
};
int __init dm_linear_init(void)
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index c90c7c08a77f..9ea2b0291f20 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -962,9 +962,23 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
dax_copy:
return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}
+
+static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
+ pgoff_t pgoff, void *addr, size_t bytes,
+ struct iov_iter *i)
+{
+ struct log_writes_c *lc = ti->private;
+ sector_t sector = pgoff * PAGE_SECTORS;
+
+ if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+ return 0;
+ return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
+}
+
#else
#define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL
+#define log_writes_dax_copy_to_iter NULL
#endif
static struct target_type log_writes_target = {
@@ -982,6 +996,7 @@ static struct target_type log_writes_target = {
.io_hints = log_writes_io_hints,
.direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
+ .dax_copy_to_iter = log_writes_dax_copy_to_iter,
};
static int __init dm_log_writes_init(void)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 6f823f44b4aa..ab13fcec3fca 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -756,7 +756,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
return ERR_PTR(-EINVAL);
}
- rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL);
+ rs = kzalloc(struct_size(rs, dev, raid_devs), GFP_KERNEL);
if (!rs) {
ti->error = "Cannot allocate raid context";
return ERR_PTR(-ENOMEM);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index fe7fb9b1aec3..8547d7594338 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -354,9 +354,29 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
+static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+ struct stripe_c *sc = ti->private;
+ struct dax_device *dax_dev;
+ struct block_device *bdev;
+ uint32_t stripe;
+
+ stripe_map_sector(sc, sector, &stripe, &dev_sector);
+ dev_sector += sc->stripe[stripe].physical_start;
+ dax_dev = sc->stripe[stripe].dev->dax_dev;
+ bdev = sc->stripe[stripe].dev->bdev;
+
+ if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
+ return 0;
+ return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
+}
+
#else
#define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL
+#define stripe_dax_copy_to_iter NULL
#endif
/*
@@ -478,6 +498,7 @@ static struct target_type stripe_target = {
.io_hints = stripe_io_hints,
.direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
+ .dax_copy_to_iter = stripe_dax_copy_to_iter,
};
int __init dm_stripe_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 0589a4da12bb..caa51dd351b6 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -548,14 +548,14 @@ static int adjoin(struct dm_table *table, struct dm_target *ti)
* On the other hand, dm-switch needs to process bulk data using messages and
* excessive use of GFP_NOIO could cause trouble.
*/
-static char **realloc_argv(unsigned *array_size, char **old_argv)
+static char **realloc_argv(unsigned *size, char **old_argv)
{
char **argv;
unsigned new_size;
gfp_t gfp;
- if (*array_size) {
- new_size = *array_size * 2;
+ if (*size) {
+ new_size = *size * 2;
gfp = GFP_KERNEL;
} else {
new_size = 8;
@@ -563,8 +563,8 @@ static char **realloc_argv(unsigned *array_size, char **old_argv)
}
argv = kmalloc(new_size * sizeof(*argv), gfp);
if (argv) {
- memcpy(argv, old_argv, *array_size * sizeof(*argv));
- *array_size = new_size;
+ memcpy(argv, old_argv, *size * sizeof(*argv));
+ *size = new_size;
}
kfree(old_argv);
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 86405869f1af..684af08d0747 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -570,7 +570,7 @@ static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data)
{
struct dm_verity *v = (struct dm_verity *)pool_data;
- return init_rs(8, 0x11d, 0, 1, v->fec->roots);
+ return init_rs_gfp(8, 0x11d, 0, 1, v->fec->roots, gfp_mask);
}
static void fec_rs_free(void *element, void *pool_data)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 20a8d63754bf..e65429a29c06 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1089,6 +1089,30 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
return ret;
}
+static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ struct mapped_device *md = dax_get_private(dax_dev);
+ sector_t sector = pgoff * PAGE_SECTORS;
+ struct dm_target *ti;
+ long ret = 0;
+ int srcu_idx;
+
+ ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+ if (!ti)
+ goto out;
+ if (!ti->type->dax_copy_to_iter) {
+ ret = copy_to_iter(addr, bytes, i);
+ goto out;
+ }
+ ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i);
+ out:
+ dm_put_live_table(md, srcu_idx);
+
+ return ret;
+}
+
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
* allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
@@ -3137,6 +3161,7 @@ static const struct block_device_operations dm_blk_dops = {
static const struct dax_operations dm_dax_ops = {
.direct_access = dm_dax_direct_access,
.copy_from_iter = dm_dax_copy_from_iter,
+ .copy_to_iter = dm_dax_copy_to_iter,
};
/*
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fc692b7128bb..29b0cd9ec951 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -84,6 +84,8 @@ static void autostart_arrays(int part);
static LIST_HEAD(pers_list);
static DEFINE_SPINLOCK(pers_lock);
+static struct kobj_type md_ktype;
+
struct md_cluster_operations *md_cluster_ops;
EXPORT_SYMBOL(md_cluster_ops);
struct module *md_cluster_mod;
@@ -130,6 +132,24 @@ static inline int speed_max(struct mddev *mddev)
mddev->sync_speed_max : sysctl_speed_limit_max;
}
+static void * flush_info_alloc(gfp_t gfp_flags, void *data)
+{
+ return kzalloc(sizeof(struct flush_info), gfp_flags);
+}
+static void flush_info_free(void *flush_info, void *data)
+{
+ kfree(flush_info);
+}
+
+static void * flush_bio_alloc(gfp_t gfp_flags, void *data)
+{
+ return kzalloc(sizeof(struct flush_bio), gfp_flags);
+}
+static void flush_bio_free(void *flush_bio, void *data)
+{
+ kfree(flush_bio);
+}
+
static struct ctl_table_header *raid_table_header;
static struct ctl_table raid_table[] = {
@@ -412,30 +432,53 @@ static int md_congested(void *data, int bits)
/*
* Generic flush handling for md
*/
+static void submit_flushes(struct work_struct *ws)
+{
+ struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
+ struct mddev *mddev = fi->mddev;
+ struct bio *bio = fi->bio;
+
+ bio->bi_opf &= ~REQ_PREFLUSH;
+ md_handle_request(mddev, bio);
+
+ mempool_free(fi, mddev->flush_pool);
+}
-static void md_end_flush(struct bio *bio)
+static void md_end_flush(struct bio *fbio)
{
- struct md_rdev *rdev = bio->bi_private;
- struct mddev *mddev = rdev->mddev;
+ struct flush_bio *fb = fbio->bi_private;
+ struct md_rdev *rdev = fb->rdev;
+ struct flush_info *fi = fb->fi;
+ struct bio *bio = fi->bio;
+ struct mddev *mddev = fi->mddev;
rdev_dec_pending(rdev, mddev);
- if (atomic_dec_and_test(&mddev->flush_pending)) {
- /* The pre-request flush has finished */
- queue_work(md_wq, &mddev->flush_work);
+ if (atomic_dec_and_test(&fi->flush_pending)) {
+ if (bio->bi_iter.bi_size == 0)
+ /* an empty barrier - all done */
+ bio_endio(bio);
+ else {
+ INIT_WORK(&fi->flush_work, submit_flushes);
+ queue_work(md_wq, &fi->flush_work);
+ }
}
- bio_put(bio);
-}
-static void md_submit_flush_data(struct work_struct *ws);
+ mempool_free(fb, mddev->flush_bio_pool);
+ bio_put(fbio);
+}
-static void submit_flushes(struct work_struct *ws)
+void md_flush_request(struct mddev *mddev, struct bio *bio)
{
- struct mddev *mddev = container_of(ws, struct mddev, flush_work);
struct md_rdev *rdev;
+ struct flush_info *fi;
+
+ fi = mempool_alloc(mddev->flush_pool, GFP_NOIO);
+
+ fi->bio = bio;
+ fi->mddev = mddev;
+ atomic_set(&fi->flush_pending, 1);
- INIT_WORK(&mddev->flush_work, md_submit_flush_data);
- atomic_set(&mddev->flush_pending, 1);
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
@@ -445,59 +488,39 @@ static void submit_flushes(struct work_struct *ws)
* we reclaim rcu_read_lock
*/
struct bio *bi;
+ struct flush_bio *fb;
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
+
+ fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO);
+ fb->fi = fi;
+ fb->rdev = rdev;
+
bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
- bi->bi_end_io = md_end_flush;
- bi->bi_private = rdev;
bio_set_dev(bi, rdev->bdev);
+ bi->bi_end_io = md_end_flush;
+ bi->bi_private = fb;
bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
- atomic_inc(&mddev->flush_pending);
+
+ atomic_inc(&fi->flush_pending);
submit_bio(bi);
+
rcu_read_lock();
rdev_dec_pending(rdev, mddev);
}
rcu_read_unlock();
- if (atomic_dec_and_test(&mddev->flush_pending))
- queue_work(md_wq, &mddev->flush_work);
-}
-
-static void md_submit_flush_data(struct work_struct *ws)
-{
- struct mddev *mddev = container_of(ws, struct mddev, flush_work);
- struct bio *bio = mddev->flush_bio;
- /*
- * must reset flush_bio before calling into md_handle_request to avoid a
- * deadlock, because other bios passed md_handle_request suspend check
- * could wait for this and below md_handle_request could wait for those
- * bios because of suspend check
- */
- mddev->flush_bio = NULL;
- wake_up(&mddev->sb_wait);
-
- if (bio->bi_iter.bi_size == 0)
- /* an empty barrier - all done */
- bio_endio(bio);
- else {
- bio->bi_opf &= ~REQ_PREFLUSH;
- md_handle_request(mddev, bio);
+ if (atomic_dec_and_test(&fi->flush_pending)) {
+ if (bio->bi_iter.bi_size == 0)
+ /* an empty barrier - all done */
+ bio_endio(bio);
+ else {
+ INIT_WORK(&fi->flush_work, submit_flushes);
+ queue_work(md_wq, &fi->flush_work);
+ }
}
}
-
-void md_flush_request(struct mddev *mddev, struct bio *bio)
-{
- spin_lock_irq(&mddev->lock);
- wait_event_lock_irq(mddev->sb_wait,
- !mddev->flush_bio,
- mddev->lock);
- mddev->flush_bio = bio;
- spin_unlock_irq(&mddev->lock);
-
- INIT_WORK(&mddev->flush_work, submit_flushes);
- queue_work(md_wq, &mddev->flush_work);
-}
EXPORT_SYMBOL(md_flush_request);
static inline struct mddev *mddev_get(struct mddev *mddev)
@@ -510,11 +533,6 @@ static void mddev_delayed_delete(struct work_struct *ws);
static void mddev_put(struct mddev *mddev)
{
- struct bio_set bs, sync_bs;
-
- memset(&bs, 0, sizeof(bs));
- memset(&sync_bs, 0, sizeof(sync_bs));
-
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return;
if (!mddev->raid_disks && list_empty(&mddev->disks) &&
@@ -522,30 +540,23 @@ static void mddev_put(struct mddev *mddev)
/* Array is not configured at all, and not held active,
* so destroy it */
list_del_init(&mddev->all_mddevs);
- bs = mddev->bio_set;
- sync_bs = mddev->sync_set;
- memset(&mddev->bio_set, 0, sizeof(mddev->bio_set));
- memset(&mddev->sync_set, 0, sizeof(mddev->sync_set));
- if (mddev->gendisk) {
- /* We did a probe so need to clean up. Call
- * queue_work inside the spinlock so that
- * flush_workqueue() after mddev_find will
- * succeed in waiting for the work to be done.
- */
- INIT_WORK(&mddev->del_work, mddev_delayed_delete);
- queue_work(md_misc_wq, &mddev->del_work);
- } else
- kfree(mddev);
+
+ /*
+ * Call queue_work inside the spinlock so that
+ * flush_workqueue() after mddev_find will succeed in waiting
+ * for the work to be done.
+ */
+ INIT_WORK(&mddev->del_work, mddev_delayed_delete);
+ queue_work(md_misc_wq, &mddev->del_work);
}
spin_unlock(&all_mddevs_lock);
- bioset_exit(&bs);
- bioset_exit(&sync_bs);
}
static void md_safemode_timeout(struct timer_list *t);
void mddev_init(struct mddev *mddev)
{
+ kobject_init(&mddev->kobj, &md_ktype);
mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex);
mutex_init(&mddev->bitmap_info.mutex);
@@ -556,7 +567,6 @@ void mddev_init(struct mddev *mddev)
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->active_io, 0);
spin_lock_init(&mddev->lock);
- atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
@@ -2854,7 +2864,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
err = 0;
}
} else if (cmd_match(buf, "re-add")) {
- if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
+ if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
+ rdev->saved_raid_disk >= 0) {
/* clear_bit is performed _after_ all the devices
* have their local Faulty bit cleared. If any writes
* happen in the meantime in the local node, they
@@ -5215,6 +5226,8 @@ static void md_free(struct kobject *ko)
put_disk(mddev->gendisk);
percpu_ref_exit(&mddev->writes_pending);
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
kfree(mddev);
}
@@ -5348,8 +5361,7 @@ static int md_alloc(dev_t dev, char *name)
mutex_lock(&mddev->open_mutex);
add_disk(disk);
- error = kobject_init_and_add(&mddev->kobj, &md_ktype,
- &disk_to_dev(disk)->kobj, "%s", "md");
+ error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
if (error) {
/* This isn't possible, but as kobject_init_and_add is marked
* __must_check, we must do something with the result
@@ -5506,7 +5518,23 @@ int md_run(struct mddev *mddev)
if (!bioset_initialized(&mddev->sync_set)) {
err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (err)
+ return err;
+ }
+ if (mddev->flush_pool == NULL) {
+ mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc,
+ flush_info_free, mddev);
+ if (!mddev->flush_pool) {
+ err = -ENOMEM;
goto abort;
+ }
+ }
+ if (mddev->flush_bio_pool == NULL) {
+ mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc,
+ flush_bio_free, mddev);
+ if (!mddev->flush_bio_pool) {
+ err = -ENOMEM;
+ goto abort;
+ }
}
spin_lock(&pers_lock);
@@ -5519,8 +5547,7 @@ int md_run(struct mddev *mddev)
else
pr_warn("md: personality for level %s is not loaded!\n",
mddev->clevel);
- err = -EINVAL;
- goto abort;
+ return -EINVAL;
}
spin_unlock(&pers_lock);
if (mddev->level != pers->level) {
@@ -5533,8 +5560,7 @@ int md_run(struct mddev *mddev)
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
module_put(pers->owner);
- err = -EINVAL;
- goto abort;
+ return -EINVAL;
}
if (pers->sync_request) {
@@ -5603,7 +5629,7 @@ int md_run(struct mddev *mddev)
mddev->private = NULL;
module_put(pers->owner);
bitmap_destroy(mddev);
- goto abort;
+ return err;
}
if (mddev->queue) {
bool nonrot = true;
@@ -5667,8 +5693,14 @@ int md_run(struct mddev *mddev)
return 0;
abort:
- bioset_exit(&mddev->bio_set);
- bioset_exit(&mddev->sync_set);
+ if (mddev->flush_bio_pool) {
+ mempool_destroy(mddev->flush_bio_pool);
+ mddev->flush_bio_pool = NULL;
+ }
+ if (mddev->flush_pool){
+ mempool_destroy(mddev->flush_pool);
+ mddev->flush_pool = NULL;
+ }
return err;
}
@@ -5881,6 +5913,14 @@ void md_stop(struct mddev *mddev)
* This is called from dm-raid
*/
__md_stop(mddev);
+ if (mddev->flush_bio_pool) {
+ mempool_destroy(mddev->flush_bio_pool);
+ mddev->flush_bio_pool = NULL;
+ }
+ if (mddev->flush_pool) {
+ mempool_destroy(mddev->flush_pool);
+ mddev->flush_pool = NULL;
+ }
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
}
@@ -6511,6 +6551,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
char b[BDEVNAME_SIZE];
struct md_rdev *rdev;
+ if (!mddev->pers)
+ return -ENODEV;
+
rdev = find_rdev(mddev, dev);
if (!rdev)
return -ENXIO;
@@ -8628,6 +8671,7 @@ static int remove_and_add_spares(struct mddev *mddev,
if (mddev->pers->hot_remove_disk(
mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
+ rdev->saved_raid_disk = rdev->raid_disk;
rdev->raid_disk = -1;
removed++;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 3507cab22cb6..2d148bdaba74 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -252,6 +252,19 @@ enum mddev_sb_flags {
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
};
+#define NR_FLUSH_INFOS 8
+#define NR_FLUSH_BIOS 64
+struct flush_info {
+ struct bio *bio;
+ struct mddev *mddev;
+ struct work_struct flush_work;
+ atomic_t flush_pending;
+};
+struct flush_bio {
+ struct flush_info *fi;
+ struct md_rdev *rdev;
+};
+
struct mddev {
void *private;
struct md_personality *pers;
@@ -457,13 +470,8 @@ struct mddev {
* metadata and bitmap writes
*/
- /* Generic flush handling.
- * The last to finish preflush schedules a worker to submit
- * the rest of the request (without the REQ_PREFLUSH flag).
- */
- struct bio *flush_bio;
- atomic_t flush_pending;
- struct work_struct flush_work;
+ mempool_t *flush_pool;
+ mempool_t *flush_bio_pool;
struct work_struct event_work; /* used by dm to report failure event */
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index bad28520719b..0b344d087581 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2449,7 +2449,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
struct mddev *mddev = conf->mddev;
struct bio *bio;
struct md_rdev *rdev;
- sector_t bio_sector;
clear_bit(R1BIO_ReadError, &r1_bio->state);
/* we got a read error. Maybe the drive is bad. Maybe just
@@ -2462,7 +2461,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
*/
bio = r1_bio->bios[r1_bio->read_disk];
- bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector;
bio_put(bio);
r1_bio->bios[r1_bio->read_disk] = NULL;
@@ -2473,6 +2471,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
fix_read_error(conf, r1_bio->read_disk,
r1_bio->sector, r1_bio->sectors);
unfreeze_array(conf);
+ } else if (mddev->ro == 0 && test_bit(FailFast, &rdev->flags)) {
+ md_error(mddev, rdev);
} else {
r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 37d4b236b81b..1147ae59e3b6 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -255,9 +255,11 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
for (j = conf->copies; j--; ) {
struct bio *bio = r10bio->devs[j].bio;
- rp = get_resync_pages(bio);
- resync_free_pages(rp);
- bio_put(bio);
+ if (bio) {
+ rp = get_resync_pages(bio);
+ resync_free_pages(rp);
+ bio_put(bio);
+ }
bio = r10bio->devs[j].repl_bio;
if (bio)
@@ -2362,7 +2364,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
{
int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors;
- struct md_rdev*rdev;
+ struct md_rdev *rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a2e64989b01f..73489446bbcb 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1139,6 +1139,9 @@ again:
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_iter.bi_size = STRIPE_SIZE;
+ bi->bi_write_hint = sh->dev[i].write_hint;
+ if (!rrdev)
+ sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
@@ -1190,6 +1193,8 @@ again:
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_iter.bi_size = STRIPE_SIZE;
+ rbi->bi_write_hint = sh->dev[i].write_hint;
+ sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
@@ -3204,6 +3209,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
(unsigned long long)sh->sector);
spin_lock_irq(&sh->stripe_lock);
+ sh->dev[dd_idx].write_hint = bi->bi_write_hint;
/* Don't allow new IO added to stripes in batch list */
if (sh->batch_head)
goto overlap;
@@ -4614,15 +4620,15 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
sh->check_state = head_sh->check_state;
sh->reconstruct_state = head_sh->reconstruct_state;
+ spin_lock_irq(&sh->stripe_lock);
+ sh->batch_head = NULL;
+ spin_unlock_irq(&sh->stripe_lock);
for (i = 0; i < sh->disks; i++) {
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
do_wakeup = 1;
sh->dev[i].flags = head_sh->dev[i].flags &
(~((1 << R5_WriteError) | (1 << R5_Overlap)));
}
- spin_lock_irq(&sh->stripe_lock);
- sh->batch_head = NULL;
- spin_unlock_irq(&sh->stripe_lock);
if (handle_flags == 0 ||
sh->state & handle_flags)
set_bit(STRIPE_HANDLE, &sh->state);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 72e75ba6abf0..8474c224127b 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -257,6 +257,7 @@ struct stripe_head {
sector_t sector; /* sector of this page */
unsigned long flags;
u32 log_checksum;
+ unsigned short write_hint;
} dev[1]; /* allocated with extra space depending of RAID geometry */
};