diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-02 13:10:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-02 13:10:25 -0700 |
commit | 1081230b748de8f03f37f80c53dfa89feda9b8de (patch) | |
tree | 7238d60e01f0843bad8f03b5d84e4220fbba5e76 /drivers/md | |
parent | df910390e2db07a76c87f258475f6c96253cee6c (diff) | |
parent | 2ca495ac27d245513c11fed70591b1838250e240 (diff) | |
download | linux-1081230b748de8f03f37f80c53dfa89feda9b8de.tar.bz2 |
Merge branch 'for-4.3/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe:
"This first core part of the block IO changes contains:
- Cleanup of the bio IO error signaling from Christoph. We used to
rely on the uptodate bit and passing around of an error, now we
store the error in the bio itself.
- Improvement of the above from myself, by shrinking the bio size
down again to fit in two cachelines on x86-64.
- Revert of the max_hw_sectors cap removal from a revision again,
from Jeff Moyer. This caused performance regressions in various
tests. Reinstate the limit, bump it to a more reasonable size
instead.
- Make /sys/block/<dev>/queue/discard_max_bytes writeable, by me.
Most devices have huge trim limits, which can cause nasty latencies
when deleting files. Enable the admin to configure the size down.
We will look into having a more sane default instead of UINT_MAX
sectors.
- Improvement of the SGP gaps logic from Keith Busch.
- Enable the block core to handle arbitrarily sized bios, which
enables a nice simplification of bio_add_page() (which is an IO hot
path). From Kent.
- Improvements to the partition io stats accounting, making it
faster. From Ming Lei.
- Also from Ming Lei, a basic fixup for overflow of the sysfs pending
file in blk-mq, as well as a fix for a blk-mq timeout race
condition.
- Ming Lin has been carrying Kents above mentioned patches forward
for a while, and testing them. Ming also did a few fixes around
that.
- Sasha Levin found and fixed a use-after-free problem introduced by
the bio->bi_error changes from Christoph.
- Small blk cgroup cleanup from Viresh Kumar"
* 'for-4.3/core' of git://git.kernel.dk/linux-block: (26 commits)
blk: Fix bio_io_vec index when checking bvec gaps
block: Replace SG_GAPS with new queue limits mask
block: bump BLK_DEF_MAX_SECTORS to 2560
Revert "block: remove artifical max_hw_sectors cap"
blk-mq: fix race between timeout and freeing request
blk-mq: fix buffer overflow when reading sysfs file of 'pending'
Documentation: update notes in biovecs about arbitrarily sized bios
block: remove bio_get_nr_vecs()
fs: use helper bio_add_page() instead of open coding on bi_io_vec
block: kill merge_bvec_fn() completely
md/raid5: get rid of bio_fits_rdev()
md/raid5: split bio for chunk_aligned_read
block: remove split code in blkdev_issue_{discard,write_same}
btrfs: remove bio splitting and merge_bvec_fn() calls
bcache: remove driver private bio splitting code
block: simplify bio_add_page()
block: make generic_make_request handle arbitrarily sized bios
blk-cgroup: Drop unlikely before IS_ERR(_OR_NULL)
block: don't access bio->bi_error after bio_put()
block: shrink struct bio down to 2 cache lines again
...
Diffstat (limited to 'drivers/md')
39 files changed, 306 insertions, 1132 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 04f7bc28ef83..6b420a55c745 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -243,19 +243,6 @@ struct keybuf { DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); }; -struct bio_split_pool { - struct bio_set *bio_split; - mempool_t *bio_split_hook; -}; - -struct bio_split_hook { - struct closure cl; - struct bio_split_pool *p; - struct bio *bio; - bio_end_io_t *bi_end_io; - void *bi_private; -}; - struct bcache_device { struct closure cl; @@ -288,8 +275,6 @@ struct bcache_device { int (*cache_miss)(struct btree *, struct search *, struct bio *, unsigned); int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long); - - struct bio_split_pool bio_split_hook; }; struct io { @@ -454,8 +439,6 @@ struct cache { atomic_long_t meta_sectors_written; atomic_long_t btree_sectors_written; atomic_long_t sectors_written; - - struct bio_split_pool bio_split_hook; }; struct gc_stat { @@ -873,7 +856,6 @@ void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *); void bch_bbio_free(struct bio *, struct cache_set *); struct bio *bch_bbio_alloc(struct cache_set *); -void bch_generic_make_request(struct bio *, struct bio_split_pool *); void __bch_submit_bbio(struct bio *, struct cache_set *); void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 00cde40db572..83392f856dfd 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -278,7 +278,7 @@ err: goto out; } -static void btree_node_read_endio(struct bio *bio, int error) +static void btree_node_read_endio(struct bio *bio) { struct closure *cl = bio->bi_private; closure_put(cl); @@ -305,7 +305,7 @@ static void bch_btree_node_read(struct btree *b) bch_submit_bbio(bio, b->c, &b->key, 0); closure_sync(&cl); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (bio->bi_error) set_btree_node_io_error(b); bch_bbio_free(bio, b->c); @@ -371,15 +371,15 @@ static void btree_node_write_done(struct closure *cl) __btree_node_write_done(cl); } -static void btree_node_write_endio(struct bio *bio, int error) +static void btree_node_write_endio(struct bio *bio) { struct closure *cl = bio->bi_private; struct btree *b = container_of(cl, struct btree, io); - if (error) + if (bio->bi_error) set_btree_node_io_error(b); - bch_bbio_count_io_errors(b->c, bio, error, "writing btree"); + bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree"); closure_put(cl); } diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 79a6d63e8ed3..782cc2c8a185 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -38,7 +38,7 @@ * they are running owned by the thread that is running them. Otherwise, suppose * you submit some bios and wish to have a function run when they all complete: * - * foo_endio(struct bio *bio, int error) + * foo_endio(struct bio *bio) * { * closure_put(cl); * } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index bf6a9ca18403..86a0bb87124e 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -11,105 +11,6 @@ #include <linux/blkdev.h> -static unsigned bch_bio_max_sectors(struct bio *bio) -{ - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - struct bio_vec bv; - struct bvec_iter iter; - unsigned ret = 0, seg = 0; - - if (bio->bi_rw & REQ_DISCARD) - return min(bio_sectors(bio), q->limits.max_discard_sectors); - - bio_for_each_segment(bv, bio, iter) { - struct bvec_merge_data bvm = { - .bi_bdev = bio->bi_bdev, - .bi_sector = bio->bi_iter.bi_sector, - .bi_size = ret << 9, - .bi_rw = bio->bi_rw, - }; - - if (seg == min_t(unsigned, BIO_MAX_PAGES, - queue_max_segments(q))) - break; - - if (q->merge_bvec_fn && - q->merge_bvec_fn(q, &bvm, &bv) < (int) bv.bv_len) - break; - - seg++; - ret += bv.bv_len >> 9; - } - - ret = min(ret, queue_max_sectors(q)); - - WARN_ON(!ret); - ret = max_t(int, ret, bio_iovec(bio).bv_len >> 9); - - return ret; -} - -static void bch_bio_submit_split_done(struct closure *cl) -{ - struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); - - s->bio->bi_end_io = s->bi_end_io; - s->bio->bi_private = s->bi_private; - bio_endio(s->bio, 0); - - closure_debug_destroy(&s->cl); - mempool_free(s, s->p->bio_split_hook); -} - -static void bch_bio_submit_split_endio(struct bio *bio, int error) -{ - struct closure *cl = bio->bi_private; - struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); - - if (error) - clear_bit(BIO_UPTODATE, &s->bio->bi_flags); - - bio_put(bio); - closure_put(cl); -} - -void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) -{ - struct bio_split_hook *s; - struct bio *n; - - if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) - goto submit; - - if (bio_sectors(bio) <= bch_bio_max_sectors(bio)) - goto submit; - - s = mempool_alloc(p->bio_split_hook, GFP_NOIO); - closure_init(&s->cl, NULL); - - s->bio = bio; - s->p = p; - s->bi_end_io = bio->bi_end_io; - s->bi_private = bio->bi_private; - bio_get(bio); - - do { - n = bio_next_split(bio, bch_bio_max_sectors(bio), - GFP_NOIO, s->p->bio_split); - - n->bi_end_io = bch_bio_submit_split_endio; - n->bi_private = &s->cl; - - closure_get(&s->cl); - generic_make_request(n); - } while (n != bio); - - continue_at(&s->cl, bch_bio_submit_split_done, NULL); - return; -submit: - generic_make_request(bio); -} - /* Bios with headers */ void bch_bbio_free(struct bio *bio, struct cache_set *c) @@ -139,7 +40,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; b->submit_time_us = local_clock_us(); - closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); + closure_bio_submit(bio, bio->bi_private); } void bch_submit_bbio(struct bio *bio, struct cache_set *c, diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 418607a6ba33..29eba7219b01 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -24,7 +24,7 @@ * bit. */ -static void journal_read_endio(struct bio *bio, int error) +static void journal_read_endio(struct bio *bio) { struct closure *cl = bio->bi_private; closure_put(cl); @@ -61,7 +61,7 @@ reread: left = ca->sb.bucket_size - offset; bio->bi_private = &cl; bch_bio_map(bio, data); - closure_bio_submit(bio, &cl, ca); + closure_bio_submit(bio, &cl); closure_sync(&cl); /* This function could be simpler now since we no longer write @@ -401,7 +401,7 @@ retry: #define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1) -static void journal_discard_endio(struct bio *bio, int error) +static void journal_discard_endio(struct bio *bio) { struct journal_device *ja = container_of(bio, struct journal_device, discard_bio); @@ -547,11 +547,11 @@ void bch_journal_next(struct journal *j) pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); } -static void journal_write_endio(struct bio *bio, int error) +static void journal_write_endio(struct bio *bio) { struct journal_write *w = bio->bi_private; - cache_set_err_on(error, w->c, "journal io error"); + cache_set_err_on(bio->bi_error, w->c, "journal io error"); closure_put(&w->c->journal.io); } @@ -648,7 +648,7 @@ static void journal_write_unlocked(struct closure *cl) spin_unlock(&c->journal.lock); while ((bio = bio_list_pop(&list))) - closure_bio_submit(bio, cl, c->cache[0]); + closure_bio_submit(bio, cl); continue_at(cl, journal_write_done, NULL); } diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index cd7490311e51..b929fc944e9c 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -60,20 +60,20 @@ static void write_moving_finish(struct closure *cl) closure_return_with_destructor(cl, moving_io_destructor); } -static void read_moving_endio(struct bio *bio, int error) +static void read_moving_endio(struct bio *bio) { struct bbio *b = container_of(bio, struct bbio, bio); struct moving_io *io = container_of(bio->bi_private, struct moving_io, cl); - if (error) - io->op.error = error; + if (bio->bi_error) + io->op.error = bio->bi_error; else if (!KEY_DIRTY(&b->key) && ptr_stale(io->op.c, &b->key, 0)) { io->op.error = -EINTR; } - bch_bbio_endio(io->op.c, bio, error, "reading data to move"); + bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move"); } static void moving_init(struct moving_io *io) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index f292790997d7..8e9877b04637 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -173,22 +173,22 @@ static void bch_data_insert_error(struct closure *cl) bch_data_insert_keys(cl); } -static void bch_data_insert_endio(struct bio *bio, int error) +static void bch_data_insert_endio(struct bio *bio) { struct closure *cl = bio->bi_private; struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); - if (error) { + if (bio->bi_error) { /* TODO: We could try to recover from this. */ if (op->writeback) - op->error = error; + op->error = bio->bi_error; else if (!op->replace) set_closure_fn(cl, bch_data_insert_error, op->wq); else set_closure_fn(cl, NULL, NULL); } - bch_bbio_endio(op->c, bio, error, "writing data to cache"); + bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache"); } static void bch_data_insert_start(struct closure *cl) @@ -477,7 +477,7 @@ struct search { struct data_insert_op iop; }; -static void bch_cache_read_endio(struct bio *bio, int error) +static void bch_cache_read_endio(struct bio *bio) { struct bbio *b = container_of(bio, struct bbio, bio); struct closure *cl = bio->bi_private; @@ -490,15 +490,15 @@ static void bch_cache_read_endio(struct bio *bio, int error) * from the backing device. */ - if (error) - s->iop.error = error; + if (bio->bi_error) + s->iop.error = bio->bi_error; else if (!KEY_DIRTY(&b->key) && ptr_stale(s->iop.c, &b->key, 0)) { atomic_long_inc(&s->iop.c->cache_read_races); s->iop.error = -EINTR; } - bch_bbio_endio(s->iop.c, bio, error, "reading from cache"); + bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache"); } /* @@ -591,13 +591,13 @@ static void cache_lookup(struct closure *cl) /* Common code for the make_request functions */ -static void request_endio(struct bio *bio, int error) +static void request_endio(struct bio *bio) { struct closure *cl = bio->bi_private; - if (error) { + if (bio->bi_error) { struct search *s = container_of(cl, struct search, cl); - s->iop.error = error; + s->iop.error = bio->bi_error; /* Only cache read errors are recoverable */ s->recoverable = false; } @@ -613,7 +613,8 @@ static void bio_complete(struct search *s) &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); - bio_endio(s->orig_bio, s->iop.error); + s->orig_bio->bi_error = s->iop.error; + bio_endio(s->orig_bio); s->orig_bio = NULL; } } @@ -718,7 +719,7 @@ static void cached_dev_read_error(struct closure *cl) /* XXX: invalidate cache */ - closure_bio_submit(bio, cl, s->d); + closure_bio_submit(bio, cl); } continue_at(cl, cached_dev_cache_miss_done, NULL); @@ -841,7 +842,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->cache_miss = miss; s->iop.bio = cache_bio; bio_get(cache_bio); - closure_bio_submit(cache_bio, &s->cl, s->d); + closure_bio_submit(cache_bio, &s->cl); return ret; out_put: @@ -849,7 +850,7 @@ out_put: out_submit: miss->bi_end_io = request_endio; miss->bi_private = &s->cl; - closure_bio_submit(miss, &s->cl, s->d); + closure_bio_submit(miss, &s->cl); return ret; } @@ -914,7 +915,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) if (!(bio->bi_rw & REQ_DISCARD) || blk_queue_discard(bdev_get_queue(dc->bdev))) - closure_bio_submit(bio, cl, s->d); + closure_bio_submit(bio, cl); } else if (s->iop.writeback) { bch_writeback_add(dc); s->iop.bio = bio; @@ -929,12 +930,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) flush->bi_end_io = request_endio; flush->bi_private = cl; - closure_bio_submit(flush, cl, s->d); + closure_bio_submit(flush, cl); } } else { s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); - closure_bio_submit(bio, cl, s->d); + closure_bio_submit(bio, cl); } closure_call(&s->iop.cl, bch_data_insert, NULL, cl); @@ -950,7 +951,7 @@ static void cached_dev_nodata(struct closure *cl) bch_journal_meta(s->iop.c, cl); /* If it's a flush, we send the flush to the backing device too */ - closure_bio_submit(bio, cl, s->d); + closure_bio_submit(bio, cl); continue_at(cl, cached_dev_bio_complete, NULL); } @@ -992,9 +993,9 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) } else { if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(dc->bdev))) - bio_endio(bio, 0); + bio_endio(bio); else - bch_generic_make_request(bio, &d->bio_split_hook); + generic_make_request(bio); } } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 94980bfca434..679a093a3bf6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -59,29 +59,6 @@ struct workqueue_struct *bcache_wq; #define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE) -static void bio_split_pool_free(struct bio_split_pool *p) -{ - if (p->bio_split_hook) - mempool_destroy(p->bio_split_hook); - - if (p->bio_split) - bioset_free(p->bio_split); -} - -static int bio_split_pool_init(struct bio_split_pool *p) -{ - p->bio_split = bioset_create(4, 0); - if (!p->bio_split) - return -ENOMEM; - - p->bio_split_hook = mempool_create_kmalloc_pool(4, - sizeof(struct bio_split_hook)); - if (!p->bio_split_hook) - return -ENOMEM; - - return 0; -} - /* Superblock */ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, @@ -221,7 +198,7 @@ err: return err; } -static void write_bdev_super_endio(struct bio *bio, int error) +static void write_bdev_super_endio(struct bio *bio) { struct cached_dev *dc = bio->bi_private; /* XXX: error checking */ @@ -290,11 +267,11 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) closure_return_with_destructor(cl, bch_write_bdev_super_unlock); } -static void write_super_endio(struct bio *bio, int error) +static void write_super_endio(struct bio *bio) { struct cache *ca = bio->bi_private; - bch_count_io_errors(ca, error, "writing superblock"); + bch_count_io_errors(ca, bio->bi_error, "writing superblock"); closure_put(&ca->set->sb_write); } @@ -339,12 +316,12 @@ void bcache_write_super(struct cache_set *c) /* UUID io */ -static void uuid_endio(struct bio *bio, int error) +static void uuid_endio(struct bio *bio) { struct closure *cl = bio->bi_private; struct cache_set *c = container_of(cl, struct cache_set, uuid_write); - cache_set_err_on(error, c, "accessing uuids"); + cache_set_err_on(bio->bi_error, c, "accessing uuids"); bch_bbio_free(bio, c); closure_put(cl); } @@ -512,11 +489,11 @@ static struct uuid_entry *uuid_find_empty(struct cache_set *c) * disk. */ -static void prio_endio(struct bio *bio, int error) +static void prio_endio(struct bio *bio) { struct cache *ca = bio->bi_private; - cache_set_err_on(error, ca->set, "accessing priorities"); + cache_set_err_on(bio->bi_error, ca->set, "accessing priorities"); bch_bbio_free(bio, ca->set); closure_put(&ca->prio); } @@ -537,7 +514,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw) bio->bi_private = ca; bch_bio_map(bio, ca->disk_buckets); - closure_bio_submit(bio, &ca->prio, ca); + closure_bio_submit(bio, &ca->prio); closure_sync(cl); } @@ -757,7 +734,6 @@ static void bcache_device_free(struct bcache_device *d) put_disk(d->disk); } - bio_split_pool_free(&d->bio_split_hook); if (d->bio_split) bioset_free(d->bio_split); kvfree(d->full_dirty_stripes); @@ -804,7 +780,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, return minor; if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || - bio_split_pool_init(&d->bio_split_hook) || !(d->disk = alloc_disk(1))) { ida_simple_remove(&bcache_minor, minor); return -ENOMEM; @@ -830,7 +805,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, q->limits.max_sectors = UINT_MAX; q->limits.max_segment_size = UINT_MAX; q->limits.max_segments = BIO_MAX_PAGES; - q->limits.max_discard_sectors = UINT_MAX; + blk_queue_max_discard_sectors(q, UINT_MAX); q->limits.discard_granularity = 512; q->limits.io_min = block_size; q->limits.logical_block_size = block_size; @@ -1793,8 +1768,6 @@ void bch_cache_release(struct kobject *kobj) ca->set->cache[ca->sb.nr_this_dev] = NULL; } - bio_split_pool_free(&ca->bio_split_hook); - free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); kfree(ca->prio_buckets); vfree(ca->buckets); @@ -1839,8 +1812,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) ca->sb.nbuckets)) || !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * 2, GFP_KERNEL)) || - !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || - bio_split_pool_init(&ca->bio_split_hook)) + !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca))) return -ENOMEM; ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 1d04c4859c70..cf2cbc211d83 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -4,6 +4,7 @@ #include <linux/blkdev.h> #include <linux/errno.h> +#include <linux/blkdev.h> #include <linux/kernel.h> #include <linux/llist.h> #include <linux/ratelimit.h> @@ -570,10 +571,10 @@ static inline sector_t bdev_sectors(struct block_device *bdev) return bdev->bd_inode->i_size >> 9; } -#define closure_bio_submit(bio, cl, dev) \ +#define closure_bio_submit(bio, cl) \ do { \ closure_get(cl); \ - bch_generic_make_request(bio, &(dev)->bio_split_hook); \ + generic_make_request(bio); \ } while (0) uint64_t bch_crc64_update(uint64_t, const void *, size_t); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f1986bcd1bf0..b23f88d9f18c 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -166,12 +166,12 @@ static void write_dirty_finish(struct closure *cl) closure_return_with_destructor(cl, dirty_io_destructor); } -static void dirty_endio(struct bio *bio, int error) +static void dirty_endio(struct bio *bio) { struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; - if (error) + if (bio->bi_error) SET_KEY_DIRTY(&w->key, false); closure_put(&io->cl); @@ -188,27 +188,27 @@ static void write_dirty(struct closure *cl) io->bio.bi_bdev = io->dc->bdev; io->bio.bi_end_io = dirty_endio; - closure_bio_submit(&io->bio, cl, &io->dc->disk); + closure_bio_submit(&io->bio, cl); continue_at(cl, write_dirty_finish, system_wq); } -static void read_dirty_endio(struct bio *bio, int error) +static void read_dirty_endio(struct bio *bio) { struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), - error, "reading dirty data from cache"); + bio->bi_error, "reading dirty data from cache"); - dirty_endio(bio, error); + dirty_endio(bio); } static void read_dirty_submit(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); - closure_bio_submit(&io->bio, cl, &io->dc->disk); + closure_bio_submit(&io->bio, cl); continue_at(cl, write_dirty, system_wq); } diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index cd6d1d21e057..03af174485d3 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -236,8 +236,10 @@ void dm_cell_error(struct dm_bio_prison *prison, bio_list_init(&bios); dm_cell_release(prison, cell, &bios); - while ((bio = bio_list_pop(&bios))) - bio_endio(bio, error); + while ((bio = bio_list_pop(&bios))) { + bio->bi_error = error; + bio_endio(bio); + } } EXPORT_SYMBOL_GPL(dm_cell_error); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 86dbbc737402..83cc52eaf56d 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -545,7 +545,8 @@ static void dmio_complete(unsigned long error, void *context) { struct dm_buffer *b = context; - b->bio.bi_end_io(&b->bio, error ? -EIO : 0); + b->bio.bi_error = error ? -EIO : 0; + b->bio.bi_end_io(&b->bio); } static void use_dmio(struct dm_buffer *b, int rw, sector_t block, @@ -575,13 +576,16 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block, b->bio.bi_end_io = end_io; r = dm_io(&io_req, 1, ®ion, NULL); - if (r) - end_io(&b->bio, r); + if (r) { + b->bio.bi_error = r; + end_io(&b->bio); + } } -static void inline_endio(struct bio *bio, int error) +static void inline_endio(struct bio *bio) { bio_end_io_t *end_fn = bio->bi_private; + int error = bio->bi_error; /* * Reset the bio to free any attached resources @@ -589,7 +593,8 @@ static void inline_endio(struct bio *bio, int error) */ bio_reset(bio); - end_fn(bio, error); + bio->bi_error = error; + end_fn(bio); } static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, @@ -661,13 +666,14 @@ static void submit_io(struct dm_buffer *b, int rw, sector_t block, * Set the error, clear B_WRITING bit and wake anyone who was waiting on * it. */ -static void write_endio(struct bio *bio, int error) +static void write_endio(struct bio *bio) { struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); - b->write_error = error; - if (unlikely(error)) { + b->write_error = bio->bi_error; + if (unlikely(bio->bi_error)) { struct dm_bufio_client *c = b->c; + int error = bio->bi_error; (void)cmpxchg(&c->async_write_error, 0, error); } @@ -1026,11 +1032,11 @@ found_buffer: * The endio routine for reading: set the error, clear the bit and wake up * anyone waiting on the buffer. */ -static void read_endio(struct bio *bio, int error) +static void read_endio(struct bio *bio) { struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); - b->read_error = error; + b->read_error = bio->bi_error; BUG_ON(!test_bit(B_READING, &b->state)); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1fe93cfea7d3..7245071778db 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -919,14 +919,14 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio) wake_worker(cache); } -static void writethrough_endio(struct bio *bio, int err) +static void writethrough_endio(struct bio *bio) { struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); dm_unhook_bio(&pb->hook_info, bio); - if (err) { - bio_endio(bio, err); + if (bio->bi_error) { + bio_endio(bio); return; } @@ -1231,7 +1231,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) * The block was promoted via an overwrite, so it's dirty. */ set_dirty(cache, mg->new_oblock, mg->cblock); - bio_endio(mg->new_ocell->holder, 0); + bio_endio(mg->new_ocell->holder); cell_defer(cache, mg->new_ocell, false); } free_io_migration(mg); @@ -1284,7 +1284,7 @@ static void issue_copy(struct dm_cache_migration *mg) } } -static void overwrite_endio(struct bio *bio, int err) +static void overwrite_endio(struct bio *bio) { struct dm_cache_migration *mg = bio->bi_private; struct cache *cache = mg->cache; @@ -1294,7 +1294,7 @@ static void overwrite_endio(struct bio *bio, int err) dm_unhook_bio(&pb->hook_info, bio); - if (err) + if (bio->bi_error) mg->err = true; mg->requeue_holder = false; @@ -1358,7 +1358,7 @@ static void issue_discard(struct dm_cache_migration *mg) b = to_dblock(from_dblock(b) + 1); } - bio_endio(bio, 0); + bio_endio(bio); cell_defer(mg->cache, mg->new_ocell, false); free_migration(mg); } @@ -1631,7 +1631,7 @@ static void process_discard_bio(struct cache *cache, struct prealloc *structs, calc_discard_block_range(cache, bio, &b, &e); if (b == e) { - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -2217,8 +2217,10 @@ static void requeue_deferred_bios(struct cache *cache) bio_list_merge(&bios, &cache->deferred_bios); bio_list_init(&cache->deferred_bios); - while ((bio = bio_list_pop(&bios))) - bio_endio(bio, DM_ENDIO_REQUEUE); + while ((bio = bio_list_pop(&bios))) { + bio->bi_error = DM_ENDIO_REQUEUE; + bio_endio(bio); + } } static int more_work(struct cache *cache) @@ -3123,7 +3125,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) * This is a duplicate writethrough io that is no * longer needed because the block has been demoted. */ - bio_endio(bio, 0); + bio_endio(bio); // FIXME: remap everything as a miss cell_defer(cache, cell, false); r = DM_MAPIO_SUBMITTED; @@ -3778,26 +3780,6 @@ static int cache_iterate_devices(struct dm_target *ti, return r; } -/* - * We assume I/O is going to the origin (which is the volume - * more likely to have restrictions e.g. by being striped). - * (Looking up the exact location of the data would be expensive - * and could always be out of date by the time the bio is submitted.) - */ -static int cache_bvec_merge(struct dm_target *ti, - struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct cache *cache = ti->private; - struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = cache->origin_dev->bdev; - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static void set_discard_limits(struct cache *cache, struct queue_limits *limits) { /* @@ -3841,7 +3823,6 @@ static struct target_type cache_target = { .status = cache_status, .message = cache_message, .iterate_devices = cache_iterate_devices, - .merge = cache_bvec_merge, .io_hints = cache_io_hints, }; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0f48fed44a17..ba5c2105f4e6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1076,7 +1076,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (io->ctx.req) crypt_free_req(cc, io->ctx.req, base_bio); - bio_endio(base_bio, error); + base_bio->bi_error = error; + bio_endio(base_bio); } /* @@ -1096,14 +1097,12 @@ static void crypt_dec_pending(struct dm_crypt_io *io) * The work is done per CPU global for all dm-crypt instances. * They should not depend on each other and do not block. */ -static void crypt_endio(struct bio *clone, int error) +static void crypt_endio(struct bio *clone) { struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned rw = bio_data_dir(clone); - - if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) - error = -EIO; + int error; /* * free the processed pages @@ -1111,6 +1110,7 @@ static void crypt_endio(struct bio *clone, int error) if (rw == WRITE) crypt_free_buffer_pages(cc, clone); + error = clone->bi_error; bio_put(clone); if (rw == READ && !error) { @@ -2035,21 +2035,6 @@ error: return -EINVAL; } -static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct crypt_config *cc = ti->private; - struct request_queue *q = bdev_get_queue(cc->dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = cc->dev->bdev; - bvm->bi_sector = cc->start + dm_target_offset(ti, bvm->bi_sector); - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static int crypt_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { @@ -2070,7 +2055,6 @@ static struct target_type crypt_target = { .preresume = crypt_preresume, .resume = crypt_resume, .message = crypt_message, - .merge = crypt_merge, .iterate_devices = crypt_iterate_devices, }; diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index ad913cd4aded..0119ebfb3d49 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1673,20 +1673,6 @@ static int era_iterate_devices(struct dm_target *ti, return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data); } -static int era_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct era *era = ti->private; - struct request_queue *q = bdev_get_queue(era->origin_dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = era->origin_dev->bdev; - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static void era_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct era *era = ti->private; @@ -1717,7 +1703,6 @@ static struct target_type era_target = { .status = era_status, .message = era_message, .iterate_devices = era_iterate_devices, - .merge = era_merge, .io_hints = era_io_hints }; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b257e46876d3..afab13bd683e 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -296,7 +296,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) * Drop writes? */ if (test_bit(DROP_WRITES, &fc->flags)) { - bio_endio(bio, 0); + bio_endio(bio); return DM_MAPIO_SUBMITTED; } @@ -387,21 +387,6 @@ static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long ar return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg); } -static int flakey_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct flakey_c *fc = ti->private; - struct request_queue *q = bdev_get_queue(fc->dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = fc->dev->bdev; - bvm->bi_sector = flakey_map_sector(ti, bvm->bi_sector); - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct flakey_c *fc = ti->private; @@ -419,7 +404,6 @@ static struct target_type flakey_target = { .end_io = flakey_end_io, .status = flakey_status, .ioctl = flakey_ioctl, - .merge = flakey_merge, .iterate_devices = flakey_iterate_devices, }; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 74adcd2c967e..6f8e83b2a6f8 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -134,12 +134,13 @@ static void dec_count(struct io *io, unsigned int region, int error) complete_io(io); } -static void endio(struct bio *bio, int error) +static void endio(struct bio *bio) { struct io *io; unsigned region; + int error; - if (error && bio_data_dir(bio) == READ) + if (bio->bi_error && bio_data_dir(bio) == READ) zero_fill_bio(bio); /* @@ -147,6 +148,7 @@ static void endio(struct bio *bio, int error) */ retrieve_io_and_region_from_bio(bio, &io, ®ion); + error = bio->bi_error; bio_put(bio); dec_count(io, region, error); @@ -314,7 +316,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) num_bvecs = 1; else - num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), + num_bvecs = min_t(int, BIO_MAX_PAGES, dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 53e848c10939..7dd5fc8e3eea 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -130,21 +130,6 @@ static int linear_ioctl(struct dm_target *ti, unsigned int cmd, return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg); } -static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct linear_c *lc = ti->private; - struct request_queue *q = bdev_get_queue(lc->dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = lc->dev->bdev; - bvm->bi_sector = linear_map_sector(ti, bvm->bi_sector); - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static int linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { @@ -162,7 +147,6 @@ static struct target_type linear_target = { .map = linear_map, .status = linear_status, .ioctl = linear_ioctl, - .merge = linear_merge, .iterate_devices = linear_iterate_devices, }; diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index ad1b049ae2ab..316cc3fb741f 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -146,16 +146,16 @@ static void put_io_block(struct log_writes_c *lc) } } -static void log_end_io(struct bio *bio, int err) +static void log_end_io(struct bio *bio) { struct log_writes_c *lc = bio->bi_private; struct bio_vec *bvec; int i; - if (err) { + if (bio->bi_error) { unsigned long flags; - DMERR("Error writing log block, error=%d", err); + DMERR("Error writing log block, error=%d", bio->bi_error); spin_lock_irqsave(&lc->blocks_lock, flags); lc->logging_enabled = false; spin_unlock_irqrestore(&lc->blocks_lock, flags); @@ -205,7 +205,6 @@ static int write_metadata(struct log_writes_c *lc, void *entry, bio->bi_bdev = lc->logdev->bdev; bio->bi_end_io = log_end_io; bio->bi_private = lc; - set_bit(BIO_UPTODATE, &bio->bi_flags); page = alloc_page(GFP_KERNEL); if (!page) { @@ -270,7 +269,6 @@ static int log_one_block(struct log_writes_c *lc, bio->bi_bdev = lc->logdev->bdev; bio->bi_end_io = log_end_io; bio->bi_private = lc; - set_bit(BIO_UPTODATE, &bio->bi_flags); for (i = 0; i < block->vec_cnt; i++) { /* @@ -292,7 +290,6 @@ static int log_one_block(struct log_writes_c *lc, bio->bi_bdev = lc->logdev->bdev; bio->bi_end_io = log_end_io; bio->bi_private = lc; - set_bit(BIO_UPTODATE, &bio->bi_flags); ret = bio_add_page(bio, block->vecs[i].bv_page, block->vecs[i].bv_len, 0); @@ -606,7 +603,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) WARN_ON(flush_bio || fua_bio); if (lc->device_supports_discard) goto map_bio; - bio_endio(bio, 0); + bio_endio(bio); return DM_MAPIO_SUBMITTED; } @@ -728,21 +725,6 @@ static int log_writes_ioctl(struct dm_target *ti, unsigned int cmd, return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg); } -static int log_writes_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct log_writes_c *lc = ti->private; - struct request_queue *q = bdev_get_queue(lc->dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = lc->dev->bdev; - bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector); - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static int log_writes_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) @@ -796,7 +778,6 @@ static struct target_type log_writes_target = { .end_io = normal_end_io, .status = log_writes_status, .ioctl = log_writes_ioctl, - .merge = log_writes_merge, .message = log_writes_message, .iterate_devices = log_writes_iterate_devices, .io_hints = log_writes_io_hints, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 2daa67793511..97e165183e79 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1717,24 +1717,6 @@ static void raid_resume(struct dm_target *ti) mddev_resume(&rs->md); } -static int raid_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct raid_set *rs = ti->private; - struct md_personality *pers = rs->md.pers; - - if (pers && pers->mergeable_bvec) - return min(max_size, pers->mergeable_bvec(&rs->md, bvm, biovec)); - - /* - * In case we can't request the personality because - * the raid set is not running yet - * - * -> return safe minimum - */ - return rs->md.chunk_sectors; -} - static struct target_type raid_target = { .name = "raid", .version = {1, 7, 0}, @@ -1749,7 +1731,6 @@ static struct target_type raid_target = { .presuspend = raid_presuspend, .postsuspend = raid_postsuspend, .resume = raid_resume, - .merge = raid_merge, }; static int __init dm_raid_init(void) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d83696bf403b..e1eabfb2f52d 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -490,9 +490,11 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio) * If device is suspended, complete the bio. */ if (dm_noflush_suspending(ms->ti)) - bio_endio(bio, DM_ENDIO_REQUEUE); + bio->bi_error = DM_ENDIO_REQUEUE; else - bio_endio(bio, -EIO); + bio->bi_error = -EIO; + + bio_endio(bio); return; } @@ -515,7 +517,7 @@ static void read_callback(unsigned long error, void *context) bio_set_m(bio, NULL); if (likely(!error)) { - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -531,7 +533,7 @@ static void read_callback(unsigned long error, void *context) DMERR_LIMIT("Read failure on mirror device %s. Failing I/O.", m->dev->name); - bio_endio(bio, -EIO); + bio_io_error(bio); } /* Asynchronous read. */ @@ -580,7 +582,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) if (likely(m)) read_async_bio(m, bio); else - bio_endio(bio, -EIO); + bio_io_error(bio); } } @@ -598,7 +600,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) static void write_callback(unsigned long error, void *context) { - unsigned i, ret = 0; + unsigned i; struct bio *bio = (struct bio *) context; struct mirror_set *ms; int should_wake = 0; @@ -614,7 +616,7 @@ static void write_callback(unsigned long error, void *context) * regions with the same code. */ if (likely(!error)) { - bio_endio(bio, ret); + bio_endio(bio); return; } @@ -623,7 +625,8 @@ static void write_callback(unsigned long error, void *context) * degrade the array. */ if (bio->bi_rw & REQ_DISCARD) { - bio_endio(bio, -EOPNOTSUPP); + bio->bi_error = -EOPNOTSUPP; + bio_endio(bio); return; } @@ -828,13 +831,12 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) * be wrong if the failed leg returned after reboot and * got replicated back to the good legs.) */ - if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure))) - bio_endio(bio, -EIO); + bio_io_error(bio); else if (errors_handled(ms) && !keep_log(ms)) hold_bio(ms, bio); else - bio_endio(bio, 0); + bio_endio(bio); } } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 7c82d3ccce87..d10b6876018e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1490,7 +1490,7 @@ out: error_bios(snapshot_bios); } else { if (full_bio) - bio_endio(full_bio, 0); + bio_endio(full_bio); flush_bios(snapshot_bios); } @@ -1580,11 +1580,11 @@ static void start_copy(struct dm_snap_pending_exception *pe) dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } -static void full_bio_end_io(struct bio *bio, int error) +static void full_bio_end_io(struct bio *bio) { void *callback_data = bio->bi_private; - dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0); + dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0); } static void start_full_bio(struct dm_snap_pending_exception *pe, @@ -2330,20 +2330,6 @@ static void origin_status(struct dm_target *ti, status_type_t type, } } -static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct dm_origin *o = ti->private; - struct request_queue *q = bdev_get_queue(o->dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = o->dev->bdev; - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static int origin_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { @@ -2362,7 +2348,6 @@ static struct target_type origin_target = { .resume = origin_resume, .postsuspend = origin_postsuspend, .status = origin_status, - .merge = origin_merge, .iterate_devices = origin_iterate_devices, }; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index a672a1502c14..484029db8cba 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -273,7 +273,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, return DM_MAPIO_REMAPPED; } else { /* The range doesn't map to the target stripe */ - bio_endio(bio, 0); + bio_endio(bio); return DM_MAPIO_SUBMITTED; } } @@ -412,26 +412,6 @@ static void stripe_io_hints(struct dm_target *ti, blk_limits_io_opt(limits, chunk_size * sc->stripes); } -static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct stripe_c *sc = ti->private; - sector_t bvm_sector = bvm->bi_sector; - uint32_t stripe; - struct request_queue *q; - - stripe_map_sector(sc, bvm_sector, &stripe, &bvm_sector); - - q = bdev_get_queue(sc->stripe[stripe].dev->bdev); - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = sc->stripe[stripe].dev->bdev; - bvm->bi_sector = sc->stripe[stripe].physical_start + bvm_sector; - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static struct target_type stripe_target = { .name = "striped", .version = {1, 5, 1}, @@ -443,7 +423,6 @@ static struct target_type stripe_target = { .status = stripe_status, .iterate_devices = stripe_iterate_devices, .io_hints = stripe_io_hints, - .merge = stripe_merge, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 16ba55ad7089..e76ed003769e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -440,14 +440,6 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, q->limits.alignment_offset, (unsigned long long) start << SECTOR_SHIFT); - /* - * Check if merge fn is supported. - * If not we'll force DM to use PAGE_SIZE or - * smaller I/O, just to be safe. - */ - if (dm_queue_merge_is_compulsory(q) && !ti->type->merge) - blk_limits_max_hw_sectors(limits, - (unsigned int) (PAGE_SIZE >> 9)); return 0; } @@ -1388,14 +1380,6 @@ static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev, return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); } -static int queue_supports_sg_gaps(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - struct request_queue *q = bdev_get_queue(dev->bdev); - - return q && !test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags); -} - static bool dm_table_all_devices_attribute(struct dm_table *t, iterate_devices_callout_fn func) { @@ -1516,11 +1500,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); - if (dm_table_all_devices_attribute(t, queue_supports_sg_gaps)) - queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, q); - else - queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, q); - dm_table_set_integrity(t); /* diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index d2bbe8cc1e97..271a66249363 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -615,8 +615,10 @@ static void error_bio_list(struct bio_list *bios, int error) { struct bio *bio; - while ((bio = bio_list_pop(bios))) - bio_endio(bio, error); + while ((bio = bio_list_pop(bios))) { + bio->bi_error = error; + bio_endio(bio); + } } static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error) @@ -870,14 +872,14 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) complete_mapping_preparation(m); } -static void overwrite_endio(struct bio *bio, int err) +static void overwrite_endio(struct bio *bio) { struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; bio->bi_end_io = m->saved_bi_end_io; - m->err = err; + m->err = bio->bi_error; complete_mapping_preparation(m); } @@ -1002,7 +1004,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ if (bio) { inc_remap_and_issue_cell(tc, m->cell, m->data_block); - bio_endio(bio, 0); + bio_endio(bio); } else { inc_all_io_entry(tc->pool, m->cell->holder); remap_and_issue(tc, m->cell->holder, m->data_block); @@ -1032,7 +1034,7 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) static void process_prepared_discard_success(struct dm_thin_new_mapping *m) { - bio_endio(m->bio, 0); + bio_endio(m->bio); free_discard_mapping(m); } @@ -1046,7 +1048,7 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m) metadata_operation_failed(tc->pool, "dm_thin_remove_range", r); bio_io_error(m->bio); } else - bio_endio(m->bio, 0); + bio_endio(m->bio); cell_defer_no_holder(tc, m->cell); mempool_free(m, tc->pool->mapping_pool); @@ -1117,7 +1119,8 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) * Even if r is set, there could be sub discards in flight that we * need to wait for. */ - bio_endio(m->bio, r); + m->bio->bi_error = r; + bio_endio(m->bio); cell_defer_no_holder(tc, m->cell); mempool_free(m, pool->mapping_pool); } @@ -1493,9 +1496,10 @@ static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) { int error = should_error_unserviceable_bio(pool); - if (error) - bio_endio(bio, error); - else + if (error) { + bio->bi_error = error; + bio_endio(bio); + } else retry_on_resume(bio); } @@ -1631,7 +1635,7 @@ static void process_discard_cell_passdown(struct thin_c *tc, struct dm_bio_priso * will prevent completion until the sub range discards have * completed. */ - bio_endio(bio, 0); + bio_endio(bio); } static void process_discard_bio(struct thin_c *tc, struct bio *bio) @@ -1645,7 +1649,7 @@ static void process_discard_bio(struct thin_c *tc, struct bio *bio) /* * The discard covers less than a block. */ - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -1790,7 +1794,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); cell_defer_no_holder(tc, cell); - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -1855,7 +1859,7 @@ static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) } else { zero_fill_bio(bio); - bio_endio(bio, 0); + bio_endio(bio); } } else provision_block(tc, bio, block, cell); @@ -1926,7 +1930,7 @@ static void __process_bio_read_only(struct thin_c *tc, struct bio *bio, } zero_fill_bio(bio); - bio_endio(bio, 0); + bio_endio(bio); break; default: @@ -1951,7 +1955,7 @@ static void process_cell_read_only(struct thin_c *tc, struct dm_bio_prison_cell static void process_bio_success(struct thin_c *tc, struct bio *bio) { - bio_endio(bio, 0); + bio_endio(bio); } static void process_bio_fail(struct thin_c *tc, struct bio *bio) @@ -2600,7 +2604,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) thin_hook_bio(tc, bio); if (tc->requeue_mode) { - bio_endio(bio, DM_ENDIO_REQUEUE); + bio->bi_error = DM_ENDIO_REQUEUE; + bio_endio(bio); return DM_MAPIO_SUBMITTED; } @@ -3875,20 +3880,6 @@ static int pool_iterate_devices(struct dm_target *ti, return fn(ti, pt->data_dev, 0, ti->len, data); } -static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct pool_c *pt = ti->private; - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = pt->data_dev->bdev; - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct pool_c *pt = ti->private; @@ -3965,7 +3956,6 @@ static struct target_type pool_target = { .resume = pool_resume, .message = pool_message, .status = pool_status, - .merge = pool_merge, .iterate_devices = pool_iterate_devices, .io_hints = pool_io_hints, }; @@ -4292,21 +4282,6 @@ err: DMEMIT("Error"); } -static int thin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct thin_c *tc = ti->private; - struct request_queue *q = bdev_get_queue(tc->pool_dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = tc->pool_dev->bdev; - bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector); - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static int thin_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { @@ -4350,7 +4325,6 @@ static struct target_type thin_target = { .presuspend = thin_presuspend, .postsuspend = thin_postsuspend, .status = thin_status, - .merge = thin_merge, .iterate_devices = thin_iterate_devices, .io_hints = thin_io_hints, }; diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index bb9c6a00e4b0..c137dcb147b8 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -458,8 +458,9 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; + bio->bi_error = error; - bio_endio(bio, error); + bio_endio(bio); } static void verity_work(struct work_struct *w) @@ -469,12 +470,12 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, verity_verify_io(io)); } -static void verity_end_io(struct bio *bio, int error) +static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; - if (error) { - verity_finish_io(io, error); + if (bio->bi_error) { + verity_finish_io(io, bio->bi_error); return; } @@ -648,21 +649,6 @@ static int verity_ioctl(struct dm_target *ti, unsigned cmd, cmd, arg); } -static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct dm_verity *v = ti->private; - struct request_queue *q = bdev_get_queue(v->data_dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = v->data_dev->bdev; - bvm->bi_sector = verity_map_sector(v, bvm->bi_sector); - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static int verity_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { @@ -995,7 +981,6 @@ static struct target_type verity_target = { .map = verity_map, .status = verity_status, .ioctl = verity_ioctl, - .merge = verity_merge, .iterate_devices = verity_iterate_devices, .io_hints = verity_io_hints, }; diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index b9a64bbce304..766bc93006e6 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio) break; } - bio_endio(bio, 0); + bio_endio(bio); /* accepted bio, don't make new request */ return DM_MAPIO_SUBMITTED; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0d7ab20c58df..6ffc01bb85f2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -124,9 +124,8 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 -#define DMF_MERGE_IS_OPTIONAL 6 -#define DMF_DEFERRED_REMOVE 7 -#define DMF_SUSPENDED_INTERNALLY 8 +#define DMF_DEFERRED_REMOVE 6 +#define DMF_SUSPENDED_INTERNALLY 7 /* * A dummy definition to make RCU happy. @@ -944,7 +943,8 @@ static void dec_pending(struct dm_io *io, int error) } else { /* done with normal IO or empty flush */ trace_block_bio_complete(md->queue, bio, io_error); - bio_endio(bio, io_error); + bio->bi_error = io_error; + bio_endio(bio); } } } @@ -957,17 +957,15 @@ static void disable_write_same(struct mapped_device *md) limits->max_write_same_sectors = 0; } -static void clone_endio(struct bio *bio, int error) +static void clone_endio(struct bio *bio) { + int error = bio->bi_error; int r = error; struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); struct dm_io *io = tio->io; struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; - if (!bio_flagged(bio, BIO_UPTODATE) && !error) - error = -EIO; - if (endio) { r = endio(tio->ti, bio, error); if (r < 0 || r == DM_ENDIO_REQUEUE) @@ -996,7 +994,7 @@ static void clone_endio(struct bio *bio, int error) /* * Partial completion handling for request-based dm */ -static void end_clone_bio(struct bio *clone, int error) +static void end_clone_bio(struct bio *clone) { struct dm_rq_clone_bio_info *info = container_of(clone, struct dm_rq_clone_bio_info, clone); @@ -1013,13 +1011,13 @@ static void end_clone_bio(struct bio *clone, int error) * the remainder. */ return; - else if (error) { + else if (bio->bi_error) { /* * Don't notice the error to the upper layer yet. * The error handling decision is made by the target driver, * when the request is completed. */ - tio->error = error; + tio->error = bio->bi_error; return; } @@ -1722,60 +1720,6 @@ static void __split_and_process_bio(struct mapped_device *md, * CRUD END *---------------------------------------------------------------*/ -static int dm_merge_bvec(struct request_queue *q, - struct bvec_merge_data *bvm, - struct bio_vec *biovec) -{ - struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table_fast(md); - struct dm_target *ti; - sector_t max_sectors; - int max_size = 0; - - if (unlikely(!map)) - goto out; - - ti = dm_table_find_target(map, bvm->bi_sector); - if (!dm_target_is_valid(ti)) - goto out; - - /* - * Find maximum amount of I/O that won't need splitting - */ - max_sectors = min(max_io_len(bvm->bi_sector, ti), - (sector_t) BIO_MAX_SECTORS); - max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - if (max_size < 0) - max_size = 0; - - /* - * merge_bvec_fn() returns number of bytes - * it can accept at this offset - * max is precomputed maximal io size - */ - if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, max_size); - /* - * If the target doesn't support merge method and some of the devices - * provided their merge_bvec method (we know this by looking at - * queue_max_hw_sectors), then we can't allow bios with multiple vector - * entries. So always set max_size to 0, and the code below allows - * just one page. - */ - else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) - max_size = 0; - -out: - dm_put_live_table_fast(md); - /* - * Always allow an entire first page - */ - if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) - max_size = biovec->bv_len; - - return max_size; -} - /* * The request function that just remaps the bio built up by * dm_merge_bvec. @@ -1789,6 +1733,8 @@ static void dm_make_request(struct request_queue *q, struct bio *bio) map = dm_get_live_table(md, &srcu_idx); + blk_queue_split(q, &bio, q->bio_split); + generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0); /* if we're suspended, we have to queue this io for later */ @@ -2496,59 +2442,6 @@ static void __set_size(struct mapped_device *md, sector_t size) } /* - * Return 1 if the queue has a compulsory merge_bvec_fn function. - * - * If this function returns 0, then the device is either a non-dm - * device without a merge_bvec_fn, or it is a dm device that is - * able to split any bios it receives that are too big. - */ -int dm_queue_merge_is_compulsory(struct request_queue *q) -{ - struct mapped_device *dev_md; - - if (!q->merge_bvec_fn) - return 0; - - if (q->make_request_fn == dm_make_request) { - dev_md = q->queuedata; - if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) - return 0; - } - - return 1; -} - -static int dm_device_merge_is_compulsory(struct dm_target *ti, - struct dm_dev *dev, sector_t start, - sector_t len, void *data) -{ - struct block_device *bdev = dev->bdev; - struct request_queue *q = bdev_get_queue(bdev); - - return dm_queue_merge_is_compulsory(q); -} - -/* - * Return 1 if it is acceptable to ignore merge_bvec_fn based - * on the properties of the underlying devices. - */ -static int dm_table_merge_is_optional(struct dm_table *table) -{ - unsigned i = 0; - struct dm_target *ti; - - while (i < dm_table_get_num_targets(table)) { - ti = dm_table_get_target(table, i++); - - if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) - return 0; - } - - return 1; -} - -/* * Returns old map, which caller must destroy. */ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, @@ -2557,7 +2450,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct dm_table *old_map; struct request_queue *q = md->queue; sector_t size; - int merge_is_optional; size = dm_table_get_size(t); @@ -2583,17 +2475,11 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, __bind_mempools(md, t); - merge_is_optional = dm_table_merge_is_optional(t); - old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); rcu_assign_pointer(md->map, t); md->immutable_target_type = dm_table_get_immutable_target_type(t); dm_table_set_restrictions(t, q, limits); - if (merge_is_optional) - set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); - else - clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); if (old_map) dm_sync_table(md); @@ -2874,7 +2760,6 @@ int dm_setup_md_queue(struct mapped_device *md) case DM_TYPE_BIO_BASED: dm_init_old_md_queue(md); blk_queue_make_request(md->queue, dm_make_request); - blk_queue_merge_bvec(md->queue, dm_merge_bvec); break; } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 4e984993d40a..7edcf97dfa5a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -78,8 +78,6 @@ bool dm_table_mq_request_based(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); -int dm_queue_merge_is_compulsory(struct request_queue *q); - void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); void dm_set_md_type(struct mapped_device *md, unsigned type); diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 1277eb26b58a..4a8e15058e8b 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -70,7 +70,7 @@ #include <linux/seq_file.h> -static void faulty_fail(struct bio *bio, int error) +static void faulty_fail(struct bio *bio) { struct bio *b = bio->bi_private; @@ -181,7 +181,7 @@ static void make_request(struct mddev *mddev, struct bio *bio) /* special case - don't decrement, don't generic_make_request, * just fail immediately */ - bio_endio(bio, -EIO); + bio_io_error(bio); return; } diff --git a/drivers/md/linear.c b/drivers/md/linear.c index fa7d577f3d12..b7fe7e9fc777 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -52,48 +52,6 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) return conf->disks + lo; } -/** - * linear_mergeable_bvec -- tell bio layer if two requests can be merged - * @q: request queue - * @bvm: properties of new bio - * @biovec: the request that could be merged to it. - * - * Return amount of bytes we can take at this offset - */ -static int linear_mergeable_bvec(struct mddev *mddev, - struct bvec_merge_data *bvm, - struct bio_vec *biovec) -{ - struct dev_info *dev0; - unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; - sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); - int maxbytes = biovec->bv_len; - struct request_queue *subq; - - dev0 = which_dev(mddev, sector); - maxsectors = dev0->end_sector - sector; - subq = bdev_get_queue(dev0->rdev->bdev); - if (subq->merge_bvec_fn) { - bvm->bi_bdev = dev0->rdev->bdev; - bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors; - maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, - biovec)); - } - - if (maxsectors < bio_sectors) - maxsectors = 0; - else - maxsectors -= bio_sectors; - - if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) - return maxbytes; - - if (maxsectors > (maxbytes >> 9)) - return maxbytes; - else - return maxsectors << 9; -} - static int linear_congested(struct mddev *mddev, int bits) { struct linear_conf *conf; @@ -297,7 +255,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) if (unlikely((split->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { /* Just ignore it */ - bio_endio(split, 0); + bio_endio(split); } else generic_make_request(split); } while (split != bio); @@ -338,7 +296,6 @@ static struct md_personality linear_personality = .size = linear_size, .quiesce = linear_quiesce, .congested = linear_congested, - .mergeable_bvec = linear_mergeable_bvec, }; static int __init linear_init (void) diff --git a/drivers/md/md.c b/drivers/md/md.c index e25f00f0138a..40332625713b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -257,13 +257,17 @@ static void md_make_request(struct request_queue *q, struct bio *bio) unsigned int sectors; int cpu; + blk_queue_split(q, &bio, q->bio_split); + if (mddev == NULL || mddev->pers == NULL || !mddev->ready) { bio_io_error(bio); return; } if (mddev->ro == 1 && unlikely(rw == WRITE)) { - bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS); + if (bio_sectors(bio) != 0) + bio->bi_error = -EROFS; + bio_endio(bio); return; } smp_rmb(); /* Ensure implications of 'active' are visible */ @@ -350,34 +354,11 @@ static int md_congested(void *data, int bits) return mddev_congested(mddev, bits); } -static int md_mergeable_bvec(struct request_queue *q, - struct bvec_merge_data *bvm, - struct bio_vec *biovec) -{ - struct mddev *mddev = q->queuedata; - int ret; - rcu_read_lock(); - if (mddev->suspended) { - /* Must always allow one vec */ - if (bvm->bi_size == 0) - ret = biovec->bv_len; - else - ret = 0; - } else { - struct md_personality *pers = mddev->pers; - if (pers && pers->mergeable_bvec) - ret = pers->mergeable_bvec(mddev, bvm, biovec); - else - ret = biovec->bv_len; - } - rcu_read_unlock(); - return ret; -} /* * Generic flush handling for md */ -static void md_end_flush(struct bio *bio, int err) +static void md_end_flush(struct bio *bio) { struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; @@ -433,7 +414,7 @@ static void md_submit_flush_data(struct work_struct *ws) if (bio->bi_iter.bi_size == 0) /* an empty barrier - all done */ - bio_endio(bio, 0); + bio_endio(bio); else { bio->bi_rw &= ~REQ_FLUSH; mddev->pers->make_request(mddev, bio); @@ -728,15 +709,13 @@ void md_rdev_clear(struct md_rdev *rdev) } EXPORT_SYMBOL_GPL(md_rdev_clear); -static void super_written(struct bio *bio, int error) +static void super_written(struct bio *bio) { struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; - if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) { - printk("md: super_written gets error=%d, uptodate=%d\n", - error, test_bit(BIO_UPTODATE, &bio->bi_flags)); - WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags)); + if (bio->bi_error) { + printk("md: super_written gets error=%d\n", bio->bi_error); md_error(mddev, rdev); } @@ -791,7 +770,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, bio_add_page(bio, page, size, 0); submit_bio_wait(rw, bio); - ret = test_bit(BIO_UPTODATE, &bio->bi_flags); + ret = !bio->bi_error; bio_put(bio); return ret; } @@ -5186,7 +5165,6 @@ int md_run(struct mddev *mddev) if (mddev->queue) { mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_fn = md_congested; - blk_queue_merge_bvec(mddev->queue, md_mergeable_bvec); } if (pers->sync_request) { if (mddev->kobj.sd && @@ -5315,7 +5293,6 @@ static void md_clean(struct mddev *mddev) mddev->degraded = 0; mddev->safemode = 0; mddev->private = NULL; - mddev->merge_check_needed = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; mddev->bitmap_info.default_space = 0; @@ -5514,7 +5491,6 @@ static int do_md_stop(struct mddev *mddev, int mode, __md_stop_writes(mddev); __md_stop(mddev); - mddev->queue->merge_bvec_fn = NULL; mddev->queue->backing_dev_info.congested_fn = NULL; /* tell userspace to handle 'inactive' */ diff --git a/drivers/md/md.h b/drivers/md/md.h index 7da6e9c3cb53..ab339571e57f 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -134,10 +134,6 @@ enum flag_bits { Bitmap_sync, /* ..actually, not quite In_sync. Need a * bitmap-based recovery to get fully in sync */ - Unmerged, /* device is being added to array and should - * be considerred for bvec_merge_fn but not - * yet for actual IO - */ WriteMostly, /* Avoid reading if at all possible */ AutoDetected, /* added by auto-detect */ Blocked, /* An error occurred but has not yet @@ -374,10 +370,6 @@ struct mddev { int degraded; /* whether md should consider * adding a spare */ - int merge_check_needed; /* at least one - * member device - * has a - * merge_bvec_fn */ atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; @@ -532,10 +524,6 @@ struct md_personality /* congested implements bdi.congested_fn(). * Will not be called while array is 'suspended' */ int (*congested)(struct mddev *mddev, int bits); - /* mergeable_bvec is use to implement ->merge_bvec_fn */ - int (*mergeable_bvec)(struct mddev *mddev, - struct bvec_merge_data *bvm, - struct bio_vec *biovec); }; struct md_sysfs_entry { diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index ac3ede2bd00e..d222522c52e0 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -77,18 +77,18 @@ static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) struct bio *bio = mp_bh->master_bio; struct mpconf *conf = mp_bh->mddev->private; - bio_endio(bio, err); + bio->bi_error = err; + bio_endio(bio); mempool_free(mp_bh, conf->pool); } -static void multipath_end_request(struct bio *bio, int error) +static void multipath_end_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct multipath_bh *mp_bh = bio->bi_private; struct mpconf *conf = mp_bh->mddev->private; struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev; - if (uptodate) + if (!bio->bi_error) multipath_end_bh_io(mp_bh, 0); else if (!(bio->bi_rw & REQ_RAHEAD)) { /* @@ -101,7 +101,7 @@ static void multipath_end_request(struct bio *bio, int error) (unsigned long long)bio->bi_iter.bi_sector); multipath_reschedule_retry(mp_bh); } else - multipath_end_bh_io(mp_bh, error); + multipath_end_bh_io(mp_bh, bio->bi_error); rdev_dec_pending(rdev, conf->mddev); } @@ -123,7 +123,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { - bio_endio(bio, -EIO); + bio_io_error(bio); mempool_free(mp_bh, conf->pool); return; } @@ -257,18 +257,6 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, so limit ->max_segments to one, lying - * within a single page. - * (Note: it is very unlikely that a device with - * merge_bvec_fn will be involved in multipath.) - */ - if (q->merge_bvec_fn) { - blk_queue_max_segments(mddev->queue, 1); - blk_queue_segment_boundary(mddev->queue, - PAGE_CACHE_SIZE - 1); - } - spin_lock_irq(&conf->device_lock); mddev->degraded--; rdev->raid_disk = path; @@ -432,15 +420,6 @@ static int multipath_run (struct mddev *mddev) disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must never risk - * violating it, not that we ever expect a device with - * a merge_bvec_fn to be involved in multipath */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { - blk_queue_max_segments(mddev->queue, 1); - blk_queue_segment_boundary(mddev->queue, - PAGE_CACHE_SIZE - 1); - } - if (!test_bit(Faulty, &rdev->flags)) working_disks++; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index efb654eb5399..59cda501a224 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -192,9 +192,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) disk_stack_limits(mddev->gendisk, rdev1->bdev, rdev1->data_offset << 9); - if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) - conf->has_merge_bvec = 1; - if (!smallest || (rdev1->sectors < smallest->sectors)) smallest = rdev1; cnt++; @@ -351,58 +348,6 @@ static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, + sector_div(sector, zone->nb_dev)]; } -/** - * raid0_mergeable_bvec -- tell bio layer if two requests can be merged - * @mddev: the md device - * @bvm: properties of new bio - * @biovec: the request that could be merged to it. - * - * Return amount of bytes we can accept at this offset - */ -static int raid0_mergeable_bvec(struct mddev *mddev, - struct bvec_merge_data *bvm, - struct bio_vec *biovec) -{ - struct r0conf *conf = mddev->private; - sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); - sector_t sector_offset = sector; - int max; - unsigned int chunk_sectors = mddev->chunk_sectors; - unsigned int bio_sectors = bvm->bi_size >> 9; - struct strip_zone *zone; - struct md_rdev *rdev; - struct request_queue *subq; - - if (is_power_of_2(chunk_sectors)) - max = (chunk_sectors - ((sector & (chunk_sectors-1)) - + bio_sectors)) << 9; - else - max = (chunk_sectors - (sector_div(sector, chunk_sectors) - + bio_sectors)) << 9; - if (max < 0) - max = 0; /* bio_add cannot handle a negative return */ - if (max <= biovec->bv_len && bio_sectors == 0) - return biovec->bv_len; - if (max < biovec->bv_len) - /* too small already, no need to check further */ - return max; - if (!conf->has_merge_bvec) - return max; - - /* May need to check subordinate device */ - sector = sector_offset; - zone = find_zone(mddev->private, §or_offset); - rdev = map_sector(mddev, zone, sector, §or_offset); - subq = bdev_get_queue(rdev->bdev); - if (subq->merge_bvec_fn) { - bvm->bi_bdev = rdev->bdev; - bvm->bi_sector = sector_offset + zone->dev_start + - rdev->data_offset; - return min(max, subq->merge_bvec_fn(subq, bvm, biovec)); - } else - return max; -} - static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks) { sector_t array_sectors = 0; @@ -543,7 +488,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) if (unlikely((split->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { /* Just ignore it */ - bio_endio(split, 0); + bio_endio(split); } else generic_make_request(split); } while (split != bio); @@ -727,7 +672,6 @@ static struct md_personality raid0_personality= .takeover = raid0_takeover, .quiesce = raid0_quiesce, .congested = raid0_congested, - .mergeable_bvec = raid0_mergeable_bvec, }; static int __init raid0_init (void) diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 05539d9c97f0..7127a623f5da 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -12,8 +12,6 @@ struct r0conf { struct md_rdev **devlist; /* lists of rdevs, pointed to * by strip_zone->dev */ int nr_strip_zones; - int has_merge_bvec; /* at least one member has - * a merge_bvec_fn */ }; #endif diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 967a4ed73929..f39d69f884de 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -255,9 +255,10 @@ static void call_bio_endio(struct r1bio *r1_bio) done = 1; if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; + if (done) { - bio_endio(bio, 0); + bio_endio(bio); /* * Wake up any possible resync thread that waits for the device * to go idle. @@ -312,9 +313,9 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio) return mirror; } -static void raid1_end_read_request(struct bio *bio, int error) +static void raid1_end_read_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = !bio->bi_error; struct r1bio *r1_bio = bio->bi_private; int mirror; struct r1conf *conf = r1_bio->mddev->private; @@ -397,9 +398,8 @@ static void r1_bio_write_done(struct r1bio *r1_bio) } } -static void raid1_end_write_request(struct bio *bio, int error) +static void raid1_end_write_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r1bio *r1_bio = bio->bi_private; int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); struct r1conf *conf = r1_bio->mddev->private; @@ -410,7 +410,7 @@ static void raid1_end_write_request(struct bio *bio, int error) /* * 'one mirror IO has finished' event handler: */ - if (!uptodate) { + if (bio->bi_error) { set_bit(WriteErrorSeen, &conf->mirrors[mirror].rdev->flags); if (!test_and_set_bit(WantReplacement, @@ -557,7 +557,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect rdev = rcu_dereference(conf->mirrors[disk].rdev); if (r1_bio->bios[disk] == IO_BLOCKED || rdev == NULL - || test_bit(Unmerged, &rdev->flags) || test_bit(Faulty, &rdev->flags)) continue; if (!test_bit(In_sync, &rdev->flags) && @@ -708,38 +707,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect return best_disk; } -static int raid1_mergeable_bvec(struct mddev *mddev, - struct bvec_merge_data *bvm, - struct bio_vec *biovec) -{ - struct r1conf *conf = mddev->private; - sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); - int max = biovec->bv_len; - - if (mddev->merge_check_needed) { - int disk; - rcu_read_lock(); - for (disk = 0; disk < conf->raid_disks * 2; disk++) { - struct md_rdev *rdev = rcu_dereference( - conf->mirrors[disk].rdev); - if (rdev && !test_bit(Faulty, &rdev->flags)) { - struct request_queue *q = - bdev_get_queue(rdev->bdev); - if (q->merge_bvec_fn) { - bvm->bi_sector = sector + - rdev->data_offset; - bvm->bi_bdev = rdev->bdev; - max = min(max, q->merge_bvec_fn( - q, bvm, biovec)); - } - } - } - rcu_read_unlock(); - } - return max; - -} - static int raid1_congested(struct mddev *mddev, int bits) { struct r1conf *conf = mddev->private; @@ -793,7 +760,7 @@ static void flush_pending_writes(struct r1conf *conf) if (unlikely((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ - bio_endio(bio, 0); + bio_endio(bio); else generic_make_request(bio); bio = next; @@ -1068,7 +1035,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) if (unlikely((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ - bio_endio(bio, 0); + bio_endio(bio); else generic_make_request(bio); bio = next; @@ -1158,7 +1125,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) * non-zero, then it is the number of not-completed requests. */ bio->bi_phys_segments = 0; - clear_bit(BIO_SEG_VALID, &bio->bi_flags); + bio_clear_flag(bio, BIO_SEG_VALID); if (rw == READ) { /* @@ -1269,8 +1236,7 @@ read_again: break; } r1_bio->bios[i] = NULL; - if (!rdev || test_bit(Faulty, &rdev->flags) - || test_bit(Unmerged, &rdev->flags)) { + if (!rdev || test_bit(Faulty, &rdev->flags)) { if (i < conf->raid_disks) set_bit(R1BIO_Degraded, &r1_bio->state); continue; @@ -1617,7 +1583,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) struct raid1_info *p; int first = 0; int last = conf->raid_disks - 1; - struct request_queue *q = bdev_get_queue(rdev->bdev); if (mddev->recovery_disabled == conf->recovery_disabled) return -EBUSY; @@ -1625,11 +1590,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; - if (q->merge_bvec_fn) { - set_bit(Unmerged, &rdev->flags); - mddev->merge_check_needed = 1; - } - for (mirror = first; mirror <= last; mirror++) { p = conf->mirrors+mirror; if (!p->rdev) { @@ -1661,19 +1621,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) break; } } - if (err == 0 && test_bit(Unmerged, &rdev->flags)) { - /* Some requests might not have seen this new - * merge_bvec_fn. We must wait for them to complete - * before merging the device fully. - * First we make sure any code which has tested - * our function has submitted the request, then - * we wait for all outstanding requests to complete. - */ - synchronize_sched(); - freeze_array(conf, 0); - unfreeze_array(conf); - clear_bit(Unmerged, &rdev->flags); - } md_integrity_add_rdev(rdev, mddev); if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); @@ -1737,7 +1684,7 @@ abort: return err; } -static void end_sync_read(struct bio *bio, int error) +static void end_sync_read(struct bio *bio) { struct r1bio *r1_bio = bio->bi_private; @@ -1748,16 +1695,16 @@ static void end_sync_read(struct bio *bio, int error) * or re-read if the read failed. * We don't do much here, just schedule handling by raid1d */ - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (!bio->bi_error) set_bit(R1BIO_Uptodate, &r1_bio->state); if (atomic_dec_and_test(&r1_bio->remaining)) reschedule_retry(r1_bio); } -static void end_sync_write(struct bio *bio, int error) +static void end_sync_write(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = !bio->bi_error; struct r1bio *r1_bio = bio->bi_private; struct mddev *mddev = r1_bio->mddev; struct r1conf *conf = mddev->private; @@ -1944,7 +1891,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) idx ++; } set_bit(R1BIO_Uptodate, &r1_bio->state); - set_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = 0; return 1; } @@ -1968,15 +1915,14 @@ static void process_checks(struct r1bio *r1_bio) for (i = 0; i < conf->raid_disks * 2; i++) { int j; int size; - int uptodate; + int error; struct bio *b = r1_bio->bios[i]; if (b->bi_end_io != end_sync_read) continue; - /* fixup the bio for reuse, but preserve BIO_UPTODATE */ - uptodate = test_bit(BIO_UPTODATE, &b->bi_flags); + /* fixup the bio for reuse, but preserve errno */ + error = b->bi_error; bio_reset(b); - if (!uptodate) - clear_bit(BIO_UPTODATE, &b->bi_flags); + b->bi_error = error; b->bi_vcnt = vcnt; b->bi_iter.bi_size = r1_bio->sectors << 9; b->bi_iter.bi_sector = r1_bio->sector + @@ -1999,7 +1945,7 @@ static void process_checks(struct r1bio *r1_bio) } for (primary = 0; primary < conf->raid_disks * 2; primary++) if (r1_bio->bios[primary]->bi_end_io == end_sync_read && - test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { + !r1_bio->bios[primary]->bi_error) { r1_bio->bios[primary]->bi_end_io = NULL; rdev_dec_pending(conf->mirrors[primary].rdev, mddev); break; @@ -2009,14 +1955,14 @@ static void process_checks(struct r1bio *r1_bio) int j; struct bio *pbio = r1_bio->bios[primary]; struct bio *sbio = r1_bio->bios[i]; - int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags); + int error = sbio->bi_error; if (sbio->bi_end_io != end_sync_read) continue; - /* Now we can 'fixup' the BIO_UPTODATE flag */ - set_bit(BIO_UPTODATE, &sbio->bi_flags); + /* Now we can 'fixup' the error value */ + sbio->bi_error = 0; - if (uptodate) { + if (!error) { for (j = vcnt; j-- ; ) { struct page *p, *s; p = pbio->bi_io_vec[j].bv_page; @@ -2031,7 +1977,7 @@ static void process_checks(struct r1bio *r1_bio) if (j >= 0) atomic64_add(r1_bio->sectors, &mddev->resync_mismatches); if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) - && uptodate)) { + && !error)) { /* No need to write to this device. */ sbio->bi_end_io = NULL; rdev_dec_pending(conf->mirrors[i].rdev, mddev); @@ -2272,11 +2218,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio struct bio *bio = r1_bio->bios[m]; if (bio->bi_end_io == NULL) continue; - if (test_bit(BIO_UPTODATE, &bio->bi_flags) && + if (!bio->bi_error && test_bit(R1BIO_MadeGood, &r1_bio->state)) { rdev_clear_badblocks(rdev, r1_bio->sector, s, 0); } - if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && + if (bio->bi_error && test_bit(R1BIO_WriteError, &r1_bio->state)) { if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0)) md_error(conf->mddev, rdev); @@ -2715,7 +2661,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp /* remove last page from this bio */ bio->bi_vcnt--; bio->bi_iter.bi_size -= len; - __clear_bit(BIO_SEG_VALID, &bio->bi_flags); + bio_clear_flag(bio, BIO_SEG_VALID); } goto bio_full; } @@ -2810,8 +2756,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) goto abort; disk->rdev = rdev; q = bdev_get_queue(rdev->bdev); - if (q->merge_bvec_fn) - mddev->merge_check_needed = 1; disk->head_position = 0; disk->seq_start = MaxSector; @@ -3176,7 +3120,6 @@ static struct md_personality raid1_personality = .quiesce = raid1_quiesce, .takeover = raid1_takeover, .congested = raid1_congested, - .mergeable_bvec = raid1_mergeable_bvec, }; static int __init raid_init(void) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 38c58e19cfce..b0fce2ebf7ad 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -101,7 +101,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore); static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *skipped); static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); -static void end_reshape_write(struct bio *bio, int error); +static void end_reshape_write(struct bio *bio); static void end_reshape(struct r10conf *conf); static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) @@ -307,9 +307,9 @@ static void raid_end_bio_io(struct r10bio *r10_bio) } else done = 1; if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; if (done) { - bio_endio(bio, 0); + bio_endio(bio); /* * Wake up any possible resync thread that waits for the device * to go idle. @@ -358,9 +358,9 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, return r10_bio->devs[slot].devnum; } -static void raid10_end_read_request(struct bio *bio, int error) +static void raid10_end_read_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = !bio->bi_error; struct r10bio *r10_bio = bio->bi_private; int slot, dev; struct md_rdev *rdev; @@ -438,9 +438,8 @@ static void one_write_done(struct r10bio *r10_bio) } } -static void raid10_end_write_request(struct bio *bio, int error) +static void raid10_end_write_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; int dev; int dec_rdev = 1; @@ -460,7 +459,7 @@ static void raid10_end_write_request(struct bio *bio, int error) /* * this branch is our 'one mirror IO has finished' event handler: */ - if (!uptodate) { + if (bio->bi_error) { if (repl) /* Never record new bad blocks to replacement, * just fail it. @@ -672,93 +671,6 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) return (vchunk << geo->chunk_shift) + offset; } -/** - * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged - * @mddev: the md device - * @bvm: properties of new bio - * @biovec: the request that could be merged to it. - * - * Return amount of bytes we can accept at this offset - * This requires checking for end-of-chunk if near_copies != raid_disks, - * and for subordinate merge_bvec_fns if merge_check_needed. - */ -static int raid10_mergeable_bvec(struct mddev *mddev, - struct bvec_merge_data *bvm, - struct bio_vec *biovec) -{ - struct r10conf *conf = mddev->private; - sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); - int max; - unsigned int chunk_sectors; - unsigned int bio_sectors = bvm->bi_size >> 9; - struct geom *geo = &conf->geo; - - chunk_sectors = (conf->geo.chunk_mask & conf->prev.chunk_mask) + 1; - if (conf->reshape_progress != MaxSector && - ((sector >= conf->reshape_progress) != - conf->mddev->reshape_backwards)) - geo = &conf->prev; - - if (geo->near_copies < geo->raid_disks) { - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) - + bio_sectors)) << 9; - if (max < 0) - /* bio_add cannot handle a negative return */ - max = 0; - if (max <= biovec->bv_len && bio_sectors == 0) - return biovec->bv_len; - } else - max = biovec->bv_len; - - if (mddev->merge_check_needed) { - struct { - struct r10bio r10_bio; - struct r10dev devs[conf->copies]; - } on_stack; - struct r10bio *r10_bio = &on_stack.r10_bio; - int s; - if (conf->reshape_progress != MaxSector) { - /* Cannot give any guidance during reshape */ - if (max <= biovec->bv_len && bio_sectors == 0) - return biovec->bv_len; - return 0; - } - r10_bio->sector = sector; - raid10_find_phys(conf, r10_bio); - rcu_read_lock(); - for (s = 0; s < conf->copies; s++) { - int disk = r10_bio->devs[s].devnum; - struct md_rdev *rdev = rcu_dereference( - conf->mirrors[disk].rdev); - if (rdev && !test_bit(Faulty, &rdev->flags)) { - struct request_queue *q = - bdev_get_queue(rdev->bdev); - if (q->merge_bvec_fn) { - bvm->bi_sector = r10_bio->devs[s].addr - + rdev->data_offset; - bvm->bi_bdev = rdev->bdev; - max = min(max, q->merge_bvec_fn( - q, bvm, biovec)); - } - } - rdev = rcu_dereference(conf->mirrors[disk].replacement); - if (rdev && !test_bit(Faulty, &rdev->flags)) { - struct request_queue *q = - bdev_get_queue(rdev->bdev); - if (q->merge_bvec_fn) { - bvm->bi_sector = r10_bio->devs[s].addr - + rdev->data_offset; - bvm->bi_bdev = rdev->bdev; - max = min(max, q->merge_bvec_fn( - q, bvm, biovec)); - } - } - } - rcu_read_unlock(); - } - return max; -} - /* * This routine returns the disk from which the requested read should * be done. There is a per-array 'next expected sequential IO' sector @@ -821,12 +733,10 @@ retry: disk = r10_bio->devs[slot].devnum; rdev = rcu_dereference(conf->mirrors[disk].replacement); if (rdev == NULL || test_bit(Faulty, &rdev->flags) || - test_bit(Unmerged, &rdev->flags) || r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) rdev = rcu_dereference(conf->mirrors[disk].rdev); if (rdev == NULL || - test_bit(Faulty, &rdev->flags) || - test_bit(Unmerged, &rdev->flags)) + test_bit(Faulty, &rdev->flags)) continue; if (!test_bit(In_sync, &rdev->flags) && r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) @@ -957,7 +867,7 @@ static void flush_pending_writes(struct r10conf *conf) if (unlikely((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ - bio_endio(bio, 0); + bio_endio(bio); else generic_make_request(bio); bio = next; @@ -1133,7 +1043,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) if (unlikely((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ - bio_endio(bio, 0); + bio_endio(bio); else generic_make_request(bio); bio = next; @@ -1217,7 +1127,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio) * non-zero, then it is the number of not-completed requests. */ bio->bi_phys_segments = 0; - clear_bit(BIO_SEG_VALID, &bio->bi_flags); + bio_clear_flag(bio, BIO_SEG_VALID); if (rw == READ) { /* @@ -1326,11 +1236,9 @@ retry_write: blocked_rdev = rrdev; break; } - if (rdev && (test_bit(Faulty, &rdev->flags) - || test_bit(Unmerged, &rdev->flags))) + if (rdev && (test_bit(Faulty, &rdev->flags))) rdev = NULL; - if (rrdev && (test_bit(Faulty, &rrdev->flags) - || test_bit(Unmerged, &rrdev->flags))) + if (rrdev && (test_bit(Faulty, &rrdev->flags))) rrdev = NULL; r10_bio->devs[i].bio = NULL; @@ -1777,7 +1685,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) int mirror; int first = 0; int last = conf->geo.raid_disks - 1; - struct request_queue *q = bdev_get_queue(rdev->bdev); if (mddev->recovery_cp < MaxSector) /* only hot-add to in-sync arrays, as recovery is @@ -1790,11 +1697,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; - if (q->merge_bvec_fn) { - set_bit(Unmerged, &rdev->flags); - mddev->merge_check_needed = 1; - } - if (rdev->saved_raid_disk >= first && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) mirror = rdev->saved_raid_disk; @@ -1833,19 +1735,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) rcu_assign_pointer(p->rdev, rdev); break; } - if (err == 0 && test_bit(Unmerged, &rdev->flags)) { - /* Some requests might not have seen this new - * merge_bvec_fn. We must wait for them to complete - * before merging the device fully. - * First we make sure any code which has tested - * our function has submitted the request, then - * we wait for all outstanding requests to complete. - */ - synchronize_sched(); - freeze_array(conf, 0); - unfreeze_array(conf); - clear_bit(Unmerged, &rdev->flags); - } md_integrity_add_rdev(rdev, mddev); if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); @@ -1916,7 +1805,7 @@ abort: return err; } -static void end_sync_read(struct bio *bio, int error) +static void end_sync_read(struct bio *bio) { struct r10bio *r10_bio = bio->bi_private; struct r10conf *conf = r10_bio->mddev->private; @@ -1928,7 +1817,7 @@ static void end_sync_read(struct bio *bio, int error) } else d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (!bio->bi_error) set_bit(R10BIO_Uptodate, &r10_bio->state); else /* The write handler will notice the lack of @@ -1977,9 +1866,8 @@ static void end_sync_request(struct r10bio *r10_bio) } } -static void end_sync_write(struct bio *bio, int error) +static void end_sync_write(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; struct mddev *mddev = r10_bio->mddev; struct r10conf *conf = mddev->private; @@ -1996,7 +1884,7 @@ static void end_sync_write(struct bio *bio, int error) else rdev = conf->mirrors[d].rdev; - if (!uptodate) { + if (bio->bi_error) { if (repl) md_error(mddev, rdev); else { @@ -2044,7 +1932,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) /* find the first device with a block */ for (i=0; i<conf->copies; i++) - if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) + if (!r10_bio->devs[i].bio->bi_error) break; if (i == conf->copies) @@ -2064,7 +1952,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) continue; if (i == first) continue; - if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) { + if (!r10_bio->devs[i].bio->bi_error) { /* We know that the bi_io_vec layout is the same for * both 'first' and 'i', so we just compare them. * All vec entries are PAGE_SIZE; @@ -2394,7 +2282,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && - !test_bit(Unmerged, &rdev->flags) && test_bit(In_sync, &rdev->flags) && is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, &first_bad, &bad_sectors) == 0) { @@ -2448,7 +2335,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); if (!rdev || - test_bit(Unmerged, &rdev->flags) || !test_bit(In_sync, &rdev->flags)) continue; @@ -2706,8 +2592,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev = conf->mirrors[dev].rdev; if (r10_bio->devs[m].bio == NULL) continue; - if (test_bit(BIO_UPTODATE, - &r10_bio->devs[m].bio->bi_flags)) { + if (!r10_bio->devs[m].bio->bi_error) { rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, @@ -2722,8 +2607,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev = conf->mirrors[dev].replacement; if (r10_bio->devs[m].repl_bio == NULL) continue; - if (test_bit(BIO_UPTODATE, - &r10_bio->devs[m].repl_bio->bi_flags)) { + + if (!r10_bio->devs[m].repl_bio->bi_error) { rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, @@ -2748,8 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) r10_bio->devs[m].addr, r10_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); - } else if (bio != NULL && - !test_bit(BIO_UPTODATE, &bio->bi_flags)) { + } else if (bio != NULL && bio->bi_error) { if (!narrow_write_error(r10_bio, m)) { md_error(conf->mddev, rdev); set_bit(R10BIO_Degraded, @@ -3263,7 +3147,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, bio = r10_bio->devs[i].bio; bio_reset(bio); - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; if (conf->mirrors[d].rdev == NULL || test_bit(Faulty, &conf->mirrors[d].rdev->flags)) continue; @@ -3300,7 +3184,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* Need to set up for writing to the replacement */ bio = r10_bio->devs[i].repl_bio; bio_reset(bio); - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; sector = r10_bio->devs[i].addr; atomic_inc(&conf->mirrors[d].rdev->nr_pending); @@ -3357,7 +3241,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* remove last page from this bio */ bio2->bi_vcnt--; bio2->bi_iter.bi_size -= len; - __clear_bit(BIO_SEG_VALID, &bio2->bi_flags); + bio_clear_flag(bio2, BIO_SEG_VALID); } goto bio_full; } @@ -3377,7 +3261,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (bio->bi_end_io == end_sync_read) { md_sync_acct(bio->bi_bdev, nr_sectors); - set_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = 0; generic_make_request(bio); } } @@ -3643,8 +3527,6 @@ static int run(struct mddev *mddev) disk->rdev = rdev; } q = bdev_get_queue(rdev->bdev); - if (q->merge_bvec_fn) - mddev->merge_check_needed = 1; diff = (rdev->new_data_offset - rdev->data_offset); if (!mddev->reshape_backwards) diff = -diff; @@ -4382,7 +4264,7 @@ read_more: read_bio->bi_end_io = end_sync_read; read_bio->bi_rw = READ; read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); - __set_bit(BIO_UPTODATE, &read_bio->bi_flags); + read_bio->bi_error = 0; read_bio->bi_vcnt = 0; read_bio->bi_iter.bi_size = 0; r10_bio->master_bio = read_bio; @@ -4439,7 +4321,7 @@ read_more: /* Remove last page from this bio */ bio2->bi_vcnt--; bio2->bi_iter.bi_size -= len; - __clear_bit(BIO_SEG_VALID, &bio2->bi_flags); + bio_clear_flag(bio2, BIO_SEG_VALID); } goto bio_full; } @@ -4604,9 +4486,8 @@ static int handle_reshape_read_error(struct mddev *mddev, return 0; } -static void end_reshape_write(struct bio *bio, int error) +static void end_reshape_write(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; struct mddev *mddev = r10_bio->mddev; struct r10conf *conf = mddev->private; @@ -4623,7 +4504,7 @@ static void end_reshape_write(struct bio *bio, int error) rdev = conf->mirrors[d].rdev; } - if (!uptodate) { + if (bio->bi_error) { /* FIXME should record badblock */ md_error(mddev, rdev); } @@ -4700,7 +4581,6 @@ static struct md_personality raid10_personality = .start_reshape = raid10_start_reshape, .finish_reshape = raid10_finish_reshape, .congested = raid10_congested, - .mergeable_bvec = raid10_mergeable_bvec, }; static int __init raid_init(void) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f757023fc458..b29e89cb815b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -233,7 +233,7 @@ static void return_io(struct bio *return_bi) bi->bi_iter.bi_size = 0; trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), bi, 0); - bio_endio(bi, 0); + bio_endio(bi); bi = return_bi; } } @@ -887,9 +887,9 @@ static int use_new_offset(struct r5conf *conf, struct stripe_head *sh) } static void -raid5_end_read_request(struct bio *bi, int error); +raid5_end_read_request(struct bio *bi); static void -raid5_end_write_request(struct bio *bi, int error); +raid5_end_write_request(struct bio *bi); static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) { @@ -2282,12 +2282,11 @@ static void shrink_stripes(struct r5conf *conf) conf->slab_cache = NULL; } -static void raid5_end_read_request(struct bio * bi, int error) +static void raid5_end_read_request(struct bio * bi) { struct stripe_head *sh = bi->bi_private; struct r5conf *conf = sh->raid_conf; int disks = sh->disks, i; - int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); char b[BDEVNAME_SIZE]; struct md_rdev *rdev = NULL; sector_t s; @@ -2296,9 +2295,9 @@ static void raid5_end_read_request(struct bio * bi, int error) if (bi == &sh->dev[i].req) break; - pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n", + pr_debug("end_read_request %llu/%d, count: %d, error %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), - uptodate); + bi->bi_error); if (i == disks) { BUG(); return; @@ -2317,7 +2316,7 @@ static void raid5_end_read_request(struct bio * bi, int error) s = sh->sector + rdev->new_data_offset; else s = sh->sector + rdev->data_offset; - if (uptodate) { + if (!bi->bi_error) { set_bit(R5_UPTODATE, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) { /* Note that this cannot happen on a @@ -2405,13 +2404,12 @@ static void raid5_end_read_request(struct bio * bi, int error) release_stripe(sh); } -static void raid5_end_write_request(struct bio *bi, int error) +static void raid5_end_write_request(struct bio *bi) { struct stripe_head *sh = bi->bi_private; struct r5conf *conf = sh->raid_conf; int disks = sh->disks, i; struct md_rdev *uninitialized_var(rdev); - int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); sector_t first_bad; int bad_sectors; int replacement = 0; @@ -2434,23 +2432,23 @@ static void raid5_end_write_request(struct bio *bi, int error) break; } } - pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n", + pr_debug("end_write_request %llu/%d, count %d, error: %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), - uptodate); + bi->bi_error); if (i == disks) { BUG(); return; } if (replacement) { - if (!uptodate) + if (bi->bi_error) md_error(conf->mddev, rdev); else if (is_badblock(rdev, sh->sector, STRIPE_SECTORS, &first_bad, &bad_sectors)) set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); } else { - if (!uptodate) { + if (bi->bi_error) { set_bit(STRIPE_DEGRADED, &sh->state); set_bit(WriteErrorSeen, &rdev->flags); set_bit(R5_WriteError, &sh->dev[i].flags); @@ -2471,7 +2469,7 @@ static void raid5_end_write_request(struct bio *bi, int error) } rdev_dec_pending(rdev, conf->mddev); - if (sh->batch_head && !uptodate && !replacement) + if (sh->batch_head && bi->bi_error && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) @@ -3112,7 +3110,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, while (bi && bi->bi_iter.bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); + + bi->bi_error = -EIO; if (!raid5_dec_bi_active_stripes(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; @@ -3136,7 +3135,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, while (bi && bi->bi_iter.bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); + + bi->bi_error = -EIO; if (!raid5_dec_bi_active_stripes(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; @@ -3161,7 +3161,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); + + bi->bi_error = -EIO; if (!raid5_dec_bi_active_stripes(bi)) { bi->bi_next = *return_bi; *return_bi = bi; @@ -4669,35 +4670,6 @@ static int raid5_congested(struct mddev *mddev, int bits) return 0; } -/* We want read requests to align with chunks where possible, - * but write requests don't need to. - */ -static int raid5_mergeable_bvec(struct mddev *mddev, - struct bvec_merge_data *bvm, - struct bio_vec *biovec) -{ - sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); - int max; - unsigned int chunk_sectors = mddev->chunk_sectors; - unsigned int bio_sectors = bvm->bi_size >> 9; - - /* - * always allow writes to be mergeable, read as well if array - * is degraded as we'll go through stripe cache anyway. - */ - if ((bvm->bi_rw & 1) == WRITE || mddev->degraded) - return biovec->bv_len; - - if (mddev->new_chunk_sectors < mddev->chunk_sectors) - chunk_sectors = mddev->new_chunk_sectors; - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; - if (max < 0) max = 0; - if (max <= biovec->bv_len && bio_sectors == 0) - return biovec->bv_len; - else - return max; -} - static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) { sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev); @@ -4756,13 +4728,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) * first). * If the read failed.. */ -static void raid5_align_endio(struct bio *bi, int error) +static void raid5_align_endio(struct bio *bi) { struct bio* raid_bi = bi->bi_private; struct mddev *mddev; struct r5conf *conf; - int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); struct md_rdev *rdev; + int error = bi->bi_error; bio_put(bi); @@ -4773,10 +4745,10 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); - if (!error && uptodate) { + if (!error) { trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), raid_bi, 0); - bio_endio(raid_bi, 0); + bio_endio(raid_bi); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_quiescent); return; @@ -4787,26 +4759,7 @@ static void raid5_align_endio(struct bio *bi, int error) add_bio_to_retry(raid_bi, conf); } -static int bio_fits_rdev(struct bio *bi) -{ - struct request_queue *q = bdev_get_queue(bi->bi_bdev); - - if (bio_sectors(bi) > queue_max_sectors(q)) - return 0; - blk_recount_segments(q, bi); - if (bi->bi_phys_segments > queue_max_segments(q)) - return 0; - - if (q->merge_bvec_fn) - /* it's too hard to apply the merge_bvec_fn at this stage, - * just just give up - */ - return 0; - - return 1; -} - -static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) +static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) { struct r5conf *conf = mddev->private; int dd_idx; @@ -4815,7 +4768,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) sector_t end_sector; if (!in_chunk_boundary(mddev, raid_bio)) { - pr_debug("chunk_aligned_read : non aligned\n"); + pr_debug("%s: non aligned\n", __func__); return 0; } /* @@ -4857,13 +4810,11 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; align_bi->bi_bdev = rdev->bdev; - __clear_bit(BIO_SEG_VALID, &align_bi->bi_flags); + bio_clear_flag(align_bi, BIO_SEG_VALID); - if (!bio_fits_rdev(align_bi) || - is_badblock(rdev, align_bi->bi_iter.bi_sector, + if (is_badblock(rdev, align_bi->bi_iter.bi_sector, bio_sectors(align_bi), &first_bad, &bad_sectors)) { - /* too big in some way, or has a known bad block */ bio_put(align_bi); rdev_dec_pending(rdev, mddev); return 0; @@ -4892,6 +4843,31 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) } } +static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) +{ + struct bio *split; + + do { + sector_t sector = raid_bio->bi_iter.bi_sector; + unsigned chunk_sects = mddev->chunk_sectors; + unsigned sectors = chunk_sects - (sector & (chunk_sects-1)); + + if (sectors < bio_sectors(raid_bio)) { + split = bio_split(raid_bio, sectors, GFP_NOIO, fs_bio_set); + bio_chain(split, raid_bio); + } else + split = raid_bio; + + if (!raid5_read_one_chunk(mddev, split)) { + if (split != raid_bio) + generic_make_request(raid_bio); + return split; + } + } while (split != raid_bio); + + return NULL; +} + /* __get_priority_stripe - get the next stripe to process * * Full stripe writes are allowed to pass preread active stripes up until @@ -5140,7 +5116,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) remaining = raid5_dec_bi_active_stripes(bi); if (remaining == 0) { md_write_end(mddev); - bio_endio(bi, 0); + bio_endio(bi); } } @@ -5169,9 +5145,11 @@ static void make_request(struct mddev *mddev, struct bio * bi) * data on failed drives. */ if (rw == READ && mddev->degraded == 0 && - mddev->reshape_position == MaxSector && - chunk_aligned_read(mddev,bi)) - return; + mddev->reshape_position == MaxSector) { + bi = chunk_aligned_read(mddev, bi); + if (!bi) + return; + } if (unlikely(bi->bi_rw & REQ_DISCARD)) { make_discard_request(mddev, bi); @@ -5304,7 +5282,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) release_stripe_plug(mddev, sh); } else { /* cannot get stripe for read-ahead, just give-up */ - clear_bit(BIO_UPTODATE, &bi->bi_flags); + bi->bi_error = -EIO; break; } } @@ -5318,7 +5296,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), bi, 0); - bio_endio(bi, 0); + bio_endio(bi); } } @@ -5714,7 +5692,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) if (remaining == 0) { trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), raid_bio, 0); - bio_endio(raid_bio, 0); + bio_endio(raid_bio); } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_quiescent); @@ -7779,7 +7757,6 @@ static struct md_personality raid6_personality = .quiesce = raid5_quiesce, .takeover = raid6_takeover, .congested = raid5_congested, - .mergeable_bvec = raid5_mergeable_bvec, }; static struct md_personality raid5_personality = { @@ -7803,7 +7780,6 @@ static struct md_personality raid5_personality = .quiesce = raid5_quiesce, .takeover = raid5_takeover, .congested = raid5_congested, - .mergeable_bvec = raid5_mergeable_bvec, }; static struct md_personality raid4_personality = @@ -7828,7 +7804,6 @@ static struct md_personality raid4_personality = .quiesce = raid5_quiesce, .takeover = raid4_takeover, .congested = raid5_congested, - .mergeable_bvec = raid5_mergeable_bvec, }; static int __init raid5_init(void) |