From 6aa8f1a6ca41c49721d2de4e048d3da8d06411f9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:04:21 -0700 Subject: bcache: Fix a dumb race In the far-too-complicated closure code - closures can have destructors, for probably dubious reasons; they get run after the closure is no longer waiting on anything but before dropping the parent ref, intended just for freeing whatever memory the closure is embedded in. Trouble is, when remaining goes to 0 and we've got nothing more to run - we also have to unlock the closure, setting remaining to -1. If there's a destructor, that unlock isn't doing anything - nobody could be trying to lock it if we're about to free it - but if the unlock _is needed... that check for a destructor was racy. Argh. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/closure.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index bd05a9a8c7cf..9aba2017f0d1 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -66,16 +66,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) } else { struct closure *parent = cl->parent; struct closure_waitlist *wait = closure_waitlist(cl); + closure_fn *destructor = cl->fn; closure_debug_destroy(cl); + smp_mb(); atomic_set(&cl->remaining, -1); if (wait) closure_wake_up(wait); - if (cl->fn) - cl->fn(cl); + if (destructor) + destructor(cl); if (parent) closure_put(parent); -- cgit v1.2.3 From d2a65ce2ac224413b291307201c5dafb03aa90d7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Jul 2013 09:05:46 +0300 Subject: bcache: check for allocation failures There is a missing NULL check after the kzalloc(). Signed-off-by: Dan Carpenter --- drivers/md/bcache/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index dd3f00a42729..12a2c2846f99 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -232,6 +232,8 @@ STORE(__cached_dev) bch_uuid_write(dc->disk.c); } env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); + if (!env) + return -ENOMEM; add_uevent_var(env, "DRIVER=bcache"); add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid), add_uevent_var(env, "CACHED_LABEL=%s", buf); -- cgit v1.2.3 From 54d12f2b4fd0f218590d1490b41a18d0e2328a9a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:44:40 -0700 Subject: bcache: Advertise that flushes are supported Whoops - bcache's flush/FUA was mostly correct, but flushes get filtered out unless we say we support them... Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/request.c | 8 +++++++- drivers/md/bcache/super.c | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index b6e74d3c8faf..786a1a4f74d8 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -488,6 +488,12 @@ static void bch_insert_data_loop(struct closure *cl) bch_queue_gc(op->c); } + /* + * Journal writes are marked REQ_FLUSH; if the original write was a + * flush, it'll wait on the journal write. + */ + bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA); + do { unsigned i; struct bkey *k; @@ -710,7 +716,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d) s->task = current; s->orig_bio = bio; s->write = (bio->bi_rw & REQ_WRITE) != 0; - s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; + s->op.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; s->recoverable = 1; s->start_time = jiffies; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index cff2d182dfb0..728fdc673f31 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -806,6 +806,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); + blk_queue_flush(q, REQ_FLUSH|REQ_FUA); + return 0; } -- cgit v1.2.3 From c9502ea4424b31728703d113fc6b30bfead14633 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:25:02 -0700 Subject: bcache: Fix a sysfs splat on shutdown If we stopped a bcache device when we were already detaching (or something like that), bcache_device_unlink() would try to remove a symlink from sysfs that was already gone because the bcache dev kobject had already been removed from sysfs. So keep track of whether we've removed stuff from sysfs. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/super.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 342ba86c6e4f..68f1ded81ae0 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -434,6 +434,7 @@ struct bcache_device { /* If nonzero, we're detaching/unregistering from cache set */ atomic_t detaching; + int flush_done; uint64_t nr_stripes; unsigned stripe_size_bits; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 728fdc673f31..7a1dcdb2536e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -706,7 +706,8 @@ static void bcache_device_detach(struct bcache_device *d) atomic_set(&d->detaching, 0); } - bcache_device_unlink(d); + if (!d->flush_done) + bcache_device_unlink(d); d->c->devices[d->id] = NULL; closure_put(&d->c->caching); @@ -1055,6 +1056,14 @@ static void cached_dev_flush(struct closure *cl) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); struct bcache_device *d = &dc->disk; + mutex_lock(&bch_register_lock); + d->flush_done = 1; + + if (d->c) + bcache_device_unlink(d); + + mutex_unlock(&bch_register_lock); + bch_cache_accounting_destroy(&dc->accounting); kobject_del(&d->kobj); -- cgit v1.2.3 From 5caa52afc5abd1396e4af720469abb5843a71eb8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:03:25 -0700 Subject: bcache: Shutdown fix Stopping a cache set is supposed to make it stop attached backing devices, but somewhere along the way that code got lost. Fixing this mainly has the effect of fixing our reboot notifier. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/super.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7a1dcdb2536e..f6a62174e8f6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1354,18 +1354,22 @@ static void cache_set_flush(struct closure *cl) static void __cache_set_unregister(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); - struct cached_dev *dc, *t; + struct cached_dev *dc; size_t i; mutex_lock(&bch_register_lock); - if (test_bit(CACHE_SET_UNREGISTERING, &c->flags)) - list_for_each_entry_safe(dc, t, &c->cached_devs, list) - bch_cached_dev_detach(dc); - for (i = 0; i < c->nr_uuids; i++) - if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i])) - bcache_device_stop(c->devices[i]); + if (c->devices[i]) { + if (!UUID_FLASH_ONLY(&c->uuids[i]) && + test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { + dc = container_of(c->devices[i], + struct cached_dev, disk); + bch_cached_dev_detach(dc); + } else { + bcache_device_stop(c->devices[i]); + } + } mutex_unlock(&bch_register_lock); -- cgit v1.2.3 From faa5673617656ee58369a3cfe4a312cfcdc59c81 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 22:42:14 -0700 Subject: bcache: Journal replay fix The journal replay code starts by finding something that looks like a valid journal entry, then it does a binary search over the unchecked region of the journal for the journal entries with the highest sequence numbers. Trouble is, the logic was wrong - journal_read_bucket() returns true if it found journal entries we need, but if the range of journal entries we're looking for loops around the end of the journal - in that case journal_read_bucket() could return true when it hadn't found the highest sequence number we'd seen yet, and in that case the binary search did the wrong thing. Whoops. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/journal.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 4b250667bb7f..ba95ab84b2be 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -184,9 +184,14 @@ bsearch: pr_debug("starting binary search, l %u r %u", l, r); while (l + 1 < r) { + seq = list_entry(list->prev, struct journal_replay, + list)->j.seq; + m = (l + r) >> 1; + read_bucket(m); - if (read_bucket(m)) + if (seq != list_entry(list->prev, struct journal_replay, + list)->j.seq) l = m; else r = m; -- cgit v1.2.3 From 29ebf465b9050f241c4433a796a32e6c896a9dcd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 19:43:21 -0700 Subject: bcache: Fix GC_SECTORS_USED() calculation Part of the job of garbage collection is to add up however many sectors of live data it finds in each bucket, but that doesn't work very well if it doesn't reset GC_SECTORS_USED() when it starts. Whoops. This wouldn't have broken anything horribly, but allocation tries to preferentially reclaim buckets that are mostly empty and that's not gonna work with an incorrect GC_SECTORS_USED() value. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 15b58239c683..ee372884c405 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1410,8 +1410,10 @@ static void btree_gc_start(struct cache_set *c) for_each_cache(ca, c, i) for_each_bucket(b, ca) { b->gc_gen = b->gen; - if (!atomic_read(&b->pin)) + if (!atomic_read(&b->pin)) { SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + SET_GC_SECTORS_USED(b, 0); + } } mutex_unlock(&c->bucket_lock); -- cgit v1.2.3 From 79826c35eb99cd3c0873b8396f45fa26c87fb0b0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:31:58 -0700 Subject: bcache: Allocation kthread fixes The alloc kthread should've been using try_to_freeze() - and also there was the potential for the alloc kthread to get woken up after it had shut down, which would have been bad. Signed-off-by: Kent Overstreet --- drivers/md/bcache/alloc.c | 18 ++++++++---------- drivers/md/bcache/bcache.h | 4 ---- drivers/md/bcache/super.c | 11 +++++++---- 3 files changed, 15 insertions(+), 18 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index b54b73b9b2b7..e45f5575fd4d 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -63,6 +63,7 @@ #include "bcache.h" #include "btree.h" +#include #include #include #include @@ -363,11 +364,10 @@ do { \ break; \ \ mutex_unlock(&(ca)->set->bucket_lock); \ - if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ - closure_put(&ca->set->cl); \ + if (kthread_should_stop()) \ return 0; \ - } \ \ + try_to_freeze(); \ schedule(); \ mutex_lock(&(ca)->set->bucket_lock); \ } \ @@ -547,14 +547,12 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, int bch_cache_allocator_start(struct cache *ca) { - ca->alloc_thread = kthread_create(bch_allocator_thread, - ca, "bcache_allocator"); - if (IS_ERR(ca->alloc_thread)) - return PTR_ERR(ca->alloc_thread); - - closure_get(&ca->set->cl); - wake_up_process(ca->alloc_thread); + struct task_struct *k = kthread_run(bch_allocator_thread, + ca, "bcache_allocator"); + if (IS_ERR(k)) + return PTR_ERR(k); + ca->alloc_thread = k; return 0; } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 68f1ded81ae0..b39f6f0b45f2 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -664,13 +664,9 @@ struct gc_stat { * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). - * - * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down - * the allocation thread. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 -#define CACHE_SET_STOPPING_2 2 struct cache_set { struct closure cl; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f6a62174e8f6..547c4c57b052 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1329,11 +1330,9 @@ static void cache_set_free(struct closure *cl) static void cache_set_flush(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); + struct cache *ca; struct btree *b; - - /* Shut down allocator threads */ - set_bit(CACHE_SET_STOPPING_2, &c->flags); - wake_up_allocators(c); + unsigned i; bch_cache_accounting_destroy(&c->accounting); @@ -1348,6 +1347,10 @@ static void cache_set_flush(struct closure *cl) if (btree_node_dirty(b)) bch_btree_node_write(b, NULL); + for_each_cache(ca, c, i) + if (ca->alloc_thread) + kthread_stop(ca->alloc_thread); + closure_return(cl); } -- cgit v1.2.3