From 6aa8f1a6ca41c49721d2de4e048d3da8d06411f9 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 18:04:21 -0700
Subject: bcache: Fix a dumb race

In the far-too-complicated closure code - closures can have destructors,
for probably dubious reasons; they get run after the closure is no
longer waiting on anything but before dropping the parent ref, intended
just for freeing whatever memory the closure is embedded in.

Trouble is, when remaining goes to 0 and we've got nothing more to run -
we also have to unlock the closure, setting remaining to -1. If there's
a destructor, that unlock isn't doing anything - nobody could be trying
to lock it if we're about to free it - but if the unlock _is needed...
that check for a destructor was racy. Argh.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/closure.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index bd05a9a8c7cf..9aba2017f0d1 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -66,16 +66,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
 		} else {
 			struct closure *parent = cl->parent;
 			struct closure_waitlist *wait = closure_waitlist(cl);
+			closure_fn *destructor = cl->fn;
 
 			closure_debug_destroy(cl);
 
+			smp_mb();
 			atomic_set(&cl->remaining, -1);
 
 			if (wait)
 				closure_wake_up(wait);
 
-			if (cl->fn)
-				cl->fn(cl);
+			if (destructor)
+				destructor(cl);
 
 			if (parent)
 				closure_put(parent);
-- 
cgit v1.2.3


From d2a65ce2ac224413b291307201c5dafb03aa90d7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 5 Jul 2013 09:05:46 +0300
Subject: bcache: check for allocation failures

There is a missing NULL check after the kzalloc().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
---
 drivers/md/bcache/sysfs.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index dd3f00a42729..12a2c2846f99 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -232,6 +232,8 @@ STORE(__cached_dev)
 			bch_uuid_write(dc->disk.c);
 		}
 		env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
+		if (!env)
+			return -ENOMEM;
 		add_uevent_var(env, "DRIVER=bcache");
 		add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
 		add_uevent_var(env, "CACHED_LABEL=%s", buf);
-- 
cgit v1.2.3


From 54d12f2b4fd0f218590d1490b41a18d0e2328a9a Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 18:44:40 -0700
Subject: bcache: Advertise that flushes are supported

Whoops - bcache's flush/FUA was mostly correct, but flushes get filtered
out unless we say we support them...

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/request.c | 8 +++++++-
 drivers/md/bcache/super.c   | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index b6e74d3c8faf..786a1a4f74d8 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -488,6 +488,12 @@ static void bch_insert_data_loop(struct closure *cl)
 		bch_queue_gc(op->c);
 	}
 
+	/*
+	 * Journal writes are marked REQ_FLUSH; if the original write was a
+	 * flush, it'll wait on the journal write.
+	 */
+	bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA);
+
 	do {
 		unsigned i;
 		struct bkey *k;
@@ -710,7 +716,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d)
 	s->task			= current;
 	s->orig_bio		= bio;
 	s->write		= (bio->bi_rw & REQ_WRITE) != 0;
-	s->op.flush_journal	= (bio->bi_rw & REQ_FLUSH) != 0;
+	s->op.flush_journal	= (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
 	s->op.skip		= (bio->bi_rw & REQ_DISCARD) != 0;
 	s->recoverable		= 1;
 	s->start_time		= jiffies;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index cff2d182dfb0..728fdc673f31 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -806,6 +806,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 	set_bit(QUEUE_FLAG_NONROT,	&d->disk->queue->queue_flags);
 	set_bit(QUEUE_FLAG_DISCARD,	&d->disk->queue->queue_flags);
 
+	blk_queue_flush(q, REQ_FLUSH|REQ_FUA);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From c9502ea4424b31728703d113fc6b30bfead14633 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 21:25:02 -0700
Subject: bcache: Fix a sysfs splat on shutdown

If we stopped a bcache device when we were already detaching (or
something like that), bcache_device_unlink() would try to remove a
symlink from sysfs that was already gone because the bcache dev kobject
had already been removed from sysfs.

So keep track of whether we've removed stuff from sysfs.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/bcache.h |  1 +
 drivers/md/bcache/super.c  | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 342ba86c6e4f..68f1ded81ae0 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -434,6 +434,7 @@ struct bcache_device {
 
 	/* If nonzero, we're detaching/unregistering from cache set */
 	atomic_t		detaching;
+	int			flush_done;
 
 	uint64_t		nr_stripes;
 	unsigned		stripe_size_bits;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 728fdc673f31..7a1dcdb2536e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -706,7 +706,8 @@ static void bcache_device_detach(struct bcache_device *d)
 		atomic_set(&d->detaching, 0);
 	}
 
-	bcache_device_unlink(d);
+	if (!d->flush_done)
+		bcache_device_unlink(d);
 
 	d->c->devices[d->id] = NULL;
 	closure_put(&d->c->caching);
@@ -1055,6 +1056,14 @@ static void cached_dev_flush(struct closure *cl)
 	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
 	struct bcache_device *d = &dc->disk;
 
+	mutex_lock(&bch_register_lock);
+	d->flush_done = 1;
+
+	if (d->c)
+		bcache_device_unlink(d);
+
+	mutex_unlock(&bch_register_lock);
+
 	bch_cache_accounting_destroy(&dc->accounting);
 	kobject_del(&d->kobj);
 
-- 
cgit v1.2.3


From 5caa52afc5abd1396e4af720469abb5843a71eb8 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 21:03:25 -0700
Subject: bcache: Shutdown fix

Stopping a cache set is supposed to make it stop attached backing
devices, but somewhere along the way that code got lost. Fixing this
mainly has the effect of fixing our reboot notifier.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/super.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 7a1dcdb2536e..f6a62174e8f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1354,18 +1354,22 @@ static void cache_set_flush(struct closure *cl)
 static void __cache_set_unregister(struct closure *cl)
 {
 	struct cache_set *c = container_of(cl, struct cache_set, caching);
-	struct cached_dev *dc, *t;
+	struct cached_dev *dc;
 	size_t i;
 
 	mutex_lock(&bch_register_lock);
 
-	if (test_bit(CACHE_SET_UNREGISTERING, &c->flags))
-		list_for_each_entry_safe(dc, t, &c->cached_devs, list)
-			bch_cached_dev_detach(dc);
-
 	for (i = 0; i < c->nr_uuids; i++)
-		if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i]))
-			bcache_device_stop(c->devices[i]);
+		if (c->devices[i]) {
+			if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
+			    test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+				dc = container_of(c->devices[i],
+						  struct cached_dev, disk);
+				bch_cached_dev_detach(dc);
+			} else {
+				bcache_device_stop(c->devices[i]);
+			}
+		}
 
 	mutex_unlock(&bch_register_lock);
 
-- 
cgit v1.2.3


From faa5673617656ee58369a3cfe4a312cfcdc59c81 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Thu, 11 Jul 2013 22:42:14 -0700
Subject: bcache: Journal replay fix

The journal replay code starts by finding something that looks like a
valid journal entry, then it does a binary search over the unchecked
region of the journal for the journal entries with the highest sequence
numbers.

Trouble is, the logic was wrong - journal_read_bucket() returns true if
it found journal entries we need, but if the range of journal entries
we're looking for loops around the end of the journal - in that case
journal_read_bucket() could return true when it hadn't found the highest
sequence number we'd seen yet, and in that case the binary search did
the wrong thing. Whoops.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/journal.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 4b250667bb7f..ba95ab84b2be 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -184,9 +184,14 @@ bsearch:
 		pr_debug("starting binary search, l %u r %u", l, r);
 
 		while (l + 1 < r) {
+			seq = list_entry(list->prev, struct journal_replay,
+					 list)->j.seq;
+
 			m = (l + r) >> 1;
+			read_bucket(m);
 
-			if (read_bucket(m))
+			if (seq != list_entry(list->prev, struct journal_replay,
+					      list)->j.seq)
 				l = m;
 			else
 				r = m;
-- 
cgit v1.2.3


From 29ebf465b9050f241c4433a796a32e6c896a9dcd Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Thu, 11 Jul 2013 19:43:21 -0700
Subject: bcache: Fix GC_SECTORS_USED() calculation

Part of the job of garbage collection is to add up however many sectors
of live data it finds in each bucket, but that doesn't work very well if
it doesn't reset GC_SECTORS_USED() when it starts. Whoops.

This wouldn't have broken anything horribly, but allocation tries to
preferentially reclaim buckets that are mostly empty and that's not
gonna work with an incorrect GC_SECTORS_USED() value.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/btree.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 15b58239c683..ee372884c405 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1410,8 +1410,10 @@ static void btree_gc_start(struct cache_set *c)
 	for_each_cache(ca, c, i)
 		for_each_bucket(b, ca) {
 			b->gc_gen = b->gen;
-			if (!atomic_read(&b->pin))
+			if (!atomic_read(&b->pin)) {
 				SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
+				SET_GC_SECTORS_USED(b, 0);
+			}
 		}
 
 	mutex_unlock(&c->bucket_lock);
-- 
cgit v1.2.3


From 79826c35eb99cd3c0873b8396f45fa26c87fb0b0 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 18:31:58 -0700
Subject: bcache: Allocation kthread fixes

The alloc kthread should've been using try_to_freeze() - and also there
was the potential for the alloc kthread to get woken up after it had
shut down, which would have been bad.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
 drivers/md/bcache/alloc.c  | 18 ++++++++----------
 drivers/md/bcache/bcache.h |  4 ----
 drivers/md/bcache/super.c  | 11 +++++++----
 3 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'drivers/md')

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index b54b73b9b2b7..e45f5575fd4d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -63,6 +63,7 @@
 #include "bcache.h"
 #include "btree.h"
 
+#include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/random.h>
 #include <trace/events/bcache.h>
@@ -363,11 +364,10 @@ do {									\
 			break;						\
 									\
 		mutex_unlock(&(ca)->set->bucket_lock);			\
-		if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) {	\
-			closure_put(&ca->set->cl);			\
+		if (kthread_should_stop())				\
 			return 0;					\
-		}							\
 									\
+		try_to_freeze();					\
 		schedule();						\
 		mutex_lock(&(ca)->set->bucket_lock);			\
 	}								\
@@ -547,14 +547,12 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
 
 int bch_cache_allocator_start(struct cache *ca)
 {
-	ca->alloc_thread = kthread_create(bch_allocator_thread,
-					  ca, "bcache_allocator");
-	if (IS_ERR(ca->alloc_thread))
-		return PTR_ERR(ca->alloc_thread);
-
-	closure_get(&ca->set->cl);
-	wake_up_process(ca->alloc_thread);
+	struct task_struct *k = kthread_run(bch_allocator_thread,
+					    ca, "bcache_allocator");
+	if (IS_ERR(k))
+		return PTR_ERR(k);
 
+	ca->alloc_thread = k;
 	return 0;
 }
 
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 68f1ded81ae0..b39f6f0b45f2 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -664,13 +664,9 @@ struct gc_stat {
  * CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
  * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
  * flushing dirty data).
- *
- * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down
- * the allocation thread.
  */
 #define CACHE_SET_UNREGISTERING		0
 #define	CACHE_SET_STOPPING		1
-#define	CACHE_SET_STOPPING_2		2
 
 struct cache_set {
 	struct closure		cl;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f6a62174e8f6..547c4c57b052 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -16,6 +16,7 @@
 #include <linux/buffer_head.h>
 #include <linux/debugfs.h>
 #include <linux/genhd.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/reboot.h>
@@ -1329,11 +1330,9 @@ static void cache_set_free(struct closure *cl)
 static void cache_set_flush(struct closure *cl)
 {
 	struct cache_set *c = container_of(cl, struct cache_set, caching);
+	struct cache *ca;
 	struct btree *b;
-
-	/* Shut down allocator threads */
-	set_bit(CACHE_SET_STOPPING_2, &c->flags);
-	wake_up_allocators(c);
+	unsigned i;
 
 	bch_cache_accounting_destroy(&c->accounting);
 
@@ -1348,6 +1347,10 @@ static void cache_set_flush(struct closure *cl)
 		if (btree_node_dirty(b))
 			bch_btree_node_write(b, NULL);
 
+	for_each_cache(ca, c, i)
+		if (ca->alloc_thread)
+			kthread_stop(ca->alloc_thread);
+
 	closure_return(cl);
 }
 
-- 
cgit v1.2.3