From 947174476701fbc84ea8c7ec9664270f9d80b076 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 28 Jan 2014 16:57:39 -0800
Subject: bcache: fix BUG_ON due to integer overflow with GC_SECTORS_USED

The BUG_ON at the end of __bch_btree_mark_key can be triggered due to
an integer overflow error:

BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13);
...
SET_GC_SECTORS_USED(g, min_t(unsigned,
	     GC_SECTORS_USED(g) + KEY_SIZE(k),
	     (1 << 14) - 1));
BUG_ON(!GC_SECTORS_USED(g));

In bcache.h, the SECTORS_USED bitfield is defined to be 13 bits wide.
While the SET_ code tries to ensure that the field doesn't overflow by
clamping it to (1<<14)-1 == 16383, this is incorrect because 16383
requires 14 bits.  Therefore, if GC_SECTORS_USED() + KEY_SIZE() =
8192, the SET_ statement tries to store 8192 into a 13-bit field.  In
a 13-bit field, 8192 becomes zero, thus triggering the BUG_ON.

Therefore, create a field width constant and a max value constant, and
use those to create the bitfield and check the inputs to
SET_GC_SECTORS_USED.  Arguably the BITMASK() template ought to have
BUG_ON checks for too-large values, but that's a separate patch.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 drivers/md/bcache/bcache.h | 4 +++-
 drivers/md/bcache/btree.c  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 0c707e4f4eaf..a4c7306ff43d 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -210,7 +210,9 @@ BITMASK(GC_MARK,	 struct bucket, gc_mark, 0, 2);
 #define GC_MARK_RECLAIMABLE	0
 #define GC_MARK_DIRTY		1
 #define GC_MARK_METADATA	2
-BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13);
+#define GC_SECTORS_USED_SIZE	13
+#define MAX_GC_SECTORS_USED	(~(~0ULL << GC_SECTORS_USED_SIZE))
+BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
 BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
 
 #include "journal.h"
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 98cc0a810a36..7d421180b6d2 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1167,7 +1167,7 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
 		/* guard against overflow */
 		SET_GC_SECTORS_USED(g, min_t(unsigned,
 					     GC_SECTORS_USED(g) + KEY_SIZE(k),
-					     (1 << 14) - 1));
+					     MAX_GC_SECTORS_USED));
 
 		BUG_ON(!GC_SECTORS_USED(g));
 	}
-- 
cgit v1.2.3


From 3572324af0f4ef877545e5a17bd3e788551f166a Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Fri, 10 Jan 2014 18:53:02 -0800
Subject: bcache: Minor fixes from kbuild robot

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
 drivers/md/bcache/bset.c    | 7 +++++--
 drivers/md/bcache/btree.c   | 2 +-
 drivers/md/bcache/extents.c | 2 +-
 drivers/md/bcache/sysfs.c   | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 4f6b5940e609..3f74b4b0747b 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -23,7 +23,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
 	for (k = i->start; k < bset_bkey_last(i); k = next) {
 		next = bkey_next(k);
 
-		printk(KERN_ERR "block %u key %zi/%u: ", set,
+		printk(KERN_ERR "block %u key %li/%u: ", set,
 		       (uint64_t *) k - i->d, i->keys);
 
 		if (b->ops->key_dump)
@@ -1185,9 +1185,12 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
 	struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
 						     order);
 	if (!out) {
+		struct page *outp;
+
 		BUG_ON(order > state->page_order);
 
-		out = page_address(mempool_alloc(state->pool, GFP_NOIO));
+		outp = mempool_alloc(state->pool, GFP_NOIO);
+		out = page_address(outp);
 		used_mempool = true;
 		order = state->page_order;
 	}
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 7d421180b6d2..5f9c2a665ca5 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1805,7 +1805,7 @@ static bool btree_insert_key(struct btree *b, struct bkey *k,
 
 static size_t insert_u64s_remaining(struct btree *b)
 {
-	ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys);
+	long ret = bch_btree_keys_u64s_remaining(&b->keys);
 
 	/*
 	 * Might land in the middle of an existing extent and have to split it
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index c3ead586dc27..416d1a3e028e 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -194,7 +194,7 @@ err:
 	mutex_unlock(&b->c->bucket_lock);
 	bch_extent_to_text(buf, sizeof(buf), k);
 	btree_bug(b,
-"inconsistent btree pointer %s: bucket %li pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
+"inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
 		  buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
 		  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
 	return true;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index c6ab69333a6d..d8458d477a12 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -416,7 +416,7 @@ static int btree_bset_stats(struct btree_op *b_op, struct btree *b)
 	return MAP_CONTINUE;
 }
 
-int bch_bset_print_stats(struct cache_set *c, char *buf)
+static int bch_bset_print_stats(struct cache_set *c, char *buf)
 {
 	struct bset_stats_op op;
 	int ret;
-- 
cgit v1.2.3


From e3b4825b85eab879b618af6ea18529ca7ab9a64f Mon Sep 17 00:00:00 2001
From: Nicholas Swenson <nks@daterainc.com>
Date: Thu, 12 Dec 2013 12:53:28 -0800
Subject: bcache: bugfix - gc thread now gets woken when cache is full

Signed-off-by: Nicholas Swenson <nks@daterainc.com>
---
 drivers/md/bcache/request.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index fcdb59f9ca91..675229b93718 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -354,14 +354,14 @@ static void bch_data_insert_start(struct closure *cl)
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 	struct bio *bio = op->bio, *n;
 
-	if (op->bypass)
-		return bch_data_invalidate(cl);
-
 	if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) {
 		set_gc_sectors(op->c);
 		wake_up_gc(op->c);
 	}
 
+	if (op->bypass)
+		return bch_data_invalidate(cl);
+
 	/*
 	 * Journal writes are marked REQ_FLUSH; if the original write was a
 	 * flush, it'll wait on the journal write.
-- 
cgit v1.2.3


From 2ed22e3c3bec5b92b9aba4afdef0cc5e6d859a11 Mon Sep 17 00:00:00 2001
From: Matt Rushton <mrushton@amazon.com>
Date: Tue, 4 Feb 2014 11:26:12 +0100
Subject: xen-blkback: fix memory leak when persistent grants are used
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently shrink_free_pagepool() is called before the pages used for
persistent grants are released via free_persistent_gnts(). This
results in a memory leak when a VBD that uses persistent grants is
torn down.

Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>
Cc: Ian Campbell <Ian.Campbell@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xen.org
Cc: Anthony Liguori <aliguori@amazon.com>
Signed-off-by: Matt Rushton <mrushton@amazon.com>
Signed-off-by: Matt Wilson <msw@amazon.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 6620b73d0490..30ef7b390df5 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -625,9 +625,6 @@ purge_gnt_list:
 			print_stats(blkif);
 	}
 
-	/* Since we are shutting down remove all pages from the buffer */
-	shrink_free_pagepool(blkif, 0 /* All */);
-
 	/* Free all persistent grant pages */
 	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
 		free_persistent_gnts(blkif, &blkif->persistent_gnts,
@@ -636,6 +633,9 @@ purge_gnt_list:
 	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
 	blkif->persistent_gnt_c = 0;
 
+	/* Since we are shutting down remove all pages from the buffer */
+	shrink_free_pagepool(blkif, 0 /* All */);
+
 	if (log_stats)
 		print_stats(blkif);
 
-- 
cgit v1.2.3


From ef753411339eae46b9a3151906901f8bfd12b0f1 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Tue, 4 Feb 2014 11:26:13 +0100
Subject: xen-blkback: fix memory leaks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've at least identified two possible memory leaks in blkback, both
related to the shutdown path of a VBD:

- blkback doesn't wait for any pending purge work to finish before
  cleaning the list of free_pages. The purge work will call
  put_free_pages and thus we might end up with pages being added to
  the free_pages list after we have emptied it. Fix this by making
  sure there's no pending purge work before exiting
  xen_blkif_schedule, and moving the free_page cleanup code to
  xen_blkif_free.
- blkback doesn't wait for pending requests to end before cleaning
  persistent grants and the list of free_pages. Again this can add
  pages to the free_pages list or persistent grants to the
  persistent_gnts red-black tree. Fixed by moving the persistent
  grants and free_pages cleanup code to xen_blkif_free.

Also, add some checks in xen_blkif_free to make sure we are cleaning
everything.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Tested-by: Matt Rushton <mrushton@amazon.com>
Reviewed-by: Matt Rushton <mrushton@amazon.com>
Cc: Matt Wilson <msw@amazon.com>
Cc: Ian Campbell <Ian.Campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 27 ++++++++++++++++++---------
 drivers/block/xen-blkback/common.h  |  1 +
 drivers/block/xen-blkback/xenbus.c  | 12 ++++++++++++
 3 files changed, 31 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 30ef7b390df5..dcfe49fd3cb4 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 
 	pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
 
-	INIT_LIST_HEAD(&blkif->persistent_purge_list);
+	BUG_ON(!list_empty(&blkif->persistent_purge_list));
 	root = &blkif->persistent_gnts;
 purge_list:
 	foreach_grant_safe(persistent_gnt, n, root, node) {
@@ -625,6 +625,23 @@ purge_gnt_list:
 			print_stats(blkif);
 	}
 
+	/* Drain pending purge work */
+	flush_work(&blkif->persistent_purge_work);
+
+	if (log_stats)
+		print_stats(blkif);
+
+	blkif->xenblkd = NULL;
+	xen_blkif_put(blkif);
+
+	return 0;
+}
+
+/*
+ * Remove persistent grants and empty the pool of free pages
+ */
+void xen_blkbk_free_caches(struct xen_blkif *blkif)
+{
 	/* Free all persistent grant pages */
 	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
 		free_persistent_gnts(blkif, &blkif->persistent_gnts,
@@ -635,14 +652,6 @@ purge_gnt_list:
 
 	/* Since we are shutting down remove all pages from the buffer */
 	shrink_free_pagepool(blkif, 0 /* All */);
-
-	if (log_stats)
-		print_stats(blkif);
-
-	blkif->xenblkd = NULL;
-	xen_blkif_put(blkif);
-
-	return 0;
 }
 
 /*
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 8d8807563d99..f733d7627120 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -376,6 +376,7 @@ int xen_blkif_xenbus_init(void);
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
 int xen_blkif_purge_persistent(void *arg);
+void xen_blkbk_free_caches(struct xen_blkif *blkif);
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 			      struct backend_info *be, int state);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index c2014a0aa206..8afef67fecdd 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -125,6 +125,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->persistent_gnts.rb_node = NULL;
 	spin_lock_init(&blkif->free_pages_lock);
 	INIT_LIST_HEAD(&blkif->free_pages);
+	INIT_LIST_HEAD(&blkif->persistent_purge_list);
 	blkif->free_pages_num = 0;
 	atomic_set(&blkif->persistent_gnt_in_use, 0);
 
@@ -259,6 +260,17 @@ static void xen_blkif_free(struct xen_blkif *blkif)
 	if (!atomic_dec_and_test(&blkif->refcnt))
 		BUG();
 
+	/* Remove all persistent grants and the cache of ballooned pages. */
+	xen_blkbk_free_caches(blkif);
+
+	/* Make sure everything is drained before shutting down */
+	BUG_ON(blkif->persistent_gnt_c != 0);
+	BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
+	BUG_ON(blkif->free_pages_num != 0);
+	BUG_ON(!list_empty(&blkif->persistent_purge_list));
+	BUG_ON(!list_empty(&blkif->free_pages));
+	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
+
 	/* Check that there is no request in use */
 	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
 		list_del(&req->free_list);
-- 
cgit v1.2.3


From c05f3e3c85df1d89673e00cee7ece5ae4eb4c6ec Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Tue, 4 Feb 2014 11:26:14 +0100
Subject: xen-blkback: fix shutdown race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a new variable to keep track of the number of in-flight
requests. We need to make sure that when xen_blkif_put is called the
request has already been freed and we can safely free xen_blkif, which
was not the case before.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Tested-by: Matt Rushton <mrushton@amazon.com>
Reviewed-by: Matt Rushton <mrushton@amazon.com>
Cc: Matt Wilson <msw@amazon.com>
Cc: Ian Campbell <Ian.Campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 32 ++++++++++++++++++++++----------
 drivers/block/xen-blkback/common.h  |  1 +
 drivers/block/xen-blkback/xenbus.c  |  1 +
 3 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index dcfe49fd3cb4..394fa2eabf87 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -943,9 +943,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif)
 {
 	atomic_set(&blkif->drain, 1);
 	do {
-		/* The initial value is one, and one refcnt taken at the
-		 * start of the xen_blkif_schedule thread. */
-		if (atomic_read(&blkif->refcnt) <= 2)
+		if (atomic_read(&blkif->inflight) == 0)
 			break;
 		wait_for_completion_interruptible_timeout(
 				&blkif->drain_complete, HZ);
@@ -985,17 +983,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 	 * the proper response on the ring.
 	 */
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		xen_blkbk_unmap(pending_req->blkif,
+		struct xen_blkif *blkif = pending_req->blkif;
+
+		xen_blkbk_unmap(blkif,
 		                pending_req->segments,
 		                pending_req->nr_pages);
-		make_response(pending_req->blkif, pending_req->id,
+		make_response(blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
-		xen_blkif_put(pending_req->blkif);
-		if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
-			if (atomic_read(&pending_req->blkif->drain))
-				complete(&pending_req->blkif->drain_complete);
+		free_req(blkif, pending_req);
+		/*
+		 * Make sure the request is freed before releasing blkif,
+		 * or there could be a race between free_req and the
+		 * cleanup done in xen_blkif_free during shutdown.
+		 *
+		 * NB: The fact that we might try to wake up pending_free_wq
+		 * before drain_complete (in case there's a drain going on)
+		 * it's not a problem with our current implementation
+		 * because we can assure there's no thread waiting on
+		 * pending_free_wq if there's a drain going on, but it has
+		 * to be taken into account if the current model is changed.
+		 */
+		if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
+			complete(&blkif->drain_complete);
 		}
-		free_req(pending_req->blkif, pending_req);
+		xen_blkif_put(blkif);
 	}
 }
 
@@ -1249,6 +1260,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
 	 */
 	xen_blkif_get(blkif);
+	atomic_inc(&blkif->inflight);
 
 	for (i = 0; i < nseg; i++) {
 		while ((bio == NULL) ||
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index f733d7627120..e40326a7f707 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -278,6 +278,7 @@ struct xen_blkif {
 	/* for barrier (drain) requests */
 	struct completion	drain_complete;
 	atomic_t		drain;
+	atomic_t		inflight;
 	/* One thread per one blkif. */
 	struct task_struct	*xenblkd;
 	unsigned int		waiting_reqs;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 8afef67fecdd..84973c6a856a 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -128,6 +128,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	INIT_LIST_HEAD(&blkif->persistent_purge_list);
 	blkif->free_pages_num = 0;
 	atomic_set(&blkif->persistent_gnt_in_use, 0);
+	atomic_set(&blkif->inflight, 0);
 
 	INIT_LIST_HEAD(&blkif->pending_free);
 
-- 
cgit v1.2.3


From 80bfa2f6e2e81049fc6cd3bfaeedcb64db3a9ba6 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Tue, 4 Feb 2014 11:26:15 +0100
Subject: xen-blkif: drop struct blkif_request_segment_aligned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was wrongly introduced in commit 402b27f9, the only difference
between blkif_request_segment_aligned and blkif_request_segment is
that the former has a named padding, while both share the same
memory layout.

Also correct a few minor glitches in the description, including for it
to no longer assume PAGE_SIZE == 4096.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
[Description fix by Jan Beulich]
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reported-by: Jan Beulich <jbeulich@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Tested-by: Matt Rushton <mrushton@amazon.com>
Cc: Matt Wilson <msw@amazon.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c |  2 +-
 drivers/block/xen-blkback/common.h  |  2 +-
 drivers/block/xen-blkfront.c        |  6 +++---
 include/xen/interface/io/blkif.h    | 34 ++++++++++++++--------------------
 4 files changed, 19 insertions(+), 25 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 394fa2eabf87..e612627ae981 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -847,7 +847,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 	struct grant_page **pages = pending_req->indirect_pages;
 	struct xen_blkif *blkif = pending_req->blkif;
 	int indirect_grefs, rc, n, nseg, i;
-	struct blkif_request_segment_aligned *segments = NULL;
+	struct blkif_request_segment *segments = NULL;
 
 	nseg = pending_req->nr_pages;
 	indirect_grefs = INDIRECT_PAGES(nseg);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index e40326a7f707..9eb34e24b4fe 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -57,7 +57,7 @@
 #define MAX_INDIRECT_SEGMENTS 256
 
 #define SEGS_PER_INDIRECT_FRAME \
-	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+	(PAGE_SIZE/sizeof(struct blkif_request_segment))
 #define MAX_INDIRECT_PAGES \
 	((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
 #define INDIRECT_PAGES(_segs) \
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c4a4c9006288..7d09dfc9735c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock);
 #define DEV_NAME	"xvd"	/* name in /dev */
 
 #define SEGS_PER_INDIRECT_FRAME \
-	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+	(PAGE_SIZE/sizeof(struct blkif_request_segment))
 #define INDIRECT_GREFS(_segs) \
 	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
 
@@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req)
 	unsigned long id;
 	unsigned int fsect, lsect;
 	int i, ref, n;
-	struct blkif_request_segment_aligned *segments = NULL;
+	struct blkif_request_segment *segments = NULL;
 
 	/*
 	 * Used to store if we are able to queue the request by just using
@@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req)
 			} else {
 				n = i % SEGS_PER_INDIRECT_FRAME;
 				segments[n] =
-					(struct blkif_request_segment_aligned) {
+					(struct blkif_request_segment) {
 							.gref       = ref,
 							.first_sect = fsect,
 							.last_sect  = lsect };
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ae665ac59c36..32ec05a6572f 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -113,13 +113,13 @@ typedef uint64_t blkif_sector_t;
  * it's less than the number provided by the backend. The indirect_grefs field
  * in blkif_request_indirect should be filled by the frontend with the
  * grant references of the pages that are holding the indirect segments.
- * This pages are filled with an array of blkif_request_segment_aligned
- * that hold the information about the segments. The number of indirect
- * pages to use is determined by the maximum number of segments
- * a indirect request contains. Every indirect page can contain a maximum
- * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)),
- * so to calculate the number of indirect pages to use we have to do
- * ceil(indirect_segments/512).
+ * These pages are filled with an array of blkif_request_segment that hold the
+ * information about the segments. The number of indirect pages to use is
+ * determined by the number of segments an indirect request contains. Every
+ * indirect page can contain a maximum of
+ * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
+ * calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
  *
  * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
  * create the "feature-max-indirect-segments" node!
@@ -135,13 +135,12 @@ typedef uint64_t blkif_sector_t;
 
 #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
 
-struct blkif_request_segment_aligned {
-	grant_ref_t gref;        /* reference to I/O buffer frame        */
-	/* @first_sect: first sector in frame to transfer (inclusive).   */
-	/* @last_sect: last sector in frame to transfer (inclusive).     */
-	uint8_t     first_sect, last_sect;
-	uint16_t    _pad; /* padding to make it 8 bytes, so it's cache-aligned */
-} __attribute__((__packed__));
+struct blkif_request_segment {
+		grant_ref_t gref;        /* reference to I/O buffer frame        */
+		/* @first_sect: first sector in frame to transfer (inclusive).   */
+		/* @last_sect: last sector in frame to transfer (inclusive).     */
+		uint8_t     first_sect, last_sect;
+};
 
 struct blkif_request_rw {
 	uint8_t        nr_segments;  /* number of segments                   */
@@ -151,12 +150,7 @@ struct blkif_request_rw {
 #endif
 	uint64_t       id;           /* private guest value, echoed in resp  */
 	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-	struct blkif_request_segment {
-		grant_ref_t gref;        /* reference to I/O buffer frame        */
-		/* @first_sect: first sector in frame to transfer (inclusive).   */
-		/* @last_sect: last sector in frame to transfer (inclusive).     */
-		uint8_t     first_sect, last_sect;
-	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 } __attribute__((__packed__));
 
 struct blkif_request_discard {
-- 
cgit v1.2.3


From 3661371701e714f0cea4120f6a365340858fb4e4 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Tue, 4 Feb 2014 18:53:56 +0000
Subject: xen-blkfront: handle backend CLOSED without CLOSING

Backend drivers shouldn't transistion to CLOSED unless the frontend is
CLOSED.  If a backend does transition to CLOSED too soon then the
frontend may not see the CLOSING state and will not properly shutdown.

So, treat an unexpected backend CLOSED state the same as CLOSING.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 7d09dfc9735c..76e5c0b265b1 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev,
 	case XenbusStateReconfiguring:
 	case XenbusStateReconfigured:
 	case XenbusStateUnknown:
-	case XenbusStateClosed:
 		break;
 
 	case XenbusStateConnected:
 		blkfront_connect(info);
 		break;
 
+	case XenbusStateClosed:
+		if (dev->state == XenbusStateClosed)
+			break;
+		/* Missed the backend's Closing state -- fallthrough */
 	case XenbusStateClosing:
 		blkfront_closing(info);
 		break;
-- 
cgit v1.2.3


From d7790b928d42597b7da21a4e43080774903e3b5c Mon Sep 17 00:00:00 2001
From: Shlomo Pongratz <shlomop@mellanox.com>
Date: Thu, 6 Feb 2014 18:33:17 +0200
Subject: block/null_blk: Fix completion processing from LIFO to FIFO

The completion queue is implemented using lockless list.

The llist_add is adds the events to the list head which is a push operation.
The processing of the completion elements is done by disconnecting all the
pushed elements and iterating over the disconnected list. The problem is
that the processing is done in reverse order w.r.t order of the insertion
i.e. LIFO processing. By reversing the disconnected list which is done in
linear time the desired FIFO processing is achieved.

Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/null_blk.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 3107282a9741..cc801cde5cfa 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -195,6 +195,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
 	cq = &per_cpu(completion_queues, smp_processor_id());
 
 	while ((entry = llist_del_all(&cq->list)) != NULL) {
+		entry = llist_reverse_order(entry);
 		do {
 			cmd = container_of(entry, struct nullb_cmd, ll_list);
 			end_cmd(cmd);
@@ -235,6 +236,7 @@ static void null_ipi_cmd_end_io(void *data)
 	cq = &per_cpu(completion_queues, smp_processor_id());
 
 	entry = llist_del_all(&cq->list);
+	entry = llist_reverse_order(entry);
 
 	while (entry) {
 		next = entry->next;
-- 
cgit v1.2.3


From 5124c285797aa33d5fdb909fbe808a61bcc5eb9d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 10 Feb 2014 03:24:39 -0800
Subject: virtio_blk: use blk_mq_complete_request

Make sure to complete requests on the submitting CPU.  Previously this
was done in blk_mq_end_io, but the responsibility shifted to the drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/virtio_blk.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6a680d4de7f1..b1cb3f4c4db4 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -110,9 +110,9 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
-static inline void virtblk_request_done(struct virtblk_req *vbr)
+static inline void virtblk_request_done(struct request *req)
 {
-	struct request *req = vbr->req;
+	struct virtblk_req *vbr = req->special;
 	int error = virtblk_result(vbr);
 
 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -138,7 +138,7 @@ static void virtblk_done(struct virtqueue *vq)
 	do {
 		virtqueue_disable_cb(vq);
 		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
-			virtblk_request_done(vbr);
+			blk_mq_complete_request(vbr->req);
 			req_done = true;
 		}
 		if (unlikely(virtqueue_is_broken(vq)))
@@ -479,6 +479,7 @@ static struct blk_mq_ops virtio_mq_ops = {
 	.map_queue	= blk_mq_map_queue,
 	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
 	.free_hctx	= blk_mq_free_single_hw_queue,
+	.complete	= virtblk_request_done,
 };
 
 static struct blk_mq_reg virtio_mq_reg = {
-- 
cgit v1.2.3


From ce2c350b2cfe5b5ca5023a6b1ec4d21821d39add Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 10 Feb 2014 03:24:40 -0800
Subject: null_blk: use blk_complete_request and blk_mq_complete_request

Use the block layer helpers for CPU-local completions instead of
reimplementing them locally.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/null_blk.c | 97 ++++++++++++++++--------------------------------
 1 file changed, 32 insertions(+), 65 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index cc801cde5cfa..091b9ea14feb 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -60,7 +60,9 @@ enum {
 	NULL_IRQ_NONE		= 0,
 	NULL_IRQ_SOFTIRQ	= 1,
 	NULL_IRQ_TIMER		= 2,
+};
 
+enum {
 	NULL_Q_BIO		= 0,
 	NULL_Q_RQ		= 1,
 	NULL_Q_MQ		= 2,
@@ -172,18 +174,20 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
 
 static void end_cmd(struct nullb_cmd *cmd)
 {
-	if (cmd->rq) {
-		if (queue_mode == NULL_Q_MQ)
-			blk_mq_end_io(cmd->rq, 0);
-		else {
-			INIT_LIST_HEAD(&cmd->rq->queuelist);
-			blk_end_request_all(cmd->rq, 0);
-		}
-	} else if (cmd->bio)
+	switch (queue_mode)  {
+	case NULL_Q_MQ:
+		blk_mq_end_io(cmd->rq, 0);
+		return;
+	case NULL_Q_RQ:
+		INIT_LIST_HEAD(&cmd->rq->queuelist);
+		blk_end_request_all(cmd->rq, 0);
+		break;
+	case NULL_Q_BIO:
 		bio_endio(cmd->bio, 0);
+		break;
+	}
 
-	if (queue_mode != NULL_Q_MQ)
-		free_cmd(cmd);
+	free_cmd(cmd);
 }
 
 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
@@ -222,62 +226,31 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
 
 static void null_softirq_done_fn(struct request *rq)
 {
-	blk_end_request_all(rq, 0);
-}
-
-#ifdef CONFIG_SMP
-
-static void null_ipi_cmd_end_io(void *data)
-{
-	struct completion_queue *cq;
-	struct llist_node *entry, *next;
-	struct nullb_cmd *cmd;
-
-	cq = &per_cpu(completion_queues, smp_processor_id());
-
-	entry = llist_del_all(&cq->list);
-	entry = llist_reverse_order(entry);
-
-	while (entry) {
-		next = entry->next;
-		cmd = llist_entry(entry, struct nullb_cmd, ll_list);
-		end_cmd(cmd);
-		entry = next;
-	}
-}
-
-static void null_cmd_end_ipi(struct nullb_cmd *cmd)
-{
-	struct call_single_data *data = &cmd->csd;
-	int cpu = get_cpu();
-	struct completion_queue *cq = &per_cpu(completion_queues, cpu);
-
-	cmd->ll_list.next = NULL;
-
-	if (llist_add(&cmd->ll_list, &cq->list)) {
-		data->func = null_ipi_cmd_end_io;
-		data->flags = 0;
-		__smp_call_function_single(cpu, data, 0);
-	}
-
-	put_cpu();
+	end_cmd(rq->special);
 }
 
-#endif /* CONFIG_SMP */
-
 static inline void null_handle_cmd(struct nullb_cmd *cmd)
 {
 	/* Complete IO by inline, softirq or timer */
 	switch (irqmode) {
-	case NULL_IRQ_NONE:
-		end_cmd(cmd);
-		break;
 	case NULL_IRQ_SOFTIRQ:
-#ifdef CONFIG_SMP
-		null_cmd_end_ipi(cmd);
-#else
+		switch (queue_mode)  {
+		case NULL_Q_MQ:
+			blk_mq_complete_request(cmd->rq);
+			break;
+		case NULL_Q_RQ:
+			blk_complete_request(cmd->rq);
+			break;
+		case NULL_Q_BIO:
+			/*
+			 * XXX: no proper submitting cpu information available.
+			 */
+			end_cmd(cmd);
+			break;
+		}
+		break;
+	case NULL_IRQ_NONE:
 		end_cmd(cmd);
-#endif
 		break;
 	case NULL_IRQ_TIMER:
 		null_cmd_end_timer(cmd);
@@ -413,6 +386,7 @@ static struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
 	.map_queue      = blk_mq_map_queue,
 	.init_hctx	= null_init_hctx,
+	.complete	= null_softirq_done_fn,
 };
 
 static struct blk_mq_reg null_mq_reg = {
@@ -611,13 +585,6 @@ static int __init null_init(void)
 {
 	unsigned int i;
 
-#if !defined(CONFIG_SMP)
-	if (irqmode == NULL_IRQ_SOFTIRQ) {
-		pr_warn("null_blk: softirq completions not available.\n");
-		pr_warn("null_blk: using direct completions.\n");
-		irqmode = NULL_IRQ_NONE;
-	}
-#endif
 	if (bs > PAGE_SIZE) {
 		pr_warn("null_blk: invalid block size\n");
 		pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
-- 
cgit v1.2.3


From abb97b8c502a270d59c0c2e1ecea78ad752372ee Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Tue, 11 Feb 2014 20:34:03 -0700
Subject: xen-blkback: init persistent_purge_work work_struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Initialize persistent_purge_work work_struct on xen_blkif_alloc (and
remove the previous initialization done in purge_persistent_gnt). This
prevents flush_work from complaining even if purge_persistent_gnt has
not been used.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/xen-blkback/blkback.c | 3 +--
 drivers/block/xen-blkback/common.h  | 1 +
 drivers/block/xen-blkback/xenbus.c  | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 765fc7348b66..64c60edcdfbc 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -299,7 +299,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
 	BUG_ON(num != 0);
 }
 
-static void unmap_purged_grants(struct work_struct *work)
+void xen_blkbk_unmap_purged_grants(struct work_struct *work)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -420,7 +420,6 @@ finished:
 	blkif->vbd.overflow_max_grants = 0;
 
 	/* We can defer this work */
-	INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
 	schedule_work(&blkif->persistent_purge_work);
 	pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
 	return;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9eb34e24b4fe..be052773ad03 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -385,6 +385,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 int xen_blkbk_barrier(struct xenbus_transaction xbt,
 		      struct backend_info *be, int state);
 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
+void xen_blkbk_unmap_purged_grants(struct work_struct *work);
 
 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 					struct blkif_x86_32_request *src)
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 84973c6a856a..9a547e6b6ebf 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -129,6 +129,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->free_pages_num = 0;
 	atomic_set(&blkif->persistent_gnt_in_use, 0);
 	atomic_set(&blkif->inflight, 0);
+	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
 
 	INIT_LIST_HEAD(&blkif->pending_free);
 
-- 
cgit v1.2.3