Merge branch 'for-3.11/core' of git://git.kernel.dk/linux-block

Pull core block IO updates from Jens Axboe: "Here are the core IO block bits for 3.11. It contains: - A tweak to the reserved tag logic from Jan, for weirdo devices with just 3 free tags. But for those it improves things substantially for random writes. - Periodic writeback fix from Jan. Marked for stable as well. - Fix for a race condition in IO scheduler switching from Jianpeng. - The hierarchical blk-cgroup support from Tejun. This is the grunt of the series. - blk-throttle fix from Vivek. Just a note that I'm in the middle of a relocation, whole family is flying out tomorrow. Hence I will be awal the remainder of this week, but back at work again on Monday the 15th. CC'ing Tejun, since any potential "surprises" will most likely be from the blk-cgroup work. But it's been brewing for a while and sitting in my tree and linux-next for a long time, so should be solid." * 'for-3.11/core' of git://git.kernel.dk/linux-block: (36 commits) elevator: Fix a race in elevator switching block: Reserve only one queue tag for sync IO if only 3 tags are available writeback: Fix periodic writeback after fs mount blk-throttle: implement proper hierarchy support blk-throttle: implement throtl_grp->has_rules[] blk-throttle: Account for child group's start time in parent while bio climbs up blk-throttle: add throtl_qnode for dispatch fairness blk-throttle: make throtl_pending_timer_fn() ready for hierarchy blk-throttle: make tg_dispatch_one_bio() ready for hierarchy blk-throttle: make blk_throtl_bio() ready for hierarchy blk-throttle: make blk_throtl_drain() ready for hierarchy blk-throttle: dispatch from throtl_pending_timer_fn() blk-throttle: implement dispatch looping blk-throttle: separate out throtl_service_queue->pending_timer from throtl_data->dispatch_work blk-throttle: set REQ_THROTTLED from throtl_charge_bio() and gate stats update with it blk-throttle: implement sq_to_tg(), sq_to_td() and throtl_log() blk-throttle: add throtl_service_queue->parent_sq blk-throttle: generalize update_disptime optimization in blk_throtl_bio() blk-throttle: dispatch to throtl_data->service_queue.bio_lists[] blk-throttle: move bio_lists[] and friends to throtl_service_queue ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-11 13:03:24 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-11 13:03:24 -0700
commit: 36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b (patch)
tree: 5565132549a0733772b3a2ac6b5cda516ea8cdce /block/blk-cgroup.c
parent: 6d2fa9e141ea56a571ec842fd4f3a86bea44a203 (diff)
parent: d50235b7bc3ee0a0427984d763ea7534149531b4 (diff)
download: linux-36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b.tar.bz2
1 files changed, 40 insertions, 65 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e8918ffaf96d..290792a13e3c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -32,26 +32,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);
 
 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
 
-static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
-				      struct request_queue *q, bool update_hint);
-
-/**
- * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
- * @d_blkg: loop cursor pointing to the current descendant
- * @pos_cgrp: used for iteration
- * @p_blkg: target blkg to walk descendants of
- *
- * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
- * read locked.  If called under either blkcg or queue lock, the iteration
- * is guaranteed to include all and only online blkgs.  The caller may
- * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
- * subtree.
- */
-#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg)		\
-	cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
-		if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
-					      (p_blkg)->q, false)))
-
 static bool blkcg_policy_enabled(struct request_queue *q,
 				 const struct blkcg_policy *pol)
 {
@@ -71,18 +51,8 @@ static void blkg_free(struct blkcg_gq *blkg)
 	if (!blkg)
 		return;
 
-	for (i = 0; i < BLKCG_MAX_POLS; i++) {
-		struct blkcg_policy *pol = blkcg_policy[i];
-		struct blkg_policy_data *pd = blkg->pd[i];
-
-		if (!pd)
-			continue;
-
-		if (pol && pol->pd_exit_fn)
-			pol->pd_exit_fn(blkg);
-
-		kfree(pd);
-	}
+	for (i = 0; i < BLKCG_MAX_POLS; i++)
+		kfree(blkg->pd[i]);
 
 	blk_exit_rl(&blkg->rl);
 	kfree(blkg);
@@ -134,10 +104,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 		blkg->pd[i] = pd;
 		pd->blkg = blkg;
 		pd->plid = i;
-
-		/* invoke per-policy init */
-		if (pol->pd_init_fn)
-			pol->pd_init_fn(blkg);
 	}
 
 	return blkg;
@@ -158,8 +124,8 @@ err_free:
  * @q's bypass state.  If @update_hint is %true, the caller should be
  * holding @q->queue_lock and lookup hint is updated on success.
  */
-static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
-				      struct request_queue *q, bool update_hint)
+struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
+			       bool update_hint)
 {
 	struct blkcg_gq *blkg;
 
@@ -234,16 +200,25 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 	}
 	blkg = new_blkg;
 
-	/* link parent and insert */
+	/* link parent */
 	if (blkcg_parent(blkcg)) {
 		blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
 		if (WARN_ON_ONCE(!blkg->parent)) {
-			blkg = ERR_PTR(-EINVAL);
+			ret = -EINVAL;
 			goto err_put_css;
 		}
 		blkg_get(blkg->parent);
 	}
 
+	/* invoke per-policy init */
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
+		struct blkcg_policy *pol = blkcg_policy[i];
+
+		if (blkg->pd[i] && pol->pd_init_fn)
+			pol->pd_init_fn(blkg);
+	}
+
+	/* insert */
 	spin_lock(&blkcg->lock);
 	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
 	if (likely(!ret)) {
@@ -394,30 +369,38 @@ static void blkg_destroy_all(struct request_queue *q)
 	q->root_rl.blkg = NULL;
 }
 
-static void blkg_rcu_free(struct rcu_head *rcu_head)
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid.  For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+void __blkg_release_rcu(struct rcu_head *rcu_head)
 {
-	blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));
-}
+	struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
+	int i;
+
+	/* tell policies that this one is being freed */
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
+		struct blkcg_policy *pol = blkcg_policy[i];
+
+		if (blkg->pd[i] && pol->pd_exit_fn)
+			pol->pd_exit_fn(blkg);
+	}
 
-void __blkg_release(struct blkcg_gq *blkg)
-{
 	/* release the blkcg and parent blkg refs this blkg has been holding */
 	css_put(&blkg->blkcg->css);
-	if (blkg->parent)
+	if (blkg->parent) {
+		spin_lock_irq(blkg->q->queue_lock);
 		blkg_put(blkg->parent);
+		spin_unlock_irq(blkg->q->queue_lock);
+	}
 
-	/*
-	 * A group is freed in rcu manner. But having an rcu lock does not
-	 * mean that one can access all the fields of blkg and assume these
-	 * are valid. For example, don't try to follow throtl_data and
-	 * request queue links.
-	 *
-	 * Having a reference to blkg under an rcu allows acess to only
-	 * values local to groups like group stats and group rate limits
-	 */
-	call_rcu(&blkg->rcu_head, blkg_rcu_free);
+	blkg_free(blkg);
 }
-EXPORT_SYMBOL_GPL(__blkg_release);
+EXPORT_SYMBOL_GPL(__blkg_release_rcu);
 
 /*
  * The next function used by blk_queue_for_each_rl().  It's a bit tricky
@@ -928,14 +911,6 @@ struct cgroup_subsys blkio_subsys = {
 	.subsys_id = blkio_subsys_id,
 	.base_cftypes = blkcg_files,
 	.module = THIS_MODULE,
-
-	/*
-	 * blkio subsystem is utterly broken in terms of hierarchy support.
-	 * It treats all cgroups equally regardless of where they're
-	 * located in the hierarchy - all cgroups are treated as if they're
-	 * right below the root.  Fix it and remove the following.
-	 */
-	.broken_hierarchy = true,
 };
 EXPORT_SYMBOL_GPL(blkio_subsys);
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-11 13:03:24 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-11 13:03:24 -0700
commit	36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b (patch)
tree	5565132549a0733772b3a2ac6b5cda516ea8cdce /block/blk-cgroup.c
parent	6d2fa9e141ea56a571ec842fd4f3a86bea44a203 (diff)
parent	d50235b7bc3ee0a0427984d763ea7534149531b4 (diff)
download	linux-36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b.tar.bz2