summaryrefslogtreecommitdiffstats
path: root/include/linux/blk-cgroup.h
diff options
context:
space:
mode:
authorDennis Zhou (Facebook) <dennisszhou@gmail.com>2018-09-11 14:41:26 -0400
committerJens Axboe <axboe@kernel.dk>2018-09-21 20:29:02 -0600
commit27e6fa996c534c32702aa4d32db0ffa383acd050 (patch)
treefbeed4501b887851cc4b48af3ddb9d24d8e2dd9f /include/linux/blk-cgroup.h
parent9ff01255a01c3d6ffc8670b358b3ac567d5646fc (diff)
downloadlinux-27e6fa996c534c32702aa4d32db0ffa383acd050.tar.bz2
blkcg: fix ref count issue with bio_blkcg using task_css
The accessor function bio_blkcg either returns the blkcg associated with the bio or finds one in the current context. This can cause an issue when trying to associate a bio with a blkcg. Particularly, it's the third case that is problematic: return css_to_blkcg(task_css(current, io_cgrp_id)); As the above may race against task migration and the cgroup exiting, it is not always ok to take a reference on the blkcg returned from bio_blkcg. This patch adds association ahead of calling bio_blkcg rather than after. This makes association a required and explicit step along the code paths for calling bio_blkcg. blk_get_rl is modified as well to get a reference to the blkcg it may use and blk_put_rl will always put the reference back. Association is also moved above the bio_blkcg call to ensure it will not return NULL in blk-iolatency. BFQ and CFQ utilize this flaw, but due to the complexity, I do not want to address this in this series. I've created a private version of the function with notes not to use it describing the flaw. Hopefully soon, that code can be cleaned up. Signed-off-by: Dennis Zhou <dennisszhou@gmail.com> Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include/linux/blk-cgroup.h')
-rw-r--r--include/linux/blk-cgroup.h101
1 files changed, 93 insertions, 8 deletions
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6d766a19f2bb..24067a1f8b36 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -230,22 +230,100 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static inline struct cgroup_subsys_state *blkcg_css(void)
+{
+ struct cgroup_subsys_state *css;
+
+ css = kthread_blkcg();
+ if (css)
+ return css;
+ return task_css(current, io_cgrp_id);
+}
+
+/**
+ * blkcg_get_css - find and get a reference to the css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This takes a reference on the blkcg which will need to be managed by the
+ * caller.
+ */
+static inline struct cgroup_subsys_state *blkcg_get_css(void)
+{
+ struct cgroup_subsys_state *css;
+
+ rcu_read_lock();
+
+ css = kthread_blkcg();
+ if (css) {
+ css_get(css);
+ } else {
+ /*
+ * This is a bit complicated. It is possible task_css is seeing
+ * an old css pointer here. This is caused by the current
+ * thread migrating away from this cgroup and this cgroup dying.
+ * css_tryget() will fail when trying to take a ref on a cgroup
+ * that's ref count has hit 0.
+ *
+ * Therefore, if it does fail, this means current must have
+ * been swapped away already and this is waiting for it to
+ * propagate on the polling cpu. Hence the use of cpu_relax().
+ */
+ while (true) {
+ css = task_css(current, io_cgrp_id);
+ if (likely(css_tryget(css)))
+ break;
+ cpu_relax();
+ }
+ }
+
+ rcu_read_unlock();
+
+ return css;
+}
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct blkcg, css) : NULL;
}
-static inline struct blkcg *bio_blkcg(struct bio *bio)
+/**
+ * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
+ *
+ * DO NOT USE.
+ * There is a flaw using this version of the function. In particular, this was
+ * used in a broken paradigm where association was called on the given css. It
+ * is possible though that the returned css from task_css() is in the process
+ * of dying due to migration of the current task. So it is improper to assume
+ * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will
+ * take additional work to handle more gracefully.
+ */
+static inline struct blkcg *__bio_blkcg(struct bio *bio)
{
- struct cgroup_subsys_state *css;
+ if (bio && bio->bi_css)
+ return css_to_blkcg(bio->bi_css);
+ return css_to_blkcg(blkcg_css());
+}
+/**
+ * bio_blkcg - grab the blkcg associated with a bio
+ * @bio: target bio
+ *
+ * This returns the blkcg associated with a bio, NULL if not associated.
+ * Callers are expected to either handle NULL or know association has been
+ * done prior to calling this.
+ */
+static inline struct blkcg *bio_blkcg(struct bio *bio)
+{
if (bio && bio->bi_css)
return css_to_blkcg(bio->bi_css);
- css = kthread_blkcg();
- if (css)
- return css_to_blkcg(css);
- return css_to_blkcg(task_css(current, io_cgrp_id));
+ return NULL;
}
static inline bool blk_cgroup_congested(void)
@@ -534,6 +612,10 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
rcu_read_lock();
blkcg = bio_blkcg(bio);
+ if (blkcg)
+ css_get(&blkcg->css);
+ else
+ blkcg = css_to_blkcg(blkcg_get_css());
/* bypass blkg lookup and use @q->root_rl directly for root */
if (blkcg == &blkcg_root)
@@ -565,6 +647,8 @@ root_rl:
*/
static inline void blk_put_rl(struct request_list *rl)
{
+ /* an additional ref is always taken for rl */
+ css_put(&rl->blkg->blkcg->css);
if (rl->blkg->blkcg != &blkcg_root)
blkg_put(rl->blkg);
}
@@ -805,10 +889,10 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
bool throtl = false;
rcu_read_lock();
- blkcg = bio_blkcg(bio);
/* associate blkcg if bio hasn't attached one */
- bio_associate_blkcg(bio, &blkcg->css);
+ bio_associate_blkcg(bio, NULL);
+ blkcg = bio_blkcg(bio);
blkg = blkg_lookup(blkcg, q);
if (unlikely(!blkg)) {
@@ -930,6 +1014,7 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
+static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,