From b5f2954d30c77649bce9c27e7a0a94299d9cfdf8 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 1 Nov 2018 17:24:10 -0400 Subject: blkcg: revert blkcg cleanups series This reverts a series committed earlier due to null pointer exception bug report in [1]. It seems there are edge case interactions that I did not consider and will need some time to understand what causes the adverse interactions. The original series can be found in [2] with a follow up series in [3]. [1] https://www.spinics.net/lists/cgroups/msg20719.html [2] https://lore.kernel.org/lkml/20180911184137.35897-1-dennisszhou@gmail.com/ [3] https://lore.kernel.org/lkml/20181020185612.51587-1-dennis@kernel.org/ This reverts the following commits: d459d853c2ed, b2c3fa546705, 101246ec02b5, b3b9f24f5fcc, e2b0989954ae, f0fcb3ec89f3, c839e7a03f92, bdc2491708c4, 74b7c02a9bc1, 5bf9a1f3b4ef, a7b39b4e961c, 07b05bcc3213, 49f4c2dc2b50, 27e6fa996c53 Signed-off-by: Dennis Zhou Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 +- block/bfq-iosched.c | 2 +- block/bio.c | 174 ++++++++++++++------------------------------------ block/blk-cgroup.c | 123 +++++++++++------------------------ block/blk-core.c | 1 - block/blk-iolatency.c | 26 +++++++- block/blk-throttle.c | 13 +++- block/bounce.c | 4 +- block/cfq-iosched.c | 4 +- 9 files changed, 127 insertions(+), 224 deletions(-) (limited to 'block') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index d9a7916ff0ab..9fe5952d117d 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bio_blkcg(bio)->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger @@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) goto out; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); + bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 6075100f03a5..3a27d31fcda6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, rcu_read_lock(); - bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); + bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); if (!bfqg) { bfqq = &bfqd->oom_bfqq; goto out; diff --git a/block/bio.c b/block/bio.c index bbfeb4ee2892..4a5a036268fb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; - bio_clone_blkg_association(bio, bio_src); - - blkcg_bio_issue_init(bio); + bio_clone_blkcg_association(bio, bio_src); } EXPORT_SYMBOL(__bio_clone_fast); @@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP -/** - * bio_associate_blkg - associate a bio with the a blkg - * @bio: target bio - * @blkg: the blkg to associate - * - * This tries to associate @bio with the specified blkg. Association failure - * is handled by walking up the blkg tree. Therefore, the blkg associated can - * be anything between @blkg and the root_blkg. This situation only happens - * when a cgroup is dying and then the remaining bios will spill to the closest - * alive blkg. - * - * A reference will be taken on the @blkg and will be released when @bio is - * freed. - */ -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) -{ - if (unlikely(bio->bi_blkg)) - return -EBUSY; - bio->bi_blkg = blkg_tryget_closest(blkg); - return 0; -} - -/** - * __bio_associate_blkg_from_css - internal blkg association function - * - * This in the core association function that all association paths rely on. - * A blkg reference is taken which is released upon freeing of the bio. - */ -static int __bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css) -{ - struct request_queue *q = bio->bi_disk->queue; - struct blkcg_gq *blkg; - int ret; - - rcu_read_lock(); - - if (!css || !css->parent) - blkg = q->root_blkg; - else - blkg = blkg_lookup_create(css_to_blkcg(css), q); - - ret = bio_associate_blkg(bio, blkg); - - rcu_read_unlock(); - return ret; -} - -/** - * bio_associate_blkg_from_css - associate a bio with a specified css - * @bio: target bio - * @css: target css - * - * Associate @bio with the blkg found by combining the css's blkg and the - * request_queue of the @bio. This falls back to the queue's root_blkg if - * the association fails with the css. - */ -int bio_associate_blkg_from_css(struct bio *bio, - struct cgroup_subsys_state *css) -{ - if (unlikely(bio->bi_blkg)) - return -EBUSY; - return __bio_associate_blkg_from_css(bio, css); -} -EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); - #ifdef CONFIG_MEMCG /** - * bio_associate_blkg_from_page - associate a bio with the page's blkg + * bio_associate_blkcg_from_page - associate a bio with the page's blkcg * @bio: target bio * @page: the page to lookup the blkcg from * - * Associate @bio with the blkg from @page's owning memcg and the respective - * request_queue. If cgroup_e_css returns NULL, fall back to the queue's - * root_blkg. - * - * Note: this must be called after bio has an associated device. + * Associate @bio with the blkcg from @page's owning memcg. This works like + * every other associate function wrt references. */ -int bio_associate_blkg_from_page(struct bio *bio, struct page *page) +int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) { - struct cgroup_subsys_state *css; - int ret; + struct cgroup_subsys_state *blkcg_css; - if (unlikely(bio->bi_blkg)) + if (unlikely(bio->bi_css)) return -EBUSY; if (!page->mem_cgroup) return 0; - - rcu_read_lock(); - - css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); - - ret = __bio_associate_blkg_from_css(bio, css); - - rcu_read_unlock(); - return ret; + blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, + &io_cgrp_subsys); + bio->bi_css = blkcg_css; + return 0; } #endif /* CONFIG_MEMCG */ /** - * bio_associate_create_blkg - associate a bio with a blkg from q - * @q: request_queue where bio is going + * bio_associate_blkcg - associate a bio with the specified blkcg * @bio: target bio + * @blkcg_css: css of the blkcg to associate + * + * Associate @bio with the blkcg specified by @blkcg_css. Block layer will + * treat @bio as if it were issued by a task which belongs to the blkcg. * - * Associate @bio with the blkg found from the bio's css and the request_queue. - * If one is not found, bio_lookup_blkg creates the blkg. This falls back to - * the queue's root_blkg if association fails. + * This function takes an extra reference of @blkcg_css which will be put + * when @bio is released. The caller must own @bio and is responsible for + * synchronizing calls to this function. */ -int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) +int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { - struct cgroup_subsys_state *css; - int ret = 0; - - /* someone has already associated this bio with a blkg */ - if (bio->bi_blkg) - return ret; - - rcu_read_lock(); - - css = blkcg_css(); - - ret = __bio_associate_blkg_from_css(bio, css); - - rcu_read_unlock(); - return ret; + if (unlikely(bio->bi_css)) + return -EBUSY; + css_get(blkcg_css); + bio->bi_css = blkcg_css; + return 0; } +EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** - * bio_reassociate_blkg - reassociate a bio with a blkg from q - * @q: request_queue where bio is going + * bio_associate_blkg - associate a bio with the specified blkg * @bio: target bio + * @blkg: the blkg to associate * - * When submitting a bio, multiple recursive calls to make_request() may occur. - * This causes the initial associate done in blkcg_bio_issue_check() to be - * incorrect and reference the prior request_queue. This performs reassociation - * when this situation happens. + * Associate @bio with the blkg specified by @blkg. This is the queue specific + * blkcg information associated with the @bio, a reference will be taken on the + * @blkg and will be freed when the bio is freed. */ -int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) +int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) { - if (bio->bi_blkg) { - blkg_put(bio->bi_blkg); - bio->bi_blkg = NULL; - } - - return bio_associate_create_blkg(q, bio); + if (unlikely(bio->bi_blkg)) + return -EBUSY; + if (!blkg_try_get(blkg)) + return -ENODEV; + bio->bi_blkg = blkg; + return 0; } /** @@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio) put_io_context(bio->bi_ioc); bio->bi_ioc = NULL; } + if (bio->bi_css) { + css_put(bio->bi_css); + bio->bi_css = NULL; + } if (bio->bi_blkg) { blkg_put(bio->bi_blkg); bio->bi_blkg = NULL; @@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio) } /** - * bio_clone_blkg_association - clone blkg association from src to dst bio + * bio_clone_blkcg_association - clone blkcg association from src to dst bio * @dst: destination bio * @src: source bio */ -void bio_clone_blkg_association(struct bio *dst, struct bio *src) +void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { - if (src->bi_blkg) - bio_associate_blkg(dst, src->bi_blkg); + if (src->bi_css) + WARN_ON(bio_associate_blkcg(dst, src->bi_css)); } -EXPORT_SYMBOL_GPL(bio_clone_blkg_association); +EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); #endif /* CONFIG_BLK_CGROUP */ static void __init biovec_init_slabs(void) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 992da5592c6e..c630e02836a8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg) kfree(blkg); } -static void __blkg_release(struct rcu_head *rcu) -{ - struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); - - percpu_ref_exit(&blkg->refcnt); - - /* release the blkcg and parent blkg refs this blkg has been holding */ - css_put(&blkg->blkcg->css); - if (blkg->parent) - blkg_put(blkg->parent); - - wb_congested_put(blkg->wb_congested); - - blkg_free(blkg); -} - -/* - * A group is RCU protected, but having an rcu lock does not mean that one - * can access all the fields of blkg and assume these are valid. For - * example, don't try to follow throtl_data and request queue links. - * - * Having a reference to blkg under an rcu allows accesses to only values - * local to groups like group stats and group rate limits. - */ -static void blkg_release(struct percpu_ref *ref) -{ - struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); - - call_rcu(&blkg->rcu_head, __blkg_release); -} - /** * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with @@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; + atomic_set(&blkg->refcnt, 1); /* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != &blkcg_root) { @@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_get(blkg->parent); } - ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0, - GFP_NOWAIT | __GFP_NOWARN); - if (ret) - goto err_cancel_ref; - /* invoke per-policy init */ for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg_put(blkg); return ERR_PTR(ret); -err_cancel_ref: - percpu_ref_exit(&blkg->refcnt); err_put_congested: wb_congested_put(wb_congested); err_put_css: @@ -296,7 +259,7 @@ err_free_blkg: } /** - * __blkg_lookup_create - lookup blkg, try to create one if not there + * blkg_lookup_create - lookup blkg, try to create one if not there * @blkcg: blkcg of interest * @q: request_queue of interest * @@ -305,11 +268,12 @@ err_free_blkg: * that all non-root blkg's have access to the parent blkg. This function * should be called under RCU read lock and @q->queue_lock. * - * Returns the blkg or the closest blkg if blkg_create fails as it walks - * down from root. + * Returns pointer to the looked up or created blkg on success, ERR_PTR() + * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not + * dead and bypassing, returns ERR_PTR(-EBUSY). */ -struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) +struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q) { struct blkcg_gq *blkg; @@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) - return q->root_blkg; + return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); blkg = __blkg_lookup(blkcg, q, true); if (blkg) @@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, /* * Create blkgs walking down from blkcg_root to @blkcg, so that all - * non-root blkgs have access to their parents. Returns the closest - * blkg to the intended blkg should blkg_create() fail. + * non-root blkgs have access to their parents. */ while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - struct blkcg_gq *ret_blkg = q->root_blkg; - - while (parent) { - blkg = __blkg_lookup(parent, q, false); - if (blkg) { - /* remember closest blkg */ - ret_blkg = blkg; - break; - } + + while (parent && !__blkg_lookup(parent, q, false)) { pos = parent; parent = blkcg_parent(parent); } blkg = blkg_create(pos, q, NULL); - if (IS_ERR(blkg)) - return ret_blkg; - if (pos == blkcg) + if (pos == blkcg || IS_ERR(blkg)) return blkg; } } -/** - * blkg_lookup_create - find or create a blkg - * @blkcg: target block cgroup - * @q: target request_queue - * - * This looks up or creates the blkg representing the unique pair - * of the blkcg and the request_queue. - */ -struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) -{ - struct blkcg_gq *blkg = blkg_lookup(blkcg, q); - unsigned long flags; - - if (unlikely(!blkg)) { - spin_lock_irqsave(q->queue_lock, flags); - - blkg = __blkg_lookup_create(blkcg, q); - - spin_unlock_irqrestore(q->queue_lock, flags); - } - - return blkg; -} - static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; @@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ - percpu_ref_kill(&blkg->refcnt); + blkg_put(blkg); } /** @@ -451,6 +380,29 @@ static void blkg_destroy_all(struct request_queue *q) q->root_rl.blkg = NULL; } +/* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +void __blkg_release_rcu(struct rcu_head *rcu_head) +{ + struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); + + /* release the blkcg and parent blkg refs this blkg has been holding */ + css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); + + wb_congested_put(blkg->wb_congested); + + blkg_free(blkg); +} +EXPORT_SYMBOL_GPL(__blkg_release_rcu); + /* * The next function used by blk_queue_for_each_rl(). It's a bit tricky * because the root blkg uses @q->root_rl instead of its own rl. @@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void) blkg = blkg_lookup(blkcg, q); if (!blkg) goto out; - if (!blkg_tryget(blkg)) + blkg = blkg_try_get(blkg); + if (!blkg) goto out; rcu_read_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c index 26a5dac80ed9..ce12515f9b9b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2435,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio) if (q) blk_queue_exit(q); q = bio->bi_disk->queue; - bio_reassociate_blkg(q, bio); flags = 0; if (bio->bi_opf & REQ_NOWAIT) flags = BLK_MQ_REQ_NOWAIT; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 35c48d7b8f78..bb240a0c1309 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -480,12 +480,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock) { struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); - struct blkcg_gq *blkg = bio->bi_blkg; + struct blkcg *blkcg; + struct blkcg_gq *blkg; + struct request_queue *q = rqos->q; bool issue_as_root = bio_issue_as_root_blkg(bio); if (!blk_iolatency_enabled(blkiolat)) return; + rcu_read_lock(); + blkcg = bio_blkcg(bio); + bio_associate_blkcg(bio, &blkcg->css); + blkg = blkg_lookup(blkcg, q); + if (unlikely(!blkg)) { + if (!lock) + spin_lock_irq(q->queue_lock); + blkg = blkg_lookup_create(blkcg, q); + if (IS_ERR(blkg)) + blkg = NULL; + if (!lock) + spin_unlock_irq(q->queue_lock); + } + if (!blkg) + goto out; + + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); + bio_associate_blkg(bio, blkg); +out: + rcu_read_unlock(); while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { @@ -706,7 +728,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) * We could be exiting, don't access the pd unless we have a * ref on the blkg. */ - if (!blkg_tryget(blkg)) + if (!blkg_try_get(blkg)) continue; iolat = blkg_to_lat(blkg); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 4bda70e8db48..db1a3a2ae006 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif +static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) +{ +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW + /* fallback to root_blkg if we fail to get a blkg ref */ + if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) + bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +#endif +} + bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio) { struct throtl_qnode *qn = NULL; - struct throtl_grp *tg = blkg_to_tg(blkg); + struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg); struct throtl_service_queue *sq; bool rw = bio_data_dir(bio); bool throttled = false; @@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, if (unlikely(blk_queue_bypass(q))) goto out_unlock; + blk_throtl_assoc_bio(tg, bio); blk_throtl_update_idletime(tg); sq = &tg->service_queue; diff --git a/block/bounce.c b/block/bounce.c index ec0d99995f5f..418677dcec60 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, } } - bio_clone_blkg_association(bio, bio_src); - - blkcg_bio_issue_init(bio); + bio_clone_blkcg_association(bio, bio_src); return bio; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6a3d87dd3c1a..ed41aa978c4a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) uint64_t serial_nr; rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bio_blkcg(bio)->css.serial_nr; rcu_read_unlock(); /* @@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, struct cfq_group *cfqg; rcu_read_lock(); - cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio)); + cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio)); if (!cfqg) { cfqq = &cfqd->oom_cfqq; goto out; -- cgit v1.2.3