From 7f203bc89eb66d6afde7eae91347fc0352090cc3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 29 Jul 2022 13:10:16 -1000 Subject: cgroup: Replace cgroup->ancestor_ids[] with ->ancestors[] Every cgroup knows all its ancestors through its ->ancestor_ids[]. There's no advantage to remembering the IDs instead of the pointers directly and this makes the array useless for finding an actual ancestor cgroup forcing cgroup_ancestor() to iteratively walk up the hierarchy instead. Let's replace cgroup->ancestor_ids[] with ->ancestors[] and remove the walking-up from cgroup_ancestor(). While at it, improve comments around cgroup_root->cgrp_ancestor_storage. This patch shouldn't cause user-visible behavior differences. v2: Update cgroup_ancestor() to use ->ancestors[]. v3: cgroup_root->cgrp_ancestor_storage's type is updated to match cgroup->ancestors[]. Better comments. Signed-off-by: Tejun Heo Acked-by: Namhyung Kim --- include/linux/cgroup-defs.h | 16 ++++++++++------ include/linux/cgroup.h | 8 +++----- 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4bcf56b3491c..1283993d7ea8 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -384,7 +384,7 @@ struct cgroup { /* * The depth this cgroup is at. The root is at depth zero and each * step down the hierarchy increments the level. This along with - * ancestor_ids[] can determine whether a given cgroup is a + * ancestors[] can determine whether a given cgroup is a * descendant of another without traversing the hierarchy. */ int level; @@ -504,8 +504,8 @@ struct cgroup { /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; - /* ids of the ancestors at each level including self */ - u64 ancestor_ids[]; + /* All ancestors including self */ + struct cgroup *ancestors[]; }; /* @@ -522,11 +522,15 @@ struct cgroup_root { /* Unique id for this hierarchy. */ int hierarchy_id; - /* The root cgroup. Root is destroyed on its release. */ + /* + * The root cgroup. The containing cgroup_root will be destroyed on its + * release. cgrp->ancestors[0] will be used overflowing into the + * following field. cgrp_ancestor_storage must immediately follow. + */ struct cgroup cgrp; - /* for cgrp->ancestor_ids[0] */ - u64 cgrp_ancestor_id_storage; + /* must follow cgrp for cgrp->ancestors[0], see above */ + struct cgroup *cgrp_ancestor_storage; /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed53bfe7c46c..4d143729b246 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -574,7 +574,7 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, { if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) return false; - return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor); + return cgrp->ancestors[ancestor->level] == ancestor; } /** @@ -591,11 +591,9 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp, int ancestor_level) { - if (cgrp->level < ancestor_level) + if (ancestor_level < 0 || ancestor_level > cgrp->level) return NULL; - while (cgrp && cgrp->level > ancestor_level) - cgrp = cgroup_parent(cgrp); - return cgrp; + return cgrp->ancestors[ancestor_level]; } /** -- cgit v1.2.3 From fa7e439cf90ba23ea473d0b7d85efd02ae6ccf94 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Fri, 26 Aug 2022 18:52:37 +0200 Subject: cgroup: Homogenize cgroup_get_from_id() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cgroup id is user provided datum hence extend its return domain to include possible error reason (similar to cgroup_get_from_fd()). This change also fixes commit d4ccaf58a847 ("bpf: Introduce cgroup iter") that would use NULL instead of proper error handling in d4ccaf58a847 ("bpf: Introduce cgroup iter"). Additionally, neither of: fc_appid_store, bpf_iter_attach_cgroup, mem_cgroup_get_from_ino (callers of cgroup_get_from_fd) is built without CONFIG_CGROUPS (depends via CONFIG_BLK_CGROUP, direct, transitive CONFIG_MEMCG respectively) transitive, so drop the singular definition not needed with !CONFIG_CGROUPS. Fixes: d4ccaf58a847 ("bpf: Introduce cgroup iter") Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- block/blk-cgroup-fc-appid.c | 4 ++-- include/linux/cgroup.h | 5 ----- kernel/cgroup/cgroup.c | 4 ++-- mm/memcontrol.c | 4 ++-- 4 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c index 760a2e1878dd..842e5e1c0f3c 100644 --- a/block/blk-cgroup-fc-appid.c +++ b/block/blk-cgroup-fc-appid.c @@ -19,8 +19,8 @@ int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len) return -EINVAL; cgrp = cgroup_get_from_id(cgrp_id); - if (!cgrp) - return -ENOENT; + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); css = cgroup_get_e_css(cgrp, &io_cgrp_subsys); if (!css) { ret = -ENOENT; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4d143729b246..30ee8ce2665e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -750,11 +750,6 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) {} - -static inline struct cgroup *cgroup_get_from_id(u64 id) -{ - return NULL; -} #endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3229f3558766..a8780ef3b769 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6005,7 +6005,7 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) /* * cgroup_get_from_id : get the cgroup associated with cgroup id * @id: cgroup id - * On success return the cgrp, on failure return NULL + * On success return the cgrp or ERR_PTR on failure * Only cgroups within current task's cgroup NS are valid. */ struct cgroup *cgroup_get_from_id(u64 id) @@ -6037,7 +6037,7 @@ struct cgroup *cgroup_get_from_id(u64 id) cgrp = NULL; } out: - return cgrp; + return cgrp ?: ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(cgroup_get_from_id); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b69979c9ced5..86f5ca8c6fa6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5110,8 +5110,8 @@ struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino) struct mem_cgroup *memcg; cgrp = cgroup_get_from_id(ino); - if (!cgrp) - return ERR_PTR(-ENOENT); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); css = cgroup_get_e_css(cgrp, &memory_cgrp_subsys); if (css) -- cgit v1.2.3 From 0083d27b21dd2a598df8275b98f89ced532e2e53 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Sep 2022 09:38:42 -1000 Subject: cgroup: Improve cftype add/rm error handling Let's track whether a cftype is currently added or not using a new flag __CFTYPE_ADDED so that duplicate operations can be failed safely and consistently allow using empty cftypes. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 + kernel/cgroup/cgroup.c | 27 ++++++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1283993d7ea8..9fa1652d0493 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -131,6 +131,7 @@ enum { /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ + __CFTYPE_ADDED = (1 << 18), }; /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e0b72eb5d283..bd9fe6e88320 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4198,19 +4198,26 @@ static void cgroup_exit_cftypes(struct cftype *cfts) cft->ss = NULL; /* revert flags set by cgroup core while adding @cfts */ - cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL); + cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL | + __CFTYPE_ADDED); } } static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { struct cftype *cft; + int ret = 0; for (cft = cfts; cft->name[0] != '\0'; cft++) { struct kernfs_ops *kf_ops; WARN_ON(cft->ss || cft->kf_ops); + if (cft->flags & __CFTYPE_ADDED) { + ret = -EBUSY; + break; + } + if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) continue; @@ -4226,26 +4233,26 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) { kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL); if (!kf_ops) { - cgroup_exit_cftypes(cfts); - return -ENOMEM; + ret = -ENOMEM; + break; } kf_ops->atomic_write_len = cft->max_write_len; } cft->kf_ops = kf_ops; cft->ss = ss; + cft->flags |= __CFTYPE_ADDED; } - return 0; + if (ret) + cgroup_exit_cftypes(cfts); + return ret; } static int cgroup_rm_cftypes_locked(struct cftype *cfts) { lockdep_assert_held(&cgroup_mutex); - if (!cfts || !cfts[0].ss) - return -ENOENT; - list_del(&cfts->node); cgroup_apply_cftypes(cfts, false); cgroup_exit_cftypes(cfts); @@ -4267,6 +4274,12 @@ int cgroup_rm_cftypes(struct cftype *cfts) { int ret; + if (!cfts || cfts[0].name[0] == '\0') + return 0; + + if (!(cfts[0].flags & __CFTYPE_ADDED)) + return -ENOENT; + mutex_lock(&cgroup_mutex); ret = cgroup_rm_cftypes_locked(cfts); mutex_unlock(&cgroup_mutex); -- cgit v1.2.3 From 8a693f7766f9e27c390c5fec8a5db1f9d1d8177e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 6 Sep 2022 09:38:55 -1000 Subject: cgroup: Remove CFTYPE_PRESSURE CFTYPE_PRESSURE is used to flag PSI related files so that they are not created if PSI is disabled during boot. It's a bit weird to use a generic flag to mark a specific file type. Let's instead move the PSI files into its own cftypes array and add/rm them conditionally. This is a bit more code but cleaner. No userland visible changes. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- include/linux/cgroup-defs.h | 1 - kernel/cgroup/cgroup.c | 64 ++++++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9fa1652d0493..8f481d1b159a 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -126,7 +126,6 @@ enum { CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ - CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index bd9fe6e88320..e24015877d3c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -217,6 +217,7 @@ struct cgroup_namespace init_cgroup_ns = { static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_base_files[]; +static struct cftype cgroup_psi_files[]; /* cgroup optional features */ enum cgroup_opt_features { @@ -1689,12 +1690,16 @@ static void css_clear_dir(struct cgroup_subsys_state *css) css->flags &= ~CSS_VISIBLE; if (!css->ss) { - if (cgroup_on_dfl(cgrp)) - cfts = cgroup_base_files; - else - cfts = cgroup1_base_files; - - cgroup_addrm_files(css, cgrp, cfts, false); + if (cgroup_on_dfl(cgrp)) { + cgroup_addrm_files(css, cgrp, + cgroup_base_files, false); + if (cgroup_psi_enabled()) + cgroup_addrm_files(css, cgrp, + cgroup_psi_files, false); + } else { + cgroup_addrm_files(css, cgrp, + cgroup1_base_files, false); + } } else { list_for_each_entry(cfts, &css->ss->cfts, node) cgroup_addrm_files(css, cgrp, cfts, false); @@ -1717,14 +1722,22 @@ static int css_populate_dir(struct cgroup_subsys_state *css) return 0; if (!css->ss) { - if (cgroup_on_dfl(cgrp)) - cfts = cgroup_base_files; - else - cfts = cgroup1_base_files; - - ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true); - if (ret < 0) - return ret; + if (cgroup_on_dfl(cgrp)) { + ret = cgroup_addrm_files(&cgrp->self, cgrp, + cgroup_base_files, true); + if (ret < 0) + return ret; + + if (cgroup_psi_enabled()) { + ret = cgroup_addrm_files(&cgrp->self, cgrp, + cgroup_psi_files, true); + if (ret < 0) + return ret; + } + } else { + cgroup_addrm_files(css, cgrp, + cgroup1_base_files, true); + } } else { list_for_each_entry(cfts, &css->ss->cfts, node) { ret = cgroup_addrm_files(css, cgrp, cfts, true); @@ -4132,8 +4145,6 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, restart: for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) { /* does cft->flags tell us to skip this file on @cgrp? */ - if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) - continue; if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp)) @@ -4218,9 +4229,6 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) break; } - if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) - continue; - if (cft->seq_start) kf_ops = &cgroup_kf_ops; else @@ -5144,10 +5152,13 @@ static struct cftype cgroup_base_files[] = { .name = "cpu.stat", .seq_show = cpu_stat_show, }, + { } /* terminate */ +}; + +static struct cftype cgroup_psi_files[] = { #ifdef CONFIG_PSI { .name = "io.pressure", - .flags = CFTYPE_PRESSURE, .seq_show = cgroup_io_pressure_show, .write = cgroup_io_pressure_write, .poll = cgroup_pressure_poll, @@ -5155,7 +5166,6 @@ static struct cftype cgroup_base_files[] = { }, { .name = "memory.pressure", - .flags = CFTYPE_PRESSURE, .seq_show = cgroup_memory_pressure_show, .write = cgroup_memory_pressure_write, .poll = cgroup_pressure_poll, @@ -5163,7 +5173,6 @@ static struct cftype cgroup_base_files[] = { }, { .name = "cpu.pressure", - .flags = CFTYPE_PRESSURE, .seq_show = cgroup_cpu_pressure_show, .write = cgroup_cpu_pressure_write, .poll = cgroup_pressure_poll, @@ -5930,6 +5939,7 @@ int __init cgroup_init(void) BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); + BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); cgroup_rstat_boot(); @@ -6821,9 +6831,6 @@ static ssize_t show_delegatable_files(struct cftype *files, char *buf, if (!(cft->flags & CFTYPE_NS_DELEGATABLE)) continue; - if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled()) - continue; - if (prefix) ret += snprintf(buf + ret, size - ret, "%s.", prefix); @@ -6843,8 +6850,11 @@ static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr, int ssid; ssize_t ret = 0; - ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret, - NULL); + ret = show_delegatable_files(cgroup_base_files, buf + ret, + PAGE_SIZE - ret, NULL); + if (cgroup_psi_enabled()) + ret += show_delegatable_files(cgroup_psi_files, buf + ret, + PAGE_SIZE - ret, NULL); for_each_subsys(ss, ssid) ret += show_delegatable_files(ss->dfl_cftypes, buf + ret, -- cgit v1.2.3