diff options
Diffstat (limited to 'kernel/cgroup')
-rw-r--r-- | kernel/cgroup/cgroup-internal.h | 4 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 38 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 81 | ||||
-rw-r--r-- | kernel/cgroup/rstat.c | 2 |
4 files changed, 66 insertions, 59 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 90d1710fef6c..bfbeabc17a9d 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -7,7 +7,7 @@ #include <linux/workqueue.h> #include <linux/list.h> #include <linux/refcount.h> -#include <linux/fs_context.h> +#include <linux/fs_parser.h> #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; @@ -265,7 +265,7 @@ extern const struct proc_ns_operations cgroupns_operations; */ extern struct cftype cgroup1_base_files[]; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; -extern const struct fs_parameter_description cgroup1_fs_parameters; +extern const struct fs_parameter_spec cgroup1_fs_parameters[]; int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 09f3a413f6f8..f2d7cea86ffe 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -18,8 +18,6 @@ #include <trace/events/cgroup.h> -#define cg_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) - /* * pidlists linger the following amount before being destroyed. The goal * is avoiding frequent destruction in the middle of consecutive read calls @@ -473,6 +471,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) */ p++; if (p >= end) { + (*pos)++; return NULL; } else { *pos = *p; @@ -784,7 +783,7 @@ void cgroup1_release_agent(struct work_struct *work) pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!pathbuf || !agentbuf) + if (!pathbuf || !agentbuf || !strlen(agentbuf)) goto out; spin_lock_irq(&css_set_lock); @@ -887,7 +886,7 @@ enum cgroup1_param { Opt_xattr, }; -static const struct fs_parameter_spec cgroup1_param_specs[] = { +const struct fs_parameter_spec cgroup1_fs_parameters[] = { fsparam_flag ("all", Opt_all), fsparam_flag ("clone_children", Opt_clone_children), fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), @@ -899,11 +898,6 @@ static const struct fs_parameter_spec cgroup1_param_specs[] = { {} }; -const struct fs_parameter_description cgroup1_fs_parameters = { - .name = "cgroup1", - .specs = cgroup1_param_specs, -}; - int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); @@ -911,7 +905,7 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fs_parse_result result; int opt, i; - opt = fs_parse(fc, &cgroup1_fs_parameters, param, &result); + opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); if (opt == -ENOPARAM) { if (strcmp(param->key, "source") == 0) { fc->source = param->string; @@ -924,7 +918,7 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->subsys_mask |= (1 << i); return 0; } - return cg_invalf(fc, "cgroup1: Unknown subsys name '%s'", param->key); + return invalfc(fc, "Unknown subsys name '%s'", param->key); } if (opt < 0) return opt; @@ -952,7 +946,7 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_release_agent: /* Specifying two release agents is forbidden */ if (ctx->release_agent) - return cg_invalf(fc, "cgroup1: release_agent respecified"); + return invalfc(fc, "release_agent respecified"); ctx->release_agent = param->string; param->string = NULL; break; @@ -962,9 +956,9 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) return -ENOENT; /* Can't specify an empty name */ if (!param->size) - return cg_invalf(fc, "cgroup1: Empty name"); + return invalfc(fc, "Empty name"); if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) - return cg_invalf(fc, "cgroup1: Name too long"); + return invalfc(fc, "Name too long"); /* Must match [\w.-]+ */ for (i = 0; i < param->size; i++) { char c = param->string[i]; @@ -972,11 +966,11 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) continue; if ((c == '.') || (c == '-') || (c == '_')) continue; - return cg_invalf(fc, "cgroup1: Invalid name"); + return invalfc(fc, "Invalid name"); } /* Specifying two names is forbidden */ if (ctx->name) - return cg_invalf(fc, "cgroup1: name respecified"); + return invalfc(fc, "name respecified"); ctx->name = param->string; param->string = NULL; break; @@ -1011,7 +1005,7 @@ static int check_cgroupfs_options(struct fs_context *fc) if (ctx->all_ss) { /* Mutually exclusive option 'all' + subsystem name */ if (ctx->subsys_mask) - return cg_invalf(fc, "cgroup1: subsys name conflicts with all"); + return invalfc(fc, "subsys name conflicts with all"); /* 'all' => select all the subsystems */ ctx->subsys_mask = enabled; } @@ -1021,7 +1015,7 @@ static int check_cgroupfs_options(struct fs_context *fc) * empty hierarchies must have a name). */ if (!ctx->subsys_mask && !ctx->name) - return cg_invalf(fc, "cgroup1: Need name or subsystem set"); + return invalfc(fc, "Need name or subsystem set"); /* * Option noprefix was introduced just for backward compatibility @@ -1029,11 +1023,11 @@ static int check_cgroupfs_options(struct fs_context *fc) * the cpuset subsystem. */ if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) - return cg_invalf(fc, "cgroup1: noprefix used incorrectly"); + return invalfc(fc, "noprefix used incorrectly"); /* Can't specify "none" and some subsystems */ if (ctx->subsys_mask && ctx->none) - return cg_invalf(fc, "cgroup1: none used incorrectly"); + return invalfc(fc, "none used incorrectly"); return 0; } @@ -1063,7 +1057,7 @@ int cgroup1_reconfigure(struct fs_context *fc) /* Don't allow flags or name to change at remount */ if ((ctx->flags ^ root->flags) || (ctx->name && strcmp(ctx->name, root->name))) { - cg_invalf(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", + errorfc(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", ctx->flags, ctx->name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; @@ -1180,7 +1174,7 @@ static int cgroup1_root_to_use(struct fs_context *fc) * can't create new one without subsys specification. */ if (!ctx->subsys_mask && !ctx->none) - return cg_invalf(fc, "cgroup1: No subsys list or none specified"); + return invalfc(fc, "No subsys list or none specified"); /* Hierarchies may only be created in the initial cgroup namespace. */ if (ctx->ns != &init_cgroup_ns) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 735af8f15f95..7a39dc882095 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1816,24 +1816,19 @@ enum cgroup2_param { nr__cgroup2_params }; -static const struct fs_parameter_spec cgroup2_param_specs[] = { +static const struct fs_parameter_spec cgroup2_fs_parameters[] = { fsparam_flag("nsdelegate", Opt_nsdelegate), fsparam_flag("memory_localevents", Opt_memory_localevents), {} }; -static const struct fs_parameter_description cgroup2_fs_parameters = { - .name = "cgroup2", - .specs = cgroup2_param_specs, -}; - static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct fs_parse_result result; int opt; - opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result); + opt = fs_parse(fc, cgroup2_fs_parameters, param, &result); if (opt < 0) return opt; @@ -2156,7 +2151,7 @@ static void cgroup_kill_sb(struct super_block *sb) struct file_system_type cgroup_fs_type = { .name = "cgroup", .init_fs_context = cgroup_init_fs_context, - .parameters = &cgroup1_fs_parameters, + .parameters = cgroup1_fs_parameters, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; @@ -2164,7 +2159,7 @@ struct file_system_type cgroup_fs_type = { static struct file_system_type cgroup2_fs_type = { .name = "cgroup2", .init_fs_context = cgroup_init_fs_context, - .parameters = &cgroup2_fs_parameters, + .parameters = cgroup2_fs_parameters, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; @@ -3055,8 +3050,6 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); - WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); - if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) continue; @@ -3066,6 +3059,8 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) return PTR_ERR(css); } + WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); + if (css_visible(css)) { ret = css_populate_dir(css); if (ret) @@ -3101,11 +3096,11 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); - WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); - if (!css) continue; + WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); + if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css(css); @@ -3392,7 +3387,8 @@ static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf, if (strcmp(strstrip(buf), "threaded")) return -EINVAL; - cgrp = cgroup_kn_lock_live(of->kn, false); + /* drain dying csses before we re-apply (threaded) subtree control */ + cgrp = cgroup_kn_lock_live(of->kn, true); if (!cgrp) return -ENOENT; @@ -3546,21 +3542,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v) static int cgroup_io_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_IO); } static int cgroup_memory_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_MEM); } static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_CPU); } @@ -4404,12 +4400,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4467,10 +4467,14 @@ repeat: else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4489,11 +4493,12 @@ repeat: goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } @@ -4599,6 +4604,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) struct kernfs_open_file *of = s->private; struct css_task_iter *it = of->priv; + if (pos) + (*pos)++; + return css_task_iter_next(it); } @@ -4614,7 +4622,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, * from position 0, so we can simply keep iterating on !0 *pos. */ if (!it) { - if (WARN_ON_ONCE((*pos)++)) + if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4622,10 +4630,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return ERR_PTR(-ENOMEM); of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); - } else if (!(*pos)++) { + } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); - } + } else + return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } @@ -5931,11 +5940,14 @@ void cgroup_post_fork(struct task_struct *child) spin_lock_irq(&css_set_lock); - WARN_ON_ONCE(!list_empty(&child->cg_list)); - cset = task_css_set(current); /* current is @child's parent */ - get_css_set(cset); - cset->nr_tasks++; - css_set_move_task(child, NULL, cset, false); + /* init tasks are special, only link regular threads */ + if (likely(child->pid)) { + WARN_ON_ONCE(!list_empty(&child->cg_list)); + cset = task_css_set(current); /* current is @child's parent */ + get_css_set(cset); + cset->nr_tasks++; + css_set_move_task(child, NULL, cset, false); + } /* * If the cgroup has to be frozen, the new task has too. Let's set @@ -6288,12 +6300,13 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #ifdef CONFIG_CGROUP_BPF int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags) + struct bpf_prog *replace_prog, enum bpf_attach_type type, + u32 flags) { int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_attach(cgrp, prog, type, flags); + ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags); mutex_unlock(&cgroup_mutex); return ret; } diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index b48b22d4deb6..6f87352f8219 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -33,7 +33,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) return; /* - * Paired with the one in cgroup_rstat_cpu_pop_upated(). Either we + * Paired with the one in cgroup_rstat_cpu_pop_updated(). Either we * see NULL updated_next or they see our updated stat. */ smp_mb(); |