From 7e47682ea555e7c1edef1d8fd96e2aa4c12abe59 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:09 +1000 Subject: cgroup: allow a cgroup subsystem to reject a fork Add a new cgroup subsystem callback can_fork that conditionally states whether or not the fork is accepted or rejected by a cgroup policy. In addition, add a cancel_fork callback so that if an error occurs later in the forking process, any state modified by can_fork can be reverted. Allow for a private opaque pointer to be passed from cgroup_can_fork to cgroup_post_fork, allowing for the fork state to be stored by each subsystem separately. Also add a tagging system for cgroup_subsys.h to allow for CGROUP_ enumerations to be be defined and used. In addition, explicitly add a CGROUP_CANFORK_COUNT macro to make arrays easier to define. This is in preparation for implementing the pids cgroup subsystem. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- kernel/cgroup.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++-- kernel/cgroup_freezer.c | 2 +- kernel/fork.c | 17 ++++++++++-- kernel/sched/core.c | 2 +- 4 files changed, 88 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f89d9292eee6..a59dd1a6b74a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -186,6 +186,9 @@ static u64 css_serial_nr_next = 1; static unsigned long have_fork_callback __read_mostly; static unsigned long have_exit_callback __read_mostly; +/* Ditto for the can_fork callback. */ +static unsigned long have_canfork_callback __read_mostly; + static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; @@ -4955,6 +4958,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) have_fork_callback |= (bool)ss->fork << ss->id; have_exit_callback |= (bool)ss->exit << ss->id; + have_canfork_callback |= (bool)ss->can_fork << ss->id; /* At system boot, before all subsystems have been * registered, no tasks have been forked, so we don't @@ -5197,6 +5201,19 @@ static const struct file_operations proc_cgroupstats_operations = { .release = single_release, }; +static void **subsys_canfork_priv_p(void *ss_priv[CGROUP_CANFORK_COUNT], int i) +{ + if (CGROUP_CANFORK_START <= i && i < CGROUP_CANFORK_END) + return &ss_priv[i - CGROUP_CANFORK_START]; + return NULL; +} + +static void *subsys_canfork_priv(void *ss_priv[CGROUP_CANFORK_COUNT], int i) +{ + void **private = subsys_canfork_priv_p(ss_priv, i); + return private ? *private : NULL; +} + /** * cgroup_fork - initialize cgroup related fields during copy_process() * @child: pointer to task_struct of forking parent process. @@ -5211,6 +5228,57 @@ void cgroup_fork(struct task_struct *child) INIT_LIST_HEAD(&child->cg_list); } +/** + * cgroup_can_fork - called on a new task before the process is exposed + * @child: the task in question. + * + * This calls the subsystem can_fork() callbacks. If the can_fork() callback + * returns an error, the fork aborts with that error code. This allows for + * a cgroup subsystem to conditionally allow or deny new forks. + */ +int cgroup_can_fork(struct task_struct *child, + void *ss_priv[CGROUP_CANFORK_COUNT]) +{ + struct cgroup_subsys *ss; + int i, j, ret; + + for_each_subsys_which(ss, i, &have_canfork_callback) { + ret = ss->can_fork(child, subsys_canfork_priv_p(ss_priv, i)); + if (ret) + goto out_revert; + } + + return 0; + +out_revert: + for_each_subsys(ss, j) { + if (j >= i) + break; + if (ss->cancel_fork) + ss->cancel_fork(child, subsys_canfork_priv(ss_priv, j)); + } + + return ret; +} + +/** + * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork() + * @child: the task in question + * + * This calls the cancel_fork() callbacks if a fork failed *after* + * cgroup_can_fork() succeded. + */ +void cgroup_cancel_fork(struct task_struct *child, + void *ss_priv[CGROUP_CANFORK_COUNT]) +{ + struct cgroup_subsys *ss; + int i; + + for_each_subsys(ss, i) + if (ss->cancel_fork) + ss->cancel_fork(child, subsys_canfork_priv(ss_priv, i)); +} + /** * cgroup_post_fork - called on a new task after adding it to the task list * @child: the task in question @@ -5221,7 +5289,8 @@ void cgroup_fork(struct task_struct *child) * cgroup_task_iter_start() - to guarantee that the new task ends up on its * list. */ -void cgroup_post_fork(struct task_struct *child) +void cgroup_post_fork(struct task_struct *child, + void *old_ss_priv[CGROUP_CANFORK_COUNT]) { struct cgroup_subsys *ss; int i; @@ -5266,7 +5335,7 @@ void cgroup_post_fork(struct task_struct *child) * and addition to css_set. */ for_each_subsys_which(ss, i, &have_fork_callback) - ss->fork(child); + ss->fork(child, subsys_canfork_priv(old_ss_priv, i)); } /** diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 92b98cc0ee76..f1b30ad5dc6d 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -203,7 +203,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, * to do anything as freezer_attach() will put @task into the appropriate * state. */ -static void freezer_fork(struct task_struct *task) +static void freezer_fork(struct task_struct *task, void *private) { struct freezer *freezer; diff --git a/kernel/fork.c b/kernel/fork.c index 1bfefc6f96a4..40e3af12c55e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1239,6 +1239,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, { int retval; struct task_struct *p; + void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {}; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -1512,6 +1513,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; + /* + * Ensure that the cgroup subsystem policies allow the new process to be + * forked. It should be noted the the new process's css_set can be changed + * between here and cgroup_post_fork() if an organisation operation is in + * progress. + */ + retval = cgroup_can_fork(p, cgrp_ss_priv); + if (retval) + goto bad_fork_free_pid; + /* * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! @@ -1548,7 +1559,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_free_pid; + goto bad_fork_cancel_cgroup; } if (likely(p->pid)) { @@ -1590,7 +1601,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); - cgroup_post_fork(p); + cgroup_post_fork(p, cgrp_ss_priv); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); @@ -1600,6 +1611,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, return p; +bad_fork_cancel_cgroup: + cgroup_cancel_fork(p, cgrp_ss_priv); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..d811652fe6f5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8068,7 +8068,7 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) sched_offline_group(tg); } -static void cpu_cgroup_fork(struct task_struct *task) +static void cpu_cgroup_fork(struct task_struct *task, void *private) { sched_move_task(task); } -- cgit v1.2.3