diff options
author | Paul Menage <menage@google.com> | 2007-10-18 23:39:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 11:53:36 -0700 |
commit | 817929ec274bcfe771586d338bb31d1659615686 (patch) | |
tree | 5a96ed1afd308016e8720437a00bf2f114e907cb /include | |
parent | a424316ca154317367c7ddf89997d1c80e4a8051 (diff) | |
download | linux-817929ec274bcfe771586d338bb31d1659615686.tar.bz2 |
Task Control Groups: shared cgroup subsystem group arrays
Replace the struct css_set embedded in task_struct with a pointer; all tasks
that have the same set of memberships across all hierarchies will share a
css_set object, and will be linked via their css_sets field to the "tasks"
list_head in the css_set.
Assuming that many tasks share the same cgroup assignments, this reduces
overall space usage and keeps the size of the task_struct down (three pointers
added to task_struct compared to a non-cgroups kernel, no matter how many
subsystems are registered).
[akpm@linux-foundation.org: fix a printk]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Paul Menage <menage@google.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/cgroup.h | 89 | ||||
-rw-r--r-- | include/linux/sched.h | 33 |
2 files changed, 83 insertions, 39 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a9553568118f..836b3557bb76 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -27,10 +27,19 @@ extern void cgroup_lock(void); extern void cgroup_unlock(void); extern void cgroup_fork(struct task_struct *p); extern void cgroup_fork_callbacks(struct task_struct *p); +extern void cgroup_post_fork(struct task_struct *p); extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern struct file_operations proc_cgroup_operations; +/* Define the enumeration of all cgroup subsystems */ +#define SUBSYS(_x) _x ## _subsys_id, +enum cgroup_subsys_id { +#include <linux/cgroup_subsys.h> + CGROUP_SUBSYS_COUNT +}; +#undef SUBSYS + /* Per-subsystem/per-cgroup state maintained by the system. */ struct cgroup_subsys_state { /* The cgroup that this subsystem is attached to. Useful @@ -97,6 +106,52 @@ struct cgroup { struct cgroupfs_root *root; struct cgroup *top_cgroup; + + /* + * List of cg_cgroup_links pointing at css_sets with + * tasks in this cgroup. Protected by css_set_lock + */ + struct list_head css_sets; +}; + +/* A css_set is a structure holding pointers to a set of + * cgroup_subsys_state objects. This saves space in the task struct + * object and speeds up fork()/exit(), since a single inc/dec and a + * list_add()/del() can bump the reference count on the entire + * cgroup set for a task. + */ + +struct css_set { + + /* Reference count */ + struct kref ref; + + /* + * List running through all cgroup groups. Protected by + * css_set_lock + */ + struct list_head list; + + /* + * List running through all tasks using this cgroup + * group. Protected by css_set_lock + */ + struct list_head tasks; + + /* + * List of cg_cgroup_link objects on link chains from + * cgroups referenced from this css_set. Protected by + * css_set_lock + */ + struct list_head cg_links; + + /* + * Set of subsystem states, one for each subsystem. This array + * is immutable after creation apart from the init_css_set + * during subsystem registration (at boot time). + */ + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + }; /* struct cftype: @@ -157,15 +212,7 @@ int cgroup_is_removed(const struct cgroup *cont); int cgroup_path(const struct cgroup *cont, char *buf, int buflen); -int __cgroup_task_count(const struct cgroup *cont); -static inline int cgroup_task_count(const struct cgroup *cont) -{ - int task_count; - rcu_read_lock(); - task_count = __cgroup_task_count(cont); - rcu_read_unlock(); - return task_count; -} +int cgroup_task_count(const struct cgroup *cont); /* Return true if the cgroup is a descendant of the current cgroup */ int cgroup_is_descendant(const struct cgroup *cont); @@ -213,7 +260,7 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( static inline struct cgroup_subsys_state *task_subsys_state( struct task_struct *task, int subsys_id) { - return rcu_dereference(task->cgroups.subsys[subsys_id]); + return rcu_dereference(task->cgroups->subsys[subsys_id]); } static inline struct cgroup* task_cgroup(struct task_struct *task, @@ -226,6 +273,27 @@ int cgroup_path(const struct cgroup *cont, char *buf, int buflen); int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss); +/* A cgroup_iter should be treated as an opaque object */ +struct cgroup_iter { + struct list_head *cg_link; + struct list_head *task; +}; + +/* To iterate across the tasks in a cgroup: + * + * 1) call cgroup_iter_start to intialize an iterator + * + * 2) call cgroup_iter_next() to retrieve member tasks until it + * returns NULL or until you want to end the iteration + * + * 3) call cgroup_iter_end() to destroy the iterator. + */ +void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it); +struct task_struct *cgroup_iter_next(struct cgroup *cont, + struct cgroup_iter *it); +void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it); + + #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } @@ -233,6 +301,7 @@ static inline int cgroup_init(void) { return 0; } static inline void cgroup_init_smp(void) {} static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_fork_callbacks(struct task_struct *p) {} +static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p, int callbacks) {} static inline void cgroup_lock(void) {} diff --git a/include/linux/sched.h b/include/linux/sched.h index af2ed4bae678..1aa1cfa63b37 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -894,34 +894,6 @@ struct sched_entity { #endif }; -#ifdef CONFIG_CGROUPS - -#define SUBSYS(_x) _x ## _subsys_id, -enum cgroup_subsys_id { -#include <linux/cgroup_subsys.h> - CGROUP_SUBSYS_COUNT -}; -#undef SUBSYS - -/* A css_set is a structure holding pointers to a set of - * cgroup_subsys_state objects. - */ - -struct css_set { - - /* Set of subsystem states, one for each subsystem. NULL for - * subsystems that aren't part of this hierarchy. These - * pointers reduce the number of dereferences required to get - * from a task to its state for a given cgroup, but result - * in increased space usage if tasks are in wildly different - * groupings across different hierarchies. This array is - * immutable after creation */ - struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; - -}; - -#endif /* CONFIG_CGROUPS */ - struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1159,7 +1131,10 @@ struct task_struct { int cpuset_mem_spread_rotor; #endif #ifdef CONFIG_CGROUPS - struct css_set cgroups; + /* Control Group info protected by css_set_lock */ + struct css_set *cgroups; + /* cg_list protected by css_set_lock and tsk->alloc_lock */ + struct list_head cg_list; #endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; |