diff options
-rw-r--r-- | fs/proc/base.c | 39 | ||||
-rw-r--r-- | include/linux/cgroup.h | 26 | ||||
-rw-r--r-- | include/linux/cpuset.h | 3 | ||||
-rw-r--r-- | kernel/cgroup.c | 184 | ||||
-rw-r--r-- | kernel/cpuset.c | 15 | ||||
-rw-r--r-- | kernel/events/core.c | 7 | ||||
-rw-r--r-- | mm/memory-failure.c | 2 |
7 files changed, 71 insertions, 205 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 4c542b907754..950100e326a1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -376,37 +376,6 @@ static const struct file_operations proc_lstats_operations = { #endif -#ifdef CONFIG_CGROUPS -static int cgroup_open(struct inode *inode, struct file *file) -{ - struct pid *pid = PROC_I(inode)->pid; - return single_open(file, proc_cgroup_show, pid); -} - -static const struct file_operations proc_cgroup_operations = { - .open = cgroup_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - -#ifdef CONFIG_PROC_PID_CPUSET - -static int cpuset_open(struct inode *inode, struct file *file) -{ - struct pid *pid = PROC_I(inode)->pid; - return single_open(file, proc_cpuset_show, pid); -} - -static const struct file_operations proc_cpuset_operations = { - .open = cpuset_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -2579,10 +2548,10 @@ static const struct pid_entry tgid_base_stuff[] = { REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, proc_cpuset_operations), + ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, proc_cgroup_operations), + ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), @@ -2925,10 +2894,10 @@ static const struct pid_entry tid_base_stuff[] = { REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, proc_cpuset_operations), + ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, proc_cgroup_operations), + ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b5223c570eba..1d5196889048 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -27,7 +27,6 @@ struct cgroup_root; struct cgroup_subsys; -struct inode; struct cgroup; extern int cgroup_init_early(void); @@ -38,7 +37,8 @@ extern void cgroup_exit(struct task_struct *p); extern int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); -extern int proc_cgroup_show(struct seq_file *, void *); +extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk); /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, @@ -161,11 +161,6 @@ static inline void css_put(struct cgroup_subsys_state *css) /* bits in struct cgroup flags field */ enum { - /* - * Control Group has previously had a child cgroup or a task, - * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) - */ - CGRP_RELEASABLE, /* Control Group requires release notifications to userspace */ CGRP_NOTIFY_ON_RELEASE, /* @@ -235,13 +230,6 @@ struct cgroup { struct list_head e_csets[CGROUP_SUBSYS_COUNT]; /* - * Linked list running through all cgroups that can - * potentially be reaped by the release agent. Protected by - * release_list_lock - */ - struct list_head release_list; - - /* * list of pidlists, up to two for each namespace (one for procs, one * for tasks); created on demand. */ @@ -250,6 +238,9 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; + + /* used to schedule release agent */ + struct work_struct release_agent_work; }; #define MAX_CGROUP_ROOT_NAMELEN 64 @@ -536,13 +527,10 @@ static inline bool cgroup_has_tasks(struct cgroup *cgrp) return !list_empty(&cgrp->cset_links); } -/* returns ino associated with a cgroup, 0 indicates unmounted root */ +/* returns ino associated with a cgroup */ static inline ino_t cgroup_ino(struct cgroup *cgrp) { - if (cgrp->kn) - return cgrp->kn->ino; - else - return 0; + return cgrp->kn->ino; } /* cft/css accessors for cftype->write() operation */ diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 6e39c9bb0dae..2f073db7392e 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -86,7 +86,8 @@ extern void __cpuset_memory_pressure_bump(void); extern void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task); -extern int proc_cpuset_show(struct seq_file *, void *); +extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk); extern int cpuset_mem_spread_node(void); extern int cpuset_slab_spread_node(void); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3a73f995a81e..cab7dc4284dc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -185,7 +185,6 @@ static int need_forkexit_callback __read_mostly; static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; -static void cgroup_put(struct cgroup *cgrp); static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask); static int cgroup_destroy_locked(struct cgroup *cgrp); @@ -195,7 +194,6 @@ static void css_release(struct percpu_ref *ref); static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], bool is_add); -static void cgroup_pidlist_destroy_all(struct cgroup *cgrp); /* IDR wrappers which synchronize using cgroup_idr_lock */ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end, @@ -331,14 +329,6 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor) return false; } -static int cgroup_is_releasable(const struct cgroup *cgrp) -{ - const int bits = - (1 << CGRP_RELEASABLE) | - (1 << CGRP_NOTIFY_ON_RELEASE); - return (cgrp->flags & bits) == bits; -} - static int notify_on_release(const struct cgroup *cgrp) { return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); @@ -394,12 +384,7 @@ static int notify_on_release(const struct cgroup *cgrp) ; \ else -/* the list of cgroups eligible for automatic release. Protected by - * release_list_lock */ -static LIST_HEAD(release_list); -static DEFINE_RAW_SPINLOCK(release_list_lock); static void cgroup_release_agent(struct work_struct *work); -static DECLARE_WORK(release_agent_work, cgroup_release_agent); static void check_for_release(struct cgroup *cgrp); /* @@ -498,7 +483,7 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) return key; } -static void put_css_set_locked(struct css_set *cset, bool taskexit) +static void put_css_set_locked(struct css_set *cset) { struct cgrp_cset_link *link, *tmp_link; struct cgroup_subsys *ss; @@ -524,11 +509,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit) /* @cgrp can't go away while we're holding css_set_rwsem */ if (list_empty(&cgrp->cset_links)) { cgroup_update_populated(cgrp, false); - if (notify_on_release(cgrp)) { - if (taskexit) - set_bit(CGRP_RELEASABLE, &cgrp->flags); - check_for_release(cgrp); - } + check_for_release(cgrp); } kfree(link); @@ -537,7 +518,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit) kfree_rcu(cset, rcu_head); } -static void put_css_set(struct css_set *cset, bool taskexit) +static void put_css_set(struct css_set *cset) { /* * Ensure that the refcount doesn't hit zero while any readers @@ -548,7 +529,7 @@ static void put_css_set(struct css_set *cset, bool taskexit) return; down_write(&css_set_rwsem); - put_css_set_locked(cset, taskexit); + put_css_set_locked(cset); up_write(&css_set_rwsem); } @@ -969,14 +950,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, * knows that the cgroup won't be removed, as cgroup_rmdir() * needs that mutex. * - * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't - * (usually) take cgroup_mutex. These are the two most performance - * critical pieces of code here. The exception occurs on cgroup_exit(), - * when a task in a notify_on_release cgroup exits. Then cgroup_mutex - * is taken, and if the cgroup count is zero, a usermode call made - * to the release agent with the name of the cgroup (path relative to - * the root of cgroup file system) as the argument. - * * A cgroup can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cgroups is empty. Since all * tasks in the system use _some_ cgroup, and since there is always at @@ -1587,7 +1560,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->self.sibling); INIT_LIST_HEAD(&cgrp->self.children); INIT_LIST_HEAD(&cgrp->cset_links); - INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->pidlists); mutex_init(&cgrp->pidlist_mutex); cgrp->self.cgroup = cgrp; @@ -1597,6 +1569,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->e_csets[ssid]); init_waitqueue_head(&cgrp->offline_waitq); + INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent); } static void init_cgroup_root(struct cgroup_root *root, @@ -2052,8 +2025,7 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, * task. As trading it for new_cset is protected by cgroup_mutex, * we're safe to drop it here; it will be freed under RCU. */ - set_bit(CGRP_RELEASABLE, &old_cgrp->flags); - put_css_set_locked(old_cset, false); + put_css_set_locked(old_cset); } /** @@ -2074,7 +2046,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) cset->mg_src_cgrp = NULL; cset->mg_dst_cset = NULL; list_del_init(&cset->mg_preload_node); - put_css_set_locked(cset, false); + put_css_set_locked(cset); } up_write(&css_set_rwsem); } @@ -2168,8 +2140,8 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; list_del_init(&src_cset->mg_preload_node); - put_css_set(src_cset, false); - put_css_set(dst_cset, false); + put_css_set(src_cset); + put_css_set(dst_cset); continue; } @@ -2178,7 +2150,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, if (list_empty(&dst_cset->mg_preload_node)) list_add(&dst_cset->mg_preload_node, &csets); else - put_css_set(dst_cset, false); + put_css_set(dst_cset); } list_splice_tail(&csets, preloaded_csets); @@ -4173,7 +4145,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css, static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css, struct cftype *cft, u64 val) { - clear_bit(CGRP_RELEASABLE, &css->cgroup->flags); if (val) set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); else @@ -4351,6 +4322,7 @@ static void css_free_work_fn(struct work_struct *work) /* cgroup free path */ atomic_dec(&cgrp->root->nr_cgrps); cgroup_pidlist_destroy_all(cgrp); + cancel_work_sync(&cgrp->release_agent_work); if (cgroup_parent(cgrp)) { /* @@ -4813,19 +4785,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) for_each_css(css, ssid, cgrp) kill_css(css); - /* CSS_ONLINE is clear, remove from ->release_list for the last time */ - raw_spin_lock(&release_list_lock); - if (!list_empty(&cgrp->release_list)) - list_del_init(&cgrp->release_list); - raw_spin_unlock(&release_list_lock); - /* * Remove @cgrp directory along with the base files. @cgrp has an * extra ref on its kn. */ kernfs_remove(cgrp->kn); - set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags); check_for_release(cgroup_parent(cgrp)); /* put the base reference */ @@ -4842,13 +4807,10 @@ static int cgroup_rmdir(struct kernfs_node *kn) cgrp = cgroup_kn_lock_live(kn); if (!cgrp) return 0; - cgroup_get(cgrp); /* for @kn->priv clearing */ ret = cgroup_destroy_locked(cgrp); cgroup_kn_unlock(kn); - - cgroup_put(cgrp); return ret; } @@ -5052,12 +5014,9 @@ core_initcall(cgroup_wq_init); * - Print task's cgroup paths into seq_file, one line for each hierarchy * - Used for /proc/<pid>/cgroup. */ - -/* TODO: Use a proper seq_file iterator */ -int proc_cgroup_show(struct seq_file *m, void *v) +int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk) { - struct pid *pid; - struct task_struct *tsk; char *buf, *path; int retval; struct cgroup_root *root; @@ -5067,14 +5026,6 @@ int proc_cgroup_show(struct seq_file *m, void *v) if (!buf) goto out; - retval = -ESRCH; - pid = m->private; - tsk = get_pid_task(pid, PIDTYPE_PID); - if (!tsk) - goto out_free; - - retval = 0; - mutex_lock(&cgroup_mutex); down_read(&css_set_rwsem); @@ -5104,11 +5055,10 @@ int proc_cgroup_show(struct seq_file *m, void *v) seq_putc(m, '\n'); } + retval = 0; out_unlock: up_read(&css_set_rwsem); mutex_unlock(&cgroup_mutex); - put_task_struct(tsk); -out_free: kfree(buf); out: return retval; @@ -5179,7 +5129,7 @@ void cgroup_post_fork(struct task_struct *child) int i; /* - * This may race against cgroup_enable_task_cg_links(). As that + * This may race against cgroup_enable_task_cg_lists(). As that * function sets use_task_css_set_links before grabbing * tasklist_lock and we just went through tasklist_lock to add * @child, it's guaranteed that either we see the set @@ -5194,7 +5144,7 @@ void cgroup_post_fork(struct task_struct *child) * when implementing operations which need to migrate all tasks of * a cgroup to another. * - * Note that if we lose to cgroup_enable_task_cg_links(), @child + * Note that if we lose to cgroup_enable_task_cg_lists(), @child * will remain in init_css_set. This is safe because all tasks are * in the init_css_set before cg_links is enabled and there's no * operation which transfers all tasks out of init_css_set. @@ -5278,30 +5228,14 @@ void cgroup_exit(struct task_struct *tsk) } if (put_cset) - put_css_set(cset, true); + put_css_set(cset); } static void check_for_release(struct cgroup *cgrp) { - if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) && - !css_has_online_children(&cgrp->self)) { - /* - * Control Group is currently removeable. If it's not - * already queued for a userspace notification, queue - * it now - */ - int need_schedule_work = 0; - - raw_spin_lock(&release_list_lock); - if (!cgroup_is_dead(cgrp) && - list_empty(&cgrp->release_list)) { - list_add(&cgrp->release_list, &release_list); - need_schedule_work = 1; - } - raw_spin_unlock(&release_list_lock); - if (need_schedule_work) - schedule_work(&release_agent_work); - } + if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) && + !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp)) + schedule_work(&cgrp->release_agent_work); } /* @@ -5329,52 +5263,39 @@ static void check_for_release(struct cgroup *cgrp) */ static void cgroup_release_agent(struct work_struct *work) { - BUG_ON(work != &release_agent_work); + struct cgroup *cgrp = + container_of(work, struct cgroup, release_agent_work); + char *pathbuf = NULL, *agentbuf = NULL, *path; + char *argv[3], *envp[3]; + mutex_lock(&cgroup_mutex); - raw_spin_lock(&release_list_lock); - while (!list_empty(&release_list)) { - char *argv[3], *envp[3]; - int i; - char *pathbuf = NULL, *agentbuf = NULL, *path; - struct cgroup *cgrp = list_entry(release_list.next, - struct cgroup, - release_list); - list_del_init(&cgrp->release_list); - raw_spin_unlock(&release_list_lock); - pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!pathbuf) - goto continue_free; - path = cgroup_path(cgrp, pathbuf, PATH_MAX); - if (!path) - goto continue_free; - agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!agentbuf) - goto continue_free; - - i = 0; - argv[i++] = agentbuf; - argv[i++] = path; - argv[i] = NULL; - - i = 0; - /* minimal command environment */ - envp[i++] = "HOME=/"; - envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[i] = NULL; - - /* Drop the lock while we invoke the usermode helper, - * since the exec could involve hitting disk and hence - * be a slow process */ - mutex_unlock(&cgroup_mutex); - call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); - mutex_lock(&cgroup_mutex); - continue_free: - kfree(pathbuf); - kfree(agentbuf); - raw_spin_lock(&release_list_lock); - } - raw_spin_unlock(&release_list_lock); + + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); + agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); + if (!pathbuf || !agentbuf) + goto out; + + path = cgroup_path(cgrp, pathbuf, PATH_MAX); + if (!path) + goto out; + + argv[0] = agentbuf; + argv[1] = path; + argv[2] = NULL; + + /* minimal command environment */ + envp[0] = "HOME=/"; + envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp[2] = NULL; + + mutex_unlock(&cgroup_mutex); + call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); + goto out_free; +out: mutex_unlock(&cgroup_mutex); +out_free: + kfree(agentbuf); + kfree(pathbuf); } static int __init cgroup_disable(char *str) @@ -5562,7 +5483,8 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) { - return test_bit(CGRP_RELEASABLE, &css->cgroup->flags); + return (!cgroup_has_tasks(css->cgroup) && + !css_has_online_children(&css->cgroup->self)); } static struct cftype debug_files[] = { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 52cb04c993b7..1f107c74087b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2730,10 +2730,9 @@ void __cpuset_memory_pressure_bump(void) * and we take cpuset_mutex, keeping cpuset_attach() from changing it * anyway. */ -int proc_cpuset_show(struct seq_file *m, void *unused_v) +int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk) { - struct pid *pid; - struct task_struct *tsk; char *buf, *p; struct cgroup_subsys_state *css; int retval; @@ -2743,24 +2742,16 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v) if (!buf) goto out; - retval = -ESRCH; - pid = m->private; - tsk = get_pid_task(pid, PIDTYPE_PID); - if (!tsk) - goto out_free; - retval = -ENAMETOOLONG; rcu_read_lock(); css = task_css(tsk, cpuset_cgrp_id); p = cgroup_path(css->cgroup, buf, PATH_MAX); rcu_read_unlock(); if (!p) - goto out_put_task; + goto out_free; seq_puts(m, p); seq_putc(m, '\n'); retval = 0; -out_put_task: - put_task_struct(tsk); out_free: kfree(buf); out: diff --git a/kernel/events/core.c b/kernel/events/core.c index 963bf139e2b2..b1c663593f5c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -392,14 +392,9 @@ perf_cgroup_match(struct perf_event *event) event->cgrp->css.cgroup); } -static inline void perf_put_cgroup(struct perf_event *event) -{ - css_put(&event->cgrp->css); -} - static inline void perf_detach_cgroup(struct perf_event *event) { - perf_put_cgroup(event); + css_put(&event->cgrp->css); event->cgrp = NULL; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 44c6bd201d3a..8639f6b28746 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -148,7 +148,7 @@ static int hwpoison_filter_task(struct page *p) ino = cgroup_ino(css->cgroup); css_put(css); - if (!ino || ino != hwpoison_filter_memcg) + if (ino != hwpoison_filter_memcg) return -EINVAL; return 0; |