diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/array.c | 47 | ||||
-rw-r--r-- | fs/proc/base.c | 92 | ||||
-rw-r--r-- | fs/proc/fd.c | 5 | ||||
-rw-r--r-- | fs/proc/generic.c | 163 | ||||
-rw-r--r-- | fs/proc/internal.h | 16 | ||||
-rw-r--r-- | fs/proc/kcore.c | 4 | ||||
-rw-r--r-- | fs/proc/page.c | 3 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 1 | ||||
-rw-r--r-- | fs/proc/root.c | 1 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 458 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 88 |
11 files changed, 493 insertions, 385 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index cd3653e4f35c..bd117d065b82 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -157,20 +157,29 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; int g; - struct fdtable *fdt = NULL; + struct task_struct *tracer; const struct cred *cred; - pid_t ppid, tpid; + pid_t ppid, tpid = 0, tgid, ngid; + unsigned int max_fds = 0; rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; - tpid = 0; - if (pid_alive(p)) { - struct task_struct *tracer = ptrace_parent(p); - if (tracer) - tpid = task_pid_nr_ns(tracer, ns); - } + + tracer = ptrace_parent(p); + if (tracer) + tpid = task_pid_nr_ns(tracer, ns); + + tgid = task_tgid_nr_ns(p, ns); + ngid = task_numa_group_id(p); cred = get_task_cred(p); + + task_lock(p); + if (p->files) + max_fds = files_fdtable(p->files)->max_fds; + task_unlock(p); + rcu_read_unlock(); + seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" @@ -179,12 +188,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, "PPid:\t%d\n" "TracerPid:\t%d\n" "Uid:\t%d\t%d\t%d\t%d\n" - "Gid:\t%d\t%d\t%d\t%d\n", + "Gid:\t%d\t%d\t%d\t%d\n" + "FDSize:\t%d\nGroups:\t", get_task_state(p), - task_tgid_nr_ns(p, ns), - task_numa_group_id(p), - pid_nr_ns(pid, ns), - ppid, tpid, + tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid, from_kuid_munged(user_ns, cred->uid), from_kuid_munged(user_ns, cred->euid), from_kuid_munged(user_ns, cred->suid), @@ -192,20 +199,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, from_kgid_munged(user_ns, cred->gid), from_kgid_munged(user_ns, cred->egid), from_kgid_munged(user_ns, cred->sgid), - from_kgid_munged(user_ns, cred->fsgid)); - - task_lock(p); - if (p->files) - fdt = files_fdtable(p->files); - seq_printf(m, - "FDSize:\t%d\n" - "Groups:\t", - fdt ? fdt->max_fds : 0); - rcu_read_unlock(); + from_kgid_munged(user_ns, cred->fsgid), + max_fds); group_info = cred->group_info; - task_unlock(p); - for (g = 0; g < group_info->ngroups; g++) seq_printf(m, "%d ", from_kgid_munged(user_ns, GROUP_AT(group_info, g))); diff --git a/fs/proc/base.c b/fs/proc/base.c index baf852b648ad..590aeda5af12 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -376,37 +376,6 @@ static const struct file_operations proc_lstats_operations = { #endif -#ifdef CONFIG_CGROUPS -static int cgroup_open(struct inode *inode, struct file *file) -{ - struct pid *pid = PROC_I(inode)->pid; - return single_open(file, proc_cgroup_show, pid); -} - -static const struct file_operations proc_cgroup_operations = { - .open = cgroup_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - -#ifdef CONFIG_PROC_PID_CPUSET - -static int cpuset_open(struct inode *inode, struct file *file) -{ - struct pid *pid = PROC_I(inode)->pid; - return single_open(file, proc_cpuset_show, pid); -} - -static const struct file_operations proc_cpuset_operations = { - .open = cpuset_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -632,29 +601,35 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, }; -static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) + +struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { - struct task_struct *task = get_proc_task(file_inode(file)); - struct mm_struct *mm; + struct task_struct *task = get_proc_task(inode); + struct mm_struct *mm = ERR_PTR(-ESRCH); - if (!task) - return -ESRCH; + if (task) { + mm = mm_access(task, mode); + put_task_struct(task); - mm = mm_access(task, mode); - put_task_struct(task); + if (!IS_ERR_OR_NULL(mm)) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + } + + return mm; +} + +static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) +{ + struct mm_struct *mm = proc_mem_open(inode, mode); if (IS_ERR(mm)) return PTR_ERR(mm); - if (mm) { - /* ensure this mm_struct can't be freed */ - atomic_inc(&mm->mm_count); - /* but do not pin its memory */ - mmput(mm); - } - file->private_data = mm; - return 0; } @@ -1590,7 +1565,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) put_task_struct(task); return 1; } - d_drop(dentry); return 0; } @@ -1727,9 +1701,6 @@ out: put_task_struct(task); out_notask: - if (status <= 0) - d_drop(dentry); - return status; } @@ -2573,10 +2544,10 @@ static const struct pid_entry tgid_base_stuff[] = { REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, proc_cpuset_operations), + ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, proc_cgroup_operations), + ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), @@ -2643,11 +2614,13 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) /* no ->d_hash() rejects on procfs */ dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); - d_drop(dentry); + d_invalidate(dentry); dput(dentry); } + if (pid == tgid) + return; + name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); @@ -2664,8 +2637,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { - shrink_dcache_parent(dentry); - d_drop(dentry); + d_invalidate(dentry); dput(dentry); } @@ -2820,7 +2792,7 @@ retry: int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; - struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info; + struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) @@ -2919,10 +2891,10 @@ static const struct pid_entry tid_base_stuff[] = { REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, proc_cpuset_operations), + ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, proc_cgroup_operations), + ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), @@ -3126,7 +3098,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = file->f_dentry->d_sb->s_fs_info; + ns = inode->i_sb->s_fs_info; tid = (int)file->f_version; file->f_version = 0; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 955bb55fab8c..8e5ad83b629a 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -53,7 +53,8 @@ static int seq_show(struct seq_file *m, void *v) (long long)file->f_pos, f_flags, real_mount(file->f_path.mnt)->mnt_id); if (file->f_op->show_fdinfo) - ret = file->f_op->show_fdinfo(m, file); + file->f_op->show_fdinfo(m, file); + ret = seq_has_overflowed(m); fput(file); } @@ -129,8 +130,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) } put_task_struct(task); } - - d_drop(dentry); return 0; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 317b72641ebf..7fea13229f33 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -31,9 +31,73 @@ static DEFINE_SPINLOCK(proc_subdir_lock); static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) { - if (de->namelen != len) - return 0; - return !memcmp(name, de->name, len); + if (len < de->namelen) + return -1; + if (len > de->namelen) + return 1; + + return memcmp(name, de->name, len); +} + +static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) +{ + return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, + subdir_node); +} + +static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) +{ + return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry, + subdir_node); +} + +static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, + const char *name, + unsigned int len) +{ + struct rb_node *node = dir->subdir.rb_node; + + while (node) { + struct proc_dir_entry *de = container_of(node, + struct proc_dir_entry, + subdir_node); + int result = proc_match(len, name, de); + + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return de; + } + return NULL; +} + +static bool pde_subdir_insert(struct proc_dir_entry *dir, + struct proc_dir_entry *de) +{ + struct rb_root *root = &dir->subdir; + struct rb_node **new = &root->rb_node, *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct proc_dir_entry *this = + container_of(*new, struct proc_dir_entry, subdir_node); + int result = proc_match(de->namelen, de->name, this); + + parent = *new; + if (result < 0) + new = &(*new)->rb_left; + else if (result > 0) + new = &(*new)->rb_right; + else + return false; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&de->subdir_node, parent, new); + rb_insert_color(&de->subdir_node, root); + return true; } static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) @@ -92,10 +156,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, break; len = next - cp; - for (de = de->subdir; de ; de = de->next) { - if (proc_match(len, cp, de)) - break; - } + de = pde_subdir_find(de, cp, len); if (!de) { WARN(1, "name '%s'\n", name); return -ENOENT; @@ -183,19 +244,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct inode *inode; spin_lock(&proc_subdir_lock); - for (de = de->subdir; de ; de = de->next) { - if (de->namelen != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { - pde_get(de); - spin_unlock(&proc_subdir_lock); - inode = proc_get_inode(dir->i_sb, de); - if (!inode) - return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &simple_dentry_operations); - d_add(dentry, inode); - return NULL; - } + de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); + if (de) { + pde_get(de); + spin_unlock(&proc_subdir_lock); + inode = proc_get_inode(dir->i_sb, de); + if (!inode) + return ERR_PTR(-ENOMEM); + d_set_d_op(dentry, &simple_dentry_operations); + d_add(dentry, inode); + return NULL; } spin_unlock(&proc_subdir_lock); return ERR_PTR(-ENOENT); @@ -225,7 +283,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, return 0; spin_lock(&proc_subdir_lock); - de = de->subdir; + de = pde_subdir_first(de); i = ctx->pos - 2; for (;;) { if (!de) { @@ -234,7 +292,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, } if (!i) break; - de = de->next; + de = pde_subdir_next(de); i--; } @@ -249,7 +307,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, } spin_lock(&proc_subdir_lock); ctx->pos++; - next = de->next; + next = pde_subdir_next(de); pde_put(de); de = next; } while (de); @@ -286,9 +344,8 @@ static const struct inode_operations proc_dir_inode_operations = { static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { - struct proc_dir_entry *tmp; int ret; - + ret = proc_alloc_inum(&dp->low_ino); if (ret) return ret; @@ -304,21 +361,21 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp dp->proc_iops = &proc_file_inode_operations; } else { WARN_ON(1); + proc_free_inum(dp->low_ino); return -EINVAL; } spin_lock(&proc_subdir_lock); - - for (tmp = dir->subdir; tmp; tmp = tmp->next) - if (strcmp(tmp->name, dp->name) == 0) { - WARN(1, "proc_dir_entry '%s/%s' already registered\n", - dir->name, dp->name); - break; - } - - dp->next = dir->subdir; dp->parent = dir; - dir->subdir = dp; + if (pde_subdir_insert(dir, dp) == false) { + WARN(1, "proc_dir_entry '%s/%s' already registered\n", + dir->name, dp->name); + spin_unlock(&proc_subdir_lock); + if (S_ISDIR(dp->mode)) + dir->nlink--; + proc_free_inum(dp->low_ino); + return -EEXIST; + } spin_unlock(&proc_subdir_lock); return 0; @@ -354,6 +411,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ent->namelen = qstr.len; ent->mode = mode; ent->nlink = nlink; + ent->subdir = RB_ROOT; atomic_set(&ent->count, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); @@ -485,7 +543,6 @@ void pde_put(struct proc_dir_entry *pde) */ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) { - struct proc_dir_entry **p; struct proc_dir_entry *de = NULL; const char *fn = name; unsigned int len; @@ -497,14 +554,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) } len = strlen(fn); - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - de = *p; - *p = de->next; - de->next = NULL; - break; - } - } + de = pde_subdir_find(parent, fn, len); + if (de) + rb_erase(&de->subdir_node, &parent->subdir); spin_unlock(&proc_subdir_lock); if (!de) { WARN(1, "name '%s'\n", name); @@ -516,16 +568,15 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; - WARN(de->subdir, "%s: removing non-empty directory " - "'%s/%s', leaking at least '%s'\n", __func__, - de->parent->name, de->name, de->subdir->name); + WARN(pde_subdir_first(de), + "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", + __func__, de->parent->name, de->name, pde_subdir_first(de)->name); pde_put(de); } EXPORT_SYMBOL(remove_proc_entry); int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { - struct proc_dir_entry **p; struct proc_dir_entry *root = NULL, *de, *next; const char *fn = name; unsigned int len; @@ -537,24 +588,18 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) } len = strlen(fn); - for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (proc_match(len, fn, *p)) { - root = *p; - *p = root->next; - root->next = NULL; - break; - } - } + root = pde_subdir_find(parent, fn, len); if (!root) { spin_unlock(&proc_subdir_lock); return -ENOENT; } + rb_erase(&root->subdir_node, &parent->subdir); + de = root; while (1) { - next = de->subdir; + next = pde_subdir_first(de); if (next) { - de->subdir = next->next; - next->next = NULL; + rb_erase(&next->subdir_node, &de->subdir); de = next; continue; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7da13e49128a..7fb1a4869fd0 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -24,10 +24,9 @@ struct mempolicy; * tree) of these proc_dir_entries, so that we can dynamically * add new files to /proc. * - * The "next" pointer creates a linked list of one /proc directory, - * while parent/subdir create the directory structure (every - * /proc file has a parent, but "subdir" is NULL for all - * non-directory entries). + * parent/subdir are used for the directory structure (every /proc file has a + * parent, but "subdir" is empty for all non-directory entries). + * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { unsigned int low_ino; @@ -38,7 +37,9 @@ struct proc_dir_entry { loff_t size; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; - struct proc_dir_entry *next, *parent, *subdir; + struct proc_dir_entry *parent; + struct rb_root subdir; + struct rb_node subdir_node; void *data; atomic_t count; /* use count */ atomic_t in_use; /* number of callers into module in progress; */ @@ -268,8 +269,9 @@ extern int proc_remount(struct super_block *, int *, char *); * task_[no]mmu.c */ struct proc_maps_private { - struct pid *pid; + struct inode *inode; struct task_struct *task; + struct mm_struct *mm; #ifdef CONFIG_MMU struct vm_area_struct *tail_vma; #endif @@ -278,6 +280,8 @@ struct proc_maps_private { #endif }; +struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); + extern const struct file_operations proc_pid_maps_operations; extern const struct file_operations proc_tid_maps_operations; extern const struct file_operations proc_pid_numa_maps_operations; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 6df8d0722c97..91a4e6426321 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -610,8 +610,10 @@ static void __init proc_kcore_text_init(void) struct kcore_list kcore_modules; static void __init add_modules_range(void) { - kclist_add(&kcore_modules, (void *)MODULES_VADDR, + if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { + kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_END - MODULES_VADDR, KCORE_VMALLOC); + } } #else static void __init add_modules_range(void) diff --git a/fs/proc/page.c b/fs/proc/page.c index e647c55275d9..1e3187da1fed 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -133,6 +133,9 @@ u64 stable_page_flags(struct page *page) if (PageBuddy(page)) u |= 1 << KPF_BUDDY; + if (PageBalloon(page)) + u |= 1 << KPF_BALLOON; + u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index a63af3e0a612..1bde894bc624 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -192,6 +192,7 @@ static __net_init int proc_net_ns_init(struct net *net) if (!netd) goto out; + netd->subdir = RB_ROOT; netd->data = net; netd->nlink = 2; netd->namelen = 3; diff --git a/fs/proc/root.c b/fs/proc/root.c index 094e44d4a6be..e74ac9f1a2c0 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -251,6 +251,7 @@ struct proc_dir_entry proc_root = { .proc_iops = &proc_root_inode_operations, .proc_fops = &proc_root_operations, .parent = &proc_root, + .subdir = RB_ROOT, .name = "/proc", }; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c34156888d70..246eae84b13b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -87,32 +87,14 @@ unsigned long task_statm(struct mm_struct *mm, #ifdef CONFIG_NUMA /* - * These functions are for numa_maps but called in generic **maps seq_file - * ->start(), ->stop() ops. - * - * numa_maps scans all vmas under mmap_sem and checks their mempolicy. - * Each mempolicy object is controlled by reference counting. The problem here - * is how to avoid accessing dead mempolicy object. - * - * Because we're holding mmap_sem while reading seq_file, it's safe to access - * each vma's mempolicy, no vma objects will never drop refs to mempolicy. - * - * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy - * is set and replaced under mmap_sem but unrefed and cleared under task_lock(). - * So, without task_lock(), we cannot trust get_vma_policy() because we cannot - * gurantee the task never exits under us. But taking task_lock() around - * get_vma_plicy() causes lock order problem. - * - * To access task->mempolicy without lock, we hold a reference count of an - * object pointed by task->mempolicy and remember it. This will guarantee - * that task->mempolicy points to an alive object or NULL in numa_maps accesses. + * Save get_task_policy() for show_numa_map(). */ static void hold_task_mempolicy(struct proc_maps_private *priv) { struct task_struct *task = priv->task; task_lock(task); - priv->task_mempolicy = task->mempolicy; + priv->task_mempolicy = get_task_policy(task); mpol_get(priv->task_mempolicy); task_unlock(task); } @@ -129,124 +111,154 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif -static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) +static void vma_stop(struct proc_maps_private *priv) { - if (vma && vma != priv->tail_vma) { - struct mm_struct *mm = vma->vm_mm; - release_task_mempolicy(priv); - up_read(&mm->mmap_sem); - mmput(mm); - } + struct mm_struct *mm = priv->mm; + + release_task_mempolicy(priv); + up_read(&mm->mmap_sem); + mmput(mm); +} + +static struct vm_area_struct * +m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) +{ + if (vma == priv->tail_vma) + return NULL; + return vma->vm_next ?: priv->tail_vma; } -static void *m_start(struct seq_file *m, loff_t *pos) +static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) +{ + if (m->count < m->size) /* vma is copied successfully */ + m->version = m_next_vma(m->private, vma) ? vma->vm_start : -1UL; +} + +static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; unsigned long last_addr = m->version; struct mm_struct *mm; - struct vm_area_struct *vma, *tail_vma = NULL; - loff_t l = *pos; - - /* Clear the per syscall fields in priv */ - priv->task = NULL; - priv->tail_vma = NULL; - - /* - * We remember last_addr rather than next_addr to hit with - * vmacache most of the time. We have zero last_addr at - * the beginning and also after lseek. We will have -1 last_addr - * after the end of the vmas. - */ + struct vm_area_struct *vma; + unsigned int pos = *ppos; + /* See m_cache_vma(). Zero at the start or after lseek. */ if (last_addr == -1UL) return NULL; - priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + priv->task = get_proc_task(priv->inode); if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_access(priv->task, PTRACE_MODE_READ); - if (!mm || IS_ERR(mm)) - return mm; - down_read(&mm->mmap_sem); + mm = priv->mm; + if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + return NULL; - tail_vma = get_gate_vma(priv->task->mm); - priv->tail_vma = tail_vma; + down_read(&mm->mmap_sem); hold_task_mempolicy(priv); - /* Start with last addr hint */ - vma = find_vma(mm, last_addr); - if (last_addr && vma) { - vma = vma->vm_next; - goto out; + priv->tail_vma = get_gate_vma(mm); + + if (last_addr) { + vma = find_vma(mm, last_addr); + if (vma && (vma = m_next_vma(priv, vma))) + return vma; } - /* - * Check the vma index is within the range and do - * sequential scan until m_index. - */ - vma = NULL; - if ((unsigned long)l < mm->map_count) { - vma = mm->mmap; - while (l-- && vma) + m->version = 0; + if (pos < mm->map_count) { + for (vma = mm->mmap; pos; pos--) { + m->version = vma->vm_start; vma = vma->vm_next; - goto out; + } + return vma; } - if (l != mm->map_count) - tail_vma = NULL; /* After gate vma */ - -out: - if (vma) - return vma; + /* we do not bother to update m->version in this case */ + if (pos == mm->map_count && priv->tail_vma) + return priv->tail_vma; - release_task_mempolicy(priv); - /* End of vmas has been reached */ - m->version = (tail_vma != NULL)? 0: -1UL; - up_read(&mm->mmap_sem); - mmput(mm); - return tail_vma; + vma_stop(priv); + return NULL; } static void *m_next(struct seq_file *m, void *v, loff_t *pos) { struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - struct vm_area_struct *tail_vma = priv->tail_vma; + struct vm_area_struct *next; (*pos)++; - if (vma && (vma != tail_vma) && vma->vm_next) - return vma->vm_next; - vma_stop(priv, vma); - return (vma != tail_vma)? tail_vma: NULL; + next = m_next_vma(priv, v); + if (!next) + vma_stop(priv); + return next; } static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - if (!IS_ERR(vma)) - vma_stop(priv, vma); - if (priv->task) + if (!IS_ERR_OR_NULL(v)) + vma_stop(priv); + if (priv->task) { put_task_struct(priv->task); + priv->task = NULL; + } +} + +static int proc_maps_open(struct inode *inode, struct file *file, + const struct seq_operations *ops, int psize) +{ + struct proc_maps_private *priv = __seq_open_private(file, ops, psize); + + if (!priv) + return -ENOMEM; + + priv->inode = inode; + priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); + + seq_release_private(inode, file); + return err; + } + + return 0; +} + +static int proc_map_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct proc_maps_private *priv = seq->private; + + if (priv->mm) + mmdrop(priv->mm); + + return seq_release_private(inode, file); } static int do_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { - struct proc_maps_private *priv; - int ret = -ENOMEM; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } + return proc_maps_open(inode, file, ops, + sizeof(struct proc_maps_private)); +} + +static pid_t pid_of_stack(struct proc_maps_private *priv, + struct vm_area_struct *vma, bool is_pid) +{ + struct inode *inode = priv->inode; + struct task_struct *task; + pid_t ret = 0; + + rcu_read_lock(); + task = pid_task(proc_pid(inode), PIDTYPE_PID); + if (task) { + task = task_of_stack(task, vma, is_pid); + if (task) + ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); } + rcu_read_unlock(); + return ret; } @@ -256,7 +268,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; @@ -321,8 +332,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - tid = vm_is_stack(task, vma, is_pid); - + tid = pid_of_stack(priv, vma, is_pid); if (tid != 0) { /* * Thread stack in /proc/PID/task/TID/maps or @@ -349,15 +359,8 @@ done: static int show_map(struct seq_file *m, void *v, int is_pid) { - struct vm_area_struct *vma = v; - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; - - show_map_vma(m, vma, is_pid); - - if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task->mm)) - ? vma->vm_start : 0; + show_map_vma(m, v, is_pid); + m_cache_vma(m, v); return 0; } @@ -399,14 +402,14 @@ const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; const struct file_operations proc_tid_maps_operations = { .open = tid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; /* @@ -444,59 +447,92 @@ struct mem_size_stats { u64 pss; }; +static void smaps_account(struct mem_size_stats *mss, struct page *page, + unsigned long size, bool young, bool dirty) +{ + int mapcount; + + if (PageAnon(page)) + mss->anonymous += size; + + mss->resident += size; + /* Accumulate the size in pages that have been accessed. */ + if (young || PageReferenced(page)) + mss->referenced += size; + mapcount = page_mapcount(page); + if (mapcount >= 2) { + u64 pss_delta; -static void smaps_pte_entry(pte_t ptent, unsigned long addr, - unsigned long ptent_size, struct mm_walk *walk) + if (dirty || PageDirty(page)) + mss->shared_dirty += size; + else + mss->shared_clean += size; + pss_delta = (u64)size << PSS_SHIFT; + do_div(pss_delta, mapcount); + mss->pss += pss_delta; + } else { + if (dirty || PageDirty(page)) + mss->private_dirty += size; + else + mss->private_clean += size; + mss->pss += (u64)size << PSS_SHIFT; + } +} + +static void smaps_pte_entry(pte_t *pte, unsigned long addr, + struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; pgoff_t pgoff = linear_page_index(vma, addr); struct page *page = NULL; - int mapcount; - if (pte_present(ptent)) { - page = vm_normal_page(vma, addr, ptent); - } else if (is_swap_pte(ptent)) { - swp_entry_t swpent = pte_to_swp_entry(ptent); + if (pte_present(*pte)) { + page = vm_normal_page(vma, addr, *pte); + } else if (is_swap_pte(*pte)) { + swp_entry_t swpent = pte_to_swp_entry(*pte); if (!non_swap_entry(swpent)) - mss->swap += ptent_size; + mss->swap += PAGE_SIZE; else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); - } else if (pte_file(ptent)) { - if (pte_to_pgoff(ptent) != pgoff) - mss->nonlinear += ptent_size; + } else if (pte_file(*pte)) { + if (pte_to_pgoff(*pte) != pgoff) + mss->nonlinear += PAGE_SIZE; } if (!page) return; - if (PageAnon(page)) - mss->anonymous += ptent_size; - if (page->index != pgoff) - mss->nonlinear += ptent_size; + mss->nonlinear += PAGE_SIZE; - mss->resident += ptent_size; - /* Accumulate the size in pages that have been accessed. */ - if (pte_young(ptent) || PageReferenced(page)) - mss->referenced += ptent_size; - mapcount = page_mapcount(page); - if (mapcount >= 2) { - if (pte_dirty(ptent) || PageDirty(page)) - mss->shared_dirty += ptent_size; - else - mss->shared_clean += ptent_size; - mss->pss += (ptent_size << PSS_SHIFT) / mapcount; - } else { - if (pte_dirty(ptent) || PageDirty(page)) - mss->private_dirty += ptent_size; - else - mss->private_clean += ptent_size; - mss->pss += (ptent_size << PSS_SHIFT); - } + smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = mss->vma; + struct page *page; + + /* FOLL_DUMP will return -EFAULT on huge zero page */ + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + if (IS_ERR_OR_NULL(page)) + return; + mss->anonymous_thp += HPAGE_PMD_SIZE; + smaps_account(mss, page, HPAGE_PMD_SIZE, + pmd_young(*pmd), pmd_dirty(*pmd)); +} +#else +static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct mm_walk *walk) +{ +} +#endif + static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -506,9 +542,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, spinlock_t *ptl; if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { - smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); - mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -521,7 +556,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, */ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) - smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); + smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; @@ -549,6 +584,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_DENYWRITE)] = "dw", +#ifdef CONFIG_X86_INTEL_MPX + [ilog2(VM_MPX)] = "mp", +#endif [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", @@ -583,8 +621,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) static int show_smap(struct seq_file *m, void *v, int is_pid) { - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; struct vm_area_struct *vma = v; struct mem_size_stats mss; struct mm_walk smaps_walk = { @@ -637,10 +673,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) mss.nonlinear >> 10); show_smap_vma_flags(m, vma); - - if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task->mm)) - ? vma->vm_start : 0; + m_cache_vma(m, vma); return 0; } @@ -682,14 +715,14 @@ const struct file_operations proc_pid_smaps_operations = { .open = pid_smaps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; const struct file_operations proc_tid_smaps_operations = { .open = tid_smaps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; /* @@ -829,8 +862,21 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .private = &cp, }; down_read(&mm->mmap_sem); - if (type == CLEAR_REFS_SOFT_DIRTY) + if (type == CLEAR_REFS_SOFT_DIRTY) { + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!(vma->vm_flags & VM_SOFTDIRTY)) + continue; + up_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + vma->vm_flags &= ~VM_SOFTDIRTY; + vma_set_page_prot(vma); + } + downgrade_write(&mm->mmap_sem); + break; + } mmu_notifier_invalidate_range_start(mm, 0, -1); + } for (vma = mm->mmap; vma; vma = vma->vm_next) { cp.vma = vma; if (is_vm_hugetlb_page(vma)) @@ -850,10 +896,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, continue; if (type == CLEAR_REFS_MAPPED && !vma->vm_file) continue; - if (type == CLEAR_REFS_SOFT_DIRTY) { - if (vma->vm_flags & VM_SOFTDIRTY) - vma->vm_flags &= ~VM_SOFTDIRTY; - } walk_page_range(vma->vm_start, vma->vm_end, &clear_refs_walk); } @@ -1029,7 +1071,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, spinlock_t *ptl; pte_t *pte; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); @@ -1043,6 +1084,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; + pagemap_entry_t pme; offset = (addr & ~PAGEMAP_WALK_MASK) >> PAGE_SHIFT; @@ -1057,32 +1099,51 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) return 0; - for (; addr != end; addr += PAGE_SIZE) { - int flags2; - - /* check to see if we've left 'vma' behind - * and need a new, higher one */ - if (vma && (addr >= vma->vm_end)) { - vma = find_vma(walk->mm, addr); - if (vma && (vma->vm_flags & VM_SOFTDIRTY)) - flags2 = __PM_SOFT_DIRTY; - else - flags2 = 0; - pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); + + while (1) { + /* End of address space hole, which we mark as non-present. */ + unsigned long hole_end; + + if (vma) + hole_end = min(end, vma->vm_start); + else + hole_end = end; + + for (; addr < hole_end; addr += PAGE_SIZE) { + pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); + + err = add_to_pagemap(addr, &pme, pm); + if (err) + return err; } - /* check that 'vma' actually covers this address, - * and that it isn't a huge page vma */ - if (vma && (vma->vm_start <= addr) && - !is_vm_hugetlb_page(vma)) { + if (!vma || vma->vm_start >= end) + break; + /* + * We can't possibly be in a hugetlb VMA. In general, + * for a mm_walk with a pmd_entry and a hugetlb_entry, + * the pmd_entry can only be called on addresses in a + * hugetlb if the walk starts in a non-hugetlb VMA and + * spans a hugepage VMA. Since pagemap_read walks are + * PMD-sized and PMD-aligned, this will never be true. + */ + BUG_ON(is_vm_hugetlb_page(vma)); + + /* Addresses in the VMA. */ + for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { + pagemap_entry_t pme; pte = pte_offset_map(pmd, addr); pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); - /* unmap before userspace copy */ pte_unmap(pte); + err = add_to_pagemap(addr, &pme, pm); + if (err) + return err; } - err = add_to_pagemap(addr, &pme, pm); - if (err) - return err; + + if (addr == end) + break; + + vma = find_vma(walk->mm, addr); } cond_resched(); @@ -1415,7 +1476,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; - struct task_struct *task = proc_priv->task; struct mm_struct *mm = vma->vm_mm; struct mm_walk walk = {}; struct mempolicy *pol; @@ -1435,9 +1495,13 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.private = md; walk.mm = mm; - pol = get_vma_policy(task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol); - mpol_cond_put(pol); + pol = __get_vma_policy(vma, vma->vm_start); + if (pol) { + mpol_to_str(buffer, sizeof(buffer), pol); + mpol_cond_put(pol); + } else { + mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy); + } seq_printf(m, "%08lx %s", vma->vm_start, buffer); @@ -1447,7 +1511,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); } else { - pid_t tid = vm_is_stack(task, vma, is_pid); + pid_t tid = pid_of_stack(proc_priv, vma, is_pid); if (tid != 0) { /* * Thread stack in /proc/PID/task/TID/maps or @@ -1495,9 +1559,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_printf(m, " N%d=%lu", nid, md->node[nid]); out: seq_putc(m, '\n'); - - if (m->count < m->size) - m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; + m_cache_vma(m, vma); return 0; } @@ -1528,20 +1590,8 @@ static const struct seq_operations proc_tid_numa_maps_op = { static int numa_maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { - struct numa_maps_private *priv; - int ret = -ENOMEM; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->proc_maps.pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } - } - return ret; + return proc_maps_open(inode, file, ops, + sizeof(struct numa_maps_private)); } static int pid_numa_maps_open(struct inode *inode, struct file *file) @@ -1558,13 +1608,13 @@ const struct file_operations proc_pid_numa_maps_operations = { .open = pid_numa_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; const struct file_operations proc_tid_numa_maps_operations = { .open = tid_numa_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = proc_map_release, }; #endif /* CONFIG_NUMA */ diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 678455d2d683..599ec2e20104 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -123,6 +123,25 @@ unsigned long task_statm(struct mm_struct *mm, return size; } +static pid_t pid_of_stack(struct proc_maps_private *priv, + struct vm_area_struct *vma, bool is_pid) +{ + struct inode *inode = priv->inode; + struct task_struct *task; + pid_t ret = 0; + + rcu_read_lock(); + task = pid_task(proc_pid(inode), PIDTYPE_PID); + if (task) { + task = task_of_stack(task, vma, is_pid); + if (task) + ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); + } + rcu_read_unlock(); + + return ret; +} + /* * display a single VMA to a sequenced file */ @@ -163,7 +182,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } else if (mm) { - pid_t tid = vm_is_stack(priv->task, vma, is_pid); + pid_t tid = pid_of_stack(priv, vma, is_pid); if (tid != 0) { seq_pad(m, ' '); @@ -212,22 +231,22 @@ static void *m_start(struct seq_file *m, loff_t *pos) loff_t n = *pos; /* pin the task and mm whilst we play with them */ - priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + priv->task = get_proc_task(priv->inode); if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_access(priv->task, PTRACE_MODE_READ); - if (!mm || IS_ERR(mm)) { - put_task_struct(priv->task); - priv->task = NULL; - return mm; - } - down_read(&mm->mmap_sem); + mm = priv->mm; + if (!mm || !atomic_inc_not_zero(&mm->mm_users)) + return NULL; + down_read(&mm->mmap_sem); /* start from the Nth VMA */ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) if (n-- == 0) return p; + + up_read(&mm->mmap_sem); + mmput(mm); return NULL; } @@ -235,11 +254,13 @@ static void m_stop(struct seq_file *m, void *_vml) { struct proc_maps_private *priv = m->private; + if (!IS_ERR_OR_NULL(_vml)) { + up_read(&priv->mm->mmap_sem); + mmput(priv->mm); + } if (priv->task) { - struct mm_struct *mm = priv->task->mm; - up_read(&mm->mmap_sem); - mmput(mm); put_task_struct(priv->task); + priv->task = NULL; } } @@ -269,20 +290,33 @@ static int maps_open(struct inode *inode, struct file *file, const struct seq_operations *ops) { struct proc_maps_private *priv; - int ret = -ENOMEM; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } + + priv = __seq_open_private(file, ops, sizeof(*priv)); + if (!priv) + return -ENOMEM; + + priv->inode = inode; + priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); + + seq_release_private(inode, file); + return err; } - return ret; + + return 0; +} + + +static int map_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct proc_maps_private *priv = seq->private; + + if (priv->mm) + mmdrop(priv->mm); + + return seq_release_private(inode, file); } static int pid_maps_open(struct inode *inode, struct file *file) @@ -299,13 +333,13 @@ const struct file_operations proc_pid_maps_operations = { .open = pid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = map_release, }; const struct file_operations proc_tid_maps_operations = { .open = tid_maps_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = map_release, }; |