diff options
author | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-02-18 13:51:42 +1100 |
---|---|---|
committer | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-02-18 13:51:42 +1100 |
commit | c58310bf4933986513020fa90b4190c7492995ae (patch) | |
tree | 143f2c7578d02ebef5db8fc57ae69e951ae0e2ee /fs/proc | |
parent | 269cdfaf769f5cd831284cc831790c7c5038040f (diff) | |
parent | 1309d4e68497184d2fd87e892ddf14076c2bda98 (diff) | |
download | linux-c58310bf4933986513020fa90b4190c7492995ae.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/array.c | 163 | ||||
-rw-r--r-- | fs/proc/base.c | 183 | ||||
-rw-r--r-- | fs/proc/generic.c | 74 | ||||
-rw-r--r-- | fs/proc/inode.c | 61 | ||||
-rw-r--r-- | fs/proc/internal.h | 25 | ||||
-rw-r--r-- | fs/proc/kcore.c | 3 | ||||
-rw-r--r-- | fs/proc/nommu.c | 4 | ||||
-rw-r--r-- | fs/proc/proc_misc.c | 172 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 7 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 6 | ||||
-rw-r--r-- | fs/proc/proc_tty.c | 5 | ||||
-rw-r--r-- | fs/proc/root.c | 1 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 692 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 13 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 1 |
15 files changed, 893 insertions, 517 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index b380313092bd..07d6c4853fe8 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -77,6 +77,7 @@ #include <linux/cpuset.h> #include <linux/rcupdate.h> #include <linux/delayacct.h> +#include <linux/seq_file.h> #include <linux/pid_namespace.h> #include <asm/pgtable.h> @@ -88,18 +89,21 @@ do { memcpy(buffer, string, strlen(string)); \ buffer += strlen(string); } while (0) -static inline char *task_name(struct task_struct *p, char *buf) +static inline void task_name(struct seq_file *m, struct task_struct *p) { int i; + char *buf, *end; char *name; char tcomm[sizeof(p->comm)]; get_task_comm(tcomm, p); - ADDBUF(buf, "Name:\t"); + seq_printf(m, "Name:\t"); + end = m->buf + m->size; + buf = m->buf + m->count; name = tcomm; i = sizeof(tcomm); - do { + while (i && (buf < end)) { unsigned char c = *name; name++; i--; @@ -107,20 +111,21 @@ static inline char *task_name(struct task_struct *p, char *buf) if (!c) break; if (c == '\\') { - buf[1] = c; - buf += 2; + buf++; + if (buf < end) + *buf++ = c; continue; } if (c == '\n') { - buf[0] = '\\'; - buf[1] = 'n'; - buf += 2; + *buf++ = '\\'; + if (buf < end) + *buf++ = 'n'; continue; } buf++; - } while (i); - *buf = '\n'; - return buf+1; + } + m->count = buf - m->buf; + seq_printf(m, "\n"); } /* @@ -151,21 +156,20 @@ static inline const char *get_task_state(struct task_struct *tsk) return *p; } -static inline char *task_state(struct task_struct *p, char *buffer) +static inline void task_state(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *p) { struct group_info *group_info; int g; struct fdtable *fdt = NULL; - struct pid_namespace *ns; pid_t ppid, tpid; - ns = current->nsproxy->pid_ns; rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; tpid = pid_alive(p) && p->ptrace ? task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; - buffer += sprintf(buffer, + seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" "Pid:\t%d\n" @@ -175,7 +179,7 @@ static inline char *task_state(struct task_struct *p, char *buffer) "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), task_tgid_nr_ns(p, ns), - task_pid_nr_ns(p, ns), + pid_nr_ns(pid, ns), ppid, tpid, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -183,7 +187,7 @@ static inline char *task_state(struct task_struct *p, char *buffer) task_lock(p); if (p->files) fdt = files_fdtable(p->files); - buffer += sprintf(buffer, + seq_printf(m, "FDSize:\t%d\n" "Groups:\t", fdt ? fdt->max_fds : 0); @@ -194,20 +198,18 @@ static inline char *task_state(struct task_struct *p, char *buffer) task_unlock(p); for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) - buffer += sprintf(buffer, "%d ", GROUP_AT(group_info, g)); + seq_printf(m, "%d ", GROUP_AT(group_info, g)); put_group_info(group_info); - buffer += sprintf(buffer, "\n"); - return buffer; + seq_printf(m, "\n"); } -static char *render_sigset_t(const char *header, sigset_t *set, char *buffer) +static void render_sigset_t(struct seq_file *m, const char *header, + sigset_t *set) { - int i, len; + int i; - len = strlen(header); - memcpy(buffer, header, len); - buffer += len; + seq_printf(m, "%s", header); i = _NSIG; do { @@ -218,12 +220,10 @@ static char *render_sigset_t(const char *header, sigset_t *set, char *buffer) if (sigismember(set, i+2)) x |= 2; if (sigismember(set, i+3)) x |= 4; if (sigismember(set, i+4)) x |= 8; - *buffer++ = (x < 10 ? '0' : 'a' - 10) + x; + seq_printf(m, "%x", x); } while (i >= 4); - *buffer++ = '\n'; - *buffer = 0; - return buffer; + seq_printf(m, "\n"); } static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, @@ -241,7 +241,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, } } -static inline char *task_sig(struct task_struct *p, char *buffer) +static inline void task_sig(struct seq_file *m, struct task_struct *p) { unsigned long flags; sigset_t pending, shpending, blocked, ignored, caught; @@ -268,58 +268,66 @@ static inline char *task_sig(struct task_struct *p, char *buffer) } rcu_read_unlock(); - buffer += sprintf(buffer, "Threads:\t%d\n", num_threads); - buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim); + seq_printf(m, "Threads:\t%d\n", num_threads); + seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); /* render them all */ - buffer = render_sigset_t("SigPnd:\t", &pending, buffer); - buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer); - buffer = render_sigset_t("SigBlk:\t", &blocked, buffer); - buffer = render_sigset_t("SigIgn:\t", &ignored, buffer); - buffer = render_sigset_t("SigCgt:\t", &caught, buffer); + render_sigset_t(m, "SigPnd:\t", &pending); + render_sigset_t(m, "ShdPnd:\t", &shpending); + render_sigset_t(m, "SigBlk:\t", &blocked); + render_sigset_t(m, "SigIgn:\t", &ignored); + render_sigset_t(m, "SigCgt:\t", &caught); +} - return buffer; +static void render_cap_t(struct seq_file *m, const char *header, + kernel_cap_t *a) +{ + unsigned __capi; + + seq_printf(m, "%s", header); + CAP_FOR_EACH_U32(__capi) { + seq_printf(m, "%08x", + a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); + } + seq_printf(m, "\n"); } -static inline char *task_cap(struct task_struct *p, char *buffer) +static inline void task_cap(struct seq_file *m, struct task_struct *p) { - return buffer + sprintf(buffer, "CapInh:\t%016x\n" - "CapPrm:\t%016x\n" - "CapEff:\t%016x\n", - cap_t(p->cap_inheritable), - cap_t(p->cap_permitted), - cap_t(p->cap_effective)); + render_cap_t(m, "CapInh:\t", &p->cap_inheritable); + render_cap_t(m, "CapPrm:\t", &p->cap_permitted); + render_cap_t(m, "CapEff:\t", &p->cap_effective); } -static inline char *task_context_switch_counts(struct task_struct *p, - char *buffer) +static inline void task_context_switch_counts(struct seq_file *m, + struct task_struct *p) { - return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n" - "nonvoluntary_ctxt_switches:\t%lu\n", - p->nvcsw, - p->nivcsw); + seq_printf(m, "voluntary_ctxt_switches:\t%lu\n" + "nonvoluntary_ctxt_switches:\t%lu\n", + p->nvcsw, + p->nivcsw); } -int proc_pid_status(struct task_struct *task, char *buffer) +int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - char *orig = buffer; struct mm_struct *mm = get_task_mm(task); - buffer = task_name(task, buffer); - buffer = task_state(task, buffer); + task_name(m, task); + task_state(m, ns, pid, task); if (mm) { - buffer = task_mem(mm, buffer); + task_mem(m, mm); mmput(mm); } - buffer = task_sig(task, buffer); - buffer = task_cap(task, buffer); - buffer = cpuset_task_status_allowed(task, buffer); + task_sig(m, task); + task_cap(m, task); + cpuset_task_status_allowed(m, task); #if defined(CONFIG_S390) - buffer = task_show_regs(task, buffer); + task_show_regs(m, task); #endif - buffer = task_context_switch_counts(task, buffer); - return buffer - orig; + task_context_switch_counts(m, task); + return 0; } /* @@ -381,14 +389,14 @@ static cputime_t task_gtime(struct task_struct *p) return p->gtime; } -static int do_task_stat(struct task_struct *task, char *buffer, int whole) +static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task, int whole) { unsigned long vsize, eip, esp, wchan = ~0UL; long priority, nice; int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; char state; - int res; pid_t ppid = 0, pgid = -1, sid = -1; int num_threads = 0; struct mm_struct *mm; @@ -400,9 +408,6 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; unsigned long flags; - struct pid_namespace *ns; - - ns = current->nsproxy->pid_ns; state = *get_task_state(task); vsize = eip = esp = 0; @@ -489,10 +494,10 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) /* convert nsec -> ticks */ start_time = nsec_to_clock_t(start_time); - res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ + seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", - task_pid_nr_ns(task, ns), + pid_nr_ns(pid, ns), tcomm, state, ppid, @@ -541,20 +546,23 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) cputime_to_clock_t(cgtime)); if (mm) mmput(mm); - return res; + return 0; } -int proc_tid_stat(struct task_struct *task, char *buffer) +int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - return do_task_stat(task, buffer, 0); + return do_task_stat(m, ns, pid, task, 0); } -int proc_tgid_stat(struct task_struct *task, char *buffer) +int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - return do_task_stat(task, buffer, 1); + return do_task_stat(m, ns, pid, task, 1); } -int proc_pid_statm(struct task_struct *task, char *buffer) +int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; struct mm_struct *mm = get_task_mm(task); @@ -563,7 +571,8 @@ int proc_pid_statm(struct task_struct *task, char *buffer) size = task_statm(mm, &shared, &text, &data, &resident); mmput(mm); } + seq_printf(m, "%d %d %d %d %d %d %d\n", + size, resident, shared, text, lib, data, 0); - return sprintf(buffer, "%d %d %d %d %d %d %d\n", - size, resident, shared, text, lib, data, 0); + return 0; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 33537487f5ab..88f8edf18258 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -88,10 +88,6 @@ * in /proc for a task before it execs a suid executable. */ - -/* Worst case buffer size needed for holding an integer. */ -#define PROC_NUMBUF 13 - struct pid_entry { char *name; int len; @@ -125,6 +121,10 @@ struct pid_entry { NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_info_file_operations, \ { .proc_read = &proc_##OTYPE } ) +#define ONE(NAME, MODE, OTYPE) \ + NOD(NAME, (S_IFREG|(MODE)), \ + NULL, &proc_single_file_operations, \ + { .proc_show = &proc_##OTYPE } ) int maps_protect; EXPORT_SYMBOL(maps_protect); @@ -153,7 +153,7 @@ static int get_nr_threads(struct task_struct *tsk) return count; } -static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_cwd_link(struct inode *inode, struct path *path) { struct task_struct *task = get_proc_task(inode); struct fs_struct *fs = NULL; @@ -165,8 +165,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->pwdmnt); - *dentry = dget(fs->pwd); + *path = fs->pwd; + path_get(&fs->pwd); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -174,7 +174,7 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs return result; } -static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static int proc_root_link(struct inode *inode, struct path *path) { struct task_struct *task = get_proc_task(inode); struct fs_struct *fs = NULL; @@ -186,8 +186,8 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf } if (fs) { read_lock(&fs->lock); - *mnt = mntget(fs->rootmnt); - *dentry = dget(fs->root); + *path = fs->root; + path_get(&fs->root); read_unlock(&fs->lock); result = 0; put_fs_struct(fs); @@ -506,7 +506,7 @@ static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; -extern struct seq_operations mounts_op; +extern const struct seq_operations mounts_op; struct proc_mounts { struct seq_file m; int event; @@ -585,7 +585,7 @@ static const struct file_operations proc_mounts_operations = { .poll = mounts_poll, }; -extern struct seq_operations mountstats_op; +extern const struct seq_operations mountstats_op; static int mountstats_open(struct inode *inode, struct file *file) { int ret = seq_open(file, &mountstats_op); @@ -662,6 +662,45 @@ static const struct file_operations proc_info_file_operations = { .read = proc_info_read, }; +static int proc_single_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct pid_namespace *ns; + struct pid *pid; + struct task_struct *task; + int ret; + + ns = inode->i_sb->s_fs_info; + pid = proc_pid(inode); + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + + ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); + + put_task_struct(task); + return ret; +} + +static int proc_single_open(struct inode *inode, struct file *filp) +{ + int ret; + ret = single_open(filp, proc_single_show, NULL); + if (!ret) { + struct seq_file *m = filp->private_data; + + m->private = inode; + } + return ret; +} + +static const struct file_operations proc_single_file_operations = { + .open = proc_single_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int mem_open(struct inode* inode, struct file* file) { file->private_data = (void*)((long)current->self_exec_id); @@ -787,7 +826,7 @@ out_no_task: } #endif -static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { case 0: @@ -935,42 +974,6 @@ static const struct file_operations proc_oom_adjust_operations = { .write = oom_adjust_write, }; -#ifdef CONFIG_MMU -static ssize_t clear_refs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - struct mm_struct *mm; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) - return -EINVAL; - if (*end == '\n') - end++; - task = get_proc_task(file->f_path.dentry->d_inode); - if (!task) - return -ESRCH; - mm = get_task_mm(task); - if (mm) { - clear_refs_smap(mm); - mmput(mm); - } - put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; -} - -static struct file_operations proc_clear_refs_operations = { - .write = clear_refs_write, -}; -#endif - #ifdef CONFIG_AUDITSYSCALL #define TMPBUFLEN 21 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, @@ -1161,39 +1164,36 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) int error = -EACCES; /* We don't need a base pointer in the /proc filesystem */ - path_release(nd); + path_put(&nd->path); /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); + error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); nd->last_type = LAST_BIND; out: return ERR_PTR(error); } -static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, - char __user *buffer, int buflen) +static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) { - struct inode * inode; char *tmp = (char*)__get_free_page(GFP_TEMPORARY); - char *path; + char *pathname; int len; if (!tmp) return -ENOMEM; - inode = dentry->d_inode; - path = d_path(dentry, mnt, tmp, PAGE_SIZE); - len = PTR_ERR(path); - if (IS_ERR(path)) + pathname = d_path(path, tmp, PAGE_SIZE); + len = PTR_ERR(pathname); + if (IS_ERR(pathname)) goto out; - len = tmp + PAGE_SIZE - 1 - path; + len = tmp + PAGE_SIZE - 1 - pathname; if (len > buflen) len = buflen; - if (copy_to_user(buffer, path, len)) + if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: free_page((unsigned long)tmp); @@ -1204,20 +1204,18 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b { int error = -EACCES; struct inode *inode = dentry->d_inode; - struct dentry *de; - struct vfsmount *mnt = NULL; + struct path path; /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); + error = PROC_I(inode)->op.proc_get_link(inode, &path); if (error) goto out; - error = do_proc_readlink(de, mnt, buffer, buflen); - dput(de); - mntput(mnt); + error = do_proc_readlink(&path, buffer, buflen); + path_put(&path); out: return error; } @@ -1444,8 +1442,7 @@ out: #define PROC_FDINFO_MAX 64 -static int proc_fd_info(struct inode *inode, struct dentry **dentry, - struct vfsmount **mnt, char *info) +static int proc_fd_info(struct inode *inode, struct path *path, char *info) { struct task_struct *task = get_proc_task(inode); struct files_struct *files = NULL; @@ -1464,10 +1461,10 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry, spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { - if (mnt) - *mnt = mntget(file->f_path.mnt); - if (dentry) - *dentry = dget(file->f_path.dentry); + if (path) { + *path = file->f_path; + path_get(&file->f_path); + } if (info) snprintf(info, PROC_FDINFO_MAX, "pos:\t%lli\n" @@ -1484,10 +1481,9 @@ static int proc_fd_info(struct inode *inode, struct dentry **dentry, return -ENOENT; } -static int proc_fd_link(struct inode *inode, struct dentry **dentry, - struct vfsmount **mnt) +static int proc_fd_link(struct inode *inode, struct path *path) { - return proc_fd_info(inode, dentry, mnt, NULL); + return proc_fd_info(inode, path, NULL); } static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) @@ -1681,7 +1677,7 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, NULL, tmp); + int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); if (!err) err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); return err; @@ -2098,15 +2094,23 @@ static const struct file_operations proc_coredump_filter_operations = { static int proc_self_readlink(struct dentry *dentry, char __user *buffer, int buflen) { + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", task_tgid_vnr(current)); + if (!tgid) + return -ENOENT; + sprintf(tmp, "%d", tgid); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", task_tgid_vnr(current)); + if (!tgid) + return ERR_PTR(-ENOENT); + sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -2271,14 +2275,14 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), + ONE("status", S_IRUGO, pid_status), INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tgid_stat), - INF("statm", S_IRUGO, pid_statm), + ONE("stat", S_IRUGO, tgid_stat), + ONE("statm", S_IRUGO, pid_statm), REG("maps", S_IRUGO, maps), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, numa_maps), @@ -2289,9 +2293,10 @@ static const struct pid_entry tgid_base_stuff[] = { LNK("exe", exe), REG("mounts", S_IRUGO, mounts), REG("mountstats", S_IRUSR, mountstats), -#ifdef CONFIG_MMU +#ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), + REG("pagemap", S_IRUSR, pagemap), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), @@ -2360,7 +2365,8 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); + if (!(current->flags & PF_EXITING)) + shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } @@ -2600,14 +2606,14 @@ static const struct pid_entry tid_base_stuff[] = { DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), - INF("status", S_IRUGO, pid_status), + ONE("status", S_IRUGO, pid_status), INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif INF("cmdline", S_IRUGO, pid_cmdline), - INF("stat", S_IRUGO, tid_stat), - INF("statm", S_IRUGO, pid_statm), + ONE("stat", S_IRUGO, tid_stat), + ONE("statm", S_IRUGO, pid_statm), REG("maps", S_IRUGO, maps), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, numa_maps), @@ -2617,9 +2623,10 @@ static const struct pid_entry tid_base_stuff[] = { LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), -#ifdef CONFIG_MMU +#ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), + REG("pagemap", S_IRUSR, pagemap), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 6a2fe5187b62..68971e66cd41 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -25,12 +25,6 @@ #include "internal.h" -static ssize_t proc_file_read(struct file *file, char __user *buf, - size_t nbytes, loff_t *ppos); -static ssize_t proc_file_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos); -static loff_t proc_file_lseek(struct file *, loff_t, int); - DEFINE_SPINLOCK(proc_subdir_lock); static int proc_match(int len, const char *name, struct proc_dir_entry *de) @@ -40,12 +34,6 @@ static int proc_match(int len, const char *name, struct proc_dir_entry *de) return !memcmp(name, de->name, len); } -static const struct file_operations proc_file_operations = { - .llseek = proc_file_lseek, - .read = proc_file_read, - .write = proc_file_write, -}; - /* buffer size is one page but our output routines use some slack for overruns */ #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) @@ -233,6 +221,12 @@ proc_file_lseek(struct file *file, loff_t offset, int orig) return retval; } +static const struct file_operations proc_file_operations = { + .llseek = proc_file_lseek, + .read = proc_file_read, + .write = proc_file_write, +}; + static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; @@ -406,12 +400,12 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam spin_unlock(&proc_subdir_lock); error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); - spin_lock(&proc_subdir_lock); - break; + goto out_unlock; } } } spin_unlock(&proc_subdir_lock); +out_unlock: unlock_kernel(); if (inode) { @@ -527,6 +521,7 @@ static const struct inode_operations proc_dir_inode_operations = { static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { unsigned int i; + struct proc_dir_entry *tmp; i = get_inode_number(); if (i == 0) @@ -550,6 +545,15 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp } spin_lock(&proc_subdir_lock); + + for (tmp = dir->subdir; tmp; tmp = tmp->next) + if (strcmp(tmp->name, dp->name) == 0) { + printk(KERN_WARNING "proc_dir_entry '%s' already " + "registered\n", dp->name); + dump_stack(); + break; + } + dp->next = dir->subdir; dp->parent = dir; dir->subdir = dp; @@ -558,7 +562,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp return 0; } -static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, +static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, const char *name, mode_t mode, nlink_t nlink) @@ -601,7 +605,7 @@ struct proc_dir_entry *proc_symlink(const char *name, { struct proc_dir_entry *ent; - ent = proc_create(&parent,name, + ent = __proc_create(&parent, name, (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); if (ent) { @@ -626,7 +630,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, { struct proc_dir_entry *ent; - ent = proc_create(&parent, name, S_IFDIR | mode, 2); + ent = __proc_create(&parent, name, S_IFDIR | mode, 2); if (ent) { if (proc_register(parent, ent) < 0) { kfree(ent); @@ -660,7 +664,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, nlink = 1; } - ent = proc_create(&parent,name,mode,nlink); + ent = __proc_create(&parent, name, mode, nlink); if (ent) { if (proc_register(parent, ent) < 0) { kfree(ent); @@ -670,6 +674,38 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, return ent; } +struct proc_dir_entry *proc_create(const char *name, mode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops) +{ + struct proc_dir_entry *pde; + nlink_t nlink; + + if (S_ISDIR(mode)) { + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO | S_IXUGO; + nlink = 2; + } else { + if ((mode & S_IFMT) == 0) + mode |= S_IFREG; + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO; + nlink = 1; + } + + pde = __proc_create(&parent, name, mode, nlink); + if (!pde) + goto out; + pde->proc_fops = proc_fops; + if (proc_register(parent, pde) < 0) + goto out_free; + return pde; +out_free: + kfree(pde); +out: + return NULL; +} + void free_proc_entry(struct proc_dir_entry *de) { unsigned int ino = de->low_ino; @@ -679,7 +715,7 @@ void free_proc_entry(struct proc_dir_entry *de) release_inode_number(ino); - if (S_ISLNK(de->mode) && de->data) + if (S_ISLNK(de->mode)) kfree(de->data); kfree(de); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 1a551d92e1d8..82b3a1b5a70b 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -73,11 +73,6 @@ static void proc_delete_inode(struct inode *inode) struct vfsmount *proc_mnt; -static void proc_read_inode(struct inode * inode) -{ - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -} - static struct kmem_cache * proc_inode_cachep; static struct inode *proc_alloc_inode(struct super_block *sb) @@ -128,7 +123,6 @@ static int proc_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, - .read_inode = proc_read_inode, .drop_inode = generic_delete_inode, .delete_inode = proc_delete_inode, .statfs = simple_statfs, @@ -401,39 +395,41 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, if (de != NULL && !try_module_get(de->owner)) goto out_mod; - inode = iget(sb, ino); + inode = iget_locked(sb, ino); if (!inode) goto out_ino; - - PROC_I(inode)->fd = 0; - PROC_I(inode)->pde = de; - if (de) { - if (de->mode) { - inode->i_mode = de->mode; - inode->i_uid = de->uid; - inode->i_gid = de->gid; - } - if (de->size) - inode->i_size = de->size; - if (de->nlink) - inode->i_nlink = de->nlink; - if (de->proc_iops) - inode->i_op = de->proc_iops; - if (de->proc_fops) { - if (S_ISREG(inode->i_mode)) { + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + PROC_I(inode)->fd = 0; + PROC_I(inode)->pde = de; + if (de) { + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; + } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + inode->i_nlink = de->nlink; + if (de->proc_iops) + inode->i_op = de->proc_iops; + if (de->proc_fops) { + if (S_ISREG(inode->i_mode)) { #ifdef CONFIG_COMPAT - if (!de->proc_fops->compat_ioctl) - inode->i_fop = - &proc_reg_file_ops_no_compat; - else + if (!de->proc_fops->compat_ioctl) + inode->i_fop = + &proc_reg_file_ops_no_compat; + else #endif - inode->i_fop = &proc_reg_file_ops; + inode->i_fop = &proc_reg_file_ops; + } else { + inode->i_fop = de->proc_fops; + } } - else - inode->i_fop = de->proc_fops; } + unlock_new_inode(inode); } - return inode; out_ino: @@ -471,4 +467,3 @@ out_no_root: de_put(&proc_root); return -ENOMEM; } -MODULE_LICENSE("GPL"); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 05b3e9006262..1c81c8f1aeed 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -46,21 +46,24 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); extern int maps_protect; -extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f); -extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **); -extern int proc_tid_stat(struct task_struct *, char *); -extern int proc_tgid_stat(struct task_struct *, char *); -extern int proc_pid_status(struct task_struct *, char *); -extern int proc_pid_statm(struct task_struct *, char *); +extern void create_seq_entry(char *name, mode_t mode, + const struct file_operations *f); +extern int proc_exe_link(struct inode *, struct path *); +extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); extern const struct file_operations proc_maps_operations; extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; - -extern const struct file_operations proc_maps_operations; -extern const struct file_operations proc_numa_maps_operations; -extern const struct file_operations proc_smaps_operations; - +extern const struct file_operations proc_clear_refs_operations; +extern const struct file_operations proc_pagemap_operations; void free_proc_entry(struct proc_dir_entry *de); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 1be73082edd3..e78c81fcf547 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -12,7 +12,6 @@ #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/user.h> -#include <linux/a.out.h> #include <linux/capability.h> #include <linux/elf.h> #include <linux/elfcore.h> @@ -325,7 +324,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (m == NULL) { if (clear_user(buffer, tsz)) return -EFAULT; - } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) { + } else if (is_vmalloc_addr((void *)start)) { char * elf_buf; struct vm_struct *m; unsigned long curstart = start; diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 22f789de3909..941e95114b5a 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -67,7 +67,7 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) if (len < 1) len = 1; seq_printf(m, "%*c", len, ' '); - seq_path(m, file->f_path.mnt, file->f_path.dentry, ""); + seq_path(m, &file->f_path, ""); } seq_putc(m, '\n'); @@ -116,7 +116,7 @@ static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) return rb_next((struct rb_node *) v); } -static struct seq_operations proc_nommu_vma_list_seqop = { +static const struct seq_operations proc_nommu_vma_list_seqop = { .start = nommu_vma_list_start, .next = nommu_vma_list_next, .stop = nommu_vma_list_stop, diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 3462bfde89f6..468805d40e2b 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/pagemap.h> +#include <linux/interrupt.h> #include <linux/swap.h> #include <linux/slab.h> #include <linux/smp.h> @@ -46,6 +47,7 @@ #include <linux/vmalloc.h> #include <linux/crash_dump.h> #include <linux/pid_namespace.h> +#include <linux/bootmem.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -63,7 +65,6 @@ */ extern int get_hardware_list(char *); extern int get_stram_list(char *); -extern int get_filesystem_list(char *); extern int get_exec_domain_list(char *); extern int get_dma_list(char *); @@ -83,10 +84,15 @@ static int loadavg_read_proc(char *page, char **start, off_t off, { int a, b, c; int len; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + } while (read_seqretry(&xtime_lock, seq)); - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), @@ -216,7 +222,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, #undef K } -extern struct seq_operations fragmentation_op; +extern const struct seq_operations fragmentation_op; static int fragmentation_open(struct inode *inode, struct file *file) { (void)inode; @@ -230,7 +236,7 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; -extern struct seq_operations pagetypeinfo_op; +extern const struct seq_operations pagetypeinfo_op; static int pagetypeinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &pagetypeinfo_op); @@ -243,7 +249,7 @@ static const struct file_operations pagetypeinfo_file_ops = { .release = seq_release, }; -extern struct seq_operations zoneinfo_op; +extern const struct seq_operations zoneinfo_op; static int zoneinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &zoneinfo_op); @@ -268,7 +274,7 @@ static int version_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } -extern struct seq_operations cpuinfo_op; +extern const struct seq_operations cpuinfo_op; static int cpuinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &cpuinfo_op); @@ -321,7 +327,7 @@ static void devinfo_stop(struct seq_file *f, void *v) /* Nothing to do */ } -static struct seq_operations devinfo_ops = { +static const struct seq_operations devinfo_ops = { .start = devinfo_start, .next = devinfo_next, .stop = devinfo_stop, @@ -340,7 +346,7 @@ static const struct file_operations proc_devinfo_operations = { .release = seq_release, }; -extern struct seq_operations vmstat_op; +extern const struct seq_operations vmstat_op; static int vmstat_open(struct inode *inode, struct file *file) { return seq_open(file, &vmstat_op); @@ -371,7 +377,7 @@ static int stram_read_proc(char *page, char **start, off_t off, #endif #ifdef CONFIG_BLOCK -extern struct seq_operations partitions_op; +extern const struct seq_operations partitions_op; static int partitions_open(struct inode *inode, struct file *file) { return seq_open(file, &partitions_op); @@ -383,7 +389,7 @@ static const struct file_operations proc_partitions_operations = { .release = seq_release, }; -extern struct seq_operations diskstats_op; +extern const struct seq_operations diskstats_op; static int diskstats_open(struct inode *inode, struct file *file) { return seq_open(file, &diskstats_op); @@ -397,7 +403,7 @@ static const struct file_operations proc_diskstats_operations = { #endif #ifdef CONFIG_MODULES -extern struct seq_operations modules_op; +extern const struct seq_operations modules_op; static int modules_open(struct inode *inode, struct file *file) { return seq_open(file, &modules_op); @@ -424,7 +430,7 @@ static const struct file_operations proc_slabinfo_operations = { }; #ifdef CONFIG_DEBUG_SLAB_LEAK -extern struct seq_operations slabstats_op; +extern const struct seq_operations slabstats_op; static int slabstats_open(struct inode *inode, struct file *file) { unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL); @@ -598,8 +604,7 @@ static void int_seq_stop(struct seq_file *f, void *v) } -extern int show_interrupts(struct seq_file *f, void *v); /* In arch code */ -static struct seq_operations int_seq_ops = { +static const struct seq_operations int_seq_ops = { .start = int_seq_start, .next = int_seq_next, .stop = int_seq_stop, @@ -675,6 +680,137 @@ static const struct file_operations proc_sysrq_trigger_operations = { }; #endif +#ifdef CONFIG_PROC_PAGE_MONITOR +#define KPMSIZE sizeof(u64) +#define KPMMASK (KPMSIZE - 1) +/* /proc/kpagecount - an array exposing page counts + * + * Each entry is a u64 representing the corresponding + * physical page count. + */ +static ssize_t kpagecount_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 __user *out = (u64 __user *)buf; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + u64 pcount; + + pfn = src / KPMSIZE; + count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EIO; + + while (count > 0) { + ppage = NULL; + if (pfn_valid(pfn)) + ppage = pfn_to_page(pfn); + pfn++; + if (!ppage) + pcount = 0; + else + pcount = atomic_read(&ppage->_count); + + if (put_user(pcount, out++)) { + ret = -EFAULT; + break; + } + + count -= KPMSIZE; + } + + *ppos += (char __user *)out - buf; + if (!ret) + ret = (char __user *)out - buf; + return ret; +} + +static struct file_operations proc_kpagecount_operations = { + .llseek = mem_lseek, + .read = kpagecount_read, +}; + +/* /proc/kpageflags - an array exposing page flags + * + * Each entry is a u64 representing the corresponding + * physical page flags. + */ + +/* These macros are used to decouple internal flags from exported ones */ + +#define KPF_LOCKED 0 +#define KPF_ERROR 1 +#define KPF_REFERENCED 2 +#define KPF_UPTODATE 3 +#define KPF_DIRTY 4 +#define KPF_LRU 5 +#define KPF_ACTIVE 6 +#define KPF_SLAB 7 +#define KPF_WRITEBACK 8 +#define KPF_RECLAIM 9 +#define KPF_BUDDY 10 + +#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) + +static ssize_t kpageflags_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 __user *out = (u64 __user *)buf; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + u64 kflags, uflags; + + pfn = src / KPMSIZE; + count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EIO; + + while (count > 0) { + ppage = NULL; + if (pfn_valid(pfn)) + ppage = pfn_to_page(pfn); + pfn++; + if (!ppage) + kflags = 0; + else + kflags = ppage->flags; + + uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | + kpf_copy_bit(kflags, KPF_ERROR, PG_error) | + kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | + kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | + kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | + kpf_copy_bit(kflags, KPF_LRU, PG_lru) | + kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | + kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | + kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | + kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | + kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); + + if (put_user(uflags, out++)) { + ret = -EFAULT; + break; + } + + count -= KPMSIZE; + } + + *ppos += (char __user *)out - buf; + if (!ret) + ret = (char __user *)out - buf; + return ret; +} + +static struct file_operations proc_kpageflags_operations = { + .llseek = mem_lseek, + .read = kpageflags_read, +}; +#endif /* CONFIG_PROC_PAGE_MONITOR */ + struct proc_dir_entry *proc_root_kcore; void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) @@ -755,6 +891,10 @@ void __init proc_misc_init(void) (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; } #endif +#ifdef CONFIG_PROC_PAGE_MONITOR + create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); + create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); +#endif #ifdef CONFIG_PROC_VMCORE proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); if (proc_vmcore) diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 4823c9677fac..14e9b5aaf863 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -67,12 +67,7 @@ EXPORT_SYMBOL_GPL(seq_release_net); struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) { - struct proc_dir_entry *res; - - res = create_proc_entry(name, mode, net->proc_net); - if (res) - res->proc_fops = fops; - return res; + return proc_create(name, mode, net->proc_net, fops); } EXPORT_SYMBOL_GPL(proc_net_fops_create); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 4e57fcf85982..614c34b6d1c2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -9,7 +9,7 @@ static struct dentry_operations proc_sys_dentry_operations; static const struct file_operations proc_sys_file_operations; -static struct inode_operations proc_sys_inode_operations; +static const struct inode_operations proc_sys_inode_operations; static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) { @@ -407,7 +407,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata * if (!nd || !depth) goto out; - dentry = nd->dentry; + dentry = nd->path.dentry; table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); /* If the entry does not exist deny permission */ @@ -446,7 +446,7 @@ static const struct file_operations proc_sys_file_operations = { .readdir = proc_sys_readdir, }; -static struct inode_operations proc_sys_inode_operations = { +static const struct inode_operations proc_sys_inode_operations = { .lookup = proc_sys_lookup, .permission = proc_sys_permission, .setattr = proc_sys_setattr, diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 22846225acfa..49816e00b51a 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -15,9 +15,6 @@ #include <linux/seq_file.h> #include <linux/bitops.h> -static int tty_ldiscs_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data); - /* * The /proc/tty directory inodes... */ @@ -120,7 +117,7 @@ static void t_stop(struct seq_file *m, void *v) mutex_unlock(&tty_mutex); } -static struct seq_operations tty_drivers_op = { +static const struct seq_operations tty_drivers_op = { .start = t_start, .next = t_next, .stop = t_stop, diff --git a/fs/proc/root.c b/fs/proc/root.c index 81f99e691f99..ef0fb57fc9ef 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -232,6 +232,7 @@ void pid_ns_release_proc(struct pid_namespace *ns) EXPORT_SYMBOL(proc_symlink); EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); +EXPORT_SYMBOL(proc_create); EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); EXPORT_SYMBOL(proc_root_fs); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8043a3eab52c..49958cffbd8d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -5,14 +5,18 @@ #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/pagemap.h> +#include <linux/ptrace.h> #include <linux/mempolicy.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/seq_file.h> #include <asm/elf.h> #include <asm/uaccess.h> #include <asm/tlbflush.h> #include "internal.h" -char *task_mem(struct mm_struct *mm, char *buffer) +void task_mem(struct seq_file *m, struct mm_struct *mm) { unsigned long data, text, lib; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; @@ -34,7 +38,7 @@ char *task_mem(struct mm_struct *mm, char *buffer) data = mm->total_vm - mm->shared_vm - mm->stack_vm; text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; - buffer += sprintf(buffer, + seq_printf(m, "VmPeak:\t%8lu kB\n" "VmSize:\t%8lu kB\n" "VmLck:\t%8lu kB\n" @@ -53,7 +57,6 @@ char *task_mem(struct mm_struct *mm, char *buffer) data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); - return buffer; } unsigned long task_vsize(struct mm_struct *mm) @@ -72,7 +75,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return mm->total_vm; } -int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +int proc_exe_link(struct inode *inode, struct path *path) { struct vm_area_struct * vma; int result = -ENOENT; @@ -95,8 +98,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * } if (vma) { - *mnt = mntget(vma->vm_file->f_path.mnt); - *dentry = dget(vma->vm_file->f_path.dentry); + *path = vma->vm_file->f_path; + path_get(&vma->vm_file->f_path); result = 0; } @@ -114,24 +117,124 @@ static void pad_len_spaces(struct seq_file *m, int len) seq_printf(m, "%*c", len, ' '); } -struct mem_size_stats +static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) { - unsigned long resident; - unsigned long shared_clean; - unsigned long shared_dirty; - unsigned long private_clean; - unsigned long private_dirty; - unsigned long referenced; -}; + if (vma && vma != priv->tail_vma) { + struct mm_struct *mm = vma->vm_mm; + up_read(&mm->mmap_sem); + mmput(mm); + } +} -struct pmd_walker { - struct vm_area_struct *vma; - void *private; - void (*action)(struct vm_area_struct *, pmd_t *, unsigned long, - unsigned long, void *); -}; +static void *m_start(struct seq_file *m, loff_t *pos) +{ + struct proc_maps_private *priv = m->private; + unsigned long last_addr = m->version; + struct mm_struct *mm; + struct vm_area_struct *vma, *tail_vma = NULL; + loff_t l = *pos; + + /* Clear the per syscall fields in priv */ + priv->task = NULL; + priv->tail_vma = NULL; + + /* + * We remember last_addr rather than next_addr to hit with + * mmap_cache most of the time. We have zero last_addr at + * the beginning and also after lseek. We will have -1 last_addr + * after the end of the vmas. + */ + + if (last_addr == -1UL) + return NULL; + + priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + if (!priv->task) + return NULL; + + mm = mm_for_maps(priv->task); + if (!mm) + return NULL; + + tail_vma = get_gate_vma(priv->task); + priv->tail_vma = tail_vma; + + /* Start with last addr hint */ + vma = find_vma(mm, last_addr); + if (last_addr && vma) { + vma = vma->vm_next; + goto out; + } + + /* + * Check the vma index is within the range and do + * sequential scan until m_index. + */ + vma = NULL; + if ((unsigned long)l < mm->map_count) { + vma = mm->mmap; + while (l-- && vma) + vma = vma->vm_next; + goto out; + } + + if (l != mm->map_count) + tail_vma = NULL; /* After gate vma */ + +out: + if (vma) + return vma; + + /* End of vmas has been reached */ + m->version = (tail_vma != NULL)? 0: -1UL; + up_read(&mm->mmap_sem); + mmput(mm); + return tail_vma; +} + +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + struct vm_area_struct *tail_vma = priv->tail_vma; + + (*pos)++; + if (vma && (vma != tail_vma) && vma->vm_next) + return vma->vm_next; + vma_stop(priv, vma); + return (vma != tail_vma)? tail_vma: NULL; +} -static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) +static void m_stop(struct seq_file *m, void *v) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + + vma_stop(priv, vma); + if (priv->task) + put_task_struct(priv->task); +} + +static int do_maps_open(struct inode *inode, struct file *file, + const struct seq_operations *ops) +{ + struct proc_maps_private *priv; + int ret = -ENOMEM; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv) { + priv->pid = proc_pid(inode); + ret = seq_open(file, ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = priv; + } else { + kfree(priv); + } + } + return ret; +} + +static int show_map(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct task_struct *task = priv->task; @@ -168,7 +271,7 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats */ if (file) { pad_len_spaces(m, len); - seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n"); + seq_path(m, &file->f_path, "\n"); } else { const char *name = arch_vma_name(vma); if (!name) { @@ -191,41 +294,71 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats } seq_putc(m, '\n'); - if (mss) - seq_printf(m, - "Size: %8lu kB\n" - "Rss: %8lu kB\n" - "Shared_Clean: %8lu kB\n" - "Shared_Dirty: %8lu kB\n" - "Private_Clean: %8lu kB\n" - "Private_Dirty: %8lu kB\n" - "Referenced: %8lu kB\n", - (vma->vm_end - vma->vm_start) >> 10, - mss->resident >> 10, - mss->shared_clean >> 10, - mss->shared_dirty >> 10, - mss->private_clean >> 10, - mss->private_dirty >> 10, - mss->referenced >> 10); - if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; return 0; } -static int show_map(struct seq_file *m, void *v) +static const struct seq_operations proc_pid_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_map +}; + +static int maps_open(struct inode *inode, struct file *file) { - return show_map_internal(m, v, NULL); + return do_maps_open(inode, file, &proc_pid_maps_op); } -static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - void *private) +const struct file_operations proc_maps_operations = { + .open = maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +/* + * Proportional Set Size(PSS): my share of RSS. + * + * PSS of a process is the count of pages it has in memory, where each + * page is divided by the number of processes sharing it. So if a + * process has 1000 pages all to itself, and 1000 shared with one other + * process, its PSS will be 1500. + * + * To keep (accumulated) division errors low, we adopt a 64bit + * fixed-point pss counter to minimize division errors. So (pss >> + * PSS_SHIFT) would be the real byte count. + * + * A shift of 12 before division means (assuming 4K page size): + * - 1M 3-user-pages add up to 8KB errors; + * - supports mapcount up to 2^24, or 16M; + * - supports PSS up to 2^52 bytes, or 4PB. + */ +#define PSS_SHIFT 12 + +#ifdef CONFIG_PROC_PAGE_MONITOR +struct mem_size_stats +{ + struct vm_area_struct *vma; + unsigned long resident; + unsigned long shared_clean; + unsigned long shared_dirty; + unsigned long private_clean; + unsigned long private_dirty; + unsigned long referenced; + u64 pss; +}; + +static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) { struct mem_size_stats *mss = private; + struct vm_area_struct *vma = mss->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; + int mapcount; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { @@ -242,26 +375,88 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, /* Accumulate the size in pages that have been accessed. */ if (pte_young(ptent) || PageReferenced(page)) mss->referenced += PAGE_SIZE; - if (page_mapcount(page) >= 2) { + mapcount = page_mapcount(page); + if (mapcount >= 2) { if (pte_dirty(ptent)) mss->shared_dirty += PAGE_SIZE; else mss->shared_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; } else { if (pte_dirty(ptent)) mss->private_dirty += PAGE_SIZE; else mss->private_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT); } } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + return 0; +} + +static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; + +static int show_smap(struct seq_file *m, void *v) +{ + struct vm_area_struct *vma = v; + struct mem_size_stats mss; + int ret; + + memset(&mss, 0, sizeof mss); + mss.vma = vma; + if (vma->vm_mm && !is_vm_hugetlb_page(vma)) + walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, + &smaps_walk, &mss); + + ret = show_map(m, v); + if (ret) + return ret; + + seq_printf(m, + "Size: %8lu kB\n" + "Rss: %8lu kB\n" + "Pss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n" + "Referenced: %8lu kB\n", + (vma->vm_end - vma->vm_start) >> 10, + mss.resident >> 10, + (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), + mss.shared_clean >> 10, + mss.shared_dirty >> 10, + mss.private_clean >> 10, + mss.private_dirty >> 10, + mss.referenced >> 10); + + return ret; +} + +static const struct seq_operations proc_pid_smaps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_smap +}; + +static int smaps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_pid_smaps_op); } -static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - void *private) +const struct file_operations proc_smaps_operations = { + .open = smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, void *private) { + struct vm_area_struct *vma = private; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -282,235 +477,248 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + return 0; } -static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, - unsigned long addr, unsigned long end) +static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; + +static ssize_t clear_refs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { - pmd_t *pmd; - unsigned long next; + struct task_struct *task; + char buffer[PROC_NUMBUF], *end; + struct mm_struct *mm; + struct vm_area_struct *vma; - for (pmd = pmd_offset(pud, addr); addr != end; - pmd++, addr = next) { - next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) - continue; - walker->action(walker->vma, pmd, addr, next, walker->private); + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + if (!simple_strtol(buffer, &end, 0)) + return -EINVAL; + if (*end == '\n') + end++; + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) + return -ESRCH; + mm = get_task_mm(task); + if (mm) { + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + if (!is_vm_hugetlb_page(vma)) + walk_page_range(mm, vma->vm_start, vma->vm_end, + &clear_refs_walk, vma); + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); } + put_task_struct(task); + if (end - buffer == 0) + return -EIO; + return end - buffer; } -static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, - unsigned long addr, unsigned long end) -{ - pud_t *pud; - unsigned long next; +const struct file_operations proc_clear_refs_operations = { + .write = clear_refs_write, +}; - for (pud = pud_offset(pgd, addr); addr != end; - pud++, addr = next) { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - walk_pmd_range(walker, pud, addr, next); +struct pagemapread { + char __user *out, *end; +}; + +#define PM_ENTRY_BYTES sizeof(u64) +#define PM_RESERVED_BITS 3 +#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS) +#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET) +#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK) +#define PM_NOT_PRESENT PM_SPECIAL(1LL) +#define PM_SWAP PM_SPECIAL(2LL) +#define PM_END_OF_BUFFER 1 + +static int add_to_pagemap(unsigned long addr, u64 pfn, + struct pagemapread *pm) +{ + /* + * Make sure there's room in the buffer for an + * entire entry. Otherwise, only copy part of + * the pfn. + */ + if (pm->out + PM_ENTRY_BYTES >= pm->end) { + if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) + return -EFAULT; + pm->out = pm->end; + return PM_END_OF_BUFFER; } + + if (put_user(pfn, pm->out)) + return -EFAULT; + pm->out += PM_ENTRY_BYTES; + return 0; } -/* - * walk_page_range - walk the page tables of a VMA with a callback - * @vma - VMA to walk - * @action - callback invoked for every bottom-level (PTE) page table - * @private - private data passed to the callback function - * - * Recursively walk the page table for the memory area in a VMA, calling - * a callback for every bottom-level (PTE) page table. - */ -static inline void walk_page_range(struct vm_area_struct *vma, - void (*action)(struct vm_area_struct *, - pmd_t *, unsigned long, - unsigned long, void *), - void *private) +static int pagemap_pte_hole(unsigned long start, unsigned long end, + void *private) { - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; - struct pmd_walker walker = { - .vma = vma, - .private = private, - .action = action, - }; - pgd_t *pgd; - unsigned long next; - - for (pgd = pgd_offset(vma->vm_mm, addr); addr != end; - pgd++, addr = next) { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - walk_pud_range(&walker, pgd, addr, next); + struct pagemapread *pm = private; + unsigned long addr; + int err = 0; + for (addr = start; addr < end; addr += PAGE_SIZE) { + err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); + if (err) + break; } + return err; } -static int show_smap(struct seq_file *m, void *v) +u64 swap_pte_to_pagemap_entry(pte_t pte) { - struct vm_area_struct *vma = v; - struct mem_size_stats mss; - - memset(&mss, 0, sizeof mss); - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma, smaps_pte_range, &mss); - return show_map_internal(m, v, &mss); + swp_entry_t e = pte_to_swp_entry(pte); + return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } -void clear_refs_smap(struct mm_struct *mm) +static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) { - struct vm_area_struct *vma; + struct pagemapread *pm = private; + pte_t *pte; + int err = 0; + + for (; addr != end; addr += PAGE_SIZE) { + u64 pfn = PM_NOT_PRESENT; + pte = pte_offset_map(pmd, addr); + if (is_swap_pte(*pte)) + pfn = swap_pte_to_pagemap_entry(*pte); + else if (pte_present(*pte)) + pfn = pte_pfn(*pte); + /* unmap so we're not in atomic when we copy to userspace */ + pte_unmap(pte); + err = add_to_pagemap(addr, pfn, pm); + if (err) + return err; + } - down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma, clear_refs_pte_range, NULL); - flush_tlb_mm(mm); - up_read(&mm->mmap_sem); + cond_resched(); + + return err; } -static void *m_start(struct seq_file *m, loff_t *pos) +static struct mm_walk pagemap_walk = { + .pmd_entry = pagemap_pte_range, + .pte_hole = pagemap_pte_hole +}; + +/* + * /proc/pid/pagemap - an array mapping virtual pages to pfns + * + * For each page in the address space, this file contains one 64-bit + * entry representing the corresponding physical page frame number + * (PFN) if the page is present. If there is a swap entry for the + * physical page, then an encoding of the swap file number and the + * page's offset into the swap file are returned. If no page is + * present at all, PM_NOT_PRESENT is returned. This allows determining + * precisely which pages are mapped (or in swap) and comparing mapped + * pages between processes. + * + * Efficient users of this interface will use /proc/pid/maps to + * determine which areas of memory are actually mapped and llseek to + * skip over unmapped regions. + */ +static ssize_t pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) { - struct proc_maps_private *priv = m->private; - unsigned long last_addr = m->version; + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct page **pages, *page; + unsigned long uaddr, uend; struct mm_struct *mm; - struct vm_area_struct *vma, *tail_vma = NULL; - loff_t l = *pos; - - /* Clear the per syscall fields in priv */ - priv->task = NULL; - priv->tail_vma = NULL; + struct pagemapread pm; + int pagecount; + int ret = -ESRCH; - /* - * We remember last_addr rather than next_addr to hit with - * mmap_cache most of the time. We have zero last_addr at - * the beginning and also after lseek. We will have -1 last_addr - * after the end of the vmas. - */ + if (!task) + goto out; - if (last_addr == -1UL) - return NULL; + ret = -EACCES; + if (!ptrace_may_attach(task)) + goto out; - priv->task = get_pid_task(priv->pid, PIDTYPE_PID); - if (!priv->task) - return NULL; + ret = -EINVAL; + /* file position must be aligned */ + if (*ppos % PM_ENTRY_BYTES) + goto out; - mm = mm_for_maps(priv->task); + ret = 0; + mm = get_task_mm(task); if (!mm) - return NULL; - - priv->tail_vma = tail_vma = get_gate_vma(priv->task); - - /* Start with last addr hint */ - if (last_addr && (vma = find_vma(mm, last_addr))) { - vma = vma->vm_next; goto out; - } - /* - * Check the vma index is within the range and do - * sequential scan until m_index. - */ - vma = NULL; - if ((unsigned long)l < mm->map_count) { - vma = mm->mmap; - while (l-- && vma) - vma = vma->vm_next; - goto out; - } + ret = -ENOMEM; + uaddr = (unsigned long)buf & PAGE_MASK; + uend = (unsigned long)(buf + count); + pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; + pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out_task; - if (l != mm->map_count) - tail_vma = NULL; /* After gate vma */ + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, pagecount, + 1, 0, pages, NULL); + up_read(¤t->mm->mmap_sem); -out: - if (vma) - return vma; + if (ret < 0) + goto out_free; - /* End of vmas has been reached */ - m->version = (tail_vma != NULL)? 0: -1UL; - up_read(&mm->mmap_sem); - mmput(mm); - return tail_vma; -} + pm.out = buf; + pm.end = buf + count; -static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) -{ - if (vma && vma != priv->tail_vma) { - struct mm_struct *mm = vma->vm_mm; - up_read(&mm->mmap_sem); - mmput(mm); + if (!ptrace_may_attach(task)) { + ret = -EIO; + } else { + unsigned long src = *ppos; + unsigned long svpfn = src / PM_ENTRY_BYTES; + unsigned long start_vaddr = svpfn << PAGE_SHIFT; + unsigned long end_vaddr = TASK_SIZE_OF(task); + + /* watch out for wraparound */ + if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) + start_vaddr = end_vaddr; + + /* + * The odds are that this will stop walking way + * before end_vaddr, because the length of the + * user buffer is tracked in "pm", and the walk + * will stop when we hit the end of the buffer. + */ + ret = walk_page_range(mm, start_vaddr, end_vaddr, + &pagemap_walk, &pm); + if (ret == PM_END_OF_BUFFER) + ret = 0; + /* don't need mmap_sem for these, but this looks cleaner */ + *ppos += pm.out - buf; + if (!ret) + ret = pm.out - buf; } -} - -static void *m_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - struct vm_area_struct *tail_vma = priv->tail_vma; - - (*pos)++; - if (vma && (vma != tail_vma) && vma->vm_next) - return vma->vm_next; - vma_stop(priv, vma); - return (vma != tail_vma)? tail_vma: NULL; -} - -static void m_stop(struct seq_file *m, void *v) -{ - struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - - vma_stop(priv, vma); - if (priv->task) - put_task_struct(priv->task); -} - -static struct seq_operations proc_pid_maps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_map -}; - -static struct seq_operations proc_pid_smaps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_smap -}; -static int do_maps_open(struct inode *inode, struct file *file, - struct seq_operations *ops) -{ - struct proc_maps_private *priv; - int ret = -ENOMEM; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } + for (; pagecount; pagecount--) { + page = pages[pagecount-1]; + if (!PageReserved(page)) + SetPageDirty(page); + page_cache_release(page); } + mmput(mm); +out_free: + kfree(pages); +out_task: + put_task_struct(task); +out: return ret; } -static int maps_open(struct inode *inode, struct file *file) -{ - return do_maps_open(inode, file, &proc_pid_maps_op); -} - -const struct file_operations proc_maps_operations = { - .open = maps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, +const struct file_operations proc_pagemap_operations = { + .llseek = mem_lseek, /* borrow this */ + .read = pagemap_read, }; +#endif /* CONFIG_PROC_PAGE_MONITOR */ #ifdef CONFIG_NUMA extern int show_numa_map(struct seq_file *m, void *v); @@ -526,7 +734,7 @@ static int show_numa_map_checked(struct seq_file *m, void *v) return show_numa_map(m, v); } -static struct seq_operations proc_pid_numa_maps_op = { +static const struct seq_operations proc_pid_numa_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, @@ -545,15 +753,3 @@ const struct file_operations proc_numa_maps_operations = { .release = seq_release_private, }; #endif - -static int smaps_open(struct inode *inode, struct file *file) -{ - return do_maps_open(inode, file, &proc_pid_smaps_op); -} - -const struct file_operations proc_smaps_operations = { - .open = smaps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 1932c2ca3457..8011528518bd 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -12,7 +12,7 @@ * each process that owns it. Non-shared memory is counted * accurately. */ -char *task_mem(struct mm_struct *mm, char *buffer) +void task_mem(struct seq_file *m, struct mm_struct *mm) { struct vm_list_struct *vml; unsigned long bytes = 0, sbytes = 0, slack = 0; @@ -58,14 +58,13 @@ char *task_mem(struct mm_struct *mm, char *buffer) bytes += kobjsize(current); /* includes kernel stack */ - buffer += sprintf(buffer, + seq_printf(m, "Mem:\t%8lu bytes\n" "Slack:\t%8lu bytes\n" "Shared:\t%8lu bytes\n", bytes, slack, sbytes); up_read(&mm->mmap_sem); - return buffer; } unsigned long task_vsize(struct mm_struct *mm) @@ -104,7 +103,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } -int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +int proc_exe_link(struct inode *inode, struct path *path) { struct vm_list_struct *vml; struct vm_area_struct *vma; @@ -127,8 +126,8 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * } if (vma) { - *mnt = mntget(vma->vm_file->f_path.mnt); - *dentry = dget(vma->vm_file->f_path.dentry); + *path = vma->vm_file->f_path; + path_get(&vma->vm_file->f_path); result = 0; } @@ -199,7 +198,7 @@ static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) return vml ? vml->next : NULL; } -static struct seq_operations proc_pid_maps_ops = { +static const struct seq_operations proc_pid_maps_ops = { .start = m_start, .next = m_next, .stop = m_stop, diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 523e1098ae88..9ac0f5e064e0 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -10,7 +10,6 @@ #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/user.h> -#include <linux/a.out.h> #include <linux/elf.h> #include <linux/elfcore.h> #include <linux/highmem.h> |