From e149ed2b805fefdccf7ccdfc19eca22fdd4514ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 10:57:28 -0400 Subject: take the targets of /proc/*/ns/* symlinks to separate fs New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now. It's not mountable (not even registered, so it's not in /proc/filesystems, etc.). Files on it *are* bindable - we explicitly permit that in do_loopback(). This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well. get_proc_ns() is a macro now (it's simply returning ->i_private; would have been an inline, if not for header ordering headache). proc_ns_inode() is an ex-parrot. The interface used in procfs is ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops). Dentries and inodes are never hashed; a non-counting reference to dentry is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path() if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details of that mechanism. As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt; it does nd_jump_link() on a consistent pair it gets from ns_get_path(). Signed-off-by: Al Viro --- fs/nsfs.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 fs/nsfs.c (limited to 'fs/nsfs.c') diff --git a/fs/nsfs.c b/fs/nsfs.c new file mode 100644 index 000000000000..af1b24fa899d --- /dev/null +++ b/fs/nsfs.c @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include + +static struct vfsmount *nsfs_mnt; + +static const struct file_operations ns_file_operations = { + .llseek = no_llseek, +}; + +static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + const struct proc_ns_operations *ns_ops = dentry->d_fsdata; + + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", + ns_ops->name, inode->i_ino); +} + +static void ns_prune_dentry(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + if (inode) { + struct ns_common *ns = inode->i_private; + atomic_long_set(&ns->stashed, 0); + } +} + +const struct dentry_operations ns_dentry_operations = +{ + .d_prune = ns_prune_dentry, + .d_delete = always_delete_dentry, + .d_dname = ns_dname, +}; + +static void nsfs_evict(struct inode *inode) +{ + struct ns_common *ns = inode->i_private; + clear_inode(inode); + ns->ops->put(ns); +} + +void *ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct vfsmount *mnt = mntget(nsfs_mnt); + struct qstr qname = { .name = "", }; + struct dentry *dentry; + struct inode *inode; + struct ns_common *ns; + unsigned long d; + +again: + ns = ns_ops->get(task); + if (!ns) { + mntput(mnt); + return ERR_PTR(-ENOENT); + } + rcu_read_lock(); + d = atomic_long_read(&ns->stashed); + if (!d) + goto slow; + dentry = (struct dentry *)d; + if (!lockref_get_not_dead(&dentry->d_lockref)) + goto slow; + rcu_read_unlock(); + ns_ops->put(ns); +got_it: + path->mnt = mnt; + path->dentry = dentry; + return NULL; +slow: + rcu_read_unlock(); + inode = new_inode_pseudo(mnt->mnt_sb); + if (!inode) { + ns_ops->put(ns); + mntput(mnt); + return ERR_PTR(-ENOMEM); + } + inode->i_ino = ns->inum; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_flags |= S_IMMUTABLE; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = &ns_file_operations; + inode->i_private = ns; + + dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); + if (!dentry) { + iput(inode); + mntput(mnt); + return ERR_PTR(-ENOMEM); + } + d_instantiate(dentry, inode); + dentry->d_fsdata = (void *)ns_ops; + d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); + if (d) { + d_delete(dentry); /* make sure ->d_prune() does nothing */ + dput(dentry); + cpu_relax(); + goto again; + } + goto got_it; +} + +int ns_get_name(char *buf, size_t size, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct ns_common *ns; + int res = -ENOENT; + ns = ns_ops->get(task); + if (ns) { + res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum); + ns_ops->put(ns); + } + return res; +} + +struct file *proc_ns_fget(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + + if (file->f_op != &ns_file_operations) + goto out_invalid; + + return file; + +out_invalid: + fput(file); + return ERR_PTR(-EINVAL); +} + +static const struct super_operations nsfs_ops = { + .statfs = simple_statfs, + .evict_inode = nsfs_evict, +}; +static struct dentry *nsfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "nsfs:", &nsfs_ops, + &ns_dentry_operations, NSFS_MAGIC); +} +static struct file_system_type nsfs = { + .name = "nsfs", + .mount = nsfs_mount, + .kill_sb = kill_anon_super, +}; + +void __init nsfs_init(void) +{ + nsfs_mnt = kern_mount(&nsfs); + if (IS_ERR(nsfs_mnt)) + panic("can't set nsfs up\n"); + nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; +} -- cgit v1.2.3