summaryrefslogtreecommitdiffstats
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c179
1 files changed, 144 insertions, 35 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index df137ba19d37..ab467ee58341 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -75,6 +75,22 @@ static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
+struct mnt_idmap {
+ struct user_namespace *owner;
+ refcount_t count;
+};
+
+/*
+ * Carries the initial idmapping of 0:0:4294967295 which is an identity
+ * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
+ * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
+ */
+struct mnt_idmap nop_mnt_idmap = {
+ .owner = &init_user_ns,
+ .count = REFCOUNT_INIT(1),
+};
+EXPORT_SYMBOL_GPL(nop_mnt_idmap);
+
struct mount_kattr {
unsigned int attr_set;
unsigned int attr_clr;
@@ -82,6 +98,7 @@ struct mount_kattr {
unsigned int lookup_flags;
bool recurse;
struct user_namespace *mnt_userns;
+ struct mnt_idmap *mnt_idmap;
};
/* /sys/fs */
@@ -193,6 +210,104 @@ int mnt_get_count(struct mount *mnt)
#endif
}
+/**
+ * mnt_idmap_owner - retrieve owner of the mount's idmapping
+ * @idmap: mount idmapping
+ *
+ * This helper will go away once the conversion to use struct mnt_idmap
+ * everywhere has finished at which point the helper will be unexported.
+ *
+ * Only code that needs to perform permission checks based on the owner of the
+ * idmapping will get access to it. All other code will solely rely on
+ * idmappings. This will get us type safety so it's impossible to conflate
+ * filesystems idmappings with mount idmappings.
+ *
+ * Return: The owner of the idmapping.
+ */
+struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap)
+{
+ return idmap->owner;
+}
+EXPORT_SYMBOL_GPL(mnt_idmap_owner);
+
+/**
+ * mnt_user_ns - retrieve owner of an idmapped mount
+ * @mnt: the relevant vfsmount
+ *
+ * This helper will go away once the conversion to use struct mnt_idmap
+ * everywhere has finished at which point the helper will be unexported.
+ *
+ * Only code that needs to perform permission checks based on the owner of the
+ * idmapping will get access to it. All other code will solely rely on
+ * idmappings. This will get us type safety so it's impossible to conflate
+ * filesystems idmappings with mount idmappings.
+ *
+ * Return: The owner of the idmapped.
+ */
+struct user_namespace *mnt_user_ns(const struct vfsmount *mnt)
+{
+ struct mnt_idmap *idmap = mnt_idmap(mnt);
+
+ /* Return the actual owner of the filesystem instead of the nop. */
+ if (idmap == &nop_mnt_idmap &&
+ !initial_idmapping(mnt->mnt_sb->s_user_ns))
+ return mnt->mnt_sb->s_user_ns;
+ return mnt_idmap_owner(idmap);
+}
+EXPORT_SYMBOL_GPL(mnt_user_ns);
+
+/**
+ * alloc_mnt_idmap - allocate a new idmapping for the mount
+ * @mnt_userns: owning userns of the idmapping
+ *
+ * Allocate a new struct mnt_idmap which carries the idmapping of the mount.
+ *
+ * Return: On success a new idmap, on error an error pointer is returned.
+ */
+static struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
+{
+ struct mnt_idmap *idmap;
+
+ idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
+ if (!idmap)
+ return ERR_PTR(-ENOMEM);
+
+ idmap->owner = get_user_ns(mnt_userns);
+ refcount_set(&idmap->count, 1);
+ return idmap;
+}
+
+/**
+ * mnt_idmap_get - get a reference to an idmapping
+ * @idmap: the idmap to bump the reference on
+ *
+ * If @idmap is not the @nop_mnt_idmap bump the reference count.
+ *
+ * Return: @idmap with reference count bumped if @not_mnt_idmap isn't passed.
+ */
+static inline struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap)
+{
+ if (idmap != &nop_mnt_idmap)
+ refcount_inc(&idmap->count);
+
+ return idmap;
+}
+
+/**
+ * mnt_idmap_put - put a reference to an idmapping
+ * @idmap: the idmap to put the reference on
+ *
+ * If this is a non-initial idmapping, put the reference count when a mount is
+ * released and free it if we're the last user.
+ */
+static inline void mnt_idmap_put(struct mnt_idmap *idmap)
+{
+ if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
+ put_user_ns(idmap->owner);
+ kfree(idmap);
+ }
+}
+
static struct mount *alloc_vfsmnt(const char *name)
{
struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -232,7 +347,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_HLIST_NODE(&mnt->mnt_mp_list);
INIT_LIST_HEAD(&mnt->mnt_umounting);
INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
- mnt->mnt.mnt_userns = &init_user_ns;
+ mnt->mnt.mnt_idmap = &nop_mnt_idmap;
}
return mnt;
@@ -602,11 +717,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
static void free_vfsmnt(struct mount *mnt)
{
- struct user_namespace *mnt_userns;
-
- mnt_userns = mnt_user_ns(&mnt->mnt);
- if (!initial_idmapping(mnt_userns))
- put_user_ns(mnt_userns);
+ mnt_idmap_put(mnt_idmap(&mnt->mnt));
kfree_const(mnt->mnt_devname);
#ifdef CONFIG_SMP
free_percpu(mnt->mnt_pcp);
@@ -1009,7 +1120,6 @@ static struct mount *skip_mnt_tree(struct mount *p)
struct vfsmount *vfs_create_mount(struct fs_context *fc)
{
struct mount *mnt;
- struct user_namespace *fs_userns;
if (!fc->root)
return ERR_PTR(-EINVAL);
@@ -1027,10 +1137,6 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
- fs_userns = mnt->mnt.mnt_sb->s_user_ns;
- if (!initial_idmapping(fs_userns))
- mnt->mnt.mnt_userns = get_user_ns(fs_userns);
-
lock_mount_hash();
list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
unlock_mount_hash();
@@ -1120,9 +1226,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
atomic_inc(&sb->s_active);
- mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
- if (!initial_idmapping(mnt->mnt.mnt_userns))
- mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
+ mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
+
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
@@ -3515,8 +3620,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
q = next_mnt(q, new);
if (!q)
break;
+ // an mntns binding we'd skipped?
while (p->mnt.mnt_root != q->mnt.mnt_root)
- p = next_mnt(p, old);
+ p = next_mnt(skip_mnt_tree(p), old);
}
namespace_unlock();
@@ -3981,14 +4087,14 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
struct vfsmount *m = &mnt->mnt;
struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;
- if (!kattr->mnt_userns)
+ if (!kattr->mnt_idmap)
return 0;
/*
* Creating an idmapped mount with the filesystem wide idmapping
* doesn't make sense so block that. We don't allow mushy semantics.
*/
- if (kattr->mnt_userns == fs_userns)
+ if (mnt_idmap_owner(kattr->mnt_idmap) == fs_userns)
return -EINVAL;
/*
@@ -4028,7 +4134,7 @@ static inline bool mnt_allow_writers(const struct mount_kattr *kattr,
{
return (!(kattr->attr_set & MNT_READONLY) ||
(mnt->mnt.mnt_flags & MNT_READONLY)) &&
- !kattr->mnt_userns;
+ !kattr->mnt_idmap;
}
static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
@@ -4082,27 +4188,18 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
- struct user_namespace *mnt_userns, *old_mnt_userns;
-
- if (!kattr->mnt_userns)
+ if (!kattr->mnt_idmap)
return;
/*
- * We're the only ones able to change the mount's idmapping. So
- * mnt->mnt.mnt_userns is stable and we can retrieve it directly.
- */
- old_mnt_userns = mnt->mnt.mnt_userns;
-
- mnt_userns = get_user_ns(kattr->mnt_userns);
- /* Pairs with smp_load_acquire() in mnt_user_ns(). */
- smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
-
- /*
- * If this is an idmapped filesystem drop the reference we've taken
- * in vfs_create_mount() before.
+ * Pairs with smp_load_acquire() in mnt_idmap().
+ *
+ * Since we only allow a mount to change the idmapping once and
+ * verified this in can_idmap_mount() we know that the mount has
+ * @nop_mnt_idmap attached to it. So there's no need to drop any
+ * references.
*/
- if (!initial_idmapping(old_mnt_userns))
- put_user_ns(old_mnt_userns);
+ smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap));
}
static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
@@ -4136,6 +4233,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
if (path->dentry != mnt->mnt.mnt_root)
return -EINVAL;
+ if (kattr->mnt_userns) {
+ struct mnt_idmap *mnt_idmap;
+
+ mnt_idmap = alloc_mnt_idmap(kattr->mnt_userns);
+ if (IS_ERR(mnt_idmap))
+ return PTR_ERR(mnt_idmap);
+ kattr->mnt_idmap = mnt_idmap;
+ }
+
if (kattr->propagation) {
/*
* Only take namespace_lock() if we're actually changing
@@ -4323,6 +4429,9 @@ static void finish_mount_kattr(struct mount_kattr *kattr)
{
put_user_ns(kattr->mnt_userns);
kattr->mnt_userns = NULL;
+
+ if (kattr->mnt_idmap)
+ mnt_idmap_put(kattr->mnt_idmap);
}
SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,