summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 13:12:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 13:12:57 -0700
commite7c93cbfe9cb4b0a47633099e78c455b1f79bbac (patch)
tree3fe9238ab3000db445c26a6ae0cc769110512281 /include
parentd479c5a1919b4e569dcd3ae9c84ed74a675d0b94 (diff)
parent2b40c5db73e239531ea54991087f4edc07fbb08e (diff)
downloadlinux-e7c93cbfe9cb4b0a47633099e78c455b1f79bbac.tar.bz2
Merge tag 'threads-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux
Pull thread updates from Christian Brauner: "We have been discussing using pidfds to attach to namespaces for quite a while and the patches have in one form or another already existed for about a year. But I wanted to wait to see how the general api would be received and adopted. This contains the changes to make it possible to use pidfds to attach to the namespaces of a process, i.e. they can be passed as the first argument to the setns() syscall. When only a single namespace type is specified the semantics are equivalent to passing an nsfd. That means setns(nsfd, CLONE_NEWNET) equals setns(pidfd, CLONE_NEWNET). However, when a pidfd is passed, multiple namespace flags can be specified in the second setns() argument and setns() will attach the caller to all the specified namespaces all at once or to none of them. Specifying 0 is not valid together with a pidfd. Here are just two obvious examples: setns(pidfd, CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET); setns(pidfd, CLONE_NEWUSER); Allowing to also attach subsets of namespaces supports various use-cases where callers setns to a subset of namespaces to retain privilege, perform an action and then re-attach another subset of namespaces. Apart from significantly reducing the number of syscalls needed to attach to all currently supported namespaces (eight "open+setns" sequences vs just a single "setns()"), this also allows atomic setns to a set of namespaces, i.e. either attaching to all namespaces succeeds or we fail without having changed anything. This is centered around a new internal struct nsset which holds all information necessary for a task to switch to a new set of namespaces atomically. Fwiw, with this change a pidfd becomes the only token needed to interact with a container. I'm expecting this to be picked-up by util-linux for nsenter rather soon. Associated with this change is a shiny new test-suite dedicated to setns() (for pidfds and nsfds alike)" * tag 'threads-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: selftests/pidfd: add pidfd setns tests nsproxy: attach to namespaces via pidfds nsproxy: add struct nsset
Diffstat (limited to 'include')
-rw-r--r--include/linux/mnt_namespace.h2
-rw-r--r--include/linux/nsproxy.h24
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/proc_ns.h4
4 files changed, 30 insertions, 2 deletions
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 35942084cd40..8f882f5881e8 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -6,10 +6,12 @@
struct mnt_namespace;
struct fs_struct;
struct user_namespace;
+struct ns_common;
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
struct user_namespace *, struct fs_struct *);
extern void put_mnt_ns(struct mnt_namespace *ns);
+extern struct ns_common *from_mnt_ns(struct mnt_namespace *);
extern const struct file_operations proc_mounts_operations;
extern const struct file_operations proc_mountinfo_operations;
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 074f395b9ad2..cdb171efc7cb 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,6 +42,30 @@ struct nsproxy {
extern struct nsproxy init_nsproxy;
/*
+ * A structure to encompass all bits needed to install
+ * a partial or complete new set of namespaces.
+ *
+ * If a new user namespace is requested cred will
+ * point to a modifiable set of credentials. If a pointer
+ * to a modifiable set is needed nsset_cred() must be
+ * used and tested.
+ */
+struct nsset {
+ unsigned flags;
+ struct nsproxy *nsproxy;
+ struct fs_struct *fs;
+ const struct cred *cred;
+};
+
+static inline struct cred *nsset_cred(struct nsset *set)
+{
+ if (set->flags & CLONE_NEWUSER)
+ return (struct cred *)set->cred;
+
+ return NULL;
+}
+
+/*
* the namespaces access rules are:
*
* 1. only current task is allowed to change tsk->nsproxy pointer or
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 45c05fd9c99d..0cfc44d7ac74 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -179,4 +179,6 @@ static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
return inode->i_sb->s_fs_info;
}
+bool proc_ns_file(const struct file *file);
+
#endif /* _LINUX_PROC_FS_H */
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 6abe85c34681..75807ecef880 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -8,7 +8,7 @@
#include <linux/ns_common.h>
struct pid_namespace;
-struct nsproxy;
+struct nsset;
struct path;
struct task_struct;
struct inode;
@@ -19,7 +19,7 @@ struct proc_ns_operations {
int type;
struct ns_common *(*get)(struct task_struct *task);
void (*put)(struct ns_common *ns);
- int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
+ int (*install)(struct nsset *nsset, struct ns_common *ns);
struct user_namespace *(*owner)(struct ns_common *ns);
struct ns_common *(*get_parent)(struct ns_common *ns);
} __randomize_layout;