From 2b5f9dad32ed19e8db3b0f10a84aa824a219803b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 20 Sep 2022 17:31:19 -0700 Subject: fs/exec: switch timens when a task gets a new mm Changing a time namespace requires remapping a vvar page, so we don't want to allow doing that if any other tasks can use the same mm. Currently, we install a time namespace when a task is created with a new vm. exec() is another case when a task gets a new mm and so it can switch a time namespace safely, but it isn't handled now. One more issue of the current interface is that clone() with CLONE_VM isn't allowed if the current task has unshared a time namespace (timens_for_children doesn't match the current timens). Both these issues make some inconvenience for users. For example, Alexey and Florian reported that posix_spawn() uses vfork+exec and this pattern doesn't work with time namespaces due to the both described issues. LXC needed to workaround the exec() issue by calling setns. In the commit 133e2d3e81de5 ("fs/exec: allow to unshare a time namespace on vfork+exec"), we tried to fix these issues with minimal impact on UAPI. But it adds extra complexity and some undesirable side effects. Eric suggested fixing the issues properly because here are all the reasons to suppose that there are no users that depend on the old behavior. Cc: Alexey Izbyshev Cc: Christian Brauner Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" Cc: Florian Weimer Cc: Kees Cook Suggested-by: "Eric W. Biederman" Origin-author: "Eric W. Biederman" Signed-off-by: Andrei Vagin Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220921003120.209637-1-avagin@google.com --- kernel/nsproxy.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'kernel/nsproxy.c') diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index eec72ca962e2..a487ff24129b 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -157,7 +157,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)))) { - if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) { + if ((flags & CLONE_VM) || + likely(old_ns->time_ns_for_children == old_ns->time_ns)) { get_nsproxy(old_ns); return 0; } @@ -179,7 +180,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) if (IS_ERR(new_ns)) return PTR_ERR(new_ns); - timens_on_fork(new_ns, tsk); + if ((flags & CLONE_VM) == 0) + timens_on_fork(new_ns, tsk); tsk->nsproxy = new_ns; return 0; @@ -254,6 +256,23 @@ void exit_task_namespaces(struct task_struct *p) switch_task_namespaces(p, NULL); } +int exec_task_namespaces(void) +{ + struct task_struct *tsk = current; + struct nsproxy *new; + + if (tsk->nsproxy->time_ns_for_children == tsk->nsproxy->time_ns) + return 0; + + new = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); + if (IS_ERR(new)) + return PTR_ERR(new); + + timens_on_fork(new, tsk); + switch_task_namespaces(tsk, new); + return 0; +} + static int check_setns_flags(unsigned long flags) { if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | -- cgit v1.2.3