From 9014143bab2f3bc0b9e5db3bc8d00e2a43e50fbd Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 23 Jun 2019 14:27:17 +0300 Subject: fork: don't check parent_tidptr with CLONE_PIDFD Give userspace a cheap and reliable way to tell whether CLONE_PIDFD is supported by the kernel or not. The easiest way is to pass an invalid file descriptor value in parent_tidptr, perform the syscall and verify that parent_tidptr has been changed to a valid file descriptor value. CLONE_PIDFD uses parent_tidptr to return pidfds. CLONE_PARENT_SETTID will use parent_tidptr to return the tid of the parent. The two flags cannot be used together. Old kernels that only support CLONE_PARENT_SETTID will not verify the value pointed to by parent_tidptr. This behavior is unchanged even with the introduction of CLONE_PIDFD. However, if CLONE_PIDFD is specified the kernel will currently check the value pointed to by parent_tidptr before placing the pidfd in the memory pointed to. EINVAL will be returned if the value in parent_tidptr is not 0. If CLONE_PIDFD is supported and fd 0 is closed, then the returned pidfd can and likely will be 0 and parent_tidptr will be unchanged. This means userspace must either check CLONE_PIDFD support beforehand or check that fd 0 is not closed when invoking CLONE_PIDFD. The check for pidfd == 0 was introduced during the v5.2 merge window by commit b3e583825266 ("clone: add CLONE_PIDFD") to ensure that CLONE_PIDFD could be potentially extended by passing in flags through the return argument. However, that extension would look horrible, and with the upcoming introduction of the clone3 syscall in v5.3 there is no need to extend legacy clone syscall this way. (Even if it would need to be extended, CLONE_DETACHED can be reused with CLONE_PIDFD.) So remove the pidfd == 0 check. Userspace that needs to be portable to kernels without CLONE_PIDFD support can then be advised to initialize pidfd to -1 and check the pidfd value returned by CLONE_PIDFD. Fixes: b3e583825266 ("clone: add CLONE_PIDFD") Signed-off-by: Dmitry V. Levin Signed-off-by: Christian Brauner --- kernel/fork.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..39a3adaa4ad1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1822,8 +1822,6 @@ static __latent_entropy struct task_struct *copy_process( } if (clone_flags & CLONE_PIDFD) { - int reserved; - /* * - CLONE_PARENT_SETTID is useless for pidfds and also * parent_tidptr is used to return pidfds. @@ -1834,16 +1832,6 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) return ERR_PTR(-EINVAL); - - /* - * Verify that parent_tidptr is sane so we can potentially - * reuse it later. - */ - if (get_user(reserved, parent_tidptr)) - return ERR_PTR(-EFAULT); - - if (reserved != 0) - return ERR_PTR(-EINVAL); } /* -- cgit v1.2.3 From bee19cd8f241ab3cd1bf79e03884e5371f9ef514 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 23 Jun 2019 14:28:00 +0300 Subject: samples: make pidfd-metadata fail gracefully on older kernels Initialize pidfd to an invalid descriptor, to fail gracefully on those kernels that do not implement CLONE_PIDFD and leave pidfd unchanged. Signed-off-by: Dmitry V. Levin Signed-off-by: Christian Brauner --- samples/pidfd/pidfd-metadata.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/samples/pidfd/pidfd-metadata.c b/samples/pidfd/pidfd-metadata.c index 14b454448429..c459155daf9a 100644 --- a/samples/pidfd/pidfd-metadata.c +++ b/samples/pidfd/pidfd-metadata.c @@ -83,7 +83,7 @@ static int pidfd_metadata_fd(pid_t pid, int pidfd) int main(int argc, char *argv[]) { - int pidfd = 0, ret = EXIT_FAILURE; + int pidfd = -1, ret = EXIT_FAILURE; char buf[4096] = { 0 }; pid_t pid; int procfd, statusfd; @@ -91,7 +91,11 @@ int main(int argc, char *argv[]) pid = pidfd_clone(CLONE_PIDFD, &pidfd); if (pid < 0) - exit(ret); + err(ret, "CLONE_PIDFD"); + if (pidfd == -1) { + warnx("CLONE_PIDFD is not supported by the kernel"); + goto out; + } procfd = pidfd_metadata_fd(pid, pidfd); close(pidfd); -- cgit v1.2.3 From 6fd2fe494b17bf2dec37b610d23a43a72b16923a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Jun 2019 22:22:09 -0400 Subject: copy_process(): don't use ksys_close() on cleanups anon_inode_getfd() should be used *ONLY* in situations when we are guaranteed to be past the last failure point (including copying the descriptor number to userland, at that). And ksys_close() should not be used for cleanups at all. anon_inode_getfile() is there for all nontrivial cases like that. Just use that... Fixes: b3e583825266 ("clone: add CLONE_PIDFD") Signed-off-by: Al Viro Reviewed-by: Jann Horn Signed-off-by: Christian Brauner --- kernel/fork.c | 46 ++++++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 39a3adaa4ad1..399aca51ff75 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1712,31 +1712,6 @@ const struct file_operations pidfd_fops = { #endif }; -/** - * pidfd_create() - Create a new pid file descriptor. - * - * @pid: struct pid that the pidfd will reference - * - * This creates a new pid file descriptor with the O_CLOEXEC flag set. - * - * Note, that this function can only be called after the fd table has - * been unshared to avoid leaking the pidfd to the new process. - * - * Return: On success, a cloexec pidfd is returned. - * On error, a negative errno number will be returned. - */ -static int pidfd_create(struct pid *pid) -{ - int fd; - - fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), - O_RDWR | O_CLOEXEC); - if (fd < 0) - put_pid(pid); - - return fd; -} - static void __delayed_free_task(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); @@ -1774,6 +1749,7 @@ static __latent_entropy struct task_struct *copy_process( int pidfd = -1, retval; struct task_struct *p; struct multiprocess_signals delayed; + struct file *pidfile = NULL; /* * Don't allow sharing the root directory with processes in a different @@ -2046,11 +2022,20 @@ static __latent_entropy struct task_struct *copy_process( * if the fd table isn't shared). */ if (clone_flags & CLONE_PIDFD) { - retval = pidfd_create(pid); + retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC); if (retval < 0) goto bad_fork_free_pid; pidfd = retval; + + pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid, + O_RDWR | O_CLOEXEC); + if (IS_ERR(pidfile)) { + put_unused_fd(pidfd); + goto bad_fork_free_pid; + } + get_pid(pid); /* held by pidfile now */ + retval = put_user(pidfd, parent_tidptr); if (retval) goto bad_fork_put_pidfd; @@ -2168,6 +2153,9 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } + /* past the last point of failure */ + if (pidfile) + fd_install(pidfd, pidfile); init_task_pid_links(p); if (likely(p->pid)) { @@ -2234,8 +2222,10 @@ bad_fork_cancel_cgroup: bad_fork_cgroup_threadgroup_change_end: cgroup_threadgroup_change_end(current); bad_fork_put_pidfd: - if (clone_flags & CLONE_PIDFD) - ksys_close(pidfd); + if (clone_flags & CLONE_PIDFD) { + fput(pidfile); + put_unused_fd(pidfd); + } bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); -- cgit v1.2.3 From 30d158b143b6575261ab610ae7b1b4f7fe3830b3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Jun 2019 11:35:14 +0200 Subject: proc: remove useless d_is_dir() check Remove the d_is_dir() check from tgid_pidfd_to_pid(). It is pointless since you should never get &proc_tgid_base_operations for f_op on a non-directory. Suggested-by: Al Viro Signed-off-by: Christian Brauner --- fs/proc/base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..255f6754c70d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3077,8 +3077,7 @@ static const struct file_operations proc_tgid_base_operations = { struct pid *tgid_pidfd_to_pid(const struct file *file) { - if (!d_is_dir(file->f_path.dentry) || - (file->f_op != &proc_tgid_base_operations)) + if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); -- cgit v1.2.3