From 343f4c49f2438d8920f1f76fa823ee59b91f02e4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 11 Apr 2022 11:40:14 -0500 Subject: kthread: Don't allocate kthread_struct for init and umh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If kthread_is_per_cpu runs concurrently with free_kthread_struct the kthread_struct that was just freed may be read from. This bug was introduced by commit 40966e316f86 ("kthread: Ensure struct kthread is present for all kthreads"). When kthread_struct started to be allocated for all tasks that have PF_KTHREAD set. This in turn required the kthread_struct to be freed in kernel_execve and violated the assumption that kthread_struct will have the same lifetime as the task. Looking a bit deeper this only applies to callers of kernel_execve which is just the init process and the user mode helper processes. These processes really don't want to be kernel threads but are for historical reasons. Mostly that copy_thread does not know how to take a kernel mode function to the process with for processes without PF_KTHREAD or PF_IO_WORKER set. Solve this by not allocating kthread_struct for the init process and the user mode helper processes. This is done by adding a kthread member to struct kernel_clone_args. Setting kthread in fork_idle and kernel_thread. Adding user_mode_thread that works like kernel_thread except it does not set kthread. In fork only allocating the kthread_struct if .kthread is set. I have looked at kernel/kthread.c and since commit 40966e316f86 ("kthread: Ensure struct kthread is present for all kthreads") there have been no assumptions added that to_kthread or __to_kthread will not return NULL. There are a few callers of to_kthread or __to_kthread that assume a non-NULL struct kthread pointer will be returned. These functions are kthread_data(), kthread_parmme(), kthread_exit(), kthread(), kthread_park(), kthread_unpark(), kthread_stop(). All of those functions can reasonably expected to be called when it is know that a task is a kthread so that assumption seems reasonable. Cc: stable@vger.kernel.org Fixes: 40966e316f86 ("kthread: Ensure struct kthread is present for all kthreads") Reported-by: Максим Кутявин Link: https://lkml.kernel.org/r/20220506141512.516114-1-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- kernel/fork.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 9796897560ab..27c5203750b4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2157,7 +2157,7 @@ static __latent_entropy struct task_struct *copy_process( p->io_context = NULL; audit_set_context(p, NULL); cgroup_fork(p); - if (p->flags & PF_KTHREAD) { + if (args->kthread) { if (!set_kthread_struct(p)) goto bad_fork_cleanup_delayacct; } @@ -2548,7 +2548,8 @@ struct task_struct * __init fork_idle(int cpu) { struct task_struct *task; struct kernel_clone_args args = { - .flags = CLONE_VM, + .flags = CLONE_VM, + .kthread = 1, }; task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args); @@ -2679,6 +2680,23 @@ pid_t kernel_clone(struct kernel_clone_args *args) * Create a kernel thread. */ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct kernel_clone_args args = { + .flags = ((lower_32_bits(flags) | CLONE_VM | + CLONE_UNTRACED) & ~CSIGNAL), + .exit_signal = (lower_32_bits(flags) & CSIGNAL), + .stack = (unsigned long)fn, + .stack_size = (unsigned long)arg, + .kthread = 1, + }; + + return kernel_clone(&args); +} + +/* + * Create a user mode thread. + */ +pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags) { struct kernel_clone_args args = { .flags = ((lower_32_bits(flags) | CLONE_VM | -- cgit v1.2.3 From c5febea0956fd3874e8fb59c6f84d68f128d68f8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 Apr 2022 18:07:50 -0500 Subject: fork: Pass struct kernel_clone_args into copy_thread With io_uring we have started supporting tasks that are for most purposes user space tasks that exclusively run code in kernel mode. The kernel task that exec's init and tasks that exec user mode helpers are also user mode tasks that just run kernel code until they call kernel execve. Pass kernel_clone_args into copy_thread so these oddball tasks can be supported more cleanly and easily. v2: Fix spelling of kenrel_clone_args on h8300 Link: https://lkml.kernel.org/r/20220506141512.516114-2-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/alpha/kernel/process.c | 8 +++++--- arch/arc/kernel/process.c | 8 +++++--- arch/arm/kernel/process.c | 7 +++++-- arch/arm64/kernel/process.c | 7 +++++-- arch/csky/kernel/process.c | 10 +++++----- arch/h8300/kernel/process.c | 5 +++-- arch/hexagon/kernel/process.c | 7 +++++-- arch/ia64/kernel/process.c | 7 +++++-- arch/m68k/kernel/process.c | 7 +++++-- arch/microblaze/kernel/process.c | 7 +++++-- arch/mips/kernel/process.c | 8 +++++--- arch/nios2/kernel/process.c | 7 +++++-- arch/openrisc/kernel/process.c | 7 +++++-- arch/parisc/kernel/process.c | 7 +++++-- arch/powerpc/kernel/process.c | 8 +++++--- arch/riscv/kernel/process.c | 7 +++++-- arch/s390/kernel/process.c | 7 +++++-- arch/sh/kernel/process_32.c | 7 +++++-- arch/sparc/kernel/process_32.c | 7 +++++-- arch/sparc/kernel/process_64.c | 7 +++++-- arch/um/kernel/process.c | 7 +++++-- arch/x86/kernel/process.c | 7 +++++-- arch/xtensa/kernel/process.c | 8 +++++--- include/linux/sched/task.h | 3 +-- kernel/fork.c | 4 ++-- 25 files changed, 116 insertions(+), 58 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 5f8527081da9..732e39217c7f 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -233,10 +233,12 @@ release_thread(struct task_struct *dead_task) /* * Copy architecture-specific thread state */ -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long kthread_arg, struct task_struct *p, - unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long kthread_arg = args->stack_size; + unsigned long tls = args->tls; extern void ret_from_fork(void); extern void ret_from_kernel_thread(void); diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 5f7f5aab361f..caf948ba647c 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -162,10 +162,12 @@ asmlinkage void ret_from_fork(void); * | user_r25 | * ------------------ <===== END of PAGE */ -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long kthread_arg, struct task_struct *p, - unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long kthread_arg = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *c_regs; /* child's pt_regs */ unsigned long *childksp; /* to unwind out of __switch_to() */ struct callee_regs *c_callee; /* child's callee regs */ diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 0617af11377f..8e13b426dd26 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -238,9 +238,12 @@ void release_thread(struct task_struct *dead_task) asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -int copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long stack_start = args->stack; + unsigned long stk_sz = args->stack_size; + unsigned long tls = args->tls; struct thread_info *thread = task_thread_info(p); struct pt_regs *childregs = task_pt_regs(p); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7fa97df55e3a..e002f6681c8d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -316,9 +316,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) asmlinkage void ret_from_fork(void) asm("ret_from_fork"); -int copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long stack_start = args->stack; + unsigned long stk_sz = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index 3d0ca22cd0e2..7dba33d37e1a 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -30,12 +30,12 @@ asmlinkage void ret_from_kernel_thread(void); */ void flush_thread(void){} -int copy_thread(unsigned long clone_flags, - unsigned long usp, - unsigned long kthread_arg, - struct task_struct *p, - unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long kthread_arg = args->stack_size; + unsigned long tls = args->tls; struct switch_stack *childstack; struct pt_regs *childregs = task_pt_regs(p); diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 8833fa4f5d51..752cbd9b0bf6 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -105,9 +105,10 @@ void flush_thread(void) { } -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long topstk, struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long usp = args->stack; + unsigned long topstk = args->stack_size; struct pt_regs *childregs; childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1; diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index eab03c691f53..f1c1f6f21941 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -50,9 +50,12 @@ void arch_cpu_idle(void) /* * Copy architecture-specific thread state */ -int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct thread_info *ti = task_thread_info(p); struct hexagon_switch_stack *ss; struct pt_regs *childregs; diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index d7a256bd9d6b..10d41ded05a5 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -295,9 +295,12 @@ ia64_load_extra (struct task_struct *task) * so there is nothing to worry about. */ int -copy_thread(unsigned long clone_flags, unsigned long user_stack_base, - unsigned long user_stack_size, struct task_struct *p, unsigned long tls) +copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long user_stack_base = args->stack; + unsigned long user_stack_size = args->stack_size; + unsigned long tls = args->tls; extern char ia64_ret_from_clone; struct switch_stack *child_stack, *stack; unsigned long rbs, child_rbs, rbs_size; diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index a6030dbaa089..8ac575656fc4 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -138,9 +138,12 @@ asmlinkage int m68k_clone3(struct pt_regs *regs) return sys_clone3((struct clone_args __user *)regs->d1, regs->d2); } -int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct fork_frame { struct switch_stack sw; struct pt_regs regs; diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 1b944d319d73..b5f549125c6a 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -52,9 +52,12 @@ void flush_thread(void) { } -int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); struct thread_info *ti = task_thread_info(p); diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c2d5f4bfe1f3..a572d097b16b 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -105,10 +105,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * Copy architecture-specific thread state */ -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long kthread_arg, struct task_struct *p, - unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long kthread_arg = args->stack_size; + unsigned long tls = args->tls; struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); unsigned long childksp; diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index f8ea522a1588..98c4bfe972e0 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -100,9 +100,12 @@ void flush_thread(void) { } -int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); struct pt_regs *regs; struct switch_stack *stack; diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 3c0c91bcdcba..486e46dd5883 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -152,9 +152,12 @@ extern asmlinkage void ret_from_fork(void); */ int -copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, - struct task_struct *p, unsigned long tls) +copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *userregs; struct pt_regs *kregs; unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 28b6a2a5574c..129c17de45ba 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -206,9 +206,12 @@ arch_initcall(parisc_idle_init); * Copy architecture-specific thread state */ int -copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long kthread_arg, struct task_struct *p, unsigned long tls) +copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long kthread_arg = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *cregs = &(p->thread.regs); void *stack = task_stack_page(p); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 984813a4d5dc..3fd67c861d54 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1716,10 +1716,12 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) /* * Copy architecture-specific thread state */ -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long kthread_arg, struct task_struct *p, - unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long kthread_arg = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *childregs, *kregs; extern void ret_from_fork(void); extern void ret_from_fork_scv(void); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 504b496787aa..334382731725 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -120,9 +120,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); /* p->thread holds context to be restored by __switch_to() */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 71d86f73b02c..bb5daec39516 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -94,9 +94,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -int copy_thread(unsigned long clone_flags, unsigned long new_stackp, - unsigned long arg, struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long new_stackp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct fake_frame { struct stack_frame sf; diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index ca01286a0610..6023399b1892 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -92,9 +92,12 @@ void release_thread(struct task_struct *dead_task) asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); -int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 88c0c14aaff0..80e6775e18c0 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -259,9 +259,12 @@ clone_stackframe(struct sparc_stackf __user *dst, extern void ret_from_fork(void); extern void ret_from_kernel_thread(void); -int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long sp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); char *new_stack; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 9a2ceb080ac9..38c46ca826d9 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -564,9 +564,12 @@ barf: * Parent --> %o0 == childs pid, %o1 == 0 * Child --> %o0 == parents pid, %o1 == 1 */ -int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long sp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct thread_info *t = task_thread_info(p); struct pt_regs *regs = current_pt_regs(); struct sparc_stackf *parent_sf; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 80504680be08..fd2d2361484d 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -154,9 +154,12 @@ void fork_handler(void) userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); } -int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct * p, unsigned long tls) +int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long sp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; void (*handler)(void); int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER); int ret = 0; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b370767f5b19..0fce52b10dc4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -130,9 +130,12 @@ static int set_new_tls(struct task_struct *p, unsigned long tls) return do_set_thread_area_64(p, ARCH_SET_FS, tls); } -int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long sp = args->stack; + unsigned long arg = args->stack_size; + unsigned long tls = args->tls; struct inactive_task_frame *frame; struct fork_frame *fork_frame; struct pt_regs *childregs; diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index e8bfbca5f001..15ce25073142 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -201,10 +201,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) * involved. Much simpler to just not copy those live frames across. */ -int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn, - unsigned long thread_fn_arg, struct task_struct *p, - unsigned long tls) +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { + unsigned long clone_flags = args->flags; + unsigned long usp_thread_fn = args->stack; + unsigned long thread_fn_arg = args->stack_size; + unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 4492266935dd..fcdcba231aac 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -68,8 +68,7 @@ extern void fork_init(void); extern void release_task(struct task_struct * p); -extern int copy_thread(unsigned long, unsigned long, unsigned long, - struct task_struct *, unsigned long); +extern int copy_thread(struct task_struct *, const struct kernel_clone_args *); extern void flush_thread(void); diff --git a/kernel/fork.c b/kernel/fork.c index 27c5203750b4..d39a248a8d8d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1979,7 +1979,7 @@ static __latent_entropy struct task_struct *copy_process( struct task_struct *p; struct multiprocess_signals delayed; struct file *pidfile = NULL; - u64 clone_flags = args->flags; + const u64 clone_flags = args->flags; struct nsproxy *nsp = current->nsproxy; /* @@ -2240,7 +2240,7 @@ static __latent_entropy struct task_struct *copy_process( retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; - retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls); + retval = copy_thread(p, args); if (retval) goto bad_fork_cleanup_io; -- cgit v1.2.3 From 36cb0e1cda645ee645b85a6ce652cb46a16e14e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 11 Apr 2022 16:17:28 -0500 Subject: fork: Explicity test for idle tasks in copy_thread The architectures ia64 and parisc have special handling for the idle thread in copy_process. Add a flag named idle to kernel_clone_args and use it to explicity test if an idle process is being created. Fullfill the expectations of the rest of the copy_thread implemetations and pass a function pointer in .stack from fork_idle(). This makes what is happening in copy_thread better defined, and is useful to make idle threads less special. Link: https://lkml.kernel.org/r/20220506141512.516114-3-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/ia64/kernel/process.c | 2 +- arch/parisc/kernel/process.c | 2 +- include/linux/sched/task.h | 1 + kernel/fork.c | 9 +++++++++ 4 files changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 10d41ded05a5..8f010ae818bc 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -342,7 +342,7 @@ copy_thread(struct task_struct *p, const struct kernel_clone_args *args) ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { - if (unlikely(!user_stack_base)) { + if (unlikely(args->idle)) { /* fork_idle() called us */ return 0; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 129c17de45ba..30a5874ca845 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -224,7 +224,7 @@ copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { /* kernel thread */ memset(cregs, 0, sizeof(struct pt_regs)); - if (!usp) /* idle thread */ + if (args->idle) /* idle thread */ return 0; /* Must exit via ret_from_kernel_thread in order * to call schedule_tail() diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index fcdcba231aac..3d6b99ce5408 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -33,6 +33,7 @@ struct kernel_clone_args { int cgroup; int io_thread; int kthread; + int idle; struct cgroup *cgrp; struct css_set *cset; }; diff --git a/kernel/fork.c b/kernel/fork.c index d39a248a8d8d..93d77ee921ff 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2544,12 +2544,21 @@ static inline void init_idle_pids(struct task_struct *idle) } } +static int idle_dummy(void *dummy) +{ + /* This function is never called */ + return 0; +} + struct task_struct * __init fork_idle(int cpu) { struct task_struct *task; struct kernel_clone_args args = { .flags = CLONE_VM, + .stack = (unsigned long)&idle_dummy, + .stack_size = (unsigned long)NULL, .kthread = 1, + .idle = 1, }; task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args); -- cgit v1.2.3 From 5bd2e97c868a8a44470950ed01846cab6328e540 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 12 Apr 2022 10:18:48 -0500 Subject: fork: Generalize PF_IO_WORKER handling Add fn and fn_arg members into struct kernel_clone_args and test for them in copy_thread (instead of testing for PF_KTHREAD | PF_IO_WORKER). This allows any task that wants to be a user space task that only runs in kernel mode to use this functionality. The code on x86 is an exception and still retains a PF_KTHREAD test because x86 unlikely everything else handles kthreads slightly differently than user space tasks that start with a function. The functions that created tasks that start with a function have been updated to set ".fn" and ".fn_arg" instead of ".stack" and ".stack_size". These functions are fork_idle(), create_io_thread(), kernel_thread(), and user_mode_thread(). Link: https://lkml.kernel.org/r/20220506141512.516114-4-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/alpha/kernel/process.c | 7 +++---- arch/arc/kernel/process.c | 7 +++---- arch/arm/kernel/process.c | 7 +++---- arch/arm64/kernel/process.c | 7 +++---- arch/csky/kernel/process.c | 7 +++---- arch/h8300/kernel/process.c | 7 +++---- arch/hexagon/kernel/process.c | 7 +++---- arch/ia64/kernel/process.c | 6 +++--- arch/m68k/kernel/process.c | 7 +++---- arch/microblaze/kernel/process.c | 7 +++---- arch/mips/kernel/process.c | 7 +++---- arch/nios2/kernel/process.c | 7 +++---- arch/openrisc/kernel/process.c | 7 +++---- arch/parisc/kernel/process.c | 11 +++++------ arch/powerpc/kernel/process.c | 9 ++++----- arch/riscv/kernel/process.c | 7 +++---- arch/s390/kernel/process.c | 7 +++---- arch/sh/kernel/process_32.c | 7 +++---- arch/sparc/kernel/process_32.c | 7 +++---- arch/sparc/kernel/process_64.c | 7 +++---- arch/um/kernel/process.c | 10 ++++------ arch/x86/include/asm/fpu/sched.h | 2 +- arch/x86/include/asm/switch_to.h | 8 ++++---- arch/x86/kernel/fpu/core.c | 4 ++-- arch/x86/kernel/process.c | 13 ++++++------- arch/xtensa/kernel/process.c | 11 +++++------ include/linux/sched/task.h | 2 ++ kernel/fork.c | 16 ++++++++-------- 28 files changed, 95 insertions(+), 116 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 732e39217c7f..6cbba7370b4e 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -237,7 +237,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long kthread_arg = args->stack_size; unsigned long tls = args->tls; extern void ret_from_fork(void); extern void ret_from_kernel_thread(void); @@ -251,13 +250,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childti->pcb.ksp = (unsigned long) childstack; childti->pcb.flags = 1; /* set FEN, clear everything else */ - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* kernel thread */ memset(childstack, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs)); childstack->r26 = (unsigned long) ret_from_kernel_thread; - childstack->r9 = usp; /* function */ - childstack->r10 = kthread_arg; + childstack->r9 = (unsigned long) args->fn; + childstack->r10 = (unsigned long) args->fn_arg; childregs->hae = alpha_mv.hae_cache; childti->pcb.usp = 0; return 0; diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index caf948ba647c..3369f0700702 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -166,7 +166,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long kthread_arg = args->stack_size; unsigned long tls = args->tls; struct pt_regs *c_regs; /* child's pt_regs */ unsigned long *childksp; /* to unwind out of __switch_to() */ @@ -193,11 +192,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childksp[0] = 0; /* fp */ childksp[1] = (unsigned long)ret_from_fork; /* blink */ - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { memset(c_regs, 0, sizeof(struct pt_regs)); - c_callee->r13 = kthread_arg; - c_callee->r14 = usp; /* function */ + c_callee->r13 = (unsigned long)args->fn_arg; + c_callee->r14 = (unsigned long)args->fn; return 0; } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 8e13b426dd26..3d9cace63884 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -242,7 +242,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long stack_start = args->stack; - unsigned long stk_sz = args->stack_size; unsigned long tls = args->tls; struct thread_info *thread = task_thread_info(p); struct pt_regs *childregs = task_pt_regs(p); @@ -259,15 +258,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) thread->cpu_domain = get_domain(); #endif - if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) { + if (likely(!args->fn)) { *childregs = *current_pt_regs(); childregs->ARM_r0 = 0; if (stack_start) childregs->ARM_sp = stack_start; } else { memset(childregs, 0, sizeof(struct pt_regs)); - thread->cpu_context.r4 = stk_sz; - thread->cpu_context.r5 = stack_start; + thread->cpu_context.r4 = (unsigned long)args->fn_arg; + thread->cpu_context.r5 = (unsigned long)args->fn; childregs->ARM_cpsr = SVC_MODE; } thread->cpu_context.pc = (unsigned long)ret_from_fork; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index e002f6681c8d..d0ef05c661b0 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -320,7 +320,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long stack_start = args->stack; - unsigned long stk_sz = args->stack_size; unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); @@ -337,7 +336,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) ptrauth_thread_init_kernel(p); - if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) { + if (likely(!args->fn)) { *childregs = *current_pt_regs(); childregs->regs[0] = 0; @@ -371,8 +370,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT; - p->thread.cpu_context.x19 = stack_start; - p->thread.cpu_context.x20 = stk_sz; + p->thread.cpu_context.x19 = (unsigned long)args->fn; + p->thread.cpu_context.x20 = (unsigned long)args->fn_arg; } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index 7dba33d37e1a..9af49aea1c3b 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -34,7 +34,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long kthread_arg = args->stack_size; unsigned long tls = args->tls; struct switch_stack *childstack; struct pt_regs *childregs = task_pt_regs(p); @@ -49,11 +48,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* setup thread.sp for switch_to !!! */ p->thread.sp = (unsigned long)childstack; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { memset(childregs, 0, sizeof(struct pt_regs)); childstack->r15 = (unsigned long) ret_from_kernel_thread; - childstack->r10 = kthread_arg; - childstack->r9 = usp; + childstack->r10 = (unsigned long) args->fn_arg; + childstack->r9 = (unsigned long) args->fn; childregs->sr = mfcr("psr"); } else { *childregs = *(current_pt_regs()); diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 752cbd9b0bf6..9028262c96a9 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -108,16 +108,15 @@ void flush_thread(void) int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long usp = args->stack; - unsigned long topstk = args->stack_size; struct pt_regs *childregs; childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { memset(childregs, 0, sizeof(struct pt_regs)); childregs->retpc = (unsigned long) ret_from_kernel_thread; - childregs->er4 = topstk; /* arg */ - childregs->er5 = usp; /* fn */ + childregs->er4 = (unsigned long) args->fn_arg; + childregs->er5 = (unsigned long) args->fn; } else { *childregs = *current_pt_regs(); childregs->er0 = 0; diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index f1c1f6f21941..f0552f98a7ba 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -54,7 +54,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct thread_info *ti = task_thread_info(p); struct hexagon_switch_stack *ss; @@ -76,11 +75,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) sizeof(*ss)); ss->lr = (unsigned long)ret_from_fork; p->thread.switch_sp = ss; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { memset(childregs, 0, sizeof(struct pt_regs)); /* r24 <- fn, r25 <- arg */ - ss->r24 = usp; - ss->r25 = arg; + ss->r24 = (unsigned long)args->fn; + ss->r25 = (unsigned long)args->fn_arg; pt_set_kmode(childregs); return 0; } diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 8f010ae818bc..167b1765bea1 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -341,14 +341,14 @@ copy_thread(struct task_struct *p, const struct kernel_clone_args *args) ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { if (unlikely(args->idle)) { /* fork_idle() called us */ return 0; } memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack)); - child_stack->r4 = user_stack_base; /* payload */ - child_stack->r5 = user_stack_size; /* argument */ + child_stack->r4 = (unsigned long) args->fn; + child_stack->r5 = (unsigned long) args->fn_arg; /* * Preserve PSR bits, except for bits 32-34 and 37-45, * which we can't read. diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 8ac575656fc4..221feb0269f1 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -142,7 +142,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct fork_frame { struct switch_stack sw; @@ -160,12 +159,12 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ p->thread.fc = USER_DATA; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* kernel thread */ memset(frame, 0, sizeof(struct fork_frame)); frame->regs.sr = PS_S; - frame->sw.a3 = usp; /* function */ - frame->sw.d7 = arg; + frame->sw.a3 = (unsigned long)args->fn; + frame->sw.d7 = (unsigned long)args->fn_arg; frame->sw.retpc = (unsigned long)ret_from_kernel_thread; p->thread.usp = 0; return 0; diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index b5f549125c6a..3c6241bcaea8 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -56,19 +56,18 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); struct thread_info *ti = task_thread_info(p); - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* if we're creating a new kernel thread then just zeroing all * the registers. That's OK for a brand new thread.*/ memset(childregs, 0, sizeof(struct pt_regs)); memset(&ti->cpu_context, 0, sizeof(struct cpu_context)); ti->cpu_context.r1 = (unsigned long)childregs; - ti->cpu_context.r20 = (unsigned long)usp; /* fn */ - ti->cpu_context.r19 = (unsigned long)arg; + ti->cpu_context.r20 = (unsigned long)args->fn; + ti->cpu_context.r19 = (unsigned long)args->fn_arg; childregs->pt_mode = 1; local_save_flags(childregs->msr); ti->cpu_context.msr = childregs->msr & ~MSR_IE; diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index a572d097b16b..35b912bce429 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -109,7 +109,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long kthread_arg = args->stack_size; unsigned long tls = args->tls; struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); @@ -122,12 +121,12 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* kernel thread */ unsigned long status = p->thread.cp0_status; memset(childregs, 0, sizeof(struct pt_regs)); - p->thread.reg16 = usp; /* fn */ - p->thread.reg17 = kthread_arg; + p->thread.reg16 = (unsigned long)args->fn; + p->thread.reg17 = (unsigned long)args->fn_arg; p->thread.reg29 = childksp; p->thread.reg31 = (unsigned long) ret_from_kernel_thread; #if defined(CONFIG_CPU_R3000) diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 98c4bfe972e0..29593b98567d 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -104,7 +104,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); struct pt_regs *regs; @@ -112,12 +111,12 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) struct switch_stack *childstack = ((struct switch_stack *)childregs) - 1; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { memset(childstack, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs)); - childstack->r16 = usp; /* fn */ - childstack->r17 = arg; + childstack->r16 = (unsigned long) args->fn; + childstack->r17 = (unsigned long) args->fn_arg; childstack->ra = (unsigned long) ret_from_kernel_thread; childregs->estatus = STATUS_PIE; childregs->sp = (unsigned long) childstack; diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 486e46dd5883..d9697cc9bc4d 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -156,7 +156,6 @@ copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct pt_regs *userregs; struct pt_regs *kregs; @@ -175,10 +174,10 @@ copy_thread(struct task_struct *p, const struct kernel_clone_args *args) sp -= sizeof(struct pt_regs); kregs = (struct pt_regs *)sp; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { memset(kregs, 0, sizeof(struct pt_regs)); - kregs->gpr[20] = usp; /* fn, kernel thread */ - kregs->gpr[22] = arg; + kregs->gpr[20] = (unsigned long)args->fn; + kregs->gpr[22] = (unsigned long)args->fn_arg; } else { *userregs = *current_pt_regs(); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 30a5874ca845..a6a2a558fc5b 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -210,7 +210,6 @@ copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long kthread_arg = args->stack_size; unsigned long tls = args->tls; struct pt_regs *cregs = &(p->thread.regs); void *stack = task_stack_page(p); @@ -221,7 +220,7 @@ copy_thread(struct task_struct *p, const struct kernel_clone_args *args) extern void * const ret_from_kernel_thread; extern void * const child_return; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* kernel thread */ memset(cregs, 0, sizeof(struct pt_regs)); if (args->idle) /* idle thread */ @@ -236,12 +235,12 @@ copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * ret_from_kernel_thread. */ #ifdef CONFIG_64BIT - cregs->gr[27] = ((unsigned long *)usp)[3]; - cregs->gr[26] = ((unsigned long *)usp)[2]; + cregs->gr[27] = ((unsigned long *)args->fn)[3]; + cregs->gr[26] = ((unsigned long *)args->fn)[2]; #else - cregs->gr[26] = usp; + cregs->gr[26] = (unsigned long) args->fn; #endif - cregs->gr[25] = kthread_arg; + cregs->gr[25] = (unsigned long) args->fn_arg; } else { /* user thread */ /* usp must be word aligned. This also prevents users from diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3fd67c861d54..4f367bb68906 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1720,7 +1720,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long kthread_arg = args->stack_size; unsigned long tls = args->tls; struct pt_regs *childregs, *kregs; extern void ret_from_fork(void); @@ -1738,18 +1737,18 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Copy registers */ sp -= sizeof(struct pt_regs); childregs = (struct pt_regs *) sp; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->gpr[1] = sp + sizeof(struct pt_regs); /* function */ - if (usp) - childregs->gpr[14] = ppc_function_entry((void *)usp); + if (args->fn) + childregs->gpr[14] = ppc_function_entry((void *)args->fn); #ifdef CONFIG_PPC64 clear_tsk_thread_flag(p, TIF_32BIT); childregs->softe = IRQS_ENABLED; #endif - childregs->gpr[15] = kthread_arg; + childregs->gpr[15] = (unsigned long)args->fn_arg; p->thread.regs = NULL; /* no user register state */ ti->flags |= _TIF_RESTOREALL; f = ret_from_kernel_thread; diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 334382731725..24efabdbc551 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -124,12 +124,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); /* p->thread holds context to be restored by __switch_to() */ - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->gp = gp_in_global; @@ -137,8 +136,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->status = SR_PP | SR_PIE; p->thread.ra = (unsigned long)ret_from_kernel_thread; - p->thread.s[0] = usp; /* fn */ - p->thread.s[1] = arg; + p->thread.s[0] = (unsigned long)args->fn; + p->thread.s[1] = (unsigned long)args->fn_arg; } else { *childregs = *(current_pt_regs()); if (usp) /* User fork */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index bb5daec39516..89949b9f3cf8 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -98,7 +98,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long new_stackp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct fake_frame { @@ -133,15 +132,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->sf.gprs[9] = (unsigned long)frame; /* Store access registers to kernel stack of new process. */ - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { /* kernel thread */ memset(&frame->childregs, 0, sizeof(struct pt_regs)); frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; frame->childregs.psw.addr = (unsigned long)__ret_from_fork; - frame->childregs.gprs[9] = new_stackp; /* function */ - frame->childregs.gprs[10] = arg; + frame->childregs.gprs[9] = (unsigned long)args->fn; + frame->childregs.gprs[10] = (unsigned long)args->fn_arg; frame->childregs.orig_gpr2 = -1; frame->childregs.last_break = 1; return 0; diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 6023399b1892..a808843375e7 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -96,7 +96,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; @@ -117,11 +116,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs = task_pt_regs(p); p->thread.sp = (unsigned long) childregs; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { memset(childregs, 0, sizeof(struct pt_regs)); p->thread.pc = (unsigned long) ret_from_kernel_thread; - childregs->regs[4] = arg; - childregs->regs[5] = usp; + childregs->regs[4] = (unsigned long) args->fn_arg; + childregs->regs[5] = (unsigned long) args->fn; childregs->sr = SR_MD; #if defined(CONFIG_SH_FPU) childregs->sr |= SR_FD; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 80e6775e18c0..33b0215a4182 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -263,7 +263,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long sp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); @@ -299,13 +298,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) ti->ksp = (unsigned long) new_stack; p->thread.kregs = childregs; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { extern int nwindows; unsigned long psr; memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ); ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8); - childregs->u_regs[UREG_G1] = sp; /* function */ - childregs->u_regs[UREG_G2] = arg; + childregs->u_regs[UREG_G1] = (unsigned long) args->fn; + childregs->u_regs[UREG_G2] = (unsigned long) args->fn_arg; psr = childregs->psr = get_psr(); ti->kpsr = psr | PSR_PIL; ti->kwim = 1 << (((psr & PSR_CWP) + 1) % nwindows); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 38c46ca826d9..6335b698a4b4 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -568,7 +568,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long sp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct thread_info *t = task_thread_info(p); struct pt_regs *regs = current_pt_regs(); @@ -587,12 +586,12 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) sizeof(struct sparc_stackf)); t->fpsaved[0] = 0; - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (unlikely(args->fn)) { memset(child_trap_frame, 0, child_stack_sz); __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = (current_pt_regs()->tstate + 1) & TSTATE_CWP; - t->kregs->u_regs[UREG_G1] = sp; /* function */ - t->kregs->u_regs[UREG_G2] = arg; + t->kregs->u_regs[UREG_G1] = (unsigned long) args->fn; + t->kregs->u_regs[UREG_G2] = (unsigned long) args->fn_arg; return 0; } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index fd2d2361484d..181cc9aafb25 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -158,15 +158,13 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long sp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; void (*handler)(void); - int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER); int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; - if (!kthread) { + if (!args->fn) { memcpy(&p->thread.regs.regs, current_pt_regs(), sizeof(p->thread.regs.regs)); PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); @@ -178,14 +176,14 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) arch_copy_thread(¤t->thread.arch, &p->thread.arch); } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); - p->thread.request.u.thread.proc = (int (*)(void *))sp; - p->thread.request.u.thread.arg = (void *)arg; + p->thread.request.u.thread.proc = args->fn; + p->thread.request.u.thread.arg = args->fn_arg; handler = new_thread_handler; } new_thread(task_stack_page(p), &p->thread.switch_buf, handler); - if (!kthread) { + if (!args->fn) { clear_flushed_tls(p); /* diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index 99a8820e8cc4..b2486b2cbc6e 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -11,7 +11,7 @@ extern void save_fpregs_to_fpstate(struct fpu *fpu); extern void fpu__drop(struct fpu *fpu); -extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags); +extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal); extern void fpu_flush_thread(void); /* diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index b5f0d2ff47e4..c08eb0fdd11f 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -78,13 +78,13 @@ static inline void update_task_stack(struct task_struct *task) } static inline void kthread_frame_init(struct inactive_task_frame *frame, - unsigned long fun, unsigned long arg) + int (*fun)(void *), void *arg) { - frame->bx = fun; + frame->bx = (unsigned long)fun; #ifdef CONFIG_X86_32 - frame->di = arg; + frame->di = (unsigned long)arg; #else - frame->r12 = arg; + frame->r12 = (unsigned long)arg; #endif } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c049561f373a..fbade5a3975b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -556,7 +556,7 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu) } /* Clone current's FPU state on fork */ -int fpu_clone(struct task_struct *dst, unsigned long clone_flags) +int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal) { struct fpu *src_fpu = ¤t->thread.fpu; struct fpu *dst_fpu = &dst->thread.fpu; @@ -579,7 +579,7 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags) * No FPU state inheritance for kernel threads and IO * worker threads. */ - if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) { + if (minimal) { /* Clear out the minimal state */ memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size()); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0fce52b10dc4..d20eaad52a85 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -134,7 +134,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long sp = args->stack; - unsigned long arg = args->stack_size; unsigned long tls = args->tls; struct inactive_task_frame *frame; struct fork_frame *fork_frame; @@ -172,13 +171,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->flags = X86_EFLAGS_FIXED; #endif - fpu_clone(p, clone_flags); + fpu_clone(p, clone_flags, args->fn); /* Kernel thread ? */ if (unlikely(p->flags & PF_KTHREAD)) { p->thread.pkru = pkru_get_init_value(); memset(childregs, 0, sizeof(struct pt_regs)); - kthread_frame_init(frame, sp, arg); + kthread_frame_init(frame, args->fn, args->fn_arg); return 0; } @@ -198,10 +197,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) task_user_gs(p) = get_user_gs(current_pt_regs()); #endif - if (unlikely(p->flags & PF_IO_WORKER)) { + if (unlikely(args->fn)) { /* - * An IO thread is a user space thread, but it doesn't - * return to ret_after_fork(). + * A user space thread, but it doesn't return to + * ret_after_fork(). * * In order to indicate that to tools like gdb, * we reset the stack and instruction pointers. @@ -211,7 +210,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ childregs->sp = 0; childregs->ip = 0; - kthread_frame_init(frame, sp, arg); + kthread_frame_init(frame, args->fn, args->fn_arg); return 0; } diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 15ce25073142..c3751cc88e5d 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -205,7 +205,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; unsigned long usp_thread_fn = args->stack; - unsigned long thread_fn_arg = args->stack_size; unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); @@ -226,7 +225,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) #error Unsupported Xtensa ABI #endif - if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + if (!args->fn) { struct pt_regs *regs = current_pt_regs(); unsigned long usp = usp_thread_fn ? usp_thread_fn : regs->areg[1]; @@ -278,15 +277,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * Window underflow will load registers from the * spill slots on the stack on return from _switch_to. */ - SPILL_SLOT(childregs, 2) = usp_thread_fn; - SPILL_SLOT(childregs, 3) = thread_fn_arg; + SPILL_SLOT(childregs, 2) = (unsigned long)args->fn; + SPILL_SLOT(childregs, 3) = (unsigned long)args->fn_arg; #elif defined(__XTENSA_CALL0_ABI__) /* * a12 = thread_fn, a13 = thread_fn arg. * _switch_to epilogue will load registers from the stack. */ - ((unsigned long *)p->thread.sp)[0] = usp_thread_fn; - ((unsigned long *)p->thread.sp)[1] = thread_fn_arg; + ((unsigned long *)p->thread.sp)[0] = (unsigned long)args->fn; + ((unsigned long *)p->thread.sp)[1] = (unsigned long)args->fn_arg; #else #error Unsupported Xtensa ABI #endif diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 3d6b99ce5408..505aaf9fe477 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -34,6 +34,8 @@ struct kernel_clone_args { int io_thread; int kthread; int idle; + int (*fn)(void *); + void *fn_arg; struct cgroup *cgrp; struct css_set *cset; }; diff --git a/kernel/fork.c b/kernel/fork.c index 93d77ee921ff..8e17c3fbce42 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2555,8 +2555,8 @@ struct task_struct * __init fork_idle(int cpu) struct task_struct *task; struct kernel_clone_args args = { .flags = CLONE_VM, - .stack = (unsigned long)&idle_dummy, - .stack_size = (unsigned long)NULL, + .fn = &idle_dummy, + .fn_arg = NULL, .kthread = 1, .idle = 1, }; @@ -2589,8 +2589,8 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) .flags = ((lower_32_bits(flags) | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), .exit_signal = (lower_32_bits(flags) & CSIGNAL), - .stack = (unsigned long)fn, - .stack_size = (unsigned long)arg, + .fn = fn, + .fn_arg = arg, .io_thread = 1, }; @@ -2694,8 +2694,8 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) .flags = ((lower_32_bits(flags) | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), .exit_signal = (lower_32_bits(flags) & CSIGNAL), - .stack = (unsigned long)fn, - .stack_size = (unsigned long)arg, + .fn = fn, + .fn_arg = arg, .kthread = 1, }; @@ -2711,8 +2711,8 @@ pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags) .flags = ((lower_32_bits(flags) | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), .exit_signal = (lower_32_bits(flags) & CSIGNAL), - .stack = (unsigned long)fn, - .stack_size = (unsigned long)arg, + .fn = fn, + .fn_arg = arg, }; return kernel_clone(&args); -- cgit v1.2.3 From 753550eb0ce1fea4b5cbd989f2e06ef80b2feb28 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 11 Apr 2022 14:13:56 -0500 Subject: fork: Explicitly set PF_KTHREAD Instead of implicitly inheriting PF_KTHREAD from the parent process examine arguments in kernel_clone_args to see if PF_KTHREAD should be set. This makes knowledge of which new threads are kernel threads explicit. This also makes it so that init and the user mode helper processes no longer have PF_KTHREAD set. Link: https://lkml.kernel.org/r/20220506141512.516114-6-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- kernel/fork.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 8e17c3fbce42..35645f57bd2f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2068,6 +2068,9 @@ static __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; + p->flags &= ~PF_KTHREAD; + if (args->kthread) + p->flags |= PF_KTHREAD; if (args->io_thread) { /* * Mark us an IO worker, and block any signal that isn't -- cgit v1.2.3