From 8fb12156b8db61af3d49f3e5e104568494581d1f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 16 May 2017 20:42:32 +0200 Subject: init: Pin init task to the boot CPU, initially Some of the boot code in init_kernel_freeable() which runs before SMP bringup assumes (rightfully) that it runs on the boot CPU and therefore can use smp_processor_id() in preemptible context. That works so far because the smp_processor_id() check starts to be effective after smp bringup. That's just wrong. Starting with SMP bringup and the ability to move threads around, smp_processor_id() in preemptible context is broken. Aside of that it does not make sense to allow init to run on all CPUs before sched_smp_init() has been run. Pin the init to the boot CPU so the existing code can continue to use smp_processor_id() without triggering the checks when the enabling of those checks starts earlier. Tested-by: Mark Rutland Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170516184734.943149935@linutronix.de Signed-off-by: Ingo Molnar --- init/main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index f866510472d7..badae3bf08f1 100644 --- a/init/main.c +++ b/init/main.c @@ -389,6 +389,7 @@ static __initdata DECLARE_COMPLETION(kthreadd_done); static noinline void __ref rest_init(void) { + struct task_struct *tsk; int pid; rcu_scheduler_starting(); @@ -397,7 +398,17 @@ static noinline void __ref rest_init(void) * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ - kernel_thread(kernel_init, NULL, CLONE_FS); + pid = kernel_thread(kernel_init, NULL, CLONE_FS); + /* + * Pin init on the boot CPU. Task migration is not properly working + * until sched_init_smp() has been run. It will set the allowed + * CPUs for init to the non isolated CPUs. + */ + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id())); + rcu_read_unlock(); + numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); rcu_read_lock(); @@ -1015,10 +1026,6 @@ static noinline void __init kernel_init_freeable(void) * init can allocate pages on any node */ set_mems_allowed(node_states[N_MEMORY]); - /* - * init can run on any cpu. - */ - set_cpus_allowed_ptr(current, cpu_all_mask); cad_pid = task_pid(current); -- cgit v1.2.3 From 1c3c5eab171590f86edd8d31389d61dd1efe3037 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 16 May 2017 20:42:48 +0200 Subject: sched/core: Enable might_sleep() and smp_processor_id() checks early might_sleep() and smp_processor_id() checks are enabled after the boot process is done. That hides bugs in the SMP bringup and driver initialization code. Enable it right when the scheduler starts working, i.e. when init task and kthreadd have been created and right before the idle task enables preemption. Tested-by: Mark Rutland Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170516184736.272225698@linutronix.de Signed-off-by: Ingo Molnar --- init/main.c | 10 ++++++++++ kernel/sched/core.c | 4 +++- lib/smp_processor_id.c | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index badae3bf08f1..df58a416dd1d 100644 --- a/init/main.c +++ b/init/main.c @@ -414,6 +414,16 @@ static noinline void __ref rest_init(void) rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); rcu_read_unlock(); + + /* + * Enable might_sleep() and smp_processor_id() checks. + * They cannot be enabled earlier because with CONFIG_PRREMPT=y + * kernel_thread() would trigger might_sleep() splats. With + * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled + * already, but it's stuck on the kthreadd_done completion. + */ + system_state = SYSTEM_SCHEDULING; + complete(&kthreadd_done); /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 877241e9f2b0..c3e50cada84d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6238,8 +6238,10 @@ void ___might_sleep(const char *file, int line, int preempt_offset) if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && !is_idle_task(current)) || - system_state != SYSTEM_RUNNING || oops_in_progress) + system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || + oops_in_progress) return; + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) return; prev_jiffy = jiffies; diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 690d75b132fa..2fb007be0212 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -28,7 +28,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1, /* * It is valid to assume CPU-locality during early bootup: */ - if (system_state != SYSTEM_RUNNING) + if (system_state < SYSTEM_SCHEDULING) goto out; /* -- cgit v1.2.3 From e1d4eeec5aaa28d25f249c0195b0e1d9b9feb7bd Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 14 Jun 2017 13:19:23 -0400 Subject: sched/cpuset: Only offer CONFIG_CPUSETS if SMP is enabled Make CONFIG_CPUSETS=y depend on SMP as this feature makes no sense on UP. This allows for configuring out cpuset_cpumask_can_shrink() and task_can_attach() entirely, which shrinks the kernel a bit. Signed-off-by: Nicolas Pitre Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170614171926.8345-2-nicolas.pitre@linaro.org Signed-off-by: Ingo Molnar --- init/Kconfig | 1 + kernel/sched/core.c | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index 1d3475fc9496..c359038ebeed 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1156,6 +1156,7 @@ config CGROUP_HUGETLB config CPUSETS bool "Cpuset controller" + depends on SMP help This option will let you create and manage CPUSETs which allow dynamically partitioning a system into sets of CPUs and diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 62166da1c359..7faf4b322b63 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5451,6 +5451,8 @@ void init_idle(struct task_struct *idle, int cpu) #endif } +#ifdef CONFIG_SMP + int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial) { @@ -5494,7 +5496,6 @@ int task_can_attach(struct task_struct *p, goto out; } -#ifdef CONFIG_SMP if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, cs_cpus_allowed)) { unsigned int dest_cpu = cpumask_any_and(cpu_active_mask, @@ -5524,13 +5525,11 @@ int task_can_attach(struct task_struct *p, rcu_read_unlock_sched(); } -#endif + out: return ret; } -#ifdef CONFIG_SMP - bool sched_smp_initialized __read_mostly; #ifdef CONFIG_NUMA_BALANCING -- cgit v1.2.3