From 5b6547ed97f4f5dfc23f8e3970af6d11d7b7ed7e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Mar 2022 22:03:41 +0100 Subject: sched/core: Fix forceidle balancing Steve reported that ChromeOS encounters the forceidle balancer being ran from rt_mutex_setprio()'s balance_callback() invocation and explodes. Now, the forceidle balancer gets queued every time the idle task gets selected, set_next_task(), which is strictly too often. rt_mutex_setprio() also uses set_next_task() in the 'change' pattern: queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */ running = task_current(rq, p); /* rq->curr == p */ if (queued) dequeue_task(...); if (running) put_prev_task(...); /* change task properties */ if (queued) enqueue_task(...); if (running) set_next_task(...); However, rt_mutex_setprio() will explicitly not run this pattern on the idle task (since priority boosting the idle task is quite insane). Most other 'change' pattern users are pidhash based and would also not apply to idle. Also, the change pattern doesn't contain a __balance_callback() invocation and hence we could have an out-of-band balance-callback, which *should* trigger the WARN in rq_pin_lock() (which guards against this exact anti-pattern). So while none of that explains how this happens, it does indicate that having it in set_next_task() might not be the most robust option. Instead, explicitly queue the forceidle balancer from pick_next_task() when it does indeed result in forceidle selection. Having it here, ensures it can only be triggered under the __schedule() rq->lock instance, and hence must be ran from that context. This also happens to clean up the code a little, so win-win. Fixes: d2dfa17bc7de ("sched: Trivial forced-newidle balancer") Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Tested-by: T.J. Alumbaugh Link: https://lkml.kernel.org/r/20220330160535.GN8939@worktop.programming.kicks-ass.net --- kernel/sched/core.c | 14 ++++++++++---- kernel/sched/idle.c | 1 - kernel/sched/sched.h | 6 ------ 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d575b4914925..017ee7807930 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5752,6 +5752,8 @@ static inline struct task_struct *pick_task(struct rq *rq) extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi); +static void queue_core_balance(struct rq *rq); + static struct task_struct * pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { @@ -5801,7 +5803,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } rq->core_pick = NULL; - return next; + goto out; } put_prev_task_balance(rq, prev, rf); @@ -5851,7 +5853,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) */ WARN_ON_ONCE(fi_before); task_vruntime_update(rq, next, false); - goto done; + goto out_set_next; } } @@ -5970,8 +5972,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) resched_curr(rq_i); } -done: +out_set_next: set_next_task(rq, next); +out: + if (rq->core->core_forceidle_count && next == rq->idle) + queue_core_balance(rq); + return next; } @@ -6066,7 +6072,7 @@ static void sched_core_balance(struct rq *rq) static DEFINE_PER_CPU(struct callback_head, core_balance_head); -void queue_core_balance(struct rq *rq) +static void queue_core_balance(struct rq *rq) { if (!sched_core_enabled(rq)) return; diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 8f8b5020e76a..ecb0d7052877 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -434,7 +434,6 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir { update_idle_core(rq); schedstat_inc(rq->sched_goidle); - queue_core_balance(rq); } #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 58263f90c559..8dccb34eb190 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1232,8 +1232,6 @@ static inline bool sched_group_cookie_match(struct rq *rq, return false; } -extern void queue_core_balance(struct rq *rq); - static inline bool sched_core_enqueued(struct task_struct *p) { return !RB_EMPTY_NODE(&p->core_node); @@ -1267,10 +1265,6 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) return &rq->__lock; } -static inline void queue_core_balance(struct rq *rq) -{ -} - static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p) { return true; -- cgit v1.2.3 From 386ef214c3c6ab111d05e1790e79475363abaa05 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 17 Mar 2022 15:51:32 +0100 Subject: sched: Teach the forced-newidle balancer about CPU affinity limitation. try_steal_cookie() looks at task_struct::cpus_mask to decide if the task could be moved to `this' CPU. It ignores that the task might be in a migration disabled section while not on the CPU. In this case the task must not be moved otherwise per-CPU assumption are broken. Use is_cpu_allowed(), as suggested by Peter Zijlstra, to decide if the a task can be moved. Fixes: d2dfa17bc7de6 ("sched: Trivial forced-newidle balancer") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YjNK9El+3fzGmswf@linutronix.de --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 017ee7807930..51efaabac3e4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6006,7 +6006,7 @@ static bool try_steal_cookie(int this, int that) if (p == src->core_pick || p == src->curr) goto next; - if (!cpumask_test_cpu(this, &p->cpus_mask)) + if (!is_cpu_allowed(p, this)) goto next; if (p->core_occupation > dst->idle->core_occupation) -- cgit v1.2.3 From 0a70045ed8516dfcff4b5728557e1ef3fd017c53 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 30 Mar 2022 10:43:28 +0200 Subject: entry: Fix compile error in dynamic_irqentry_exit_cond_resched() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel/entry/common.c: In function ‘dynamic_irqentry_exit_cond_resched’: kernel/entry/common.c:409:14: error: implicit declaration of function ‘static_key_unlikely’; did you mean ‘static_key_enable’? [-Werror=implicit-function-declaration] 409 | if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) | ^~~~~~~~~~~~~~~~~~~ | static_key_enable static_key_unlikely() should be static_branch_unlikely(). Fixes: 99cf983cc8bca ("sched/preempt: Add PREEMPT_DYNAMIC using static keys") Signed-off-by: Sven Schnelle Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220330084328.1805665-1-svens@linux.ibm.com --- kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/entry/common.c b/kernel/entry/common.c index e57a224d6b79..93c3b86e781c 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -392,7 +392,7 @@ DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); void dynamic_irqentry_exit_cond_resched(void) { - if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) + if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) return; raw_irqentry_exit_cond_resched(); } -- cgit v1.2.3