From e26fd28db82899be71b4b949527373d0a6be1e65 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 12 Jan 2023 12:27:07 +0000 Subject: sched/uclamp: Fix a uninitialized variable warnings Addresses the following warnings: > config: riscv-randconfig-m031-20221111 > compiler: riscv64-linux-gcc (GCC) 12.1.0 > > smatch warnings: > kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_min'. > kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_max'. Fixes: 244226035a1f ("sched/uclamp: Fix fits_capacity() check in feec()") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Qais Yousef (Google) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20230112122708.330667-2-qyousef@layalina.io --- kernel/sched/fair.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c36aa54ae071..be43731b147d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7229,10 +7229,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) eenv_task_busy_time(&eenv, p, prev_cpu); for (; pd; pd = pd->next) { + unsigned long util_min = p_util_min, util_max = p_util_max; unsigned long cpu_cap, cpu_thermal_cap, util; unsigned long cur_delta, max_spare_cap = 0; unsigned long rq_util_min, rq_util_max; - unsigned long util_min, util_max; unsigned long prev_spare_cap = 0; int max_spare_cap_cpu = -1; unsigned long base_energy; @@ -7251,6 +7251,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) eenv.pd_cap = 0; for_each_cpu(cpu, cpus) { + struct rq *rq = cpu_rq(cpu); + eenv.pd_cap += cpu_thermal_cap; if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) @@ -7269,24 +7271,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) * much capacity we can get out of the CPU; this is * aligned with sched_cpu_util(). */ - if (uclamp_is_used()) { - if (uclamp_rq_is_idle(cpu_rq(cpu))) { - util_min = p_util_min; - util_max = p_util_max; - } else { - /* - * Open code uclamp_rq_util_with() except for - * the clamp() part. Ie: apply max aggregation - * only. util_fits_cpu() logic requires to - * operate on non clamped util but must use the - * max-aggregated uclamp_{min, max}. - */ - rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); - rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); - - util_min = max(rq_util_min, p_util_min); - util_max = max(rq_util_max, p_util_max); - } + if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) { + /* + * Open code uclamp_rq_util_with() except for + * the clamp() part. Ie: apply max aggregation + * only. util_fits_cpu() logic requires to + * operate on non clamped util but must use the + * max-aggregated uclamp_{min, max}. + */ + rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN); + rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX); + + util_min = max(rq_util_min, p_util_min); + util_max = max(rq_util_max, p_util_max); } if (!util_fits_cpu(util, util_min, util_max, cpu)) continue; -- cgit v1.2.3 From da07d2f9c153e457e845d4dcfdd13568d71d18a4 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 12 Jan 2023 12:27:08 +0000 Subject: sched/fair: Fixes for capacity inversion detection Traversing the Perf Domains requires rcu_read_lock() to be held and is conditional on sched_energy_enabled(). Ensure right protections applied. Also skip capacity inversion detection for our own pd; which was an error. Fixes: 44c7b80bffc3 ("sched/fair: Detect capacity inversion") Reported-by: Dietmar Eggemann Signed-off-by: Qais Yousef (Google) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20230112122708.330667-3-qyousef@layalina.io --- kernel/sched/fair.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index be43731b147d..0f8736991427 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8868,16 +8868,23 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) * * Thermal pressure will impact all cpus in this perf domain * equally. */ - if (static_branch_unlikely(&sched_asym_cpucapacity)) { + if (sched_energy_enabled()) { unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); - struct perf_domain *pd = rcu_dereference(rq->rd->pd); + struct perf_domain *pd; + + rcu_read_lock(); + pd = rcu_dereference(rq->rd->pd); rq->cpu_capacity_inverted = 0; for (; pd; pd = pd->next) { struct cpumask *pd_span = perf_domain_span(pd); unsigned long pd_cap_orig, pd_cap; + /* We can't be inverted against our own pd */ + if (cpumask_test_cpu(cpu_of(rq), pd_span)) + continue; + cpu = cpumask_any(pd_span); pd_cap_orig = arch_scale_cpu_capacity(cpu); @@ -8902,6 +8909,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) break; } } + + rcu_read_unlock(); } trace_sched_cpu_capacity_tp(rq); -- cgit v1.2.3 From 5657c116783545fb49cd7004994c187128552b12 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 15 Jan 2023 14:31:22 -0500 Subject: sched/core: Fix NULL pointer access fault in sched_setaffinity() with non-SMP configs The kernel commit 9a5418bc48ba ("sched/core: Use kfree_rcu() in do_set_cpus_allowed()") introduces a bug for kernels built with non-SMP configs. Calling sched_setaffinity() on such a uniprocessor kernel will cause cpumask_copy() to be called with a NULL pointer leading to general protection fault. This is not really a problem in real use cases as there aren't that many uniprocessor kernel configs in use and calling sched_setaffinity() on such a uniprocessor system doesn't make sense. Fix this problem by making sure cpumask_copy() will not be called in such a case. Fixes: 9a5418bc48ba ("sched/core: Use kfree_rcu() in do_set_cpus_allowed()") Reported-by: kernel test robot Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230115193122.563036-1-longman@redhat.com --- kernel/sched/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bb1ee6d7bdde..e838feb6adc5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8290,12 +8290,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_put_task; + /* + * With non-SMP configs, user_cpus_ptr/user_mask isn't used and + * alloc_user_cpus_ptr() returns NULL. + */ user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE); - if (IS_ENABLED(CONFIG_SMP) && !user_mask) { + if (user_mask) { + cpumask_copy(user_mask, in_mask); + } else if (IS_ENABLED(CONFIG_SMP)) { retval = -ENOMEM; goto out_put_task; } - cpumask_copy(user_mask, in_mask); + ac = (struct affinity_context){ .new_mask = in_mask, .user_mask = user_mask, -- cgit v1.2.3 From 5f5cc9ed992cbab6361f198966f0edba5fc52688 Mon Sep 17 00:00:00 2001 From: Yair Podemsky Date: Tue, 10 Jan 2023 18:02:06 +0200 Subject: x86/aperfmperf: Erase stale arch_freq_scale values when disabling frequency invariance readings Once disable_freq_invariance_work is called the scale_freq_tick function will not compute or update the arch_freq_scale values. However the scheduler will still read these values and use them. The result is that the scheduler might perform unfair decisions based on stale values. This patch adds the step of setting the arch_freq_scale values for all cpus to the default (max) value SCHED_CAPACITY_SCALE, Once all cpus have the same arch_freq_scale value the scaling is meaningless. Signed-off-by: Yair Podemsky Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230110160206.75912-1-ypodemsk@redhat.com --- arch/x86/kernel/cpu/aperfmperf.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 1f60a2b27936..fdbb5f07448f 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -330,7 +330,16 @@ static void __init bp_init_freq_invariance(void) static void disable_freq_invariance_workfn(struct work_struct *work) { + int cpu; + static_branch_disable(&arch_scale_freq_key); + + /* + * Set arch_freq_scale to a default value on all cpus + * This negates the effect of scaling + */ + for_each_possible_cpu(cpu) + per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE; } static DECLARE_WORK(disable_freq_invariance_work, -- cgit v1.2.3