From 1b5d43cfb69759d8ef8d30469cea31d0c037aed5 Mon Sep 17 00:00:00 2001 From: Jules Maselbas Date: Thu, 29 Mar 2018 15:43:01 +0100 Subject: sched/cpufreq/schedutil: Fix error path mutex unlock This patch prevents the 'global_tunables_lock' mutex from being unlocked before being locked. This mutex is not locked if the sugov_kthread_create() function fails. Signed-off-by: Jules Maselbas Acked-by: Peter Zijlstra Cc: Chris Redpath Cc: Dietmar Eggermann Cc: Linus Torvalds Cc: Mike Galbraith Cc: Patrick Bellasi Cc: Stephen Kyle Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: nd@arm.com Link: http://lkml.kernel.org/r/20180329144301.38419-1-jules.maselbas@arm.com Signed-off-by: Ingo Molnar --- kernel/sched/cpufreq_schedutil.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 7936f548e071..617c6741c525 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -625,10 +625,9 @@ fail: stop_kthread: sugov_kthread_stop(sg_policy); - -free_sg_policy: mutex_unlock(&global_tunables_lock); +free_sg_policy: sugov_policy_free(sg_policy); disable_fast_switch: -- cgit v1.2.3 From d29a20645d5e929aa7e8616f28e5d8e1c49263ec Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 2 Apr 2018 09:49:54 -0700 Subject: sched/rt: Fix rq->clock_update_flags < RQCF_ACT_SKIP warning While running rt-tests' pi_stress program I got the following splat: rq->clock_update_flags < RQCF_ACT_SKIP WARNING: CPU: 27 PID: 0 at kernel/sched/sched.h:960 assert_clock_updated.isra.38.part.39+0x13/0x20 [...] enqueue_top_rt_rq+0xf4/0x150 ? cpufreq_dbs_governor_start+0x170/0x170 sched_rt_rq_enqueue+0x65/0x80 sched_rt_period_timer+0x156/0x360 ? sched_rt_rq_enqueue+0x80/0x80 __hrtimer_run_queues+0xfa/0x260 hrtimer_interrupt+0xcb/0x220 smp_apic_timer_interrupt+0x62/0x120 apic_timer_interrupt+0xf/0x20 [...] do_idle+0x183/0x1e0 cpu_startup_entry+0x5f/0x70 start_secondary+0x192/0x1d0 secondary_startup_64+0xa5/0xb0 We can get rid of it be the "traditional" means of adding an update_rq_clock() call after acquiring the rq->lock in do_sched_rt_period_timer(). The case for the RT task throttling (which this workload also hits) can be ignored in that the skip_update call is actually bogus and quite the contrary (the request bits are removed/reverted). By setting RQCF_UPDATED we really don't care if the skip is happening or not and will therefore make the assert_clock_updated() check happy. Signed-off-by: Davidlohr Bueso Reviewed-by: Matt Fleming Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: linux-kernel@vger.kernel.org Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/20180402164954.16255-1-dave@stgolabs.net Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 86b77987435e..ad13e6242481 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -839,6 +839,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) continue; raw_spin_lock(&rq->lock); + update_rq_clock(rq); + if (rt_rq->rt_time) { u64 runtime; -- cgit v1.2.3 From adcc8da8859bee9548bb6d323b1e8de8a7252acd Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 4 Apr 2018 09:15:39 -0700 Subject: sched/core: Simplify helpers for rq clock update skip requests By renaming the functions we can get rid of the skip parameter and have better code redability. It makes zero sense to have things such as: rq_clock_skip_update(rq, false) When the skip request is in fact not going to happen. Ever. Rename things such that we end up with: rq_clock_skip_update(rq) rq_clock_cancel_skipupdate(rq) Signed-off-by: Davidlohr Bueso Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: matt@codeblueprint.co.uk Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/20180404161539.nhadkff2aats74jh@linux-n805 Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/rt.c | 2 +- kernel/sched/sched.h | 17 ++++++++++++----- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 28b68995a417..550a07f648b6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -874,7 +874,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) * this case, we can save a useless back to back clock update. */ if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) - rq_clock_skip_update(rq, true); + rq_clock_skip_update(rq); } #ifdef CONFIG_SMP diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d1c7bf7c7e5b..e7b3008b85bb 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1560,7 +1560,7 @@ static void yield_task_dl(struct rq *rq) * so we don't do microscopic update in schedule() * and double the fastpath cost. */ - rq_clock_skip_update(rq, true); + rq_clock_skip_update(rq); } #ifdef CONFIG_SMP diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0951d1c58d2f..54dc31e7ab9b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7089,7 +7089,7 @@ static void yield_task_fair(struct rq *rq) * so we don't do microscopic update in schedule() * and double the fastpath cost. */ - rq_clock_skip_update(rq, true); + rq_clock_skip_update(rq); } set_skip_buddy(se); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index ad13e6242481..7aef6b4e885a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -861,7 +861,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) * 'runtime'. */ if (rt_rq->rt_nr_running && rq->curr == rq->idle) - rq_clock_skip_update(rq, false); + rq_clock_cancel_skipupdate(rq); } if (rt_rq->rt_time || rt_rq->rt_nr_running) idle = 0; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c3deaee7a7a2..15750c222ca2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -976,13 +976,20 @@ static inline u64 rq_clock_task(struct rq *rq) return rq->clock_task; } -static inline void rq_clock_skip_update(struct rq *rq, bool skip) +static inline void rq_clock_skip_update(struct rq *rq) { lockdep_assert_held(&rq->lock); - if (skip) - rq->clock_update_flags |= RQCF_REQ_SKIP; - else - rq->clock_update_flags &= ~RQCF_REQ_SKIP; + rq->clock_update_flags |= RQCF_REQ_SKIP; +} + +/* + * See rt task throttoling, which is the only time a skip + * request is cancelled. + */ +static inline void rq_clock_cancel_skipupdate(struct rq *rq) +{ + lockdep_assert_held(&rq->lock); + rq->clock_update_flags &= ~RQCF_REQ_SKIP; } struct rq_flags { -- cgit v1.2.3 From 317d359df95dd0cb7653d09b7fc513770590cf85 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Apr 2018 10:05:21 +0200 Subject: sched/core: Force proper alignment of 'struct util_est' For some as yet not understood reason, Tony gets unaligned access traps on IA64 because of: struct util_est ue = READ_ONCE(p->se.avg.util_est); and: WRITE_ONCE(p->se.avg.util_est, ue); introduced by commit: d519329f72a6 ("sched/fair: Update util_est only on util_avg updates") Normally those two fields should end up on an 8-byte aligned location, but UP and RANDSTRUCT can mess that up so enforce the alignment explicitly. Also make the alignment on sched_avg unconditional, as it is really about data locality, not false-sharing. With or without this patch the layout for sched_avg on a ia64-defconfig build looks like: $ pahole -EC sched_avg ia64-defconfig/kernel/sched/core.o die__process_function: tag not supported (INVALID)! struct sched_avg { /* typedef u64 */ long long unsigned int last_update_time; /* 0 8 */ /* typedef u64 */ long long unsigned int load_sum; /* 8 8 */ /* typedef u64 */ long long unsigned int runnable_load_sum; /* 16 8 */ /* typedef u32 */ unsigned int util_sum; /* 24 4 */ /* typedef u32 */ unsigned int period_contrib; /* 28 4 */ long unsigned int load_avg; /* 32 8 */ long unsigned int runnable_load_avg; /* 40 8 */ long unsigned int util_avg; /* 48 8 */ struct util_est { unsigned int enqueued; /* 56 4 */ unsigned int ewma; /* 60 4 */ } util_est; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ /* size: 64, cachelines: 1, members: 9 */ }; Reported-and-Tested-by: Tony Luck Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mel Gorman Cc: Norbert Manthey Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Cc: Vincent Guittot Fixes: d519329f72a6 ("sched/fair: Update util_est only on util_avg updates") Link: http://lkml.kernel.org/r/20180405080521.GG4129@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index f228c6033832..b3d697f3b573 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -300,7 +300,7 @@ struct util_est { unsigned int enqueued; unsigned int ewma; #define UTIL_EST_WEIGHT_SHIFT 2 -}; +} __attribute__((__aligned__(sizeof(u64)))); /* * The load_avg/util_avg accumulates an infinite geometric series @@ -364,7 +364,7 @@ struct sched_avg { unsigned long runnable_load_avg; unsigned long util_avg; struct util_est util_est; -}; +} ____cacheline_aligned; struct sched_statistics { #ifdef CONFIG_SCHEDSTATS @@ -435,7 +435,7 @@ struct sched_entity { * Put into separate cache line so it does not * collide with read-mostly values above. */ - struct sched_avg avg ____cacheline_aligned_in_smp; + struct sched_avg avg; #endif }; -- cgit v1.2.3