summaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorMike Galbraith <umgwanakikbuti@gmail.com>2014-06-24 07:49:40 +0200
committerIngo Molnar <mingo@kernel.org>2014-07-05 11:17:30 +0200
commit4036ac1567834222fc763ab18e3e17df93b4eaaf (patch)
treee5063b46415b169fbfa3895d5faa4008f003f402 /kernel/sched
parentc06f04c70489b9deea3212af8375e2f0c2f0b184 (diff)
downloadlinux-4036ac1567834222fc763ab18e3e17df93b4eaaf.tar.bz2
sched: Fix clock_gettime(CLOCK_[PROCESS/THREAD]_CPUTIME_ID) monotonicity
If a task has been dequeued, it has been accounted. Do not project cycles that may or may not ever be accounted to a dequeued task, as that may make clock_gettime() both inaccurate and non-monotonic. Protect update_rq_clock() from slight TSC skew while at it. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Cc: kosaki.motohiro@jp.fujitsu.com Cc: pjt@google.com Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1403588980.29711.11.camel@marge.simpson.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 866d840b99ca..e50234ba0b27 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -139,6 +139,8 @@ void update_rq_clock(struct rq *rq)
return;
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
+ if (delta < 0)
+ return;
rq->clock += delta;
update_rq_clock_task(rq, delta);
}
@@ -2431,7 +2433,12 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
{
u64 ns = 0;
- if (task_current(rq, p)) {
+ /*
+ * Must be ->curr _and_ ->on_rq. If dequeued, we would
+ * project cycles that may never be accounted to this
+ * thread, breaking clock_gettime().
+ */
+ if (task_current(rq, p) && p->on_rq) {
update_rq_clock(rq);
ns = rq_clock_task(rq) - p->se.exec_start;
if ((s64)ns < 0)
@@ -2474,8 +2481,10 @@ unsigned long long task_sched_runtime(struct task_struct *p)
* If we race with it leaving cpu, we'll take a lock. So we're correct.
* If we race with it entering cpu, unaccounted time is 0. This is
* indistinguishable from the read occurring a few cycles earlier.
+ * If we see ->on_cpu without ->on_rq, the task is leaving, and has
+ * been accounted, so we're correct here as well.
*/
- if (!p->on_cpu)
+ if (!p->on_cpu || !p->on_rq)
return p->se.sum_exec_runtime;
#endif