From dee08a72deefac251267ed2717717596aa8b6818 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:02:22 +0100 Subject: cputime: Fix jiffies based cputime assumption on steal accounting The steal guest time accounting code assumes that cputime_t is based on jiffies. So when CONFIG_NO_HZ_FULL=y, which implies that cputime_t is based on nsecs, steal_account_process_tick() passes the delta in jiffies to account_steal_time() which then accounts it as if it's a value in nsecs. As a result, accounting 1 second of steal time (with HZ=100 that would be 100 jiffies) is spuriously accounted as 100 nsecs. As such /proc/stat may report 0 values of steal time even when two guests have run concurrently for a few seconds on the same host and same CPU. In order to fix this, lets convert the nsecs based steal delta to cputime instead of jiffies by using the right conversion API. Given that the steal time is stored in cputime_t and this type can have a smaller granularity than nsecs, we only account the rounded converted value and leave the remaining nsecs for the next deltas. Reported-by: Huiqingding Reported-by: Marcelo Tosatti Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- kernel/sched/cputime.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 99947919e30b..c91b09770ebd 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -258,16 +258,22 @@ static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT if (static_key_false(¶virt_steal_enabled)) { - u64 steal, st = 0; + u64 steal; + cputime_t steal_ct; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; - st = steal_ticks(steal); - this_rq()->prev_steal_time += st * TICK_NSEC; + /* + * cputime_t may be less precise than nsecs (eg: if it's + * based on jiffies). Lets cast the result to cputime + * granularity and account the rest on the next rounds. + */ + steal_ct = nsecs_to_cputime(steal); + this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct); - account_steal_time(st); - return st; + account_steal_time(steal_ct); + return steal_ct; } #endif return false; -- cgit v1.2.3 From 300a9d887ea221f344962506f724e02101bacc08 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:05:57 +0100 Subject: sched: Remove needless round trip nsecs <-> tick conversion of steal time When update_rq_clock_task() accounts the pending steal time for a task, it converts the steal delta from nsecs to tick then from tick to nsecs. There is no apparent good reason for doing that though because both the task clock and the prev steal delta are u64 and store values in nsecs. So lets remove the needless conversion. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker --- kernel/sched/core.c | 6 ------ kernel/sched/sched.h | 10 ---------- 2 files changed, 16 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131ef6aab..b14a188af898 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -823,19 +823,13 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING if (static_key_false((¶virt_steal_rq_enabled))) { - u64 st; - steal = paravirt_steal_clock(cpu_of(rq)); steal -= rq->prev_steal_time_rq; if (unlikely(steal > delta)) steal = delta; - st = steal_ticks(steal); - steal = st * TICK_NSEC; - rq->prev_steal_time_rq += steal; - delta -= steal; } #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c2119fd20f8b..5ec991010122 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1214,16 +1214,6 @@ extern void update_idle_cpu_load(struct rq *this_rq); extern void init_task_runnable_average(struct task_struct *p); -#ifdef CONFIG_PARAVIRT -static inline u64 steal_ticks(u64 steal) -{ - if (unlikely(steal > NSEC_PER_SEC)) - return div_u64(steal, TICK_NSEC); - - return __iter_div_u64_rem(steal, TICK_NSEC, &steal); -} -#endif - static inline void inc_nr_running(struct rq *rq) { rq->nr_running++; -- cgit v1.2.3