From 5b206d48e58204e84d249c4eb18651a1ff7a1274 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jul 2013 19:05:14 +0200 Subject: vtime: Update a few comments Update a stale comment from the old vtime era and document some locking that might be non obvious. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- kernel/sched/cputime.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/sched/cputime.c') diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a7959e05a9d5..223a35efa0a6 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -712,6 +712,13 @@ void vtime_user_enter(struct task_struct *tsk) void vtime_guest_enter(struct task_struct *tsk) { + /* + * The flags must be updated under the lock with + * the vtime_snap flush and update. + * That enforces a right ordering and update sequence + * synchronization against the reader (task_gtime()) + * that can thus safely catch up with a tickless delta. + */ write_seqlock(&tsk->vtime_seqlock); __vtime_account_system(tsk); current->flags |= PF_VCPU; -- cgit v1.2.3 From 48d6a816a8bf36e2a197c322697323003bdc1cfe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Jul 2013 02:44:35 +0200 Subject: context_tracking: Optimize guest APIs off case with static key Optimize guest entry/exit APIs with static keys. This minimize the overhead for those who enable CONFIG_NO_HZ_FULL without always using it. Having no range passed to nohz_full= should result in the probes overhead to be minimized. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 19 +++++++++++++++++-- kernel/context_tracking.c | 23 ++--------------------- kernel/sched/cputime.c | 2 ++ 3 files changed, 21 insertions(+), 23 deletions(-) (limited to 'kernel/sched/cputime.c') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 38ab60b3f3a6..8854eadb2142 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -95,8 +95,23 @@ static inline void context_tracking_init(void) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void guest_enter(void); -extern void guest_exit(void); +static inline void guest_enter(void) +{ + if (static_key_false(&context_tracking_enabled) && + vtime_accounting_enabled()) + vtime_guest_enter(current); + else + current->flags |= PF_VCPU; +} + +static inline void guest_exit(void) +{ + if (static_key_false(&context_tracking_enabled) && + vtime_accounting_enabled()) + vtime_guest_exit(current); + else + current->flags &= ~PF_VCPU; +} #else static inline void guest_enter(void) { diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 6e89e094c80e..b6a186c4b886 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -21,8 +21,10 @@ #include struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL_GPL(context_tracking_enabled); DEFINE_PER_CPU(struct context_tracking, context_tracking); +EXPORT_SYMBOL_GPL(context_tracking); void context_tracking_cpu_set(int cpu) { @@ -163,27 +165,6 @@ void context_tracking_user_exit(void) local_irq_restore(flags); } -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -void guest_enter(void) -{ - if (vtime_accounting_enabled()) - vtime_guest_enter(current); - else - current->flags |= PF_VCPU; -} -EXPORT_SYMBOL_GPL(guest_enter); - -void guest_exit(void) -{ - if (vtime_accounting_enabled()) - vtime_guest_exit(current); - else - current->flags &= ~PF_VCPU; -} -EXPORT_SYMBOL_GPL(guest_exit); -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ - - /** * context_tracking_task_switch - context switch the syscall callbacks * @prev: the task that is being switched out diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 223a35efa0a6..bb6b29a3067c 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -724,6 +724,7 @@ void vtime_guest_enter(struct task_struct *tsk) current->flags |= PF_VCPU; write_sequnlock(&tsk->vtime_seqlock); } +EXPORT_SYMBOL_GPL(vtime_guest_enter); void vtime_guest_exit(struct task_struct *tsk) { @@ -732,6 +733,7 @@ void vtime_guest_exit(struct task_struct *tsk) current->flags &= ~PF_VCPU; write_sequnlock(&tsk->vtime_seqlock); } +EXPORT_SYMBOL_GPL(vtime_guest_exit); void vtime_account_idle(struct task_struct *tsk) { -- cgit v1.2.3 From 7621d1f8bcb418e7a7ac583e89e38ec01b7ed182 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Jul 2013 17:07:35 +0200 Subject: vtime: Remove a few unneeded generic vtime state checks Some generic vtime APIs check if the vtime accounting is enabled on the local CPU before doing their work. Some of these are not needed because all their callers already take care of that. Let's remove the checks on these. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- kernel/sched/cputime.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'kernel/sched/cputime.c') diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index bb6b29a3067c..5f273b477764 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -664,9 +664,6 @@ static void __vtime_account_system(struct task_struct *tsk) void vtime_account_system(struct task_struct *tsk) { - if (!vtime_accounting_enabled()) - return; - write_seqlock(&tsk->vtime_seqlock); __vtime_account_system(tsk); write_sequnlock(&tsk->vtime_seqlock); @@ -686,12 +683,7 @@ void vtime_account_irq_exit(struct task_struct *tsk) void vtime_account_user(struct task_struct *tsk) { - cputime_t delta_cpu; - - if (!vtime_accounting_enabled()) - return; - - delta_cpu = get_vtime_delta(tsk); + cputime_t delta_cpu = get_vtime_delta(tsk); write_seqlock(&tsk->vtime_seqlock); tsk->vtime_snap_whence = VTIME_SYS; @@ -701,9 +693,6 @@ void vtime_account_user(struct task_struct *tsk) void vtime_user_enter(struct task_struct *tsk) { - if (!vtime_accounting_enabled()) - return; - write_seqlock(&tsk->vtime_seqlock); tsk->vtime_snap_whence = VTIME_USER; __vtime_account_system(tsk); -- cgit v1.2.3 From 54461562c90e0ac104764c5a9de637fd9151a1c1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Jul 2013 17:10:18 +0200 Subject: vtime: Fix racy cputime delta update get_vtime_delta() must be called under the task vtime_seqlock with the code that does the cputime accounting flush. Otherwise the cputime reader can be fooled and run into a race where it sees the snapshot update but misses the cputime flush. As a result it can report a cputime that is way too short. Fix vtime_account_user() that wasn't complying to that rule. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- kernel/sched/cputime.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/sched/cputime.c') diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5f273b477764..b62d5c027c7e 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -683,9 +683,10 @@ void vtime_account_irq_exit(struct task_struct *tsk) void vtime_account_user(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(tsk); + cputime_t delta_cpu; write_seqlock(&tsk->vtime_seqlock); + delta_cpu = get_vtime_delta(tsk); tsk->vtime_snap_whence = VTIME_SYS; account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); write_sequnlock(&tsk->vtime_seqlock); -- cgit v1.2.3 From b04934061330a4a449cfce703c97d887c3e11cd7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jul 2013 03:10:15 +0200 Subject: vtime: Optimize full dynticks accounting off case with static keys If no CPU is in the full dynticks range, we can avoid the full dynticks cputime accounting through generic vtime along with its overhead and use the traditional tick based accounting instead. Let's do this and nope the off case with static keys. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- include/linux/context_tracking.h | 6 ++-- include/linux/vtime.h | 70 ++++++++++++++++++++++++++++++++++------ kernel/sched/cputime.c | 22 +++---------- 3 files changed, 67 insertions(+), 31 deletions(-) (limited to 'kernel/sched/cputime.c') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 82ec4870e064..158158704c30 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -74,8 +74,7 @@ static inline void context_tracking_init(void) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static inline void guest_enter(void) { - if (static_key_false(&context_tracking_enabled) && - vtime_accounting_enabled()) + if (vtime_accounting_enabled()) vtime_guest_enter(current); else current->flags |= PF_VCPU; @@ -83,8 +82,7 @@ static inline void guest_enter(void) static inline void guest_exit(void) { - if (static_key_false(&context_tracking_enabled) && - vtime_accounting_enabled()) + if (vtime_accounting_enabled()) vtime_guest_exit(current); else current->flags &= ~PF_VCPU; diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 2ad073915e8c..f5b72b364bda 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -1,22 +1,68 @@ #ifndef _LINUX_KERNEL_VTIME_H #define _LINUX_KERNEL_VTIME_H +#include #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include #endif + struct task_struct; +/* + * vtime_accounting_enabled() definitions/declarations + */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +static inline bool vtime_accounting_enabled(void) { return true; } +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static inline bool vtime_accounting_enabled(void) +{ + if (static_key_false(&context_tracking_enabled)) { + if (context_tracking_active()) + return true; + } + + return false; +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING +static inline bool vtime_accounting_enabled(void) { return false; } +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ + + +/* + * Common vtime APIs + */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING + +#ifdef __ARCH_HAS_VTIME_TASK_SWITCH extern void vtime_task_switch(struct task_struct *prev); +#else +extern void vtime_common_task_switch(struct task_struct *prev); +static inline void vtime_task_switch(struct task_struct *prev) +{ + if (vtime_accounting_enabled()) + vtime_common_task_switch(prev); +} +#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */ + extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -extern void vtime_account_irq_enter(struct task_struct *tsk); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -static inline bool vtime_accounting_enabled(void) { return true; } -#endif +#ifdef __ARCH_HAS_VTIME_ACCOUNT +extern void vtime_account_irq_enter(struct task_struct *tsk); +#else +extern void vtime_common_account_irq_enter(struct task_struct *tsk); +static inline void vtime_account_irq_enter(struct task_struct *tsk) +{ + if (vtime_accounting_enabled()) + vtime_common_account_irq_enter(tsk); +} +#endif /* __ARCH_HAS_VTIME_ACCOUNT */ #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ @@ -24,14 +70,20 @@ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account_irq_enter(struct task_struct *tsk) { } -static inline bool vtime_accounting_enabled(void) { return false; } -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); -extern void vtime_account_irq_exit(struct task_struct *tsk); -extern bool vtime_accounting_enabled(void); +extern void vtime_gen_account_irq_exit(struct task_struct *tsk); + +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ + if (vtime_accounting_enabled()) + vtime_gen_account_irq_exit(tsk); +} + extern void vtime_user_enter(struct task_struct *tsk); + static inline void vtime_user_exit(struct task_struct *tsk) { vtime_account_user(tsk); @@ -39,7 +91,7 @@ static inline void vtime_user_exit(struct task_struct *tsk) extern void vtime_guest_enter(struct task_struct *tsk); extern void vtime_guest_exit(struct task_struct *tsk); extern void vtime_init_idle(struct task_struct *tsk, int cpu); -#else +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ static inline void vtime_account_irq_exit(struct task_struct *tsk) { /* On hard|softirq exit we always account to hard|softirq cputime */ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index b62d5c027c7e..0831b06aab97 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_ #ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifndef __ARCH_HAS_VTIME_TASK_SWITCH -void vtime_task_switch(struct task_struct *prev) +void vtime_common_task_switch(struct task_struct *prev) { - if (!vtime_accounting_enabled()) - return; - if (is_idle_task(prev)) vtime_account_idle(prev); else @@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev) * vtime_account(). */ #ifndef __ARCH_HAS_VTIME_ACCOUNT -void vtime_account_irq_enter(struct task_struct *tsk) +void vtime_common_account_irq_enter(struct task_struct *tsk) { - if (!vtime_accounting_enabled()) - return; - if (!in_interrupt()) { /* * If we interrupted user, context_tracking_in_user() @@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk) } vtime_account_system(tsk); } -EXPORT_SYMBOL_GPL(vtime_account_irq_enter); +EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ @@ -669,11 +663,8 @@ void vtime_account_system(struct task_struct *tsk) write_sequnlock(&tsk->vtime_seqlock); } -void vtime_account_irq_exit(struct task_struct *tsk) +void vtime_gen_account_irq_exit(struct task_struct *tsk) { - if (!vtime_accounting_enabled()) - return; - write_seqlock(&tsk->vtime_seqlock); if (context_tracking_in_user()) tsk->vtime_snap_whence = VTIME_USER; @@ -732,11 +723,6 @@ void vtime_account_idle(struct task_struct *tsk) account_idle_time(delta_cpu); } -bool vtime_accounting_enabled(void) -{ - return context_tracking_active(); -} - void arch_vtime_task_switch(struct task_struct *prev) { write_seqlock(&prev->vtime_seqlock); -- cgit v1.2.3 From b854fafa4e06c50a92e00b39d75ee62083d986d6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Jul 2013 17:24:20 +0200 Subject: vtime: Always scale generic vtime accounting results The cputime accounting in full dynticks can be a subtle mixup of CPUs using tick based accounting and others using generic vtime. As long as the tick can have a share on producing these stats, we want to scale the result against CFS precise accounting as the tick can miss some task hiding between the periodic interrupt. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- kernel/sched/cputime.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/sched/cputime.c') diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 0831b06aab97..e9e742ed7280 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -553,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr, { cputime_t rtime, stime, utime, total; - if (vtime_accounting_enabled()) { - *ut = curr->utime; - *st = curr->stime; - return; - } - stime = curr->stime; total = stime + curr->utime; -- cgit v1.2.3 From af2350bd12096dfd04e1090b90bfecea1f75f84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 15 Jul 2013 16:35:55 +0200 Subject: vtime: Always debug check snapshot source _before_ updating it The vtime delta update performed by get_vtime_delta() always check that the source of the snapshot is valid. Meanhile the snapshot updaters that rely on get_vtime_delta() also set the new snapshot origin. But some of them do this right before the call to get_vtime_delta(), making its debug check useless. This is easily fixable by moving the snapshot origin update after the call to get_vtime_delta(). The order doesn't matter there. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Mike Galbraith Cc: Kevin Hilman --- kernel/sched/cputime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched/cputime.c') diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index e9e742ed7280..c1d7493825ae 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -660,9 +660,9 @@ void vtime_account_system(struct task_struct *tsk) void vtime_gen_account_irq_exit(struct task_struct *tsk) { write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); if (context_tracking_in_user()) tsk->vtime_snap_whence = VTIME_USER; - __vtime_account_system(tsk); write_sequnlock(&tsk->vtime_seqlock); } @@ -680,8 +680,8 @@ void vtime_account_user(struct task_struct *tsk) void vtime_user_enter(struct task_struct *tsk) { write_seqlock(&tsk->vtime_seqlock); - tsk->vtime_snap_whence = VTIME_USER; __vtime_account_system(tsk); + tsk->vtime_snap_whence = VTIME_USER; write_sequnlock(&tsk->vtime_seqlock); } -- cgit v1.2.3