From e8fcaa5c54e3b0371230e5d43a6f650c667da9c5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 7 Aug 2013 22:28:01 +0200 Subject: nohz: Convert a few places to use local per cpu accesses A few functions use remote per CPU access APIs when they deal with local values. Just do the right conversion to improve performance, code readability and debug checks. While at it, lets extend some of these function names with *_this_cpu() suffix in order to display their purpose more clearly. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- include/linux/tick.h | 6 +++--- kernel/softirq.c | 4 +--- kernel/time/tick-broadcast.c | 6 +++--- kernel/time/tick-internal.h | 4 ++-- kernel/time/tick-sched.c | 39 ++++++++++++++++----------------------- 5 files changed, 25 insertions(+), 34 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 5128d33bbb39..a004f66a6cf0 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -104,7 +104,7 @@ extern struct cpumask *tick_get_broadcast_oneshot_mask(void); extern void tick_clock_notify(void); extern int tick_check_oneshot_change(int allow_nohz); extern struct tick_sched *tick_get_tick_sched(int cpu); -extern void tick_check_idle(int cpu); +extern void tick_check_idle(void); extern int tick_oneshot_mode_active(void); # ifndef arch_needs_cpu # define arch_needs_cpu(cpu) (0) @@ -112,7 +112,7 @@ extern int tick_oneshot_mode_active(void); # else static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -static inline void tick_check_idle(int cpu) { } +static inline void tick_check_idle(void) { } static inline int tick_oneshot_mode_active(void) { return 0; } # endif @@ -121,7 +121,7 @@ static inline void tick_init(void) { } static inline void tick_cancel_sched_timer(int cpu) { } static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -static inline void tick_check_idle(int cpu) { } +static inline void tick_check_idle(void) { } static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 11025ccc06dd..11348de09400 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -311,8 +311,6 @@ asmlinkage void do_softirq(void) */ void irq_enter(void) { - int cpu = smp_processor_id(); - rcu_irq_enter(); if (is_idle_task(current) && !in_interrupt()) { /* @@ -320,7 +318,7 @@ void irq_enter(void) * here, as softirq will be serviced on return from interrupt. */ local_bh_disable(); - tick_check_idle(cpu); + tick_check_idle(); _local_bh_enable(); } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 9532690daaa9..43780ab5e279 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -538,10 +538,10 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc) * Called from irq_enter() when idle was interrupted to reenable the * per cpu device. */ -void tick_check_oneshot_broadcast(int cpu) +void tick_check_oneshot_broadcast_this_cpu(void) { - if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { - struct tick_device *td = &per_cpu(tick_cpu_device, cpu); + if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { + struct tick_device *td = &__get_cpu_var(tick_cpu_device); /* * We might be in the middle of switching over from diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 18e71f7fbc2a..e2bced59b6dd 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -51,7 +51,7 @@ extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); -extern void tick_check_oneshot_broadcast(int cpu); +extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); # else /* BROADCAST */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) @@ -62,7 +62,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } -static inline void tick_check_oneshot_broadcast(int cpu) { } +static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return true; } # endif /* !BROADCAST */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3612fc77f834..2afd43fca93b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -391,11 +391,9 @@ __setup("nohz=", setup_tick_nohz); */ static void tick_nohz_update_jiffies(ktime_t now) { - int cpu = smp_processor_id(); - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - ts->idle_waketime = now; + __this_cpu_write(tick_cpu_sched.idle_waketime, now); local_irq_save(flags); tick_do_update_jiffies64(now); @@ -426,17 +424,15 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda } -static void tick_nohz_stop_idle(int cpu, ktime_t now) +static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) { - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - - update_ts_time_stats(cpu, ts, now, NULL); + update_ts_time_stats(smp_processor_id(), ts, now, NULL); ts->idle_active = 0; sched_clock_idle_wakeup_event(0); } -static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) +static ktime_t tick_nohz_start_idle(struct tick_sched *ts) { ktime_t now = ktime_get(); @@ -752,7 +748,7 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) ktime_t now, expires; int cpu = smp_processor_id(); - now = tick_nohz_start_idle(cpu, ts); + now = tick_nohz_start_idle(ts); if (can_stop_idle_tick(cpu, ts)) { int was_stopped = ts->tick_stopped; @@ -914,8 +910,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) */ void tick_nohz_idle_exit(void) { - int cpu = smp_processor_id(); - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ktime_t now; local_irq_disable(); @@ -928,7 +923,7 @@ void tick_nohz_idle_exit(void) now = ktime_get(); if (ts->idle_active) - tick_nohz_stop_idle(cpu, now); + tick_nohz_stop_idle(ts, now); if (ts->tick_stopped) { tick_nohz_restart_sched_tick(ts, now); @@ -1012,12 +1007,10 @@ static void tick_nohz_switch_to_nohz(void) * timer and do not touch the other magic bits which need to be done * when idle is left. */ -static void tick_nohz_kick_tick(int cpu, ktime_t now) +static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now) { #if 0 /* Switch back to 2.6.27 behaviour */ - - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t delta; /* @@ -1032,36 +1025,36 @@ static void tick_nohz_kick_tick(int cpu, ktime_t now) #endif } -static inline void tick_check_nohz(int cpu) +static inline void tick_check_nohz_this_cpu(void) { - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ktime_t now; if (!ts->idle_active && !ts->tick_stopped) return; now = ktime_get(); if (ts->idle_active) - tick_nohz_stop_idle(cpu, now); + tick_nohz_stop_idle(ts, now); if (ts->tick_stopped) { tick_nohz_update_jiffies(now); - tick_nohz_kick_tick(cpu, now); + tick_nohz_kick_tick(ts, now); } } #else static inline void tick_nohz_switch_to_nohz(void) { } -static inline void tick_check_nohz(int cpu) { } +static inline void tick_check_nohz_this_cpu(void) { } #endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() */ -void tick_check_idle(int cpu) +void tick_check_idle(void) { - tick_check_oneshot_broadcast(cpu); - tick_check_nohz(cpu); + tick_check_oneshot_broadcast_this_cpu(); + tick_check_nohz_this_cpu(); } /* -- cgit v1.2.3 From 99c8b1ea0972be82ce1842d830e0173e70907065 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 24 Oct 2013 10:07:47 -0400 Subject: trivial: fix spelling in CONTEXT_TRACKING_FORCE help text Signed-off-by: Paul Gortmaker Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 79383d3aa5dc..12d61f82e5f7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -532,7 +532,7 @@ config CONTEXT_TRACKING_FORCE dynticks subsystem by forcing the context tracking on all CPUs in the system. - Say Y only if you're working on the developpement of an + Say Y only if you're working on the development of an architecture backend for the context tracking. Say N otherwise, this option brings an overhead that you -- cgit v1.2.3 From 58135f574f1b791c926622387780ed3d090116d6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 Nov 2013 14:45:57 +0100 Subject: context_tracking: Wrap static key check into more intuitive function name Use a function with a meaningful name to check the global context tracking state. static_key_false() is a bit confusing for reviewers. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- include/linux/context_tracking.h | 10 +++++----- include/linux/context_tracking_state.h | 4 ++++ include/linux/tick.h | 2 +- include/linux/vtime.h | 2 +- kernel/context_tracking.c | 8 ++++---- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 158158704c30..37b81bd51ec0 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -17,13 +17,13 @@ extern void __context_tracking_task_switch(struct task_struct *prev, static inline void user_enter(void) { - if (static_key_false(&context_tracking_enabled)) + if (context_tracking_is_enabled()) context_tracking_user_enter(); } static inline void user_exit(void) { - if (static_key_false(&context_tracking_enabled)) + if (context_tracking_is_enabled()) context_tracking_user_exit(); } @@ -31,7 +31,7 @@ static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (!static_key_false(&context_tracking_enabled)) + if (!context_tracking_is_enabled()) return 0; prev_ctx = this_cpu_read(context_tracking.state); @@ -42,7 +42,7 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { - if (static_key_false(&context_tracking_enabled)) { + if (context_tracking_is_enabled()) { if (prev_ctx == IN_USER) context_tracking_user_enter(); } @@ -51,7 +51,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) static inline void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { - if (static_key_false(&context_tracking_enabled)) + if (context_tracking_is_enabled()) __context_tracking_task_switch(prev, next); } #else diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 0f1979d0674f..0db535b79be7 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -22,6 +22,10 @@ struct context_tracking { extern struct static_key context_tracking_enabled; DECLARE_PER_CPU(struct context_tracking, context_tracking); +static inline bool context_tracking_is_enabled(void) +{ + return static_key_false(&context_tracking_enabled); +} static inline bool context_tracking_in_user(void) { return __this_cpu_read(context_tracking.state) == IN_USER; diff --git a/include/linux/tick.h b/include/linux/tick.h index a004f66a6cf0..0175d8663b6c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -165,7 +165,7 @@ extern cpumask_var_t tick_nohz_full_mask; static inline bool tick_nohz_full_enabled(void) { - if (!static_key_false(&context_tracking_enabled)) + if (!context_tracking_is_enabled()) return false; return tick_nohz_full_running; diff --git a/include/linux/vtime.h b/include/linux/vtime.h index f5b72b364bda..807c732cbf29 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -19,7 +19,7 @@ static inline bool vtime_accounting_enabled(void) { return true; } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static inline bool vtime_accounting_enabled(void) { - if (static_key_false(&context_tracking_enabled)) { + if (context_tracking_is_enabled()) { if (context_tracking_active()) return true; } diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index e5f3917aa05b..6cb20d2e7ee0 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -53,10 +53,10 @@ void context_tracking_user_enter(void) /* * Repeat the user_enter() check here because some archs may be calling * this from asm and if no CPU needs context tracking, they shouldn't - * go further. Repeat the check here until they support the static key - * check. + * go further. Repeat the check here until they support the inline static + * key check. */ - if (!static_key_false(&context_tracking_enabled)) + if (!context_tracking_is_enabled()) return; /* @@ -160,7 +160,7 @@ void context_tracking_user_exit(void) { unsigned long flags; - if (!static_key_false(&context_tracking_enabled)) + if (!context_tracking_is_enabled()) return; if (in_interrupt()) -- cgit v1.2.3 From d0df09ebfc126b23c1005f98ddecc9907f9c5d25 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 Nov 2013 15:11:57 +0100 Subject: context_tracking: Rename context_tracking_active() to context_tracking_cpu_is_enabled() We currently have a confusing couple of API naming with the existing context_tracking_active() and context_tracking_is_enabled(). Lets keep the latter one, context_tracking_is_enabled(), for global context tracking state check and use context_tracking_cpu_is_enabled() for local state check. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- include/linux/context_tracking_state.h | 9 +++++---- include/linux/vtime.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 0db535b79be7..97a81225d037 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -26,14 +26,15 @@ static inline bool context_tracking_is_enabled(void) { return static_key_false(&context_tracking_enabled); } -static inline bool context_tracking_in_user(void) + +static inline bool context_tracking_cpu_is_enabled(void) { - return __this_cpu_read(context_tracking.state) == IN_USER; + return __this_cpu_read(context_tracking.active); } -static inline bool context_tracking_active(void) +static inline bool context_tracking_in_user(void) { - return __this_cpu_read(context_tracking.active); + return __this_cpu_read(context_tracking.state) == IN_USER; } #else static inline bool context_tracking_in_user(void) { return false; } diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 807c732cbf29..c5165fd256f9 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -20,7 +20,7 @@ static inline bool vtime_accounting_enabled(void) { return true; } static inline bool vtime_accounting_enabled(void) { if (context_tracking_is_enabled()) { - if (context_tracking_active()) + if (context_tracking_cpu_is_enabled()) return true; } -- cgit v1.2.3 From d4283c654130c3d01b6842d3821dbdc3c15ceb46 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 Nov 2013 15:42:04 +0100 Subject: posix-timers: Spare workqueue if there is no full dynticks CPU to kick After a posix cpu timer is set, a workqueue is scheduled in order to kick the full dynticks CPUs and let them restart their tick if necessary in case the task they are running is concerned by the new timer. This kick is implemented by way of IPIs, which require interrupts to be enabled, hence the need for a workqueue to raise them because the posix cpu timer set path has interrupts disabled. Now if there is no full dynticks CPU on the system, the workqueue is still scheduled but it simply won't send any IPI and return immediately. So lets spare that worqueue when it is not needed. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- kernel/posix-cpu-timers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c7f31aa272f7..35509c5a3ffb 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -608,7 +608,8 @@ static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn); */ static void posix_cpu_timer_kick_nohz(void) { - schedule_work(&nohz_kick_work); + if (context_tracking_is_enabled()) + schedule_work(&nohz_kick_work); } bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk) -- cgit v1.2.3 From c925077c33fc9a546e7cf6c3be2adf4a2afe2608 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 Nov 2013 17:18:30 +0100 Subject: posix-timers: Fix full dynticks CPUs kick on timer rescheduling A posix CPU timer can be rearmed while it is firing or after it is notified with a signal. This can happen for example with timers that were set with a non zero interval in timer_settime(). This rearming can happen in two places: 1) On timer firing time, which happens on the target's tick. If the timer can't trigger a signal because it is ignored, it reschedules itself to honour the timer interval. 2) On signal handling from the timer's notification target. This one can be a different task than the timer's target itself. Once the signal is notified, the notification target rearms the timer, again to honour the timer interval. When a timer is rearmed, we need to notify the full dynticks CPUs such that they restart their tick in case they are running tasks that may have a share in elapsing this timer. Now the 1st case above handles full dynticks CPUs with a call to posix_cpu_timer_kick_nohz() from the posix cpu timer firing code. But the second case ignores the fact that some CPUs may run non-idle tasks with their tick off. As a result, when a timer is resheduled after its signal notification, the full dynticks CPUs may completely ignore it and not tick on the timer as expected This patch fixes this bug by handling both cases in one. All we need is to move the kick to the rearming common code in posix_cpu_timer_schedule(). Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Olivier Langlois --- kernel/posix-cpu-timers.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 35509c5a3ffb..79747b7d9420 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1091,7 +1091,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) put_task_struct(p); timer->it.cpu.task = p = NULL; timer->it.cpu.expires = 0; - goto out_unlock; + read_unlock(&tasklist_lock); + goto out; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { /* * We've noticed that the thread is dead, but @@ -1100,7 +1101,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ cpu_timer_sample_group(timer->it_clock, p, &now); clear_dead_task(timer, now); - goto out_unlock; + read_unlock(&tasklist_lock); + goto out; } spin_lock(&p->sighand->siglock); cpu_timer_sample_group(timer->it_clock, p, &now); @@ -1114,10 +1116,11 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) BUG_ON(!irqs_disabled()); arm_timer(timer); spin_unlock(&p->sighand->siglock); - -out_unlock: read_unlock(&tasklist_lock); + /* Kick full dynticks CPUs in case they need to tick on the new timer */ + posix_cpu_timer_kick_nohz(); + out: timer->it_overrun_last = timer->it_overrun; timer->it_overrun = -1; @@ -1257,13 +1260,6 @@ void run_posix_cpu_timers(struct task_struct *tsk) cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } - - /* - * In case some timers were rescheduled after the queue got emptied, - * wake up full dynticks CPUs. - */ - if (tsk->signal->cputimer.running) - posix_cpu_timer_kick_nohz(); } /* -- cgit v1.2.3 From 724a371396e8162cc883a40cd8e525dfc7e5f3ff Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Oct 2013 16:55:57 +0200 Subject: posix-timers: Remove dead thread posix cpu timers caching When a task is exiting or has exited, its posix cpu timers don't tick anymore and won't elapse further. It's too late for them to expire. So any further call to timer_gettime() on these timers will return the same remaining expiry time. The current code optimize this by caching the remaining delta and storing it where we use to save the absolute expiration time. This way, the future calls to timer_gettime() won't need to compute the difference between the absolute expiration time and the current time anymore. Now this optimization doesn't seem to bring much value. Computing the timer remaining delta is not very costly. Fetching the timer value OTOH can be costly in two ways: * CPUCLOCK_SCHED read requires to lock the target's rq. But some optimizations are on the way to make task_sched_runtime() not holding the rq lock of a non-running target. * CPUCLOCK_VIRT/CPUCLOCK_PROF read simply consist in fetching current->utime/current->stime except when the system uses full dynticks cputime accounting. The latter requires a per task lock in order to correctly compute user and system time. But once the target is dead, this lock shouldn't be contended anyway. All in one this caching doesn't seem to be justified. Given that it complicates the code significantly for few wins, let's remove it on single thread timers. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 79747b7d9420..3b7df8653913 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -788,7 +788,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) { unsigned long long now; struct task_struct *p = timer->it.cpu.task; - int clear_dead; /* * Easy part: convert the reload time. @@ -802,6 +801,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) } if (unlikely(p == NULL)) { + WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock)); /* * This task already died and the timer will never fire. * In this case, expires is actually the dead value. @@ -817,7 +817,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &now); - clear_dead = p->exit_state; } else { read_lock(&tasklist_lock); if (unlikely(p->sighand == NULL)) { @@ -833,22 +832,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) goto dead; } else { cpu_timer_sample_group(timer->it_clock, p, &now); - clear_dead = (unlikely(p->exit_state) && - thread_group_empty(p)); + if (unlikely(p->exit_state) && thread_group_empty(p)) { + read_unlock(&tasklist_lock); + /* + * We've noticed that the thread is dead, but + * not yet reaped. Take this opportunity to + * drop our task ref. + */ + clear_dead_task(timer, now); + goto dead; + } } read_unlock(&tasklist_lock); } - if (unlikely(clear_dead)) { - /* - * We've noticed that the thread is dead, but - * not yet reaped. Take this opportunity to - * drop our task ref. - */ - clear_dead_task(timer, now); - goto dead; - } - if (now < timer->it.cpu.expires) { sample_to_timespec(timer->it_clock, timer->it.cpu.expires - now, @@ -1063,11 +1060,13 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) struct task_struct *p = timer->it.cpu.task; unsigned long long now; - if (unlikely(p == NULL)) + if (unlikely(p == NULL)) { + WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock)); /* * The task was cleaned up already, no future firings. */ goto out; + } /* * Fetch the current sample and update the timer's expiry time. @@ -1075,10 +1074,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &now); bump_cpu_timer(timer, now); - if (unlikely(p->exit_state)) { - clear_dead_task(timer, now); + if (unlikely(p->exit_state)) goto out; - } + read_lock(&tasklist_lock); /* arm_timer needs it. */ spin_lock(&p->sighand->siglock); } else { -- cgit v1.2.3 From d430b9173a9a50a83e10d1c70baead3e625b522f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Oct 2013 16:55:57 +0200 Subject: posix-timers: Remove dead process posix cpu timers caching Now that we removed dead thread posix cpu timers caching, lets remove the dead process wide version. This caching is similar to the per thread version but it should be even more rare: * If the process id dead, we are not reading its timers status from a thread belonging to its group since they are all dead. So this caching only concern remote process timers reads. Now posix cpu timers using itimers or timer_settime() can't do remote process timers anyway so it's not even clear if there is actually a user for this caching. * Unlike per thread timers caching, this only applies to zombies targets. Buried targets' process wide timers return 0 values. But then again, timer_gettime() can't read remote process timers, so if the process is dead, there can't be any reader left anyway. Then again this caching seem to complicate the code for corner cases that are probably not worth it. So lets get rid of it. Also remove the sample snapshot on dying process timer that is now useless, as suggested by Kosaki. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 3b7df8653913..c5d1ef530268 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -453,23 +453,6 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } -static void clear_dead_task(struct k_itimer *itimer, unsigned long long now) -{ - struct cpu_timer_list *timer = &itimer->it.cpu; - - /* - * That's all for this thread or process. - * We leave our residual in expires to be reported. - */ - put_task_struct(timer->task); - timer->task = NULL; - if (timer->expires < now) { - timer->expires = 0; - } else { - timer->expires -= now; - } -} - static inline int expires_gt(cputime_t expires, cputime_t new_exp) { return expires == 0 || expires > new_exp; @@ -832,16 +815,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) goto dead; } else { cpu_timer_sample_group(timer->it_clock, p, &now); - if (unlikely(p->exit_state) && thread_group_empty(p)) { - read_unlock(&tasklist_lock); - /* - * We've noticed that the thread is dead, but - * not yet reaped. Take this opportunity to - * drop our task ref. - */ - clear_dead_task(timer, now); - goto dead; - } } read_unlock(&tasklist_lock); } @@ -1092,14 +1065,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) read_unlock(&tasklist_lock); goto out; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { - /* - * We've noticed that the thread is dead, but - * not yet reaped. Take this opportunity to - * drop our task ref. - */ - cpu_timer_sample_group(timer->it_clock, p, &now); - clear_dead_task(timer, now); read_unlock(&tasklist_lock); + /* Optimizations: if the process is dying, no need to rearm */ goto out; } spin_lock(&p->sighand->siglock); -- cgit v1.2.3 From e26d70d271ee1a68a925796b411cb0239394c7a1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 00:27:19 +0200 Subject: posix-timers: Cleanup reaped target handling When a timer's target is seen to be buried, for example on calls to timer_gettime(), the posix cpu timers code behaves a bit like a garbage collector and releases early the reference to the task. Then again, this optimization complicates the code for no much value: it's up to the user to release the timer and its associated ressources by calling timer_delete() after it buries the target tasks. Remove this to simplify the code. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c5d1ef530268..dc4355b967db 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -639,8 +639,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, */ if (unlikely(p->sighand == NULL)) { read_unlock(&tasklist_lock); - put_task_struct(p); - timer->it.cpu.task = NULL; return -ESRCH; } @@ -808,8 +806,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) * We can't even collect a sample any more. * Call the timer disarmed, nothing else to do. */ - put_task_struct(p); - timer->it.cpu.task = NULL; timer->it.cpu.expires = 0; read_unlock(&tasklist_lock); goto dead; @@ -1059,8 +1055,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) * The process has been reaped. * We can't even collect a sample any more. */ - put_task_struct(p); - timer->it.cpu.task = p = NULL; timer->it.cpu.expires = 0; read_unlock(&tasklist_lock); goto out; -- cgit v1.2.3 From a3222f88fa4f2ebec4632aef527dd2c9a41b997d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 00:37:39 +0200 Subject: posix-timers: Remove dead task special case Now that we've removed all the optimizations that could result in NULL timer's targets, we can remove all the associated special case handling. Also add some warnings on NULL targets to spot any possible leftover. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 70 +++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index dc4355b967db..ab9911b54faf 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -374,27 +374,27 @@ static int posix_cpu_timer_del(struct k_itimer *timer) struct task_struct *p = timer->it.cpu.task; int ret = 0; - if (likely(p != NULL)) { - read_lock(&tasklist_lock); - if (unlikely(p->sighand == NULL)) { - /* - * We raced with the reaping of the task. - * The deletion should have cleared us off the list. - */ - BUG_ON(!list_empty(&timer->it.cpu.entry)); - } else { - spin_lock(&p->sighand->siglock); - if (timer->it.cpu.firing) - ret = TIMER_RETRY; - else - list_del(&timer->it.cpu.entry); - spin_unlock(&p->sighand->siglock); - } - read_unlock(&tasklist_lock); + WARN_ON_ONCE(p == NULL); - if (!ret) - put_task_struct(p); + read_lock(&tasklist_lock); + if (unlikely(p->sighand == NULL)) { + /* + * We raced with the reaping of the task. + * The deletion should have cleared us off the list. + */ + BUG_ON(!list_empty(&timer->it.cpu.entry)); + } else { + spin_lock(&p->sighand->siglock); + if (timer->it.cpu.firing) + ret = TIMER_RETRY; + else + list_del(&timer->it.cpu.entry); + spin_unlock(&p->sighand->siglock); } + read_unlock(&tasklist_lock); + + if (!ret) + put_task_struct(p); return ret; } @@ -622,12 +622,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, unsigned long long old_expires, new_expires, old_incr, val; int ret; - if (unlikely(p == NULL)) { - /* - * Timer refers to a dead task's clock. - */ - return -ESRCH; - } + WARN_ON_ONCE(p == NULL); new_expires = timespec_to_sample(timer->it_clock, &new->it_value); @@ -770,6 +765,8 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) unsigned long long now; struct task_struct *p = timer->it.cpu.task; + WARN_ON_ONCE(p == NULL); + /* * Easy part: convert the reload time. */ @@ -781,18 +778,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) return; } - if (unlikely(p == NULL)) { - WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock)); - /* - * This task already died and the timer will never fire. - * In this case, expires is actually the dead value. - */ - dead: - sample_to_timespec(timer->it_clock, timer->it.cpu.expires, - &itp->it_value); - return; - } - /* * Sample the clock to take the difference with the expiry time. */ @@ -807,8 +792,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) * Call the timer disarmed, nothing else to do. */ timer->it.cpu.expires = 0; + sample_to_timespec(timer->it_clock, timer->it.cpu.expires, + &itp->it_value); read_unlock(&tasklist_lock); - goto dead; } else { cpu_timer_sample_group(timer->it_clock, p, &now); } @@ -1029,13 +1015,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) struct task_struct *p = timer->it.cpu.task; unsigned long long now; - if (unlikely(p == NULL)) { - WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock)); - /* - * The task was cleaned up already, no future firings. - */ - goto out; - } + WARN_ON_ONCE(p == NULL); /* * Fetch the current sample and update the timer's expiry time. -- cgit v1.2.3 From af82eb3c3068877a6b1989796a06b846b1e9e1c3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 16:11:43 +0200 Subject: posix-timers: Remove useless clock sample on timers cleanup a0b2062b0904ef07944c4a6e4d0f88ee44f1e9f2 ("posix_timers: fix racy timer delta caching on task exit") forgot to remove the arguments used for timer caching. Fix this leftover. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index ab9911b54faf..e6389f915bcb 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -399,8 +399,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer) return ret; } -static void cleanup_timers_list(struct list_head *head, - unsigned long long curr) +static void cleanup_timers_list(struct list_head *head) { struct cpu_timer_list *timer, *next; @@ -414,16 +413,11 @@ static void cleanup_timers_list(struct list_head *head, * time for later timer_gettime calls to return. * This must be called with the siglock held. */ -static void cleanup_timers(struct list_head *head, - cputime_t utime, cputime_t stime, - unsigned long long sum_exec_runtime) +static void cleanup_timers(struct list_head *head) { - - cputime_t ptime = utime + stime; - - cleanup_timers_list(head, cputime_to_expires(ptime)); - cleanup_timers_list(++head, cputime_to_expires(utime)); - cleanup_timers_list(++head, sum_exec_runtime); + cleanup_timers_list(head); + cleanup_timers_list(++head); + cleanup_timers_list(++head); } /* @@ -433,24 +427,14 @@ static void cleanup_timers(struct list_head *head, */ void posix_cpu_timers_exit(struct task_struct *tsk) { - cputime_t utime, stime; - add_device_randomness((const void*) &tsk->se.sum_exec_runtime, sizeof(unsigned long long)); - task_cputime(tsk, &utime, &stime); - cleanup_timers(tsk->cpu_timers, - utime, stime, tsk->se.sum_exec_runtime); + cleanup_timers(tsk->cpu_timers); } void posix_cpu_timers_exit_group(struct task_struct *tsk) { - struct signal_struct *const sig = tsk->signal; - cputime_t utime, stime; - - task_cputime(tsk, &utime, &stime); - cleanup_timers(tsk->signal->cpu_timers, - utime + sig->utime, stime + sig->stime, - tsk->se.sum_exec_runtime + sig->sum_sched_runtime); + cleanup_timers(tsk->signal->cpu_timers); } static inline int expires_gt(cputime_t expires, cputime_t new_exp) -- cgit v1.2.3 From 33ab0fec33527e8b5ab124cff6aefd4746508e04 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 17:41:11 +0200 Subject: posix-timers: Consolidate posix_cpu_clock_get() Consolidate the clock sampling common code used for both local and remote targets. Note that this introduces a tiny user ABI change: if a PID is passed to clock_gettime() along the clockid, we used to forbid a process wide clock sample when that PID doesn't belong to a group leader. Now after this patch we allow process wide clock samples if that PID belongs to the current task, even if the current task is not the group leader. But local process wide clock samples are allowed if PID == 0 (current task) even if the current task is not the group leader. So in the end this should be no big deal as this actually harmonize the behaviour when the remote sample is actually a local one. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 64 ++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index e6389f915bcb..03c5d6c3e614 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -260,30 +260,43 @@ static int cpu_clock_sample_group(const clockid_t which_clock, return 0; } +static int posix_cpu_clock_get_task(struct task_struct *tsk, + const clockid_t which_clock, + struct timespec *tp) +{ + int err = -EINVAL; + unsigned long long rtn; + + if (CPUCLOCK_PERTHREAD(which_clock)) { + if (same_thread_group(tsk, current)) + err = cpu_clock_sample(which_clock, tsk, &rtn); + } else { + read_lock(&tasklist_lock); + + if (tsk->sighand && (tsk == current || thread_group_leader(tsk))) + err = cpu_clock_sample_group(which_clock, tsk, &rtn); + + read_unlock(&tasklist_lock); + } + + if (!err) + sample_to_timespec(which_clock, rtn, tp); + + return err; +} + static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); - int error = -EINVAL; - unsigned long long rtn; + int err = -EINVAL; if (pid == 0) { /* * Special case constant value for our own clocks. * We don't have to do any lookup to find ourselves. */ - if (CPUCLOCK_PERTHREAD(which_clock)) { - /* - * Sampling just ourselves we can do with no locking. - */ - error = cpu_clock_sample(which_clock, - current, &rtn); - } else { - read_lock(&tasklist_lock); - error = cpu_clock_sample_group(which_clock, - current, &rtn); - read_unlock(&tasklist_lock); - } + err = posix_cpu_clock_get_task(current, which_clock, tp); } else { /* * Find the given PID, and validate that the caller @@ -292,29 +305,12 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) struct task_struct *p; rcu_read_lock(); p = find_task_by_vpid(pid); - if (p) { - if (CPUCLOCK_PERTHREAD(which_clock)) { - if (same_thread_group(p, current)) { - error = cpu_clock_sample(which_clock, - p, &rtn); - } - } else { - read_lock(&tasklist_lock); - if (thread_group_leader(p) && p->sighand) { - error = - cpu_clock_sample_group(which_clock, - p, &rtn); - } - read_unlock(&tasklist_lock); - } - } + if (p) + err = posix_cpu_clock_get_task(p, which_clock, tp); rcu_read_unlock(); } - if (error) - return error; - sample_to_timespec(which_clock, rtn, tp); - return 0; + return err; } -- cgit v1.2.3 From 50875788a1d4a3f662a27ed13cd05282d835939a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 17:41:11 +0200 Subject: posix-timers: Use sighand lock instead of tasklist_lock for task clock sample There is no need for the tasklist_lock just to take a process wide clock sample. All we need is to get a coherent sample that doesn't race with exit() and exec(): * exit() may be concurrently reaping a task and flushing its time * sighand is unstable under exit() and exec(), and the latter also result in group leader that can change To protect against these, locking the target's sighand is enough. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 03c5d6c3e614..71a07699a36b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -271,12 +271,22 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk, if (same_thread_group(tsk, current)) err = cpu_clock_sample(which_clock, tsk, &rtn); } else { - read_lock(&tasklist_lock); + unsigned long flags; + struct sighand_struct *sighand; - if (tsk->sighand && (tsk == current || thread_group_leader(tsk))) + /* + * while_each_thread() is not yet entirely RCU safe, + * keep locking the group while sampling process + * clock for now. + */ + sighand = lock_task_sighand(tsk, &flags); + if (!sighand) + return err; + + if (tsk == current || thread_group_leader(tsk)) err = cpu_clock_sample_group(which_clock, tsk, &rtn); - read_unlock(&tasklist_lock); + unlock_task_sighand(tsk, &flags); } if (!err) -- cgit v1.2.3 From 3d7a1427e4ce545e949e9bccb75d0ca8d941d93c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 17:41:11 +0200 Subject: posix-timers: Use sighand lock instead of tasklist_lock on timer deletion Timer deletion doesn't need the tasklist lock. We need to protect against: * concurrent access to the lists p->cputime_expires and p->sighand->cputime_expires * task reaping that may also delete the timer list entry * timer firing We already hold the timer lock which protects us against concurrent timer firing. The rest only need the targets sighand to be locked. So hold it and drop the use of tasklist_lock there. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 71a07699a36b..9641958ddb3e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -377,27 +377,32 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) */ static int posix_cpu_timer_del(struct k_itimer *timer) { - struct task_struct *p = timer->it.cpu.task; int ret = 0; + unsigned long flags; + struct sighand_struct *sighand; + struct task_struct *p = timer->it.cpu.task; WARN_ON_ONCE(p == NULL); - read_lock(&tasklist_lock); - if (unlikely(p->sighand == NULL)) { + /* + * Protect against sighand release/switch in exit/exec and process/ + * thread timer list entry concurrent read/writes. + */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) { /* * We raced with the reaping of the task. * The deletion should have cleared us off the list. */ BUG_ON(!list_empty(&timer->it.cpu.entry)); } else { - spin_lock(&p->sighand->siglock); if (timer->it.cpu.firing) ret = TIMER_RETRY; else list_del(&timer->it.cpu.entry); - spin_unlock(&p->sighand->siglock); + + unlock_task_sighand(p, &flags); } - read_unlock(&tasklist_lock); if (!ret) put_task_struct(p); -- cgit v1.2.3 From e73d84e33f15c099ed1df60437700093cb14e46e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 18:56:49 +0200 Subject: posix-timers: Remove remaining uses of tasklist_lock The remaining uses of tasklist_lock were mostly about synchronizing against sighand modifications, getting coherent and safe group samples and also thread/process wide timers list handling. All of this is already safely synchronizable with the target's sighand lock. Let's use it on these places instead. Also update the comments about locking. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 76 +++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9641958ddb3e..d9dc5edc318c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -233,7 +233,8 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) /* * Sample a process (thread group) clock for the given group_leader task. - * Must be called with tasklist_lock held for reading. + * Must be called with task sighand lock held for safe while_each_thread() + * traversal. */ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, @@ -455,8 +456,7 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp) /* * Insert the timer on the appropriate list before any timers that - * expire later. This must be called with the tasklist_lock held - * for reading, interrupts disabled and p->sighand->siglock taken. + * expire later. This must be called with the sighand lock held. */ static void arm_timer(struct k_itimer *timer) { @@ -547,7 +547,8 @@ static void cpu_timer_fire(struct k_itimer *timer) /* * Sample a process (thread group) timer for the given group_leader task. - * Must be called with tasklist_lock held for reading. + * Must be called with task sighand lock held for safe while_each_thread() + * traversal. */ static int cpu_timer_sample_group(const clockid_t which_clock, struct task_struct *p, @@ -610,9 +611,11 @@ static inline void posix_cpu_timer_kick_nohz(void) { } * If we return TIMER_RETRY, it's necessary to release the timer's lock * and try again. (This happens when the timer is in the middle of firing.) */ -static int posix_cpu_timer_set(struct k_itimer *timer, int flags, +static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, struct itimerspec *new, struct itimerspec *old) { + unsigned long flags; + struct sighand_struct *sighand; struct task_struct *p = timer->it.cpu.task; unsigned long long old_expires, new_expires, old_incr, val; int ret; @@ -621,14 +624,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, new_expires = timespec_to_sample(timer->it_clock, &new->it_value); - read_lock(&tasklist_lock); /* - * We need the tasklist_lock to protect against reaping that - * clears p->sighand. If p has just been reaped, we can no + * Protect against sighand release/switch in exit/exec and p->cpu_timers + * and p->signal->cpu_timers read/write in arm_timer() + */ + sighand = lock_task_sighand(p, &flags); + /* + * If p has just been reaped, we can no * longer get any information about it at all. */ - if (unlikely(p->sighand == NULL)) { - read_unlock(&tasklist_lock); + if (unlikely(sighand == NULL)) { return -ESRCH; } @@ -639,7 +644,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, ret = 0; old_incr = timer->it.cpu.incr; - spin_lock(&p->sighand->siglock); old_expires = timer->it.cpu.expires; if (unlikely(timer->it.cpu.firing)) { timer->it.cpu.firing = -1; @@ -696,12 +700,11 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, * disable this firing since we are already reporting * it as an overrun (thanks to bump_cpu_timer above). */ - spin_unlock(&p->sighand->siglock); - read_unlock(&tasklist_lock); + unlock_task_sighand(p, &flags); goto out; } - if (new_expires != 0 && !(flags & TIMER_ABSTIME)) { + if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) { new_expires += val; } @@ -715,9 +718,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, arm_timer(timer); } - spin_unlock(&p->sighand->siglock); - read_unlock(&tasklist_lock); - + unlock_task_sighand(p, &flags); /* * Install the new reload setting, and * set up the signal and overrun bookkeeping. @@ -779,8 +780,16 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &now); } else { - read_lock(&tasklist_lock); - if (unlikely(p->sighand == NULL)) { + struct sighand_struct *sighand; + unsigned long flags; + + /* + * Protect against sighand release/switch in exit/exec and + * also make timer sampling safe if it ends up calling + * thread_group_cputime(). + */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) { /* * The process has been reaped. * We can't even collect a sample any more. @@ -789,11 +798,10 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) timer->it.cpu.expires = 0; sample_to_timespec(timer->it_clock, timer->it.cpu.expires, &itp->it_value); - read_unlock(&tasklist_lock); } else { cpu_timer_sample_group(timer->it_clock, p, &now); + unlock_task_sighand(p, &flags); } - read_unlock(&tasklist_lock); } if (now < timer->it.cpu.expires) { @@ -1007,6 +1015,8 @@ static void check_process_timers(struct task_struct *tsk, */ void posix_cpu_timer_schedule(struct k_itimer *timer) { + struct sighand_struct *sighand; + unsigned long flags; struct task_struct *p = timer->it.cpu.task; unsigned long long now; @@ -1021,27 +1031,31 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) if (unlikely(p->exit_state)) goto out; - read_lock(&tasklist_lock); /* arm_timer needs it. */ - spin_lock(&p->sighand->siglock); + /* Protect timer list r/w in arm_timer() */ + sighand = lock_task_sighand(p, &flags); + if (!sighand) + goto out; } else { - read_lock(&tasklist_lock); - if (unlikely(p->sighand == NULL)) { + /* + * Protect arm_timer() and timer sampling in case of call to + * thread_group_cputime(). + */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) { /* * The process has been reaped. * We can't even collect a sample any more. */ timer->it.cpu.expires = 0; - read_unlock(&tasklist_lock); goto out; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { - read_unlock(&tasklist_lock); + unlock_task_sighand(p, &flags); /* Optimizations: if the process is dying, no need to rearm */ goto out; } - spin_lock(&p->sighand->siglock); cpu_timer_sample_group(timer->it_clock, p, &now); bump_cpu_timer(timer, now); - /* Leave the tasklist_lock locked for the call below. */ + /* Leave the sighand locked for the call below. */ } /* @@ -1049,12 +1063,10 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ BUG_ON(!irqs_disabled()); arm_timer(timer); - spin_unlock(&p->sighand->siglock); - read_unlock(&tasklist_lock); + unlock_task_sighand(p, &flags); /* Kick full dynticks CPUs in case they need to tick on the new timer */ posix_cpu_timer_kick_nohz(); - out: timer->it_overrun_last = timer->it_overrun; timer->it_overrun = -1; -- cgit v1.2.3 From 531f64fd6f46a3f2a3edb1b97ecc827c775932c5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 11 Oct 2013 17:58:08 +0200 Subject: posix-timers: Convert abuses of BUG_ON to WARN_ON The posix cpu timers code makes a heavy use of BUG_ON() but none of these concern fatal issues that require to stop the machine. So let's just warn the user when some internal state slips out of our hands. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Kosaki Motohiro Cc: Andrew Morton --- kernel/posix-cpu-timers.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d9dc5edc318c..3b8946416a5f 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -395,7 +395,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer) * We raced with the reaping of the task. * The deletion should have cleared us off the list. */ - BUG_ON(!list_empty(&timer->it.cpu.entry)); + WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry)); } else { if (timer->it.cpu.firing) ret = TIMER_RETRY; @@ -640,7 +640,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, /* * Disarm any old timer after extracting its expiry time. */ - BUG_ON(!irqs_disabled()); + WARN_ON_ONCE(!irqs_disabled()); ret = 0; old_incr = timer->it.cpu.incr; @@ -1061,7 +1061,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) /* * Now re-arm for the new expiry time. */ - BUG_ON(!irqs_disabled()); + WARN_ON_ONCE(!irqs_disabled()); arm_timer(timer); unlock_task_sighand(p, &flags); @@ -1150,7 +1150,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) struct k_itimer *timer, *next; unsigned long flags; - BUG_ON(!irqs_disabled()); + WARN_ON_ONCE(!irqs_disabled()); /* * The fast path checks that there are no expired thread or thread @@ -1217,7 +1217,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, { unsigned long long now; - BUG_ON(clock_idx == CPUCLOCK_SCHED); + WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); cpu_timer_sample_group(clock_idx, tsk, &now); if (oldval) { -- cgit v1.2.3 From 247f325aaddb8b6117959f70c26ba735360c4160 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 25 Oct 2013 12:16:10 +0100 Subject: clockevent: sun4i: Fill the irq field in the clockevent structure The clock event structure irq field was not filled previously to the interrupt we're using. This was resulting in the timer not being used at all when using a configuration with SMP enabled on a system with several CPUs, and with the cpumask set to the cpu_possible_mask. Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano Reviewed-by: Stephen Boyd --- drivers/clocksource/sun4i_timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index 2fb4695a28d8..a7f492e0c7fc 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -188,6 +188,7 @@ static void __init sun4i_timer_init(struct device_node *node) writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); sun4i_clockevent.cpumask = cpumask_of(0); + sun4i_clockevent.irq = irq; clockevents_config_and_register(&sun4i_clockevent, rate, TIMER_SYNC_TICKS, 0xffffffff); -- cgit v1.2.3 From 2c28f32ca4c98b41ad95f62fa27f59f3117931d4 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 25 Oct 2013 12:16:11 +0100 Subject: clocksource: sun4i: Change CPU mask to cpu_possible_mask The interrupt for the timer is a shared processor interrupt, so any CPU found in the system can handle it. Switch to our cpumask to cpu_possible_mask instead of cpumask_of(0). Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano Reviewed-by: Stephen Boyd --- drivers/clocksource/sun4i_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index a7f492e0c7fc..6a76b4ec3470 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -187,7 +187,7 @@ static void __init sun4i_timer_init(struct device_node *node) val = readl(timer_base + TIMER_IRQ_EN_REG); writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); - sun4i_clockevent.cpumask = cpumask_of(0); + sun4i_clockevent.cpumask = cpu_possible_mask; sun4i_clockevent.irq = irq; clockevents_config_and_register(&sun4i_clockevent, rate, -- cgit v1.2.3 From 5df9affb50a09e0cb571c4fa3e2d577db85c7475 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 Nov 2013 12:01:48 +0100 Subject: clocksource: sun4i: Increase a bit the clock event and sources rating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to keep this driver as the default provider of the clock events and source, yet some other driver might fit in the "desired" category of ratings. Hence, we need to increase a bit the rating so that we can have more flexibility in the ratings we choose. Signed-off-by: Maxime Ripard Tested-by: Emilio López Signed-off-by: Daniel Lezcano --- drivers/clocksource/sun4i_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index 6a76b4ec3470..d7a1a1ad8f05 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -114,7 +114,7 @@ static int sun4i_clkevt_next_event(unsigned long evt, static struct clock_event_device sun4i_clockevent = { .name = "sun4i_tick", - .rating = 300, + .rating = 350, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = sun4i_clkevt_mode, .set_next_event = sun4i_clkevt_next_event, @@ -172,7 +172,7 @@ static void __init sun4i_timer_init(struct device_node *node) setup_sched_clock(sun4i_timer_sched_read, 32, rate); clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name, - rate, 300, 32, clocksource_mmio_readl_down); + rate, 350, 32, clocksource_mmio_readl_down); ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); -- cgit v1.2.3 From 67905540e8b8eaf51e621cfd2ef15641d6d5b9a7 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 Nov 2013 12:01:48 +0100 Subject: clocksource: Add Allwinner SoCs HS timers driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the Allwinner SoCs (at this time, all but the A10) also have a High Speed timers that are not using the 24MHz oscillator as a source but rather the AHB clock running much faster. The IP is slightly different between the A10s/A13 and the one used in the A20/A31, since the latter have 4 timers available, while the former have only 2 of them. [dlezcano] : Fixed conflict with b788beda "Order Kconfig options alphabetically" Signed-off-by: Maxime Ripard Tested-by: Emilio López Signed-off-by: Daniel Lezcano --- .../bindings/timer/allwinner,sun5i-a13-hstimer.txt | 22 +++ arch/arm/mach-sunxi/Kconfig | 1 + drivers/clocksource/Kconfig | 4 + drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-sun5i.c | 192 +++++++++++++++++++++ 5 files changed, 220 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt create mode 100644 drivers/clocksource/timer-sun5i.c diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt new file mode 100644 index 000000000000..7c26154b8bbb --- /dev/null +++ b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt @@ -0,0 +1,22 @@ +Allwinner SoCs High Speed Timer Controller + +Required properties: + +- compatible : should be "allwinner,sun5i-a13-hstimer" or + "allwinner,sun7i-a20-hstimer" +- reg : Specifies base physical address and size of the registers. +- interrupts : The interrupts of these timers (2 for the sun5i IP, 4 for the sun7i + one) +- clocks: phandle to the source clock (usually the AHB clock) + +Example: + +timer@01c60000 { + compatible = "allwinner,sun7i-a20-hstimer"; + reg = <0x01c60000 0x1000>; + interrupts = <0 51 1>, + <0 52 1>, + <0 53 1>, + <0 54 1>; + clocks = <&ahb1_gates 19>; +}; diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index c9e72c89066a..bce0d4277f71 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -12,3 +12,4 @@ config ARCH_SUNXI select PINCTRL_SUNXI select SPARSE_IRQ select SUN4I_TIMER + select SUN5I_HSTIMER diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index bdb953e15d2a..884eeff8e32d 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -37,6 +37,10 @@ config SUN4I_TIMER select CLKSRC_MMIO bool +config SUN5I_HSTIMER + select CLKSRC_MMIO + bool + config VT8500_TIMER bool diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 33621efb9148..358358d87b6d 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_ARCH_MOXART) += moxart_timer.o obj-$(CONFIG_ARCH_MXS) += mxs_timer.o obj-$(CONFIG_ARCH_PRIMA2) += timer-prima2.o obj-$(CONFIG_SUN4I_TIMER) += sun4i_timer.o +obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o obj-$(CONFIG_ARCH_NSPIRE) += zevio-timer.o diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c new file mode 100644 index 000000000000..bddc52233d2a --- /dev/null +++ b/drivers/clocksource/timer-sun5i.c @@ -0,0 +1,192 @@ +/* + * Allwinner SoCs hstimer driver. + * + * Copyright (C) 2013 Maxime Ripard + * + * Maxime Ripard + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TIMER_IRQ_EN_REG 0x00 +#define TIMER_IRQ_EN(val) BIT(val) +#define TIMER_IRQ_ST_REG 0x04 +#define TIMER_CTL_REG(val) (0x20 * (val) + 0x10) +#define TIMER_CTL_ENABLE BIT(0) +#define TIMER_CTL_RELOAD BIT(1) +#define TIMER_CTL_CLK_PRES(val) (((val) & 0x7) << 4) +#define TIMER_CTL_ONESHOT BIT(7) +#define TIMER_INTVAL_LO_REG(val) (0x20 * (val) + 0x14) +#define TIMER_INTVAL_HI_REG(val) (0x20 * (val) + 0x18) +#define TIMER_CNTVAL_LO_REG(val) (0x20 * (val) + 0x1c) +#define TIMER_CNTVAL_HI_REG(val) (0x20 * (val) + 0x20) + +#define TIMER_SYNC_TICKS 3 + +static void __iomem *timer_base; +static u32 ticks_per_jiffy; + +/* + * When we disable a timer, we need to wait at least for 2 cycles of + * the timer source clock. We will use for that the clocksource timer + * that is already setup and runs at the same frequency than the other + * timers, and we never will be disabled. + */ +static void sun5i_clkevt_sync(void) +{ + u32 old = readl(timer_base + TIMER_CNTVAL_LO_REG(1)); + + while ((old - readl(timer_base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS) + cpu_relax(); +} + +static void sun5i_clkevt_time_stop(u8 timer) +{ + u32 val = readl(timer_base + TIMER_CTL_REG(timer)); + writel(val & ~TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG(timer)); + + sun5i_clkevt_sync(); +} + +static void sun5i_clkevt_time_setup(u8 timer, u32 delay) +{ + writel(delay, timer_base + TIMER_INTVAL_LO_REG(timer)); +} + +static void sun5i_clkevt_time_start(u8 timer, bool periodic) +{ + u32 val = readl(timer_base + TIMER_CTL_REG(timer)); + + if (periodic) + val &= ~TIMER_CTL_ONESHOT; + else + val |= TIMER_CTL_ONESHOT; + + writel(val | TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, + timer_base + TIMER_CTL_REG(timer)); +} + +static void sun5i_clkevt_mode(enum clock_event_mode mode, + struct clock_event_device *clk) +{ + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + sun5i_clkevt_time_stop(0); + sun5i_clkevt_time_setup(0, ticks_per_jiffy); + sun5i_clkevt_time_start(0, true); + break; + case CLOCK_EVT_MODE_ONESHOT: + sun5i_clkevt_time_stop(0); + sun5i_clkevt_time_start(0, false); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + default: + sun5i_clkevt_time_stop(0); + break; + } +} + +static int sun5i_clkevt_next_event(unsigned long evt, + struct clock_event_device *unused) +{ + sun5i_clkevt_time_stop(0); + sun5i_clkevt_time_setup(0, evt - TIMER_SYNC_TICKS); + sun5i_clkevt_time_start(0, false); + + return 0; +} + +static struct clock_event_device sun5i_clockevent = { + .name = "sun5i_tick", + .rating = 340, + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .set_mode = sun5i_clkevt_mode, + .set_next_event = sun5i_clkevt_next_event, +}; + + +static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = (struct clock_event_device *)dev_id; + + writel(0x1, timer_base + TIMER_IRQ_ST_REG); + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +static struct irqaction sun5i_timer_irq = { + .name = "sun5i_timer0", + .flags = IRQF_TIMER | IRQF_IRQPOLL, + .handler = sun5i_timer_interrupt, + .dev_id = &sun5i_clockevent, +}; + +static u32 sun5i_timer_sched_read(void) +{ + return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1)); +} + +static void __init sun5i_timer_init(struct device_node *node) +{ + unsigned long rate; + struct clk *clk; + int ret, irq; + u32 val; + + timer_base = of_iomap(node, 0); + if (!timer_base) + panic("Can't map registers"); + + irq = irq_of_parse_and_map(node, 0); + if (irq <= 0) + panic("Can't parse IRQ"); + + clk = of_clk_get(node, 0); + if (IS_ERR(clk)) + panic("Can't get timer clock"); + clk_prepare_enable(clk); + rate = clk_get_rate(clk); + + writel(~0, timer_base + TIMER_INTVAL_LO_REG(1)); + writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, + timer_base + TIMER_CTL_REG(1)); + + setup_sched_clock(sun5i_timer_sched_read, 32, rate); + clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name, + rate, 340, 32, clocksource_mmio_readl_down); + + ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); + + ret = setup_irq(irq, &sun5i_timer_irq); + if (ret) + pr_warn("failed to setup irq %d\n", irq); + + /* Enable timer0 interrupt */ + val = readl(timer_base + TIMER_IRQ_EN_REG); + writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); + + sun5i_clockevent.cpumask = cpu_possible_mask; + sun5i_clockevent.irq = irq; + + clockevents_config_and_register(&sun5i_clockevent, rate, + TIMER_SYNC_TICKS, 0xffffffff); +} +CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer", + sun5i_timer_init); +CLOCKSOURCE_OF_DECLARE(sun7i_a20, "allwinner,sun7i-a20-hstimer", + sun5i_timer_init); -- cgit v1.2.3 From f2b5002889cd2ca25d1dfe522755ade701f49044 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 Nov 2013 12:01:48 +0100 Subject: ARM: sun5i: a10s: Add support for the High Speed Timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Allwinner A10s has support for two high speed timers. Now that we have a driver to support it, we can enable them in the device tree. Signed-off-by: Maxime Ripard Tested-by: Emilio López Signed-off-by: Daniel Lezcano --- arch/arm/boot/dts/sun5i-a10s.dtsi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 52476742a104..e674c94c7206 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -332,5 +332,12 @@ clock-frequency = <100000>; status = "disabled"; }; + + timer@01c60000 { + compatible = "allwinner,sun5i-a13-hstimer"; + reg = <0x01c60000 0x1000>; + interrupts = <82>, <83>; + clocks = <&ahb_gates 28>; + }; }; }; -- cgit v1.2.3 From 4411902a13e6b64873dc21abafeb57db335efcf1 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 Nov 2013 12:01:48 +0100 Subject: ARM: sun5i: a13: Add support for the High Speed Timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Allwinner A13 has support for two high speed timers. Now that we have a driver to support it, we can enable them in the device tree. Signed-off-by: Maxime Ripard Tested-by: Emilio López Signed-off-by: Daniel Lezcano --- arch/arm/boot/dts/sun5i-a13.dtsi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index ce8ef2a45be0..1ccd75d37f49 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -273,5 +273,12 @@ clock-frequency = <100000>; status = "disabled"; }; + + timer@01c60000 { + compatible = "allwinner,sun5i-a13-hstimer"; + reg = <0x01c60000 0x1000>; + interrupts = <82>, <83>; + clocks = <&ahb_gates 28>; + }; }; }; -- cgit v1.2.3 From 31f8ad387e4306ec1fb2a01c5cd0d648b5e9bff5 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 Nov 2013 12:01:48 +0100 Subject: ARM: sun7i: a20: Add support for the High Speed Timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Allwinner A20 has support for four high speed timers. Apart for the number of timers (4 vs 2), it's basically the same logic than the high speed timers found in the sun5i chips. Now that we have a driver to support it, we can enable them in the device tree. [dlezcano] : Fixed conflict with 428abbb8 "Enable the I2C controllers" Signed-off-by: Maxime Ripard Tested-by: Emilio López Signed-off-by: Daniel Lezcano --- arch/arm/boot/dts/sun7i-a20.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index e46cfedde74c..ee6cec7b0c90 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -395,6 +395,16 @@ status = "disabled"; }; + hstimer@01c60000 { + compatible = "allwinner,sun7i-a20-hstimer"; + reg = <0x01c60000 0x1000>; + interrupts = <0 81 1>, + <0 82 1>, + <0 83 1>, + <0 84 1>; + clocks = <&ahb_gates 28>; + }; + gic: interrupt-controller@01c81000 { compatible = "arm,cortex-a7-gic", "arm,cortex-a15-gic"; reg = <0x01c81000 0x1000>, -- cgit v1.2.3 From af066fce5fa6b615588732cad6909c450a9eb616 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 20 Nov 2013 00:47:32 +0100 Subject: clocksource: arm_global_timer: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. While we're here increase the number of bits that sched_clock can handle to 64 to make full use of the counter. Cc: Stuart Menefy Cc: Srinivas Kandagatla Acked-by: Srinivas Kandagatla Acked-by: Stuart Menefy Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano --- drivers/clocksource/arm_global_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index c639b1a9e996..0fc31d029e52 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -202,7 +202,7 @@ static struct clocksource gt_clocksource = { }; #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK -static u32 notrace gt_sched_clock_read(void) +static u64 notrace gt_sched_clock_read(void) { return gt_counter_read(); } @@ -217,7 +217,7 @@ static void __init gt_clocksource_init(void) writel(GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL); #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK - setup_sched_clock(gt_sched_clock_read, 32, gt_clk_rate); + sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate); #endif clocksource_register_hz(>_clocksource, gt_clk_rate); } -- cgit v1.2.3 From dfded00902d7437963870accbcf4b39114e85f59 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 20 Nov 2013 00:47:32 +0100 Subject: clocksource: cadence_ttc_timer: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Soren Brinkmann Cc: Michal Simek Tested-by: Soren Brinkmann Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano --- drivers/clocksource/cadence_ttc_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c index b2bb3a4bc205..b865b4e3d67b 100644 --- a/drivers/clocksource/cadence_ttc_timer.c +++ b/drivers/clocksource/cadence_ttc_timer.c @@ -158,7 +158,7 @@ static cycle_t __ttc_clocksource_read(struct clocksource *cs) TTC_COUNT_VAL_OFFSET); } -static u32 notrace ttc_sched_clock_read(void) +static u64 notrace ttc_sched_clock_read(void) { return __raw_readl(ttc_sched_clock_val_reg); } @@ -306,7 +306,7 @@ static void __init ttc_setup_clocksource(struct clk *clk, void __iomem *base) } ttc_sched_clock_val_reg = base + TTC_COUNT_VAL_OFFSET; - setup_sched_clock(ttc_sched_clock_read, 16, + sched_clock_register(ttc_sched_clock_read, 16, clk_get_rate(ttccs->ttc.clk) / PRESCALE); } -- cgit v1.2.3 From 662e7230ee16951e6858c01e72db87c5dc46150e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 20 Nov 2013 00:47:32 +0100 Subject: clocksource: sun4i: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. While we're here, mark the sched_clock function as notrace to prevent ftrace recursion crashes. Cc: Maxime Ripard Acked-by: Maxime Ripard Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano --- drivers/clocksource/sun4i_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index d7a1a1ad8f05..191187470aa6 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -138,7 +138,7 @@ static struct irqaction sun4i_timer_irq = { .dev_id = &sun4i_clockevent, }; -static u32 sun4i_timer_sched_read(void) +static u64 notrace sun4i_timer_sched_read(void) { return ~readl(timer_base + TIMER_CNTVAL_REG(1)); } @@ -170,7 +170,7 @@ static void __init sun4i_timer_init(struct device_node *node) TIMER_CTL_CLK_SRC(TIMER_CTL_CLK_SRC_OSC24M), timer_base + TIMER_CTL_REG(1)); - setup_sched_clock(sun4i_timer_sched_read, 32, rate); + sched_clock_register(sun4i_timer_sched_read, 32, rate); clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name, rate, 350, 32, clocksource_mmio_readl_down); -- cgit v1.2.3 From 2e8bac532f8bcd3834853e0a22b130b9fd59270d Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 20 Nov 2013 00:47:32 +0100 Subject: clocksource: orion: Switch to sched_clock_register() The 32 bit sched_clock interface now supports 64 bits. Upgrade to the 64 bit function to allow us to remove the 32 bit registration interface. Cc: Sebastian Hesselbarth Tested-by: Sebastian Hesselbarth Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano --- drivers/clocksource/time-orion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/time-orion.c index 9c7f018a67ca..20066222f3f2 100644 --- a/drivers/clocksource/time-orion.c +++ b/drivers/clocksource/time-orion.c @@ -53,7 +53,7 @@ EXPORT_SYMBOL(orion_timer_ctrl_clrset); /* * Free-running clocksource handling. */ -static u32 notrace orion_read_sched_clock(void) +static u64 notrace orion_read_sched_clock(void) { return ~readl(timer_base + TIMER0_VAL); } @@ -135,7 +135,7 @@ static void __init orion_timer_init(struct device_node *np) clocksource_mmio_init(timer_base + TIMER0_VAL, "orion_clocksource", clk_get_rate(clk), 300, 32, clocksource_mmio_readl_down); - setup_sched_clock(orion_read_sched_clock, 32, clk_get_rate(clk)); + sched_clock_register(orion_read_sched_clock, 32, clk_get_rate(clk)); /* setup timer1 as clockevent timer */ if (setup_irq(irq, &orion_clkevt_irq)) -- cgit v1.2.3 From fdca679d87bb4ac0fdc882dbf3deb47a1f58b813 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 20 Nov 2013 09:58:56 +0100 Subject: clocksource: clksrc-of: Warn if no clock sources are found Many platforms rely on clocksource_of_init() being implicitly called for registering clock sources and will get zero warnings if no working clock source is available. Let's print a critical error message if no clock source is found. Acked-by: Lee Jones Signed-off-by: Linus Walleij Signed-off-by: Daniel Lezcano --- drivers/clocksource/clksrc-of.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c index 35639cf4e5a2..a30b42c3ac3b 100644 --- a/drivers/clocksource/clksrc-of.c +++ b/drivers/clocksource/clksrc-of.c @@ -28,6 +28,7 @@ void __init clocksource_of_init(void) struct device_node *np; const struct of_device_id *match; clocksource_of_init_fn init_func; + unsigned clocksources = 0; for_each_matching_node_and_match(np, __clksrc_of_table, &match) { if (!of_device_is_available(np)) @@ -36,5 +37,8 @@ void __init clocksource_of_init(void) init_func = match->data; init_func(np); of_node_put(np); + clocksources++; } + if (!clocksources) + pr_crit("%s: no matching clocksources found\n", __func__); } -- cgit v1.2.3 From 08cb8e460956489fcfbfab5a7d33e62acd190b9a Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 2 Dec 2013 11:39:56 +0100 Subject: clocksource: armada-370-xp: Enable timer divider only when needed The current code sets the timer divider bits always. However, when the 25 MHz timer is enabled, this is not needed and has no effect. As this causes some confusion, rework the code so the divider is set only when needed, i.e. when the 25 MHz timer is not in use. Acked-by: Gregory CLEMENT Signed-off-by: Ezequiel Garcia Signed-off-by: Daniel Lezcano --- drivers/clocksource/time-armada-370-xp.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index d8e47e502785..0450f6b69ade 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -76,6 +76,7 @@ static void __iomem *timer_base, *local_base; static unsigned int timer_clk; static bool timer25Mhz = true; +static u32 enable_mask; /* * Number of timer ticks per jiffy. @@ -121,8 +122,7 @@ armada_370_xp_clkevt_next_event(unsigned long delta, /* * Enable the timer. */ - local_timer_ctrl_clrset(TIMER0_RELOAD_EN, - TIMER0_EN | TIMER0_DIV(TIMER_DIVIDER_SHIFT)); + local_timer_ctrl_clrset(TIMER0_RELOAD_EN, enable_mask); return 0; } @@ -141,9 +141,7 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode, /* * Enable timer. */ - local_timer_ctrl_clrset(0, TIMER0_RELOAD_EN | - TIMER0_EN | - TIMER0_DIV(TIMER_DIVIDER_SHIFT)); + local_timer_ctrl_clrset(0, TIMER0_RELOAD_EN | enable_mask); } else { /* * Disable timer. @@ -240,10 +238,13 @@ static void __init armada_370_xp_timer_common_init(struct device_node *np) WARN_ON(!timer_base); local_base = of_iomap(np, 1); - if (timer25Mhz) + if (timer25Mhz) { set = TIMER0_25MHZ; - else + enable_mask = TIMER0_EN; + } else { clr = TIMER0_25MHZ; + enable_mask = TIMER0_EN | TIMER0_DIV(TIMER_DIVIDER_SHIFT); + } timer_ctrl_clrset(clr, set); local_timer_ctrl_clrset(clr, set); @@ -267,8 +268,7 @@ static void __init armada_370_xp_timer_common_init(struct device_node *np) writel(0xffffffff, timer_base + TIMER0_VAL_OFF); writel(0xffffffff, timer_base + TIMER0_RELOAD_OFF); - timer_ctrl_clrset(0, TIMER0_EN | TIMER0_RELOAD_EN | - TIMER0_DIV(TIMER_DIVIDER_SHIFT)); + timer_ctrl_clrset(0, TIMER0_RELOAD_EN | enable_mask); clocksource_mmio_init(timer_base + TIMER0_VAL_OFF, "armada_370_xp_clocksource", -- cgit v1.2.3 From 5707f18c28f4df0f993d965aff1c168b69637d63 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Tue, 3 Dec 2013 15:50:09 +0900 Subject: clocksource: sh_tmu: Remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure. Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Acked-by: Simon Horman Signed-off-by: Daniel Lezcano --- drivers/clocksource/sh_tmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 78b8dae49628..54ab47553701 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -509,7 +509,6 @@ static int sh_tmu_probe(struct platform_device *pdev) ret = sh_tmu_setup(p, pdev); if (ret) { kfree(p); - platform_set_drvdata(pdev, NULL); pm_runtime_idle(&pdev->dev); return ret; } -- cgit v1.2.3 From 87d4bb9fced08054afb83af2d85f5cf0ba0e21e4 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Tue, 3 Dec 2013 15:51:06 +0900 Subject: clocksource: sh_mtu2: Remove unnecessary platform_set_drvdata() The driver core clears the driver data to NULL after device_release or on probe failure. Thus, it is not needed to manually clear the device driver data to NULL. Signed-off-by: Jingoo Han Acked-by: Simon Horman Signed-off-by: Daniel Lezcano --- drivers/clocksource/sh_mtu2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c index 4aac9ee0d0c0..f02648e27f7e 100644 --- a/drivers/clocksource/sh_mtu2.c +++ b/drivers/clocksource/sh_mtu2.c @@ -346,7 +346,6 @@ static int sh_mtu2_probe(struct platform_device *pdev) ret = sh_mtu2_setup(p, pdev); if (ret) { kfree(p); - platform_set_drvdata(pdev, NULL); pm_runtime_idle(&pdev->dev); return ret; } -- cgit v1.2.3 From 38c30a8421ce8b06492121deee422ba7ecfaeef2 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Mon, 9 Dec 2013 10:12:10 +0100 Subject: clocksource: misc drivers: Remove deprecated IRQF_DISABLED This patch removes the use of the IRQF_DISABLED flag It's a NOOP since 2.6.35 and it will be removed one day. [dlezcano] : slightly changed the changelog Signed-off-by: Michael Opdenacker Signed-off-by: Daniel Lezcano --- drivers/clocksource/cadence_ttc_timer.c | 3 +-- drivers/clocksource/cs5535-clockevt.c | 2 +- drivers/clocksource/dw_apb_timer.c | 3 +-- drivers/clocksource/nomadik-mtu.c | 2 +- drivers/clocksource/samsung_pwm_timer.c | 2 +- drivers/clocksource/sh_cmt.c | 3 +-- drivers/clocksource/sh_mtu2.c | 3 +-- drivers/clocksource/sh_tmu.c | 3 +-- 8 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c index b865b4e3d67b..8c7382bf260c 100644 --- a/drivers/clocksource/cadence_ttc_timer.c +++ b/drivers/clocksource/cadence_ttc_timer.c @@ -388,8 +388,7 @@ static void __init ttc_setup_clockevent(struct clk *clk, __raw_writel(0x1, ttcce->ttc.base_addr + TTC_IER_OFFSET); err = request_irq(irq, ttc_clock_event_interrupt, - IRQF_DISABLED | IRQF_TIMER, - ttcce->ce.name, ttcce); + IRQF_TIMER, ttcce->ce.name, ttcce); if (WARN_ON(err)) { kfree(ttcce); return; diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c index ea210482dd20..db2105290898 100644 --- a/drivers/clocksource/cs5535-clockevt.c +++ b/drivers/clocksource/cs5535-clockevt.c @@ -131,7 +131,7 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id) static struct irqaction mfgptirq = { .handler = mfgpt_tick, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER | IRQF_SHARED, + .flags = IRQF_NOBALANCING | IRQF_TIMER | IRQF_SHARED, .name = DRV_NAME, }; diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c index e54ca1062d8e..f3656a6b0382 100644 --- a/drivers/clocksource/dw_apb_timer.c +++ b/drivers/clocksource/dw_apb_timer.c @@ -243,8 +243,7 @@ dw_apb_clockevent_init(int cpu, const char *name, unsigned rating, dw_ced->irqaction.dev_id = &dw_ced->ced; dw_ced->irqaction.irq = irq; dw_ced->irqaction.flags = IRQF_TIMER | IRQF_IRQPOLL | - IRQF_NOBALANCING | - IRQF_DISABLED; + IRQF_NOBALANCING; dw_ced->eoi = apbt_eoi; err = setup_irq(irq, &dw_ced->irqaction); diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c index ed7b73b508e0..152a3f3875ee 100644 --- a/drivers/clocksource/nomadik-mtu.c +++ b/drivers/clocksource/nomadik-mtu.c @@ -187,7 +187,7 @@ static irqreturn_t nmdk_timer_interrupt(int irq, void *dev_id) static struct irqaction nmdk_timer_irq = { .name = "Nomadik Timer Tick", - .flags = IRQF_DISABLED | IRQF_TIMER, + .flags = IRQF_TIMER, .handler = nmdk_timer_interrupt, .dev_id = &nmdk_clkevt, }; diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c index 85082e8d3052..5645cfc90c41 100644 --- a/drivers/clocksource/samsung_pwm_timer.c +++ b/drivers/clocksource/samsung_pwm_timer.c @@ -264,7 +264,7 @@ static irqreturn_t samsung_clock_event_isr(int irq, void *dev_id) static struct irqaction samsung_clock_event_irq = { .name = "samsung_time_irq", - .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER | IRQF_IRQPOLL, .handler = samsung_clock_event_isr, .dev_id = &time_event_device, }; diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 0965e9848b3d..c6186339dd52 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -726,8 +726,7 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) p->irqaction.name = dev_name(&p->pdev->dev); p->irqaction.handler = sh_cmt_interrupt; p->irqaction.dev_id = p; - p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | \ - IRQF_IRQPOLL | IRQF_NOBALANCING; + p->irqaction.flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING; /* get hold of clock */ p->clk = clk_get(&p->pdev->dev, "cmt_fck"); diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c index f02648e27f7e..b6a56b1c8947 100644 --- a/drivers/clocksource/sh_mtu2.c +++ b/drivers/clocksource/sh_mtu2.c @@ -302,8 +302,7 @@ static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev) p->irqaction.handler = sh_mtu2_interrupt; p->irqaction.dev_id = p; p->irqaction.irq = irq; - p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | \ - IRQF_IRQPOLL | IRQF_NOBALANCING; + p->irqaction.flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING; /* get hold of clock */ p->clk = clk_get(&p->pdev->dev, "mtu2_fck"); diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 54ab47553701..fc752f7b2719 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -462,8 +462,7 @@ static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev) p->irqaction.handler = sh_tmu_interrupt; p->irqaction.dev_id = p; p->irqaction.irq = irq; - p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | \ - IRQF_IRQPOLL | IRQF_NOBALANCING; + p->irqaction.flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING; /* get hold of clock */ p->clk = clk_get(&p->pdev->dev, "tmu_fck"); -- cgit v1.2.3 From 39304fad8f31b2114492e9a09fe0bd1ac7eb1834 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Mon, 9 Dec 2013 10:35:45 +0100 Subject: clocksource: tegra: Remove deprecated IRQF_DISABLED This patch removes the use of the IRQF_DISABLED flag. It's a NOOP since 2.6.35 and it will be removed one day. [dlezcano] : slightly changed the changelog Signed-off-by: Michael Opdenacker Signed-off-by: Daniel Lezcano --- drivers/clocksource/tegra20_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index 642849256d82..d1869f02051c 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -149,7 +149,7 @@ static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id) static struct irqaction tegra_timer_irq = { .name = "timer0", - .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_TRIGGER_HIGH, + .flags = IRQF_TIMER | IRQF_TRIGGER_HIGH, .handler = tegra_timer_interrupt, .dev_id = &tegra_clockevent, }; -- cgit v1.2.3 From 39039eb31c6e5252e25ec6336d92ddef938ccafa Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Mon, 9 Dec 2013 10:38:50 +0100 Subject: clocksource: vt8500: Remove deprecated IRQF_DISABLED This patch removes the use of the IRQF_DISABLED flag. It's a NOOP since 2.6.35 and it will be removed one day. [dlezcano] : slightly changed the changelog Signed-off-by: Michael Opdenacker Signed-off-by: Daniel Lezcano --- drivers/clocksource/vt8500_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c index ad3c0e83a779..1098ed3b9b89 100644 --- a/drivers/clocksource/vt8500_timer.c +++ b/drivers/clocksource/vt8500_timer.c @@ -124,7 +124,7 @@ static irqreturn_t vt8500_timer_interrupt(int irq, void *dev_id) static struct irqaction irq = { .name = "vt8500_timer", - .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER | IRQF_IRQPOLL, .handler = vt8500_timer_interrupt, .dev_id = &clockevent, }; -- cgit v1.2.3 From 6d19944bd2609a1d7d48149605aa9f62ca8bf640 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 25 Nov 2013 09:54:45 +0800 Subject: clocksource: bcm_kona_timer: Remove unused bcm_timer_ids bcm_timer_ids is no longer used after converting to CLOCKSOURCE_OF_DECLARE. Signed-off-by: Axel Lin Signed-off-by: Daniel Lezcano --- drivers/clocksource/bcm_kona_timer.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c index 0d7d8c3ed6b2..5176e761166b 100644 --- a/drivers/clocksource/bcm_kona_timer.c +++ b/drivers/clocksource/bcm_kona_timer.c @@ -98,12 +98,6 @@ kona_timer_get_counter(void *timer_base, uint32_t *msw, uint32_t *lsw) return; } -static const struct of_device_id bcm_timer_ids[] __initconst = { - {.compatible = "brcm,kona-timer"}, - {.compatible = "bcm,kona-timer"}, /* deprecated name */ - {}, -}; - static void __init kona_timers_init(struct device_node *node) { u32 freq; -- cgit v1.2.3 From 57dee992df244ccce6a6a3a88a43160e285da5d8 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 14 Dec 2013 15:07:32 +0900 Subject: clocksource: sh_cmt: Add clk_prepare/unprepare support Prepare the clock at probe time, as there is no other appropriate place in the driver where we're allowed to sleep. Cc: Daniel Lezcano Cc: linux-kernel@vger.kernel.org Signed-off-by: Laurent Pinchart Signed-off-by: Simon Horman Signed-off-by: Daniel Lezcano --- drivers/clocksource/sh_cmt.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 0965e9848b3d..940341a185d7 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -634,12 +634,18 @@ static int sh_cmt_clock_event_next(unsigned long delta, static void sh_cmt_clock_event_suspend(struct clock_event_device *ced) { - pm_genpd_syscore_poweroff(&ced_to_sh_cmt(ced)->pdev->dev); + struct sh_cmt_priv *p = ced_to_sh_cmt(ced); + + pm_genpd_syscore_poweroff(&p->pdev->dev); + clk_unprepare(p->clk); } static void sh_cmt_clock_event_resume(struct clock_event_device *ced) { - pm_genpd_syscore_poweron(&ced_to_sh_cmt(ced)->pdev->dev); + struct sh_cmt_priv *p = ced_to_sh_cmt(ced); + + clk_prepare(p->clk); + pm_genpd_syscore_poweron(&p->pdev->dev); } static void sh_cmt_register_clockevent(struct sh_cmt_priv *p, @@ -737,6 +743,10 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) goto err2; } + ret = clk_prepare(p->clk); + if (ret < 0) + goto err3; + if (res2 && (resource_size(res2) == 4)) { /* assume both CMSTR and CMCSR to be 32-bit */ p->read_control = sh_cmt_read32; @@ -773,19 +783,21 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) cfg->clocksource_rating); if (ret) { dev_err(&p->pdev->dev, "registration failed\n"); - goto err3; + goto err4; } p->cs_enabled = false; ret = setup_irq(irq, &p->irqaction); if (ret) { dev_err(&p->pdev->dev, "failed to request irq %d\n", irq); - goto err3; + goto err4; } platform_set_drvdata(pdev, p); return 0; +err4: + clk_unprepare(p->clk); err3: clk_put(p->clk); err2: -- cgit v1.2.3 From f55c07607a38f84b5c7e6066ee1cfe433fa5643c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Dec 2013 18:50:25 -0800 Subject: timekeeping: Fix lost updates to tai adjustment Since 48cdc135d4840 (Implement a shadow timekeeper), we have to call timekeeping_update() after any adjustment to the timekeeping structure in order to make sure that any adjustments to the structure persist. Unfortunately, the updates to the tai offset via adjtimex do not trigger this update, causing adjustments to the tai offset to be made and then over-written by the previous value at the next update_wall_time() call. This patch resovles the issue by calling timekeeping_update() right after setting the tai offset. Cc: Sasha Levin Cc: Thomas Gleixner Cc: Prarit Bhargava Cc: Richard Cochran Cc: Ingo Molnar Cc: stable #3.10+ Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3abf53418b67..7488f0b97dee 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -610,6 +610,7 @@ void timekeeping_set_tai_offset(s32 tai_offset) raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&timekeeper_seq); __timekeeping_set_tai_offset(tk, tai_offset); + timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); clock_was_set(); @@ -1698,7 +1699,7 @@ int do_adjtimex(struct timex *txc) if (tai != orig_tai) { __timekeeping_set_tai_offset(tk, tai); - update_pvclock_gtod(tk, true); + timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); clock_was_set_delayed(); } write_seqcount_end(&timekeeper_seq); -- cgit v1.2.3 From 5258d3f25c76f6ab86e9333abf97a55a877d3870 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Dec 2013 20:07:49 -0800 Subject: timekeeping: Fix potential lost pv notification of time change In 780427f0e11 (Indicate that clock was set in the pvclock gtod notifier), logic was added to pass a CLOCK_WAS_SET notification to the pvclock notifier chain. While that patch added a action flag returned from accumulate_nsecs_to_secs(), it only uses the returned value in one location, and not in the logarithmic accumulation. This means if a leap second triggered during the logarithmic accumulation (which is most likely where it would happen), the notification that the clock was set would not make it to the pv notifiers. This patch extends the logarithmic_accumulation pass down that action flag so proper notification will occur. This patch also changes the varialbe action -> clock_set per Ingo's suggestion. Cc: Sasha Levin Cc: Thomas Gleixner Cc: Ingo Molnar Cc: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: Prarit Bhargava Cc: Richard Cochran Cc: Cc: stable #3.11+ Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7488f0b97dee..051855fe68bc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1256,7 +1256,7 @@ out_adjust: static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) { u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; - unsigned int action = 0; + unsigned int clock_set = 0; while (tk->xtime_nsec >= nsecps) { int leap; @@ -1279,10 +1279,10 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); clock_was_set_delayed(); - action = TK_CLOCK_WAS_SET; + clock_set = TK_CLOCK_WAS_SET; } } - return action; + return clock_set; } /** @@ -1295,7 +1295,8 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) * Returns the unconsumed cycles. */ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, - u32 shift) + u32 shift, + unsigned int *clock_set) { cycle_t interval = tk->cycle_interval << shift; u64 raw_nsecs; @@ -1309,7 +1310,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, tk->cycle_last += interval; tk->xtime_nsec += tk->xtime_interval << shift; - accumulate_nsecs_to_secs(tk); + *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ raw_nsecs = (u64)tk->raw_interval << shift; @@ -1367,7 +1368,7 @@ static void update_wall_time(void) struct timekeeper *tk = &shadow_timekeeper; cycle_t offset; int shift = 0, maxshift; - unsigned int action; + unsigned int clock_set = 0; unsigned long flags; raw_spin_lock_irqsave(&timekeeper_lock, flags); @@ -1402,7 +1403,8 @@ static void update_wall_time(void) maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; shift = min(shift, maxshift); while (offset >= tk->cycle_interval) { - offset = logarithmic_accumulation(tk, offset, shift); + offset = logarithmic_accumulation(tk, offset, shift, + &clock_set); if (offset < tk->cycle_interval<cycle_last with the new value */ @@ -1436,7 +1438,7 @@ static void update_wall_time(void) * updating. */ memcpy(real_tk, tk, sizeof(*tk)); - timekeeping_update(real_tk, action); + timekeeping_update(real_tk, clock_set); write_seqcount_end(&timekeeper_seq); out: raw_spin_unlock_irqrestore(&timekeeper_lock, flags); -- cgit v1.2.3 From 6fdda9a9c5db367130cf32df5d6618d08b89f46a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 10 Dec 2013 17:18:18 -0800 Subject: timekeeping: Avoid possible deadlock from clock_was_set_delayed As part of normal operaions, the hrtimer subsystem frequently calls into the timekeeping code, creating a locking order of hrtimer locks -> timekeeping locks clock_was_set_delayed() was suppoed to allow us to avoid deadlocks between the timekeeping the hrtimer subsystem, so that we could notify the hrtimer subsytem the time had changed while holding the timekeeping locks. This was done by scheduling delayed work that would run later once we were out of the timekeeing code. But unfortunately the lock chains are complex enoguh that in scheduling delayed work, we end up eventually trying to grab an hrtimer lock. Sasha Levin noticed this in testing when the new seqlock lockdep enablement triggered the following (somewhat abrieviated) message: [ 251.100221] ====================================================== [ 251.100221] [ INFO: possible circular locking dependency detected ] [ 251.100221] 3.13.0-rc2-next-20131206-sasha-00005-g8be2375-dirty #4053 Not tainted [ 251.101967] ------------------------------------------------------- [ 251.101967] kworker/10:1/4506 is trying to acquire lock: [ 251.101967] (timekeeper_seq){----..}, at: [] retrigger_next_event+0x56/0x70 [ 251.101967] [ 251.101967] but task is already holding lock: [ 251.101967] (hrtimer_bases.lock#11){-.-...}, at: [] retrigger_next_event+0x3c/0x70 [ 251.101967] [ 251.101967] which lock already depends on the new lock. [ 251.101967] [ 251.101967] [ 251.101967] the existing dependency chain (in reverse order) is: [ 251.101967] -> #5 (hrtimer_bases.lock#11){-.-...}: [snipped] -> #4 (&rt_b->rt_runtime_lock){-.-...}: [snipped] -> #3 (&rq->lock){-.-.-.}: [snipped] -> #2 (&p->pi_lock){-.-.-.}: [snipped] -> #1 (&(&pool->lock)->rlock){-.-...}: [ 251.101967] [] validate_chain+0x6c3/0x7b0 [ 251.101967] [] __lock_acquire+0x4ad/0x580 [ 251.101967] [] lock_acquire+0x182/0x1d0 [ 251.101967] [] _raw_spin_lock+0x40/0x80 [ 251.101967] [] __queue_work+0x1a9/0x3f0 [ 251.101967] [] queue_work_on+0x98/0x120 [ 251.101967] [] clock_was_set_delayed+0x21/0x30 [ 251.101967] [] do_adjtimex+0x111/0x160 [ 251.101967] [] compat_sys_adjtimex+0x41/0x70 [ 251.101967] [] ia32_sysret+0x0/0x5 [ 251.101967] -> #0 (timekeeper_seq){----..}: [snipped] [ 251.101967] other info that might help us debug this: [ 251.101967] [ 251.101967] Chain exists of: timekeeper_seq --> &rt_b->rt_runtime_lock --> hrtimer_bases.lock#11 [ 251.101967] Possible unsafe locking scenario: [ 251.101967] [ 251.101967] CPU0 CPU1 [ 251.101967] ---- ---- [ 251.101967] lock(hrtimer_bases.lock#11); [ 251.101967] lock(&rt_b->rt_runtime_lock); [ 251.101967] lock(hrtimer_bases.lock#11); [ 251.101967] lock(timekeeper_seq); [ 251.101967] [ 251.101967] *** DEADLOCK *** [ 251.101967] [ 251.101967] 3 locks held by kworker/10:1/4506: [ 251.101967] #0: (events){.+.+.+}, at: [] process_one_work+0x200/0x530 [ 251.101967] #1: (hrtimer_work){+.+...}, at: [] process_one_work+0x200/0x530 [ 251.101967] #2: (hrtimer_bases.lock#11){-.-...}, at: [] retrigger_next_event+0x3c/0x70 [ 251.101967] [ 251.101967] stack backtrace: [ 251.101967] CPU: 10 PID: 4506 Comm: kworker/10:1 Not tainted 3.13.0-rc2-next-20131206-sasha-00005-g8be2375-dirty #4053 [ 251.101967] Workqueue: events clock_was_set_work So the best solution is to avoid calling clock_was_set_delayed() while holding the timekeeping lock, and instead using a flag variable to decide if we should call clock_was_set() once we've released the locks. This works for the case here, where the do_adjtimex() was the deadlock trigger point. Unfortuantely, in update_wall_time() we still hold the jiffies lock, which would deadlock with the ipi triggered by clock_was_set(), preventing us from calling it even after we drop the timekeeping lock. So instead call clock_was_set_delayed() at that point. Cc: Thomas Gleixner Cc: Prarit Bhargava Cc: Richard Cochran Cc: Ingo Molnar Cc: Sasha Levin Cc: stable #3.10+ Reported-by: Sasha Levin Tested-by: Sasha Levin Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 051855fe68bc..d62682b6df4a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1278,7 +1278,6 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); - clock_was_set_delayed(); clock_set = TK_CLOCK_WAS_SET; } } @@ -1442,6 +1441,19 @@ static void update_wall_time(void) write_seqcount_end(&timekeeper_seq); out: raw_spin_unlock_irqrestore(&timekeeper_lock, flags); + if (clock_was_set) { + /* + * XXX - I'd rather we just call clock_was_set(), but + * since we're currently holding the jiffies lock, calling + * clock_was_set would trigger an ipi which would then grab + * the jiffies lock and we'd deadlock. :( + * The right solution should probably be droping + * the jiffies lock before calling update_wall_time + * but that requires some rework of the tick sched + * code. + */ + clock_was_set_delayed(); + } } /** @@ -1702,11 +1714,13 @@ int do_adjtimex(struct timex *txc) if (tai != orig_tai) { __timekeeping_set_tai_offset(tk, tai); timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); - clock_was_set_delayed(); } write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); + if (tai != orig_tai) + clock_was_set(); + ntp_notify_cmos_timer(); return ret; -- cgit v1.2.3 From 47a1b796306356f358e515149d86baf0cc6bf007 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 12 Dec 2013 13:10:55 -0800 Subject: tick/timekeeping: Call update_wall_time outside the jiffies lock Since the xtime lock was split into the timekeeping lock and the jiffies lock, we no longer need to call update_wall_time() while holding the jiffies lock. Thus, this patch splits update_wall_time() out from do_timer(). This allows us to get away from calling clock_was_set_delayed() in update_wall_time() and instead use the standard clock_was_set() call that previously would deadlock, as it causes the jiffies lock to be acquired. Cc: Sasha Levin Cc: Thomas Gleixner Cc: Prarit Bhargava Cc: Richard Cochran Cc: Ingo Molnar Signed-off-by: John Stultz --- kernel/time/tick-common.c | 1 + kernel/time/tick-internal.h | 1 + kernel/time/tick-sched.c | 1 + kernel/time/timekeeping.c | 19 ++++--------------- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 64522ecdfe0e..91c5f27e82a3 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -70,6 +70,7 @@ static void tick_periodic(int cpu) do_timer(1); write_sequnlock(&jiffies_lock); + update_wall_time(); } update_process_times(user_mode(get_irq_regs())); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index e2bced59b6dd..8329669b51ec 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -155,3 +155,4 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) #endif extern void do_timer(unsigned long ticks); +extern void update_wall_time(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2afd43fca93b..c58b03d89951 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -86,6 +86,7 @@ static void tick_do_update_jiffies64(ktime_t now) tick_next_period = ktime_add(last_jiffies_update, tick_period); } write_sequnlock(&jiffies_lock); + update_wall_time(); } /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d62682b6df4a..44b7e6bb081b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1360,7 +1360,7 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk) * update_wall_time - Uses the current clocksource to increment the wall time * */ -static void update_wall_time(void) +void update_wall_time(void) { struct clocksource *clock; struct timekeeper *real_tk = &timekeeper; @@ -1441,19 +1441,8 @@ static void update_wall_time(void) write_seqcount_end(&timekeeper_seq); out: raw_spin_unlock_irqrestore(&timekeeper_lock, flags); - if (clock_was_set) { - /* - * XXX - I'd rather we just call clock_was_set(), but - * since we're currently holding the jiffies lock, calling - * clock_was_set would trigger an ipi which would then grab - * the jiffies lock and we'd deadlock. :( - * The right solution should probably be droping - * the jiffies lock before calling update_wall_time - * but that requires some rework of the tick sched - * code. - */ - clock_was_set_delayed(); - } + if (clock_set) + clock_was_set(); } /** @@ -1598,7 +1587,6 @@ struct timespec get_monotonic_coarse(void) void do_timer(unsigned long ticks) { jiffies_64 += ticks; - update_wall_time(); calc_global_load(ticks); } @@ -1756,4 +1744,5 @@ void xtime_update(unsigned long ticks) write_seqlock(&jiffies_lock); do_timer(ticks); write_sequnlock(&jiffies_lock); + update_wall_time(); } -- cgit v1.2.3 From 04005f6011e3b504cd4d791d9769f7cb9a3b2eae Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 10 Dec 2013 17:13:35 -0800 Subject: timekeeping: Fix CLOCK_TAI timer/nanosleep delays A think-o in the calculation of the monotonic -> tai time offset results in CLOCK_TAI timers and nanosleeps to expire late (the latency is ~2x the tai offset). Fix this by adding the tai offset from the realtime offset instead of subtracting. Cc: Sasha Levin Cc: Thomas Gleixner Cc: Prarit Bhargava Cc: Richard Cochran Cc: Ingo Molnar Cc: stable #3.10+ Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 44b7e6bb081b..3f6a827680fa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -77,7 +77,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) tk->wall_to_monotonic = wtm; set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec); tk->offs_real = timespec_to_ktime(tmp); - tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0)); + tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0)); } static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) @@ -595,7 +595,7 @@ s32 timekeeping_get_tai_offset(void) static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) { tk->tai_offset = tai_offset; - tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0)); + tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0)); } /** -- cgit v1.2.3 From 330a1617b0a6268d427aa5922c94d082b1d3e96d Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Dec 2013 19:10:36 -0800 Subject: timekeeping: Fix missing timekeeping_update in suspend path Since 48cdc135d4840 (Implement a shadow timekeeper), we have to call timekeeping_update() after any adjustment to the timekeeping structure in order to make sure that any adjustments to the structure persist. In the timekeeping suspend path, we udpate the timekeeper structure, so we should be sure to update the shadow-timekeeper before releasing the timekeeping locks. Currently this isn't done. In most cases, the next time related code to run would be timekeeping_resume, which does update the shadow-timekeeper, but in an abundence of caution, this patch adds the call to timekeeping_update() in the suspend path. Cc: Sasha Levin Cc: Thomas Gleixner Cc: Prarit Bhargava Cc: Richard Cochran Cc: Ingo Molnar Cc: stable #3.10+ Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3f6a827680fa..2793c4382529 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1024,6 +1024,8 @@ static int timekeeping_suspend(void) timekeeping_suspend_time = timespec_add(timekeeping_suspend_time, delta_delta); } + + timekeeping_update(tk, TK_MIRROR); write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); -- cgit v1.2.3 From d26e4fe0dbe95778b9dbe80b6aa884d71fb6f459 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 28 Nov 2013 16:28:55 +0800 Subject: timekeeper: fix comment typo for tk_setup_internals() Fix trivial comment typo for tk_setup_internals(). Signed-off-by: Yijing Wang Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2793c4382529..3ff30640fc9d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -90,8 +90,9 @@ static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) } /** - * timekeeper_setup_internals - Set up internals to use clocksource clock. + * tk_setup_internals - Set up internals to use clocksource clock. * + * @tk: The target timekeeper to setup. * @clock: Pointer to clocksource. * * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment -- cgit v1.2.3 From d5a1c7e3fc38d9c7d629e1e47f32f863acbdec3d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 20 Jul 2013 19:00:23 +0200 Subject: rtc-cmos: Add an alarm disable quirk 41c7f7424259f ("rtc: Disable the alarm in the hardware (v2)") added the functionality to disable the RTC wake alarm when shutting down the box. However, there are at least two b0rked BIOSes we know about: https://bugzilla.novell.com/show_bug.cgi?id=812592 https://bugzilla.novell.com/show_bug.cgi?id=805740 where, when wakeup alarm is enabled in the BIOS, the machine reboots automatically right after shutdown, regardless of what wakeup time is programmed. Bisecting the issue lead to this patch so disable its functionality with a DMI quirk only for those boxes. Cc: Brecht Machiels Cc: Thomas Gleixner Cc: John Stultz Cc: Rabin Vincent Cc: stable Signed-off-by: Borislav Petkov [jstultz: Changed variable name for clarity, added extra dmi entry] Tested-by: Brecht Machiels Tested-by: Borislav Petkov Signed-off-by: John Stultz --- drivers/rtc/rtc-cmos.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index f14876256a4a..a2325bc5e497 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -34,11 +34,11 @@ #include #include #include -#include #include #include #include #include +#include /* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */ #include @@ -377,6 +377,51 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) return 0; } +/* + * Do not disable RTC alarm on shutdown - workaround for b0rked BIOSes. + */ +static bool alarm_disable_quirk; + +static int __init set_alarm_disable_quirk(const struct dmi_system_id *id) +{ + alarm_disable_quirk = true; + pr_info("rtc-cmos: BIOS has alarm-disable quirk. "); + pr_info("RTC alarms disabled\n"); + return 0; +} + +static const struct dmi_system_id rtc_quirks[] __initconst = { + /* https://bugzilla.novell.com/show_bug.cgi?id=805740 */ + { + .callback = set_alarm_disable_quirk, + .ident = "IBM Truman", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "4852570"), + }, + }, + /* https://bugzilla.novell.com/show_bug.cgi?id=812592 */ + { + .callback = set_alarm_disable_quirk, + .ident = "Gigabyte GA-990XA-UD3", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "Gigabyte Technology Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "GA-990XA-UD3"), + }, + }, + /* http://permalink.gmane.org/gmane.linux.kernel/1604474 */ + { + .callback = set_alarm_disable_quirk, + .ident = "Toshiba Satellite L300", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "Satellite L300"), + }, + }, + {} +}; + static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled) { struct cmos_rtc *cmos = dev_get_drvdata(dev); @@ -385,6 +430,9 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled) if (!is_valid_irq(cmos->irq)) return -EINVAL; + if (alarm_disable_quirk) + return 0; + spin_lock_irqsave(&rtc_lock, flags); if (enabled) @@ -1157,6 +1205,8 @@ static int __init cmos_init(void) platform_driver_registered = true; } + dmi_check_system(rtc_quirks); + if (retval == 0) return 0; -- cgit v1.2.3 From 38aef31ce7773624c8f09ff58c4c27b3b955faaf Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 23 Dec 2013 12:53:22 -0800 Subject: timekeeping: Remove comment that's mostly out of date Prior to 92bb1fcf57a0c2e45f7e67fbf0a8ed475a749236 (Only do nanosecond rounding on GENERIC_TIME_VSYSCALL_OLD systems), the comment here was accuate, but now we can mostly avoid the extra rounding which causes the unlikey to be actually likely here. So remove the out of date comment. Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3ff30640fc9d..abfa4e86ac54 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1134,16 +1134,6 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * we can adjust by 1. */ error >>= 2; - /* - * XXX - In update_wall_time, we round up to the next - * nanosecond, and store the amount rounded up into - * the error. This causes the likely below to be unlikely. - * - * The proper fix is to avoid rounding up by using - * the high precision tk->xtime_nsec instead of - * xtime.tv_nsec everywhere. Fixing this will take some - * time. - */ if (likely(error <= interval)) adj = 1; else -- cgit v1.2.3 From 00e2bcd6d35f59fce7fa0e76e24d08f74c6a8506 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 16 Jan 2014 17:38:06 -0800 Subject: clocksource: Timer-sun5i: Switch to sched_clock_register() The 32-bit sched_clock() interface supports 64 bits since 3.13-rc1. Upgrade to the 64-bit function to allow us to remove the 32-bit registration interface. Signed-off-by: Stephen Boyd Acked-by: Daniel Lezcano Cc: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1389922686-6249-1-git-send-email-sboyd@codeaurora.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-sun5i.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index bddc52233d2a..deebcd6469fc 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -136,7 +136,7 @@ static struct irqaction sun5i_timer_irq = { .dev_id = &sun5i_clockevent, }; -static u32 sun5i_timer_sched_read(void) +static u64 sun5i_timer_sched_read(void) { return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1)); } @@ -166,7 +166,7 @@ static void __init sun5i_timer_init(struct device_node *node) writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, timer_base + TIMER_CTL_REG(1)); - setup_sched_clock(sun5i_timer_sched_read, 32, rate); + sched_clock_register(sun5i_timer_sched_read, 32, rate); clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name, rate, 340, 32, clocksource_mmio_readl_down); -- cgit v1.2.3