From a5dec9f82ab2ae486119f0b0820ea16db3e522c3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 26 Jul 2021 14:55:08 +0200 Subject: posix-cpu-timers: Assert task sighand is locked while starting cputime counter Starting the process wide cputime counter needs to be done in the same sighand locking sequence than actually arming the related timer otherwise this races against concurrent timers setting/expiring in the same threadgroup. Detecting that the cputime counter is started without holding the sighand lock is a first step toward debugging such situations. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210726125513.271824-2-frederic@kernel.org --- include/linux/sched/signal.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index b9126fe06c3f..0310a5add9ab 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -714,6 +714,12 @@ static inline void unlock_task_sighand(struct task_struct *task, spin_unlock_irqrestore(&task->sighand->siglock, *flags); } +#ifdef CONFIG_LOCKDEP +extern void lockdep_assert_task_sighand_held(struct task_struct *task); +#else +static inline void lockdep_assert_task_sighand_held(struct task_struct *task) { } +#endif + static inline unsigned long task_rlimit(const struct task_struct *task, unsigned int limit) { -- cgit v1.2.3 From 175cc3ab28e3509ddee8de4f164b563d99daa570 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 26 Jul 2021 14:55:09 +0200 Subject: posix-cpu-timers: Force next_expiration recalc after timer deletion A timer deletion only dequeues the timer but it doesn't shutdown the related costly process wide cputimer counter and the tick dependency. The following code snippet keeps this overhead around for one week after the timer deletion: void trigger_process_counter(void) { timer_t id; struct itimerspec val = { }; val.it_value.tv_sec = 604800; timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id); timer_settime(id, 0, &val, NULL); timer_delete(id); } Make sure the next target's tick recalculates the nearest expiration and clears the process wide counter and tick dependency if necessary. Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210726125513.271824-3-frederic@kernel.org --- include/linux/posix-timers.h | 4 +++- kernel/time/posix-cpu-timers.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 896c16d2c5fb..4cf1fbe8d1bc 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -82,12 +82,14 @@ static inline bool cpu_timer_enqueue(struct timerqueue_head *head, return timerqueue_add(head, &ctmr->node); } -static inline void cpu_timer_dequeue(struct cpu_timer *ctmr) +static inline bool cpu_timer_dequeue(struct cpu_timer *ctmr) { if (ctmr->head) { timerqueue_del(ctmr->head, &ctmr->node); ctmr->head = NULL; + return true; } + return false; } static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 4693d3c71e7e..61c78b62fe6a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -407,6 +407,37 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) return 0; } +/* + * Dequeue the timer and reset the base if it was its earliest expiration. + * It makes sure the next tick recalculates the base next expiration so we + * don't keep the costly process wide cputime counter around for a random + * amount of time, along with the tick dependency. + * + * If another timer gets queued between this and the next tick, its + * expiration will update the base next event if necessary on the next + * tick. + */ +static void disarm_timer(struct k_itimer *timer, struct task_struct *p) +{ + struct cpu_timer *ctmr = &timer->it.cpu; + struct posix_cputimer_base *base; + int clkidx; + + if (!cpu_timer_dequeue(ctmr)) + return; + + clkidx = CPUCLOCK_WHICH(timer->it_clock); + + if (CPUCLOCK_PERTHREAD(timer->it_clock)) + base = p->posix_cputimers.bases + clkidx; + else + base = p->signal->posix_cputimers.bases + clkidx; + + if (cpu_timer_getexpires(ctmr) == base->nextevt) + base->nextevt = 0; +} + + /* * Clean up a CPU-clock timer that is about to be destroyed. * This is called from timer deletion with the timer already locked. @@ -441,7 +472,7 @@ static int posix_cpu_timer_del(struct k_itimer *timer) if (timer->it.cpu.firing) ret = TIMER_RETRY; else - cpu_timer_dequeue(ctmr); + disarm_timer(timer, p); unlock_task_sighand(p, &flags); } -- cgit v1.2.3 From ee375328f579f94251eb66d5dc91aba056019a31 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 26 Jul 2021 14:55:13 +0200 Subject: posix-cpu-timers: Recalc next expiration when timer_settime() ends up not queueing There are several scenarios that can result in posix_cpu_timer_set() not queueing the timer but still leaving the threadgroup cputime counter running or keeping the tick dependency around for a random amount of time. 1) If timer_settime() is called with a 0 expiration on a timer that is already disabled, the process wide cputime counter will be started and won't ever get a chance to be stopped by stop_process_timer() since no timer is actually armed to be processed. The following snippet is enough to trigger the issue. void trigger_process_counter(void) { timer_t id; struct itimerspec val = { }; timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id); timer_settime(id, TIMER_ABSTIME, &val, NULL); timer_delete(id); } 2) If timer_settime() is called with a 0 expiration on a timer that is already armed, the timer is dequeued but not really disarmed. So the process wide cputime counter and the tick dependency may still remain a while around. The following code snippet keeps this overhead around for one week after the timer deletion: void trigger_process_counter(void) { timer_t id; struct itimerspec val = { }; val.it_value.tv_sec = 604800; timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id); timer_settime(id, 0, &val, NULL); timer_delete(id); } 3) If the timer was initially deactivated, this call to timer_settime() with an early expiration may have started the process wide cputime counter even though the timer hasn't been queued and armed because it has fired early and inline within posix_cpu_timer_set() itself. As a result the process wide cputime counter may never stop until a new timer is ever armed in the future. The following code snippet can reproduce this: void trigger_process_counter(void) { timer_t id; struct itimerspec val = { }; signal(SIGALRM, SIG_IGN); timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id); val.it_value.tv_nsec = 1; timer_settime(id, TIMER_ABSTIME, &val, NULL); } 4) If the timer was initially armed with a former expiration value before this call to timer_settime() and the current call sets an early deadline that has already expired, the timer fires inline within posix_cpu_timer_set(). In this case it must have been dequeued before firing inline with its new expiration value, yet it hasn't been disarmed in this case. So the process wide cputime counter and the tick dependency may still be around for a while even after the timer fired. The following code snippet can reproduce this: void trigger_process_counter(void) { timer_t id; struct itimerspec val = { }; signal(SIGALRM, SIG_IGN); timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id); val.it_value.tv_sec = 100; timer_settime(id, TIMER_ABSTIME, &val, NULL); val.it_value.tv_sec = 0; val.it_value.tv_nsec = 1; timer_settime(id, TIMER_ABSTIME, &val, NULL); } Fix all these issues with triggering the related base next expiration recalculation on the next tick. This also implies to re-evaluate the need to keep around the process wide cputime counter and the tick dependency, in a similar fashion to disarm_timer(). Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210726125513.271824-7-frederic@kernel.org --- include/linux/posix-timers.h | 7 ++++++- kernel/time/posix-cpu-timers.c | 41 +++++++++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 4cf1fbe8d1bc..00fef0064355 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -82,9 +82,14 @@ static inline bool cpu_timer_enqueue(struct timerqueue_head *head, return timerqueue_add(head, &ctmr->node); } +static inline bool cpu_timer_queued(struct cpu_timer *ctmr) +{ + return !!ctmr->head; +} + static inline bool cpu_timer_dequeue(struct cpu_timer *ctmr) { - if (ctmr->head) { + if (cpu_timer_queued(ctmr)) { timerqueue_del(ctmr->head, &ctmr->node); ctmr->head = NULL; return true; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 0d918117a3e0..ee736861b18f 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -418,6 +418,20 @@ static struct posix_cputimer_base *timer_base(struct k_itimer *timer, return tsk->signal->posix_cputimers.bases + clkidx; } +/* + * Force recalculating the base earliest expiration on the next tick. + * This will also re-evaluate the need to keep around the process wide + * cputime counter and tick dependency and eventually shut these down + * if necessary. + */ +static void trigger_base_recalc_expires(struct k_itimer *timer, + struct task_struct *tsk) +{ + struct posix_cputimer_base *base = timer_base(timer, tsk); + + base->nextevt = 0; +} + /* * Dequeue the timer and reset the base if it was its earliest expiration. * It makes sure the next tick recalculates the base next expiration so we @@ -438,7 +452,7 @@ static void disarm_timer(struct k_itimer *timer, struct task_struct *p) base = timer_base(timer, p); if (cpu_timer_getexpires(ctmr) == base->nextevt) - base->nextevt = 0; + trigger_base_recalc_expires(timer, p); } @@ -734,13 +748,28 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, timer->it_overrun_last = 0; timer->it_overrun = -1; - if (new_expires != 0 && !(val < new_expires)) { + if (val >= new_expires) { + if (new_expires != 0) { + /* + * The designated time already passed, so we notify + * immediately, even if the thread never runs to + * accumulate more time on this clock. + */ + cpu_timer_fire(timer); + } + /* - * The designated time already passed, so we notify - * immediately, even if the thread never runs to - * accumulate more time on this clock. + * Make sure we don't keep around the process wide cputime + * counter or the tick dependency if they are not necessary. */ - cpu_timer_fire(timer); + sighand = lock_task_sighand(p, &flags); + if (!sighand) + goto out; + + if (!cpu_timer_queued(ctmr)) + trigger_base_recalc_expires(timer, p); + + unlock_task_sighand(p, &flags); } out: rcu_read_unlock(); -- cgit v1.2.3 From 8c3b5e6ec0fee18bc2ce38d1dfe913413205f908 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Jul 2021 15:39:48 +0200 Subject: hrtimer: Ensure timerfd notification for HIGHRES=n If high resolution timers are disabled the timerfd notification about a clock was set event is not happening for all cases which use clock_was_set_delayed() because that's a NOP for HIGHRES=n, which is wrong. Make clock_was_set_delayed() unconditially available to fix that. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210713135158.196661266@linutronix.de --- include/linux/hrtimer.h | 5 ----- kernel/time/hrtimer.c | 32 ++++++++++++++++---------------- kernel/time/tick-internal.h | 3 +++ 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bb5e7b0a4274..77295af72426 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -318,16 +318,12 @@ struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); -extern void clock_was_set_delayed(void); - extern unsigned int hrtimer_resolution; #else #define hrtimer_resolution (unsigned int)LOW_RES_NSEC -static inline void clock_was_set_delayed(void) { } - #endif static inline ktime_t @@ -351,7 +347,6 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) timer->base->get_time()); } -extern void clock_was_set(void); #ifdef CONFIG_TIMERFD extern void timerfd_clock_was_set(void); #else diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5f7c46598d9f..7ebf642b53f9 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -777,22 +777,6 @@ static void hrtimer_switch_to_hres(void) retrigger_next_event(NULL); } -static void clock_was_set_work(struct work_struct *work) -{ - clock_was_set(); -} - -static DECLARE_WORK(hrtimer_work, clock_was_set_work); - -/* - * Called from timekeeping and resume code to reprogram the hrtimer - * interrupt device on all cpus. - */ -void clock_was_set_delayed(void) -{ - schedule_work(&hrtimer_work); -} - #else static inline int hrtimer_is_hres_enabled(void) { return 0; } @@ -877,6 +861,22 @@ void clock_was_set(void) timerfd_clock_was_set(); } +static void clock_was_set_work(struct work_struct *work) +{ + clock_was_set(); +} + +static DECLARE_WORK(hrtimer_work, clock_was_set_work); + +/* + * Called from timekeeping and resume code to reprogram the hrtimer + * interrupt device on all cpus and to notify timerfd. + */ +void clock_was_set_delayed(void) +{ + schedule_work(&hrtimer_work); +} + /* * During resume we might have to reprogram the high resolution timer * interrupt on all online CPUs. However, all other CPUs will be diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 6a742a29e545..cd610faa2523 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -165,3 +165,6 @@ DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); void timer_clear_idle(void); + +void clock_was_set(void); +void clock_was_set_delayed(void); -- cgit v1.2.3 From 66f7b0c8aadd2785fc29f2c71477ebc16f4e38cc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Jul 2021 15:39:50 +0200 Subject: timerfd: Provide timerfd_resume() Resuming timekeeping is a clock-was-set event and uses the clock-was-set notification mechanism. This is in the way of making the clock-was-set update for hrtimers selective so unnecessary IPIs are avoided when a CPU base does not have timers queued which are affected by the clock setting. Provide a seperate timerfd_resume() interface so the resume logic and the clock-was-set mechanism can be distangled in the core code. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210713135158.395287410@linutronix.de --- fs/timerfd.c | 16 ++++++++++++++++ include/linux/hrtimer.h | 2 ++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/fs/timerfd.c b/fs/timerfd.c index c5509d2448e3..e9c96a0c79f1 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -115,6 +115,22 @@ void timerfd_clock_was_set(void) rcu_read_unlock(); } +static void timerfd_resume_work(struct work_struct *work) +{ + timerfd_clock_was_set(); +} + +static DECLARE_WORK(timerfd_work, timerfd_resume_work); + +/* + * Invoked from timekeeping_resume(). Defer the actual update to work so + * timerfd_clock_was_set() runs in task context. + */ +void timerfd_resume(void) +{ + schedule_work(&timerfd_work); +} + static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) { if (ctx->might_cancel) { diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 77295af72426..253c6e25f331 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -349,8 +349,10 @@ hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) #ifdef CONFIG_TIMERFD extern void timerfd_clock_was_set(void); +extern void timerfd_resume(void); #else static inline void timerfd_clock_was_set(void) { } +static inline void timerfd_resume(void) { } #endif extern void hrtimers_resume(void); -- cgit v1.2.3 From a761a67f591a8c7476c30bb20ed0f09fdfb1a704 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Jul 2021 15:39:51 +0200 Subject: timekeeping: Distangle resume and clock-was-set events Resuming timekeeping is a clock-was-set event and uses the clock-was-set notification mechanism. This is in the way of making the clock-was-set update for hrtimers selective so unnecessary IPIs are avoided when a CPU base does not have timers queued which are affected by the clock setting. Distangle it by invoking hrtimer_resume() on each unfreezing CPU and invoke the new timerfd_resume() function from timekeeping_resume() which is the only place where this is needed. Rename hrtimer_resume() to hrtimer_resume_local() to reflect the change. With this the clock_was_set*() functions are not longer required to IPI all CPUs unconditionally and can get some smarts to avoid them. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210713135158.488853478@linutronix.de --- include/linux/hrtimer.h | 1 - kernel/time/hrtimer.c | 15 ++++++--------- kernel/time/tick-common.c | 7 +++++++ kernel/time/tick-internal.h | 2 ++ kernel/time/timekeeping.c | 4 +++- 5 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 253c6e25f331..0ee140176f10 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -354,7 +354,6 @@ extern void timerfd_resume(void); static inline void timerfd_clock_was_set(void) { } static inline void timerfd_resume(void) { } #endif -extern void hrtimers_resume(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 214fd65a9597..68e56f0ecb09 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -900,8 +900,8 @@ static void clock_was_set_work(struct work_struct *work) static DECLARE_WORK(hrtimer_work, clock_was_set_work); /* - * Called from timekeeping and resume code to reprogram the hrtimer - * interrupt device on all cpus and to notify timerfd. + * Called from timekeeping code to reprogram the hrtimer interrupt device + * on all cpus and to notify timerfd. */ void clock_was_set_delayed(void) { @@ -909,18 +909,15 @@ void clock_was_set_delayed(void) } /* - * During resume we might have to reprogram the high resolution timer - * interrupt on all online CPUs. However, all other CPUs will be - * stopped with IRQs interrupts disabled so the clock_was_set() call - * must be deferred. + * Called during resume either directly from via timekeeping_resume() + * or in the case of s2idle from tick_unfreeze() to ensure that the + * hrtimers are up to date. */ -void hrtimers_resume(void) +void hrtimers_resume_local(void) { lockdep_assert_irqs_disabled(); /* Retrigger on the local CPU */ retrigger_next_event(NULL); - /* And schedule a retrigger for all others */ - clock_was_set_delayed(); } /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d663249652ef..46789356f856 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -470,6 +470,13 @@ void tick_resume_local(void) else tick_resume_oneshot(); } + + /* + * Ensure that hrtimers are up to date and the clockevents device + * is reprogrammed correctly when high resolution timers are + * enabled. + */ + hrtimers_resume_local(); } /** diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index cd610faa2523..22de98cc6dd8 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -168,3 +168,5 @@ void timer_clear_idle(void); void clock_was_set(void); void clock_was_set_delayed(void); + +void hrtimers_resume_local(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8a364aa9881a..c8a9b9e54c9d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1810,8 +1810,10 @@ void timekeeping_resume(void) touch_softlockup_watchdog(); + /* Resume the clockevent device(s) and hrtimers */ tick_resume(); - hrtimers_resume(); + /* Notify timerfd as resume is equivalent to clock_was_set() */ + timerfd_resume(); } int timekeeping_suspend(void) -- cgit v1.2.3 From f196ae282070d798c9144771db65577910d58566 Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Fri, 16 Jul 2021 01:36:45 +0800 Subject: dt-bindings: timer: Add ABIs for new Ingenic SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.Add OST_CLK_EVENT_TIMER for new XBurst®1 SoCs. 2.Add OST_CLK_EVENT_TIMER0 to OST_CLK_EVENT_TIMER15 for new XBurst®2 SoCs. Signed-off-by: 周琰杰 (Zhou Yanjie) Acked-by: Rob Herring Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1626370605-120775-1-git-send-email-zhouyanjie@wanyeetech.com --- include/dt-bindings/clock/ingenic,sysost.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/ingenic,sysost.h b/include/dt-bindings/clock/ingenic,sysost.h index 063791b01ab3..d7aa42c08ded 100644 --- a/include/dt-bindings/clock/ingenic,sysost.h +++ b/include/dt-bindings/clock/ingenic,sysost.h @@ -13,4 +13,23 @@ #define OST_CLK_PERCPU_TIMER2 3 #define OST_CLK_PERCPU_TIMER3 4 +#define OST_CLK_EVENT_TIMER 1 + +#define OST_CLK_EVENT_TIMER0 0 +#define OST_CLK_EVENT_TIMER1 1 +#define OST_CLK_EVENT_TIMER2 2 +#define OST_CLK_EVENT_TIMER3 3 +#define OST_CLK_EVENT_TIMER4 4 +#define OST_CLK_EVENT_TIMER5 5 +#define OST_CLK_EVENT_TIMER6 6 +#define OST_CLK_EVENT_TIMER7 7 +#define OST_CLK_EVENT_TIMER8 8 +#define OST_CLK_EVENT_TIMER9 9 +#define OST_CLK_EVENT_TIMER10 10 +#define OST_CLK_EVENT_TIMER11 11 +#define OST_CLK_EVENT_TIMER12 12 +#define OST_CLK_EVENT_TIMER13 13 +#define OST_CLK_EVENT_TIMER14 14 +#define OST_CLK_EVENT_TIMER15 15 + #endif /* __DT_BINDINGS_CLOCK_INGENIC_OST_H__ */ -- cgit v1.2.3