From 99ee5315dac6211e972fa3f23bcc9a0343ff58c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Apr 2011 14:16:42 +0200 Subject: timerfd: Allow timers to be cancelled when clock was set Some applications must be aware of clock realtime being set backward. A simple example is a clock applet which arms a timer for the next minute display. If clock realtime is set backward then the applet displays a stale time for the amount of time which the clock was set backwards. Due to that applications poll the time because we don't have an interface. Extend the timerfd interface by adding a flag which puts the timer onto a different internal realtime clock. All timers on this clock are expired whenever the clock was set. The timerfd core records the monotonic offset when the timer is created. When the timer is armed, then the current offset is compared to the previous recorded offset. When it has changed, then timerfd_settime returns -ECANCELED. When a timer is read the offset is compared and if it changed -ECANCELED returned to user space. Periodic timers are not rearmed in the cancelation case. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Chris Friesen Tested-by: Kay Sievers Cc: "Kirill A. Shutemov" Cc: Peter Zijlstra Cc: Davide Libenzi Reviewed-by: Alexander Shishkin Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1104271359580.3323%40ionos%3E Signed-off-by: Thomas Gleixner --- fs/timerfd.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/timerfd.c b/fs/timerfd.c index 8c4fc1425b3e..7e14c9e7c4ee 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -26,10 +26,12 @@ struct timerfd_ctx { struct hrtimer tmr; ktime_t tintv; + ktime_t moffs; wait_queue_head_t wqh; u64 ticks; int expired; int clockid; + bool might_cancel; }; /* @@ -59,24 +61,52 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } -static void timerfd_setup(struct timerfd_ctx *ctx, int flags, - const struct itimerspec *ktmr) +static bool timerfd_canceled(struct timerfd_ctx *ctx) +{ + ktime_t moffs; + + if (!ctx->might_cancel) + return false; + + moffs = ktime_get_monotonic_offset(); + + if (moffs.tv64 == ctx->moffs.tv64) + return false; + + ctx->moffs = moffs; + return true; +} + +static int timerfd_setup(struct timerfd_ctx *ctx, int flags, + const struct itimerspec *ktmr) { enum hrtimer_mode htmode; ktime_t texp; + int clockid = ctx->clockid; htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; + ctx->might_cancel = false; + if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME && + (flags & TFD_TIMER_CANCELON_SET)) { + clockid = CLOCK_REALTIME_COS; + ctx->might_cancel = true; + } + texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; ctx->tintv = timespec_to_ktime(ktmr->it_interval); - hrtimer_init(&ctx->tmr, ctx->clockid, htmode); + hrtimer_init(&ctx->tmr, clockid, htmode); hrtimer_set_expires(&ctx->tmr, texp); ctx->tmr.function = timerfd_tmrproc; - if (texp.tv64 != 0) + if (texp.tv64 != 0) { hrtimer_start(&ctx->tmr, texp, htmode); + if (timerfd_canceled(ctx)) + return -ECANCELED; + } + return 0; } static int timerfd_release(struct inode *inode, struct file *file) @@ -118,8 +148,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); + if (ctx->ticks) { ticks = ctx->ticks; + + /* + * If clock has changed, we do not care about the + * ticks and we do not rearm the timer. Userspace must + * reevaluate anyway. + */ + if (timerfd_canceled(ctx)) { + ticks = 0; + ctx->expired = 0; + res = -ECANCELED; + } + if (ctx->expired && ctx->tintv.tv64) { /* * If tintv.tv64 != 0, this is a periodic timer that @@ -183,6 +226,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) init_waitqueue_head(&ctx->wqh); ctx->clockid = clockid; hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); + ctx->moffs = ktime_get_monotonic_offset(); ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); @@ -199,6 +243,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, struct file *file; struct timerfd_ctx *ctx; struct itimerspec ktmr, kotmr; + int ret; if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; @@ -240,14 +285,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, /* * Re-program the timer to the new value ... */ - timerfd_setup(ctx, flags, &ktmr); + ret = timerfd_setup(ctx, flags, &ktmr); spin_unlock_irq(&ctx->wqh.lock); fput(file); if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) return -EFAULT; - return 0; + return ret; } SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) -- cgit v1.2.3 From 9ec2690758a5467f24beb301cca5098078073bba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 16:18:50 +0200 Subject: timerfd: Manage cancelable timers in timerfd Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the timer interrupt. Yes, I did not think about it, because the solution was so elegant. I didn't like the extra list in timerfd when it was proposed some time ago, but with a rcu based list the list walk it's less horrible than the original global lock, which was held over the list iteration. Requested-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- fs/timerfd.c | 105 ++++++++++++++++++++++++++++++++++-------------- include/linux/hrtimer.h | 6 ++- include/linux/time.h | 6 --- include/linux/timerfd.h | 4 +- kernel/hrtimer.c | 94 +++++++++++++++---------------------------- 5 files changed, 113 insertions(+), 102 deletions(-) (limited to 'fs') diff --git a/fs/timerfd.c b/fs/timerfd.c index 7e14c9e7c4ee..f67acbdda5e8 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -22,6 +22,7 @@ #include #include #include +#include struct timerfd_ctx { struct hrtimer tmr; @@ -31,9 +32,14 @@ struct timerfd_ctx { u64 ticks; int expired; int clockid; + struct rcu_head rcu; + struct list_head clist; bool might_cancel; }; +static LIST_HEAD(cancel_list); +static DEFINE_SPINLOCK(cancel_lock); + /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, @@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) return HRTIMER_NORESTART; } -static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) +/* + * Called when the clock was set to cancel the timers in the cancel + * list. + */ +void timerfd_clock_was_set(void) { - ktime_t remaining; + ktime_t moffs = ktime_get_monotonic_offset(); + struct timerfd_ctx *ctx; + unsigned long flags; - remaining = hrtimer_expires_remaining(&ctx->tmr); - return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &cancel_list, clist) { + if (!ctx->might_cancel) + continue; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->moffs.tv64 != moffs.tv64) { + ctx->moffs.tv64 = KTIME_MAX; + wake_up_locked(&ctx->wqh); + } + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + } + rcu_read_unlock(); } -static bool timerfd_canceled(struct timerfd_ctx *ctx) +static void timerfd_remove_cancel(struct timerfd_ctx *ctx) { - ktime_t moffs; + if (ctx->might_cancel) { + ctx->might_cancel = false; + spin_lock(&cancel_lock); + list_del_rcu(&ctx->clist); + spin_unlock(&cancel_lock); + } +} - if (!ctx->might_cancel) +static bool timerfd_canceled(struct timerfd_ctx *ctx) +{ + if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) return false; + ctx->moffs = ktime_get_monotonic_offset(); + return true; +} - moffs = ktime_get_monotonic_offset(); +static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) +{ + if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && + (flags & TFD_TIMER_CANCEL_ON_SET)) { + if (!ctx->might_cancel) { + ctx->might_cancel = true; + spin_lock(&cancel_lock); + list_add_rcu(&ctx->clist, &cancel_list); + spin_unlock(&cancel_lock); + } + } else if (ctx->might_cancel) { + timerfd_remove_cancel(ctx); + } +} - if (moffs.tv64 == ctx->moffs.tv64) - return false; +static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) +{ + ktime_t remaining; - ctx->moffs = moffs; - return true; + remaining = hrtimer_expires_remaining(&ctx->tmr); + return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } static int timerfd_setup(struct timerfd_ctx *ctx, int flags, @@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; - ctx->might_cancel = false; - if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME && - (flags & TFD_TIMER_CANCELON_SET)) { - clockid = CLOCK_REALTIME_COS; - ctx->might_cancel = true; - } - texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; @@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file) { struct timerfd_ctx *ctx = file->private_data; + timerfd_remove_cancel(ctx); hrtimer_cancel(&ctx->tmr); - kfree(ctx); + kfree_rcu(ctx, rcu); return 0; } @@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); + /* + * If clock has changed, we do not care about the + * ticks and we do not rearm the timer. Userspace must + * reevaluate anyway. + */ + if (timerfd_canceled(ctx)) { + ctx->ticks = 0; + ctx->expired = 0; + res = -ECANCELED; + } + if (ctx->ticks) { ticks = ctx->ticks; - /* - * If clock has changed, we do not care about the - * ticks and we do not rearm the timer. Userspace must - * reevaluate anyway. - */ - if (timerfd_canceled(ctx)) { - ticks = 0; - ctx->expired = 0; - res = -ECANCELED; - } - if (ctx->expired && ctx->tintv.tv64) { /* * If tintv.tv64 != 0, this is a periodic timer that @@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return PTR_ERR(file); ctx = file->private_data; + timerfd_setup_cancel(ctx, flags); + /* * We need to stop the existing timer before reprogramming * it to the new values. diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index eda4ccde0730..925c8c01db7b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -155,7 +155,6 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_BOOTTIME, - HRTIMER_BASE_REALTIME_COS, HRTIMER_MAX_CLOCK_BASES, }; @@ -306,6 +305,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) #endif extern void clock_was_set(void); +#ifdef CONFIG_TIMERFD +extern void timerfd_clock_was_set(void); +#else +static inline void timerfd_clock_was_set(void) { } +#endif extern void hrtimers_resume(void); extern ktime_t ktime_get(void); diff --git a/include/linux/time.h b/include/linux/time.h index a9242773eb24..b3061782dec3 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -302,12 +302,6 @@ struct itimerval { * The IDs of various hardware clocks: */ #define CLOCK_SGI_CYCLE 10 - -#ifdef __KERNEL__ -/* This clock is not exposed to user space */ -#define CLOCK_REALTIME_COS 15 -#endif - #define MAX_CLOCKS 16 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) #define CLOCKS_MONO CLOCK_MONOTONIC diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h index e9571fc8f1a0..d3b57fa12225 100644 --- a/include/linux/timerfd.h +++ b/include/linux/timerfd.h @@ -19,7 +19,7 @@ * shared O_* flags. */ #define TFD_TIMER_ABSTIME (1 << 0) -#define TFD_TIMER_CANCELON_SET (1 << 1) +#define TFD_TIMER_CANCEL_ON_SET (1 << 1) #define TFD_CLOEXEC O_CLOEXEC #define TFD_NONBLOCK O_NONBLOCK @@ -27,6 +27,6 @@ /* Flags for timerfd_create. */ #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS /* Flags for timerfd_settime. */ -#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET) +#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET) #endif /* _LINUX_TIMERFD_H */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index eabcbd781433..26dd32f9f6b2 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, - { - .index = CLOCK_REALTIME_COS, - .get_time = &ktime_get_real, - .resolution = KTIME_LOW_RES, - }, } }; @@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, - [CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS, }; static inline int hrtimer_clockid_to_base(clockid_t clock_id) @@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; - base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim; } /* @@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } -static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base); - /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } -static void retrigger_next_event(void *arg); +/* + * Retrigger next event is called after clock was set + * + * Called with interrupts disabled via on_each_cpu() + */ +static void retrigger_next_event(void *arg) +{ + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + struct timespec realtime_offset, xtim, wtm, sleep; + + if (!hrtimer_hres_active()) + return; + + /* Optimized out for !HIGH_RES */ + get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); + set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); + + /* Adjust CLOCK_REALTIME offset */ + raw_spin_lock(&base->lock); + base->clock_base[HRTIMER_BASE_REALTIME].offset = + timespec_to_ktime(realtime_offset); + base->clock_base[HRTIMER_BASE_BOOTTIME].offset = + timespec_to_ktime(sleep); + + hrtimer_force_reprogram(base, 0); + raw_spin_unlock(&base->lock); +} /* * Switch to high resolution mode @@ -711,45 +728,10 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } +static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ -/* - * Retrigger next event is called after clock was set - * - * Called with interrupts disabled via on_each_cpu() - */ -static void retrigger_next_event(void *arg) -{ - struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); - struct timespec realtime_offset, xtim, wtm, sleep; - - if (!hrtimer_hres_active()) { - raw_spin_lock(&base->lock); - hrtimer_expire_cancelable(base); - raw_spin_unlock(&base->lock); - return; - } - - /* Optimized out for !HIGH_RES */ - get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); - set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); - - /* Adjust CLOCK_REALTIME offset */ - raw_spin_lock(&base->lock); - base->clock_base[HRTIMER_BASE_REALTIME].offset = - timespec_to_ktime(realtime_offset); - base->clock_base[HRTIMER_BASE_BOOTTIME].offset = - timespec_to_ktime(sleep); - base->clock_base[HRTIMER_BASE_REALTIME_COS].offset = - timespec_to_ktime(realtime_offset); - - hrtimer_expire_cancelable(base); - - hrtimer_force_reprogram(base, 0); - raw_spin_unlock(&base->lock); -} - /* * Clock realtime was set * @@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg) */ void clock_was_set(void) { +#ifdef CONFIG_HIGHRES_TIMERS /* Retrigger the CPU local events everywhere */ on_each_cpu(retrigger_next_event, NULL, 1); +#endif + timerfd_clock_was_set(); } /* @@ -777,6 +762,7 @@ void hrtimers_resume(void) KERN_INFO "hrtimers_resume() called with IRQs enabled!"); retrigger_next_event(NULL); + timerfd_clock_was_set(); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) @@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) timer->state &= ~HRTIMER_STATE_CALLBACK; } -static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base) -{ - struct timerqueue_node *node; - struct hrtimer_clock_base *base; - ktime_t now = ktime_get_real(); - - base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS]; - - while ((node = timerqueue_getnext(&base->active))) { - struct hrtimer *timer; - - timer = container_of(node, struct hrtimer, node); - __run_hrtimer(timer, &now); - } -} - #ifdef CONFIG_HIGH_RES_TIMERS /* -- cgit v1.2.3