summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2011-05-20 16:18:50 +0200
committerThomas Gleixner <tglx@linutronix.de>2011-05-23 13:59:53 +0200
commit9ec2690758a5467f24beb301cca5098078073bba (patch)
treee5bc78f690d12635a56460ea6f54b49318221dc8
parent250f972d85effad5b6e10da4bbd877e6a4b503b6 (diff)
downloadlinux-9ec2690758a5467f24beb301cca5098078073bba.tar.bz2
timerfd: Manage cancelable timers in timerfd
Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the timer interrupt. Yes, I did not think about it, because the solution was so elegant. I didn't like the extra list in timerfd when it was proposed some time ago, but with a rcu based list the list walk it's less horrible than the original global lock, which was held over the list iteration. Requested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Peter Zijlstra <peterz@infradead.org>
-rw-r--r--fs/timerfd.c105
-rw-r--r--include/linux/hrtimer.h6
-rw-r--r--include/linux/time.h6
-rw-r--r--include/linux/timerfd.h4
-rw-r--r--kernel/hrtimer.c94
5 files changed, 113 insertions, 102 deletions
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 7e14c9e7c4ee..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,6 +22,7 @@
#include <linux/anon_inodes.h>
#include <linux/timerfd.h>
#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
struct timerfd_ctx {
struct hrtimer tmr;
@@ -31,9 +32,14 @@ struct timerfd_ctx {
u64 ticks;
int expired;
int clockid;
+ struct rcu_head rcu;
+ struct list_head clist;
bool might_cancel;
};
+static LIST_HEAD(cancel_list);
+static DEFINE_SPINLOCK(cancel_lock);
+
/*
* This gets called when the timer event triggers. We set the "expired"
* flag, but we do not re-arm the timer (in case it's necessary,
@@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
return HRTIMER_NORESTART;
}
-static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
+/*
+ * Called when the clock was set to cancel the timers in the cancel
+ * list.
+ */
+void timerfd_clock_was_set(void)
{
- ktime_t remaining;
+ ktime_t moffs = ktime_get_monotonic_offset();
+ struct timerfd_ctx *ctx;
+ unsigned long flags;
- remaining = hrtimer_expires_remaining(&ctx->tmr);
- return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &cancel_list, clist) {
+ if (!ctx->might_cancel)
+ continue;
+ spin_lock_irqsave(&ctx->wqh.lock, flags);
+ if (ctx->moffs.tv64 != moffs.tv64) {
+ ctx->moffs.tv64 = KTIME_MAX;
+ wake_up_locked(&ctx->wqh);
+ }
+ spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+ }
+ rcu_read_unlock();
}
-static bool timerfd_canceled(struct timerfd_ctx *ctx)
+static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
{
- ktime_t moffs;
+ if (ctx->might_cancel) {
+ ctx->might_cancel = false;
+ spin_lock(&cancel_lock);
+ list_del_rcu(&ctx->clist);
+ spin_unlock(&cancel_lock);
+ }
+}
- if (!ctx->might_cancel)
+static bool timerfd_canceled(struct timerfd_ctx *ctx)
+{
+ if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
return false;
+ ctx->moffs = ktime_get_monotonic_offset();
+ return true;
+}
- moffs = ktime_get_monotonic_offset();
+static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
+{
+ if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
+ (flags & TFD_TIMER_CANCEL_ON_SET)) {
+ if (!ctx->might_cancel) {
+ ctx->might_cancel = true;
+ spin_lock(&cancel_lock);
+ list_add_rcu(&ctx->clist, &cancel_list);
+ spin_unlock(&cancel_lock);
+ }
+ } else if (ctx->might_cancel) {
+ timerfd_remove_cancel(ctx);
+ }
+}
- if (moffs.tv64 == ctx->moffs.tv64)
- return false;
+static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
+{
+ ktime_t remaining;
- ctx->moffs = moffs;
- return true;
+ remaining = hrtimer_expires_remaining(&ctx->tmr);
+ return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
}
static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
@@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
htmode = (flags & TFD_TIMER_ABSTIME) ?
HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
- ctx->might_cancel = false;
- if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
- (flags & TFD_TIMER_CANCELON_SET)) {
- clockid = CLOCK_REALTIME_COS;
- ctx->might_cancel = true;
- }
-
texp = timespec_to_ktime(ktmr->it_value);
ctx->expired = 0;
ctx->ticks = 0;
@@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file)
{
struct timerfd_ctx *ctx = file->private_data;
+ timerfd_remove_cancel(ctx);
hrtimer_cancel(&ctx->tmr);
- kfree(ctx);
+ kfree_rcu(ctx, rcu);
return 0;
}
@@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
else
res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
+ /*
+ * If clock has changed, we do not care about the
+ * ticks and we do not rearm the timer. Userspace must
+ * reevaluate anyway.
+ */
+ if (timerfd_canceled(ctx)) {
+ ctx->ticks = 0;
+ ctx->expired = 0;
+ res = -ECANCELED;
+ }
+
if (ctx->ticks) {
ticks = ctx->ticks;
- /*
- * If clock has changed, we do not care about the
- * ticks and we do not rearm the timer. Userspace must
- * reevaluate anyway.
- */
- if (timerfd_canceled(ctx)) {
- ticks = 0;
- ctx->expired = 0;
- res = -ECANCELED;
- }
-
if (ctx->expired && ctx->tintv.tv64) {
/*
* If tintv.tv64 != 0, this is a periodic timer that
@@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
return PTR_ERR(file);
ctx = file->private_data;
+ timerfd_setup_cancel(ctx, flags);
+
/*
* We need to stop the existing timer before reprogramming
* it to the new values.
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index eda4ccde0730..925c8c01db7b 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -155,7 +155,6 @@ enum hrtimer_base_type {
HRTIMER_BASE_REALTIME,
HRTIMER_BASE_MONOTONIC,
HRTIMER_BASE_BOOTTIME,
- HRTIMER_BASE_REALTIME_COS,
HRTIMER_MAX_CLOCK_BASES,
};
@@ -306,6 +305,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
#endif
extern void clock_was_set(void);
+#ifdef CONFIG_TIMERFD
+extern void timerfd_clock_was_set(void);
+#else
+static inline void timerfd_clock_was_set(void) { }
+#endif
extern void hrtimers_resume(void);
extern ktime_t ktime_get(void);
diff --git a/include/linux/time.h b/include/linux/time.h
index a9242773eb24..b3061782dec3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -302,12 +302,6 @@ struct itimerval {
* The IDs of various hardware clocks:
*/
#define CLOCK_SGI_CYCLE 10
-
-#ifdef __KERNEL__
-/* This clock is not exposed to user space */
-#define CLOCK_REALTIME_COS 15
-#endif
-
#define MAX_CLOCKS 16
#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC)
#define CLOCKS_MONO CLOCK_MONOTONIC
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index e9571fc8f1a0..d3b57fa12225 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -19,7 +19,7 @@
* shared O_* flags.
*/
#define TFD_TIMER_ABSTIME (1 << 0)
-#define TFD_TIMER_CANCELON_SET (1 << 1)
+#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
#define TFD_CLOEXEC O_CLOEXEC
#define TFD_NONBLOCK O_NONBLOCK
@@ -27,6 +27,6 @@
/* Flags for timerfd_create. */
#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
/* Flags for timerfd_settime. */
-#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET)
+#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
#endif /* _LINUX_TIMERFD_H */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index eabcbd781433..26dd32f9f6b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
.get_time = &ktime_get_boottime,
.resolution = KTIME_LOW_RES,
},
- {
- .index = CLOCK_REALTIME_COS,
- .get_time = &ktime_get_real,
- .resolution = KTIME_LOW_RES,
- },
}
};
@@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
- [CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS,
};
static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
- base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
}
/*
@@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer)
trace_hrtimer_cancel(timer);
}
-static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base);
-
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
return 0;
}
-static void retrigger_next_event(void *arg);
+/*
+ * Retrigger next event is called after clock was set
+ *
+ * Called with interrupts disabled via on_each_cpu()
+ */
+static void retrigger_next_event(void *arg)
+{
+ struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
+ struct timespec realtime_offset, xtim, wtm, sleep;
+
+ if (!hrtimer_hres_active())
+ return;
+
+ /* Optimized out for !HIGH_RES */
+ get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
+ set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
+
+ /* Adjust CLOCK_REALTIME offset */
+ raw_spin_lock(&base->lock);
+ base->clock_base[HRTIMER_BASE_REALTIME].offset =
+ timespec_to_ktime(realtime_offset);
+ base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+ timespec_to_ktime(sleep);
+
+ hrtimer_force_reprogram(base, 0);
+ raw_spin_unlock(&base->lock);
+}
/*
* Switch to high resolution mode
@@ -711,46 +728,11 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
return 0;
}
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
+static inline void retrigger_next_event(void *arg) { }
#endif /* CONFIG_HIGH_RES_TIMERS */
/*
- * Retrigger next event is called after clock was set
- *
- * Called with interrupts disabled via on_each_cpu()
- */
-static void retrigger_next_event(void *arg)
-{
- struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
- struct timespec realtime_offset, xtim, wtm, sleep;
-
- if (!hrtimer_hres_active()) {
- raw_spin_lock(&base->lock);
- hrtimer_expire_cancelable(base);
- raw_spin_unlock(&base->lock);
- return;
- }
-
- /* Optimized out for !HIGH_RES */
- get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
- set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
-
- /* Adjust CLOCK_REALTIME offset */
- raw_spin_lock(&base->lock);
- base->clock_base[HRTIMER_BASE_REALTIME].offset =
- timespec_to_ktime(realtime_offset);
- base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
- timespec_to_ktime(sleep);
- base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
- timespec_to_ktime(realtime_offset);
-
- hrtimer_expire_cancelable(base);
-
- hrtimer_force_reprogram(base, 0);
- raw_spin_unlock(&base->lock);
-}
-
-/*
* Clock realtime was set
*
* Change the offset of the realtime clock vs. the monotonic
@@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg)
*/
void clock_was_set(void)
{
+#ifdef CONFIG_HIGHRES_TIMERS
/* Retrigger the CPU local events everywhere */
on_each_cpu(retrigger_next_event, NULL, 1);
+#endif
+ timerfd_clock_was_set();
}
/*
@@ -777,6 +762,7 @@ void hrtimers_resume(void)
KERN_INFO "hrtimers_resume() called with IRQs enabled!");
retrigger_next_event(NULL);
+ timerfd_clock_was_set();
}
static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
@@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
timer->state &= ~HRTIMER_STATE_CALLBACK;
}
-static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base)
-{
- struct timerqueue_node *node;
- struct hrtimer_clock_base *base;
- ktime_t now = ktime_get_real();
-
- base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS];
-
- while ((node = timerqueue_getnext(&base->active))) {
- struct hrtimer *timer;
-
- timer = container_of(node, struct hrtimer, node);
- __run_hrtimer(timer, &now);
- }
-}
-
#ifdef CONFIG_HIGH_RES_TIMERS
/*