From b00c1a99e7758f794923c61e5cd55268d61c9469 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Sep 2008 15:44:46 +0200 Subject: hrtimer: mark migration state Impact: during migration active hrtimers can be seen as inactive The migration code removes the hrtimers from the queues of the dead CPU and sets the state temporary to INACTIVE. The enqueue code sets it to ACTIVE/PENDING again. Prevent that the wrong state can be seen by using a separate migration state bit. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6d93dce61cbb..bdd88df1b4e5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -67,9 +67,10 @@ enum hrtimer_cb_mode { * 0x02 callback function running * 0x04 callback pending (high resolution mode) * - * Special case: + * Special cases: * 0x03 callback function running and enqueued * (was requeued on another CPU) + * 0x09 timer was migrated on CPU hotunplug * The "callback function running and enqueued" status is only possible on * SMP. It happens for example when a posix timer expired and the callback * queued a signal. Between dropping the lock which protects the posix timer @@ -87,6 +88,7 @@ enum hrtimer_cb_mode { #define HRTIMER_STATE_ENQUEUED 0x01 #define HRTIMER_STATE_CALLBACK 0x02 #define HRTIMER_STATE_PENDING 0x04 +#define HRTIMER_STATE_MIGRATE 0x08 /** * struct hrtimer - the basic hrtimer structure -- cgit v1.2.3 From ccc7dadf736639da86f3e0c86832c11a66fc8221 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Sep 2008 15:47:42 +0200 Subject: hrtimer: prevent migration of per CPU hrtimers Impact: per CPU hrtimers can be migrated from a dead CPU The hrtimer code has no knowledge about per CPU timers, but we need to prevent the migration of such timers and warn when such a timer is active at migration time. Explicitely mark the timers as per CPU and use a more understandable mode descriptor for the interrupts safe unlocked callback mode, which is used by hrtimer_sleeper and the scheduler code. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 14 +++++++++++--- kernel/hrtimer.c | 37 +++++++++++++++++++++++++------------ kernel/sched.c | 4 ++-- kernel/time/tick-sched.c | 2 +- kernel/trace/trace_sysprof.c | 2 +- 5 files changed, 40 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bdd88df1b4e5..2f245fe63bda 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -47,14 +47,22 @@ enum hrtimer_restart { * HRTIMER_CB_IRQSAFE: Callback may run in hardirq context * HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and * does not restart the timer - * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in hardirq context - * Special mode for tick emultation + * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context + * Special mode for tick emulation and + * scheduler timer. Such timers are per + * cpu and not allowed to be migrated on + * cpu unplug. + * HRTIMER_CB_IRQSAFE_UNLOCKED: Callback should run in hardirq context + * with timer->base lock unlocked + * used for timers which call wakeup to + * avoid lock order problems with rq->lock */ enum hrtimer_cb_mode { HRTIMER_CB_SOFTIRQ, HRTIMER_CB_IRQSAFE, HRTIMER_CB_IRQSAFE_NO_RESTART, - HRTIMER_CB_IRQSAFE_NO_SOFTIRQ, + HRTIMER_CB_IRQSAFE_PERCPU, + HRTIMER_CB_IRQSAFE_UNLOCKED, }; /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ace723dd1e52..cdec83e722fa 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, */ BUG_ON(timer->function(timer) != HRTIMER_NORESTART); return 1; - case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: + case HRTIMER_CB_IRQSAFE_PERCPU: + case HRTIMER_CB_IRQSAFE_UNLOCKED: /* * This is solely for the sched tick emulation with * dynamic tick support to ensure that we do not * restart the tick right on the edge and end up with * the tick timer in the softirq ! The calling site - * takes care of this. + * takes care of this. Also used for hrtimer sleeper ! */ debug_hrtimer_deactivate(timer); return 1; @@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer) timer_stats_account_hrtimer(timer); fn = timer->function; - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || + timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) { /* * Used for scheduler timers, avoid lock inversion with * rq->lock and tasklist_lock. @@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) sl->timer.function = hrtimer_wakeup; sl->task = task; #ifdef CONFIG_HIGH_RES_TIMERS - sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; #endif } @@ -1592,7 +1594,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) #ifdef CONFIG_HOTPLUG_CPU static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base) + struct hrtimer_clock_base *new_base, int dcpu) { struct hrtimer *timer; struct rb_node *node; @@ -1603,6 +1605,18 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, BUG_ON(hrtimer_callback_running(timer)); debug_hrtimer_deactivate(timer); + /* + * Should not happen. Per CPU timers should be + * canceled _before_ the migration code is called + */ + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) { + __remove_hrtimer(timer, old_base, + HRTIMER_STATE_INACTIVE, 0); + WARN(1, "hrtimer (%p %p)active but cpu %d dead\n", + timer, timer->function, dcpu); + continue; + } + /* * Mark it as STATE_MIGRATE not INACTIVE otherwise the * timer could be seen as !active and just vanish away @@ -1619,12 +1633,11 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, /* * Happens with high res enabled when the timer was * already expired and the callback mode is - * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ - * (hrtimer_sleeper). The enqueue code does not move - * them to the soft irq pending list for - * performance/latency reasons, but in the migration - * state, we need to do that otherwise we end up with - * a stale timer. + * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The + * enqueue code does not move them to the soft irq + * pending list for performance/latency reasons, but + * in the migration state, we need to do that + * otherwise we end up with a stale timer. */ if (timer->state == HRTIMER_STATE_MIGRATE) { timer->state = HRTIMER_STATE_PENDING; @@ -1682,7 +1695,7 @@ static void migrate_hrtimers(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { if (migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i])) + &new_base->clock_base[i], cpu)) raise = 1; } diff --git a/kernel/sched.c b/kernel/sched.c index 13dd2db9fb2d..ad1962dc0aa2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rt_b->rt_period_timer.function = sched_rt_period_timer; - rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; } static void start_rt_bandwidth(struct rt_bandwidth *rt_b) @@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq) hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->hrtick_timer.function = hrtick; - rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; } #else static inline void hrtick_clear(struct rq *rq) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 39019b3f7621..cb02324bdb88 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -625,7 +625,7 @@ void tick_setup_sched_timer(void) */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ts->sched_timer.function = tick_sched_timer; - ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; /* Get the next period (per cpu) */ ts->sched_timer.expires = tick_init_jiffy_update(); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index bb948e52ce20..db58fb66a135 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -202,7 +202,7 @@ static void start_stack_timer(int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = stack_trace_timer_fn; - hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); } -- cgit v1.2.3