From e5d4d1756b07d9490a0269a9e68c1e05ee1feb9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Mar 2020 12:25:58 +0100 Subject: timekeeping: Split jiffies seqlock seqlock consists of a sequence counter and a spinlock_t which is used to serialize the writers. spinlock_t is substituted by a "sleeping" spinlock on PREEMPT_RT enabled kernels which breaks the usage in the timekeeping code as the writers are executed in hard interrupt and therefore non-preemptible context even on PREEMPT_RT. The spinlock in seqlock cannot be unconditionally replaced by a raw_spinlock_t as many seqlock users have nesting spinlock sections or other code which is not suitable to run in truly atomic context on RT. Instead of providing a raw_seqlock API for a single use case, open code the seqlock for the jiffies use case and implement it with a raw_spinlock_t and a sequence counter. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113242.120587764@linutronix.de --- kernel/time/jiffies.c | 7 ++++--- kernel/time/tick-common.c | 10 ++++++---- kernel/time/tick-sched.c | 19 ++++++++++++------- kernel/time/timekeeping.c | 6 ++++-- kernel/time/timekeeping.h | 3 ++- 5 files changed, 28 insertions(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index d23b434c2ca7..eddcf4970444 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -58,7 +58,8 @@ static struct clocksource clocksource_jiffies = { .max_cycles = 10, }; -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); +__cacheline_aligned_in_smp seqcount_t jiffies_seq; #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) @@ -67,9 +68,9 @@ u64 get_jiffies_64(void) u64 ret; do { - seq = read_seqbegin(&jiffies_lock); + seq = read_seqcount_begin(&jiffies_seq); ret = jiffies_64; - } while (read_seqretry(&jiffies_lock, seq)); + } while (read_seqcount_retry(&jiffies_seq, seq)); return ret; } EXPORT_SYMBOL(get_jiffies_64); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 7e5d3524e924..6c9c342dd0e5 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -84,13 +84,15 @@ int tick_is_oneshot_available(void) static void tick_periodic(int cpu) { if (tick_do_timer_cpu == cpu) { - write_seqlock(&jiffies_lock); + raw_spin_lock(&jiffies_lock); + write_seqcount_begin(&jiffies_seq); /* Keep track of the next tick event */ tick_next_period = ktime_add(tick_next_period, tick_period); do_timer(1); - write_sequnlock(&jiffies_lock); + write_seqcount_end(&jiffies_seq); + raw_spin_unlock(&jiffies_lock); update_wall_time(); } @@ -162,9 +164,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) ktime_t next; do { - seq = read_seqbegin(&jiffies_lock); + seq = read_seqcount_begin(&jiffies_seq); next = tick_next_period; - } while (read_seqretry(&jiffies_lock, seq)); + } while (read_seqcount_retry(&jiffies_seq, seq)); clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a792d21cac64..4be756b88a48 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -65,7 +65,8 @@ static void tick_do_update_jiffies64(ktime_t now) return; /* Reevaluate with jiffies_lock held */ - write_seqlock(&jiffies_lock); + raw_spin_lock(&jiffies_lock); + write_seqcount_begin(&jiffies_seq); delta = ktime_sub(now, last_jiffies_update); if (delta >= tick_period) { @@ -91,10 +92,12 @@ static void tick_do_update_jiffies64(ktime_t now) /* Keep the tick_next_period variable up to date */ tick_next_period = ktime_add(last_jiffies_update, tick_period); } else { - write_sequnlock(&jiffies_lock); + write_seqcount_end(&jiffies_seq); + raw_spin_unlock(&jiffies_lock); return; } - write_sequnlock(&jiffies_lock); + write_seqcount_end(&jiffies_seq); + raw_spin_unlock(&jiffies_lock); update_wall_time(); } @@ -105,12 +108,14 @@ static ktime_t tick_init_jiffy_update(void) { ktime_t period; - write_seqlock(&jiffies_lock); + raw_spin_lock(&jiffies_lock); + write_seqcount_begin(&jiffies_seq); /* Did we start the jiffies update yet ? */ if (last_jiffies_update == 0) last_jiffies_update = tick_next_period; period = last_jiffies_update; - write_sequnlock(&jiffies_lock); + write_seqcount_end(&jiffies_seq); + raw_spin_unlock(&jiffies_lock); return period; } @@ -676,10 +681,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) /* Read jiffies and the time when jiffies were updated last */ do { - seq = read_seqbegin(&jiffies_lock); + seq = read_seqcount_begin(&jiffies_seq); basemono = last_jiffies_update; basejiff = jiffies; - } while (read_seqretry(&jiffies_lock, seq)); + } while (read_seqcount_retry(&jiffies_seq, seq)); ts->last_jiffies = basejiff; ts->timer_expires_base = basemono; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ca69290bee2a..856280d2cbd4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2397,8 +2397,10 @@ EXPORT_SYMBOL(hardpps); */ void xtime_update(unsigned long ticks) { - write_seqlock(&jiffies_lock); + raw_spin_lock(&jiffies_lock); + write_seqcount_begin(&jiffies_seq); do_timer(ticks); - write_sequnlock(&jiffies_lock); + write_seqcount_end(&jiffies_seq); + raw_spin_unlock(&jiffies_lock); update_wall_time(); } diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 141ab3ab0354..099737f6f10c 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -25,7 +25,8 @@ static inline void sched_clock_resume(void) { } extern void do_timer(unsigned long ticks); extern void update_wall_time(void); -extern seqlock_t jiffies_lock; +extern raw_spinlock_t jiffies_lock; +extern seqcount_t jiffies_seq; #define CS_NAME_LEN 32 -- cgit v1.2.3 From 40db173965c05a1d803451240ed41707d5bd978d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 21 Mar 2020 12:26:02 +0100 Subject: lockdep: Add hrtimer context tracing bits Set current->irq_config = 1 for hrtimers which are not marked to expire in hard interrupt context during hrtimer_init(). These timers will expire in softirq context on PREEMPT_RT. Setting this allows lockdep to differentiate these timers. If a timer is marked to expire in hard interrupt context then the timer callback is not supposed to acquire a regular spinlock instead of a raw_spinlock in the expiry callback. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113242.534508206@linutronix.de --- include/linux/irqflags.h | 15 +++++++++++++++ include/linux/sched.h | 1 + kernel/locking/lockdep.c | 2 +- kernel/time/hrtimer.c | 6 +++++- 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index fdaf28601cbe..9c17f9c827aa 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -56,6 +56,19 @@ do { \ do { \ current->softirq_context--; \ } while (0) + +# define lockdep_hrtimer_enter(__hrtimer) \ + do { \ + if (!__hrtimer->is_hard) \ + current->irq_config = 1; \ + } while (0) + +# define lockdep_hrtimer_exit(__hrtimer) \ + do { \ + if (!__hrtimer->is_hard) \ + current->irq_config = 0; \ + } while (0) + #else # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) @@ -68,6 +81,8 @@ do { \ # define trace_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) +# define lockdep_hrtimer_enter(__hrtimer) do { } while (0) +# define lockdep_hrtimer_exit(__hrtimer) do { } while (0) #endif #if defined(CONFIG_IRQSOFF_TRACER) || \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d3b9ecce074..933914cdf219 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -983,6 +983,7 @@ struct task_struct { unsigned int softirq_enable_event; int softirqs_enabled; int softirq_context; + int irq_config; #endif #ifdef CONFIG_LOCKDEP diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 6b9f9f321e6d..0ebf9807d971 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4025,7 +4025,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) /* * Check if force_irqthreads will run us threaded. */ - if (curr->hardirq_threaded) + if (curr->hardirq_threaded || curr->irq_config) curr_inner = LD_WAIT_CONFIG; else curr_inner = LD_WAIT_SPIN; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 3a609e7344f3..8cce72501aea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1404,7 +1404,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; base += hrtimer_clockid_to_base(clock_id); timer->is_soft = softtimer; - timer->is_hard = !softtimer; + timer->is_hard = !!(mode & HRTIMER_MODE_HARD); timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); } @@ -1514,7 +1514,11 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, */ raw_spin_unlock_irqrestore(&cpu_base->lock, flags); trace_hrtimer_expire_entry(timer, now); + lockdep_hrtimer_enter(timer); + restart = fn(timer); + + lockdep_hrtimer_exit(timer); trace_hrtimer_expire_exit(timer); raw_spin_lock_irq(&cpu_base->lock); -- cgit v1.2.3 From 49915ac35ca7b07c54295a72d905be5064afb89e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 21 Mar 2020 12:26:03 +0100 Subject: lockdep: Annotate irq_work Mark irq_work items with IRQ_WORK_HARD_IRQ which should be invoked in hardirq context even on PREEMPT_RT. IRQ_WORK without this flag will be invoked in softirq context on PREEMPT_RT. Set ->irq_config to 1 for the IRQ_WORK items which are invoked in softirq context so lockdep knows that these can safely acquire a spinlock_t. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113242.643576700@linutronix.de --- include/linux/irq_work.h | 2 ++ include/linux/irqflags.h | 13 +++++++++++++ kernel/irq_work.c | 2 ++ kernel/rcu/tree.c | 1 + kernel/time/tick-sched.c | 1 + 5 files changed, 19 insertions(+) (limited to 'kernel/time') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 02da997ad12c..3b752e80c017 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -18,6 +18,8 @@ /* Doesn't want IPI, wait for tick: */ #define IRQ_WORK_LAZY BIT(2) +/* Run hard IRQ context, even on RT */ +#define IRQ_WORK_HARD_IRQ BIT(3) #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 9c17f9c827aa..f23f540e0ebb 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -69,6 +69,17 @@ do { \ current->irq_config = 0; \ } while (0) +# define lockdep_irq_work_enter(__work) \ + do { \ + if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ + current->irq_config = 1; \ + } while (0) +# define lockdep_irq_work_exit(__work) \ + do { \ + if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ + current->irq_config = 0; \ + } while (0) + #else # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) @@ -83,6 +94,8 @@ do { \ # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) do { } while (0) # define lockdep_hrtimer_exit(__hrtimer) do { } while (0) +# define lockdep_irq_work_enter(__work) do { } while (0) +# define lockdep_irq_work_exit(__work) do { } while (0) #endif #if defined(CONFIG_IRQSOFF_TRACER) || \ diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 828cc30774bc..48b5d1b6af4d 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -153,7 +153,9 @@ static void irq_work_run_list(struct llist_head *list) */ flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags); + lockdep_irq_work_enter(work); work->func(work); + lockdep_irq_work_exit(work); /* * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d91c9156fab2..5066d1dd3077 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1113,6 +1113,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && (rnp->ffmask & rdp->grpmask)) { init_irq_work(&rdp->rcu_iw, rcu_iw_handler); + atomic_set(&rdp->rcu_iw.flags, IRQ_WORK_HARD_IRQ); rdp->rcu_iw_pending = true; rdp->rcu_iw_gp_seq = rnp->gp_seq; irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4be756b88a48..3e2dc9b8858c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -245,6 +245,7 @@ static void nohz_full_kick_func(struct irq_work *work) static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { .func = nohz_full_kick_func, + .flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ), }; /* -- cgit v1.2.3 From d53f2b62fcb63f6547c10d8c62bca19e957b0eef Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 21 Mar 2020 12:26:04 +0100 Subject: lockdep: Add posixtimer context tracing bits Splitting run_posix_cpu_timers() into two parts is work in progress which is stuck on other entry code related problems. The heavy lifting which involves locking of sighand lock will be moved into task context so the necessary execution time is burdened on the task and not on interrupt context. Until this work completes lockdep with the spinlock nesting rules enabled would emit warnings for this known context. Prevent it by setting "->irq_config = 1" for the invocation of run_posix_cpu_timers() so lockdep does not complain when sighand lock is acquried. This will be removed once the split is completed. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200321113242.751182723@linutronix.de --- include/linux/irqflags.h | 12 ++++++++++++ kernel/time/posix-cpu-timers.c | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index f23f540e0ebb..a16adbb58f66 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -69,6 +69,16 @@ do { \ current->irq_config = 0; \ } while (0) +# define lockdep_posixtimer_enter() \ + do { \ + current->irq_config = 1; \ + } while (0) + +# define lockdep_posixtimer_exit() \ + do { \ + current->irq_config = 0; \ + } while (0) + # define lockdep_irq_work_enter(__work) \ do { \ if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\ @@ -94,6 +104,8 @@ do { \ # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) do { } while (0) # define lockdep_hrtimer_exit(__hrtimer) do { } while (0) +# define lockdep_posixtimer_enter() do { } while (0) +# define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) # define lockdep_irq_work_exit(__work) do { } while (0) #endif diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 8ff6da77a01f..2c48a7233b19 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1126,8 +1126,11 @@ void run_posix_cpu_timers(void) if (!fastpath_timer_check(tsk)) return; - if (!lock_task_sighand(tsk, &flags)) + lockdep_posixtimer_enter(); + if (!lock_task_sighand(tsk, &flags)) { + lockdep_posixtimer_exit(); return; + } /* * Here we take off tsk->signal->cpu_timers[N] and * tsk->cpu_timers[N] all the timers that are firing, and @@ -1169,6 +1172,7 @@ void run_posix_cpu_timers(void) cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } + lockdep_posixtimer_exit(); } /* -- cgit v1.2.3