From 4bc8d55574dd316e43975651b9259c5c18d741fc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Nov 2017 15:13:56 -0800 Subject: rcu: Add debugging info to assertion The WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp()) in rcu_gp_cleanup() triggers (inexplicably, of course) every so often. This commit therefore extracts more information. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7fd12039e512..17b67ecf7dff 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -260,8 +260,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * ->exp_tasks pointers, respectively, to reference the newly * blocked tasks. */ - if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) + if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) { rnp->gp_tasks = &t->rcu_node_entry; + WARN_ON_ONCE(rnp->completedqs == rnp->gpnum); + } if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) rnp->exp_tasks = &t->rcu_node_entry; WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) != @@ -535,6 +537,8 @@ void rcu_read_unlock_special(struct task_struct *t) WARN_ON_ONCE(rnp != t->rcu_blocked_node); WARN_ON_ONCE(!rcu_is_leaf_node(rnp)); empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); + WARN_ON_ONCE(rnp->completedqs == rnp->gpnum && + (!empty_norm || rnp->qsmask)); empty_exp = sync_rcu_preempt_exp_done(rnp); smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ np = rcu_next_node_entry(t, rnp); @@ -697,7 +701,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) struct task_struct *t; RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); - WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); + if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) + dump_blkd_tasks(rnp, 10); if (rcu_preempt_has_tasks(rnp)) { rnp->gp_tasks = rnp->blkd_tasks.next; t = container_of(rnp->gp_tasks, struct task_struct, @@ -841,6 +846,27 @@ void exit_rcu(void) __rcu_read_unlock(); } +/* + * Dump the blocked-tasks state, but limit the list dump to the + * specified number of elements. + */ +static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck) +{ + int i; + struct list_head *lhp; + + lockdep_assert_held(&rnp->lock); + pr_info("%s: grp: %d-%d level: %d ->qamask %#lx ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p &->blkd_tasks: %p offset: %u\n", __func__, rnp->grplo, rnp->grphi, rnp->level, rnp->qsmask, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks, &rnp->blkd_tasks, (unsigned int)offsetof(typeof(*rnp), blkd_tasks)); + pr_cont("\t->blkd_tasks"); + i = 0; + list_for_each(lhp, &rnp->blkd_tasks) { + pr_cont(" %p", lhp); + if (++i >= 10) + break; + } + pr_cont("\n"); +} + #else /* #ifdef CONFIG_PREEMPT_RCU */ static struct rcu_state *const rcu_state_p = &rcu_sched_state; @@ -949,6 +975,14 @@ void exit_rcu(void) { } +/* + * Dump the guaranteed-empty blocked-tasks state. Trust but verify. + */ +static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck) +{ + WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks)); +} + #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_RCU_BOOST -- cgit v1.2.3 From ce11fae8d43fe9a36823fbbfe7c44de775b7e346 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 9 Mar 2018 09:32:18 +0800 Subject: rcu: Use the proper lockdep annotation in dump_blkd_tasks() Sparse reported this: | kernel/rcu/tree_plugin.h:814:9: warning: incorrect type in argument 1 (different modifiers) | kernel/rcu/tree_plugin.h:814:9: expected struct lockdep_map const *lock | kernel/rcu/tree_plugin.h:814:9: got struct lockdep_map [noderef] * This is caused by using vanilla lockdep annotations on rcu_node::lock, and that requires accessing ->lock of rcu_node directly. However we need to keep rcu_node::lock __private to avoid breaking its extra ordering guarantee. And we have a dedicated lockdep annotation for rcu_node::lock, so use it. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 17b67ecf7dff..e387ea712758 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -855,7 +855,7 @@ static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck) int i; struct list_head *lhp; - lockdep_assert_held(&rnp->lock); + raw_lockdep_assert_held_rcu_node(rnp); pr_info("%s: grp: %d-%d level: %d ->qamask %#lx ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p &->blkd_tasks: %p offset: %u\n", __func__, rnp->grplo, rnp->grphi, rnp->level, rnp->qsmask, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks, &rnp->blkd_tasks, (unsigned int)offsetof(typeof(*rnp), blkd_tasks)); pr_cont("\t->blkd_tasks"); i = 0; -- cgit v1.2.3 From 03c8cb765a747c02fd8d3fade1efe9d529ad54bd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Apr 2018 19:56:16 -0700 Subject: rcu: Move rcu_try_advance_all_cbs() to ->gp_seq This commit makes rcu_try_advance_all_cbs() use ->gp_seq, with the exception of tracing, which will be converted later. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e387ea712758..d893899b72f4 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1467,7 +1467,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * completed since we last checked and there are * callbacks not yet ready to invoke. */ - if ((rdp->completed != rnp->completed || + if ((rcu_seq_completed_gp(rdp->gp_seq, + rcu_seq_current(&rnp->gp_seq)) || unlikely(READ_ONCE(rdp->gpwrap))) && rcu_segcblist_pend_cbs(&rdp->cblist)) note_gp_changes(rsp, rdp); -- cgit v1.2.3 From e0da2374c3881cb9a512e2718f9ca655a48de9db Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Apr 2018 20:51:36 -0700 Subject: rcu: Move rcu_nocb_gp_get() to ->gp_seq This commit makes rcu_try_advance_all_cbs() use ->gp_seq. It uses rcu_seq_ctr() in order to shift away the state bits, so that the low-order bits of the result may safely be used to index ->nocb_gp_wq[]. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index d893899b72f4..5b10904669c5 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1852,7 +1852,7 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) { - return &rnp->nocb_gp_wq[rnp->completed & 0x1]; + return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1]; } static void rcu_init_one_nocb(struct rcu_node *rnp) -- cgit v1.2.3 From 8aa670cdacc1820cb0597e4b4b413ef91ede2dd9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 28 Apr 2018 14:15:40 -0700 Subject: rcu: Convert ->rcu_iw_gpnum to ->gp_seq This commit switches the interrupt-disabled detection mechanism to ->gp_seq. This mechanism is used as part of RCU CPU stall warnings, and detects cases where the stall is due to a CPU having interrupts disabled. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 12 ++++++------ kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 56445f4c09a8..2ddbd1cfb31a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1099,8 +1099,8 @@ static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp) if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4, rnp->gp_seq)) WRITE_ONCE(rdp->gpwrap, true); - if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum)) - rdp->rcu_iw_gpnum = rnp->gpnum + ULONG_MAX / 4; + if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq)) + rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4; } /* @@ -1134,7 +1134,7 @@ static void rcu_iw_handler(struct irq_work *iwp) rnp = rdp->mynode; raw_spin_lock_rcu_node(rnp); if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) { - rdp->rcu_iw_gpnum = rnp->gpnum; + rdp->rcu_iw_gp_seq = rnp->gp_seq; rdp->rcu_iw_pending = false; } raw_spin_unlock_rcu_node(rnp); @@ -1231,11 +1231,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) { resched_cpu(rdp->cpu); if (IS_ENABLED(CONFIG_IRQ_WORK) && - !rdp->rcu_iw_pending && rdp->rcu_iw_gpnum != rnp->gpnum && + !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && (rnp->ffmask & rdp->grpmask)) { init_irq_work(&rdp->rcu_iw, rcu_iw_handler); rdp->rcu_iw_pending = true; - rdp->rcu_iw_gpnum = rnp->gpnum; + rdp->rcu_iw_gp_seq = rnp->gp_seq; irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); } } @@ -3575,7 +3575,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); rdp->core_needs_qs = false; rdp->rcu_iw_pending = false; - rdp->rcu_iw_gpnum = rnp->gpnum - 1; + rdp->rcu_iw_gp_seq = rnp->gp_seq - 1; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 50a28d1cf5a1..6d6cbc8b3a9c 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -286,7 +286,7 @@ struct rcu_data { /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */ struct irq_work rcu_iw; /* Check for non-irq activity. */ bool rcu_iw_pending; /* Is ->rcu_iw pending? */ - unsigned long rcu_iw_gpnum; /* ->gpnum associated with ->rcu_iw. */ + unsigned long rcu_iw_gp_seq; /* ->gp_seq associated with ->rcu_iw. */ int cpu; struct rcu_state *rsp; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 5b10904669c5..bc32e1f434a6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1763,7 +1763,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) ticks_value = rsp->gpnum - rdp->gpnum; } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); - delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum; + delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], -- cgit v1.2.3 From d43a5d32e125db1e34641922e52baaa4fee3510a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 28 Apr 2018 18:50:06 -0700 Subject: rcu: Convert ->completedqs to ->gp_seq This commit switches the quiescent-state no-backtracking checks from ->gpnum and ->completed to ->gp_seq. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 4 ++-- kernel/rcu/tree_plugin.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2ddbd1cfb31a..da2ca9cc078b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2278,7 +2278,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } - rnp->completedqs = rnp->gpnum; + rnp->completedqs = rnp->gp_seq; mask = rnp->grpmask; if (rnp->parent == NULL) { @@ -3951,7 +3951,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) rnp->gpnum = rsp->gpnum; rnp->completed = rsp->completed; rnp->gp_seq = rsp->gp_seq; - rnp->completedqs = rsp->completed; + rnp->completedqs = rsp->gp_seq; rnp->qsmask = 0; rnp->qsmaskinit = 0; rnp->grplo = j * cpustride; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index bc32e1f434a6..2a81d243521f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -262,7 +262,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) */ if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) { rnp->gp_tasks = &t->rcu_node_entry; - WARN_ON_ONCE(rnp->completedqs == rnp->gpnum); + WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq); } if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) rnp->exp_tasks = &t->rcu_node_entry; @@ -537,7 +537,7 @@ void rcu_read_unlock_special(struct task_struct *t) WARN_ON_ONCE(rnp != t->rcu_blocked_node); WARN_ON_ONCE(!rcu_is_leaf_node(rnp)); empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); - WARN_ON_ONCE(rnp->completedqs == rnp->gpnum && + WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq && (!empty_norm || rnp->qsmask)); empty_exp = sync_rcu_preempt_exp_done(rnp); smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ -- cgit v1.2.3 From 29365e563b1e4e5bfde211280d37dc6127c019ed Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 30 Apr 2018 10:57:36 -0700 Subject: rcu: Convert grace-period requests to ->gp_seq This commit converts the grace-period request code paths from ->completed and ->gpnum to ->gp_seq. The need_future_gp_element() macro encapsulates the shift operation required to use ->gp_seq as an index to the ->need_future_gp[] array. The rcu_cbs_completed() function is removed in favor of the rcu_seq_snap() function. The rcu_start_this_gp() gets some temporary consistency checks and uses rcu_seq_done(), rcu_seq_current(), rcu_seq_state(), and rcu_gp_in_progress() in place of the earlier open-coded comparisons of ->gpnum and ->completed. The rcu_future_gp_cleanup() function replaces use of ->completed with ->gp_seq. The rcu_accelerate_cbs() function replaces a call to rcu_cbs_completed() with one to rcu_seq_snap(). The rcu_advance_cbs() function replaces an access to >completed with one to ->gp_seq and adds some temporary warnings. The rcu_nocb_wait_gp() function replaces a call to rcu_cbs_completed() with one to rcu_seq_snap() and an open-coded comparison with rcu_seq_done(). The temporary warnings will be removed when the various ->gpnum and ->completed fields are removed. Their purpose is to locate code who might still be using ->gpnum and ->completed. (Much easier that way than trying to trace down the causes of too-short grace periods and grace-period hangs!) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 84 ++++++++++++------------------------------------ kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 6 ++-- 3 files changed, 24 insertions(+), 68 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index da2ca9cc078b..ffa45b6175d9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1548,52 +1548,6 @@ void rcu_cpu_stall_reset(void) WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2); } -/* - * Determine the value that ->completed will have at the end of the - * next subsequent grace period. This is used to tag callbacks so that - * a CPU can invoke callbacks in a timely fashion even if that CPU has - * been dyntick-idle for an extended period with callbacks under the - * influence of RCU_FAST_NO_HZ. - * - * The caller must hold rnp->lock with interrupts disabled. - */ -static unsigned long rcu_cbs_completed(struct rcu_state *rsp, - struct rcu_node *rnp) -{ - raw_lockdep_assert_held_rcu_node(rnp); - - /* - * If RCU is idle, we just wait for the next grace period. - * But we can only be sure that RCU is idle if we are looking - * at the root rcu_node structure -- otherwise, a new grace - * period might have started, but just not yet gotten around - * to initializing the current non-root rcu_node structure. - */ - if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed) - return rnp->completed + 1; - - /* - * If the current rcu_node structure believes that RCU is - * idle, and if the rcu_state structure does not yet reflect - * the start of a new grace period, then the next grace period - * will suffice. The memory barrier is needed to accurately - * sample the rsp->gpnum, and pairs with the second lock - * acquisition in rcu_gp_init(), which is augmented with - * smp_mb__after_unlock_lock() for this purpose. - */ - if (rnp->gpnum == rnp->completed) { - smp_mb(); /* See above block comment. */ - if (READ_ONCE(rsp->gpnum) == rnp->completed) - return rnp->completed + 1; - } - - /* - * Otherwise, wait for a possible partial grace period and - * then the subsequent full grace period. - */ - return rnp->completed + 2; -} - /* Trace-event wrapper function for trace_rcu_future_grace_period. */ static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, unsigned long c, const char *s) @@ -1629,16 +1583,16 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, * not be released. */ raw_lockdep_assert_held_rcu_node(rnp); + WARN_ON_ONCE(c & 0x2); /* Catch any lingering use of ->gpnum. */ + WARN_ON_ONCE(((rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT) != rcu_seq_ctr(rnp->gp_seq)); /* Catch any ->completed/->gp_seq mismatches. */ trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf")); for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) { if (rnp_root != rnp) raw_spin_lock_rcu_node(rnp_root); - WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum + - need_future_gp_mask(), c)); if (need_future_gp_element(rnp_root, c) || - ULONG_CMP_GE(rnp_root->gpnum, c) || + rcu_seq_done(&rnp_root->gp_seq, c) || (rnp != rnp_root && - rnp_root->gpnum != rnp_root->completed)) { + rcu_seq_state(rcu_seq_current(&rnp_root->gp_seq)))) { trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted")); goto unlock_out; } @@ -1650,7 +1604,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, } /* If GP already in progress, just leave, otherwise start one. */ - if (rnp_root->gpnum != rnp_root->completed) { + if (rcu_gp_in_progress(rsp)) { trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot")); goto unlock_out; } @@ -1675,7 +1629,7 @@ unlock_out: */ static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { - unsigned long c = rnp->completed; + unsigned long c = rnp->gp_seq; bool needmore; struct rcu_data *rdp = this_cpu_ptr(rsp->rda); @@ -1703,14 +1657,14 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp) } /* - * If there is room, assign a ->completed number to any callbacks on - * this CPU that have not already been assigned. Also accelerate any - * callbacks that were previously assigned a ->completed number that has - * since proven to be too conservative, which can happen if callbacks get - * assigned a ->completed number while RCU is idle, but with reference to - * a non-root rcu_node structure. This function is idempotent, so it does - * not hurt to call it repeatedly. Returns an flag saying that we should - * awaken the RCU grace-period kthread. + * If there is room, assign a ->gp_seq number to any callbacks on this + * CPU that have not already been assigned. Also accelerate any callbacks + * that were previously assigned a ->gp_seq number that has since proven + * to be too conservative, which can happen if callbacks get assigned a + * ->gp_seq number while RCU is idle, but with reference to a non-root + * rcu_node structure. This function is idempotent, so it does not hurt + * to call it repeatedly. Returns an flag saying that we should awaken + * the RCU grace-period kthread. * * The caller must hold rnp->lock with interrupts disabled. */ @@ -1736,7 +1690,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, * accelerating callback invocation to an earlier grace-period * number. */ - c = rcu_cbs_completed(rsp, rnp); + c = rcu_seq_snap(&rsp->gp_seq); if (rcu_segcblist_accelerate(&rdp->cblist, c)) ret = rcu_start_this_gp(rnp, rdp, c); @@ -1751,7 +1705,7 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, /* * Move any callbacks whose grace period has completed to the * RCU_DONE_TAIL sublist, then compact the remaining sublists and - * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL + * assign ->gp_seq numbers to any callbacks in the RCU_NEXT_TAIL * sublist. This function is idempotent, so it does not hurt to * invoke it repeatedly. As long as it is not invoked -too- often... * Returns true if the RCU grace-period kthread needs to be awakened. @@ -1768,10 +1722,10 @@ static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, return false; /* - * Find all callbacks whose ->completed numbers indicate that they + * Find all callbacks whose ->gp_seq numbers indicate that they * are ready to invoke, and put them into the RCU_DONE_TAIL sublist. */ - rcu_segcblist_advance(&rdp->cblist, rnp->completed); + rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq); /* Classify any remaining callbacks. */ return rcu_accelerate_cbs(rsp, rnp, rdp); @@ -1889,6 +1843,8 @@ static bool rcu_gp_init(struct rcu_state *rsp) smp_store_release(&rsp->gpnum, rsp->gpnum + 1); smp_mb(); /* Pairs with barriers in stall-warning code. */ rcu_seq_start(&rsp->gp_seq); + if (WARN_ON_ONCE(((rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT) != rcu_seq_ctr(rnp->gp_seq))) /* Catch any ->completed/->gp_seq mismatches. */ + pr_info("%s ->completed: %#lx (%#lx) ->gp_seq %#lx (%#lx)\n", __func__, rnp->completed, (rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT, rnp->gp_seq, rcu_seq_ctr(rnp->gp_seq)); trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); raw_spin_unlock_irq_rcu_node(rnp); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6d6cbc8b3a9c..a21d403a6010 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -174,7 +174,7 @@ struct rcu_node { #define need_future_gp_mask() \ (ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1) #define need_future_gp_element(rnp, c) \ - ((rnp)->need_future_gp[(c) & need_future_gp_mask()]) + ((rnp)->need_future_gp[(c >> RCU_SEQ_CTR_SHIFT) & need_future_gp_mask()]) #define need_any_future_gp(rnp) \ ({ \ int __i; \ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 2a81d243521f..2036dc7426ac 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2105,7 +2105,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) struct rcu_node *rnp = rdp->mynode; raw_spin_lock_irqsave_rcu_node(rnp, flags); - c = rcu_cbs_completed(rdp->rsp, rnp); + c = rcu_seq_snap(&rdp->rsp->gp_seq); needwake = rcu_start_this_gp(rnp, rdp, c); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (needwake) @@ -2118,8 +2118,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait")); for (;;) { swait_event_interruptible( - rnp->nocb_gp_wq[c & 0x1], - (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c))); + rnp->nocb_gp_wq[rcu_seq_ctr(c) & 0x1], + (d = rcu_seq_done(&rnp->gp_seq, c))); if (likely(d)) break; WARN_ON(signal_pending(current)); -- cgit v1.2.3 From 471f87c3d91bd0884451c0e3071473476c165630 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 30 Apr 2018 13:09:17 -0700 Subject: rcu: Make RCU CPU stall warnings use ->gp_seq This commit makes the RCU CPU stall-warning code in print_other_cpu_stall(), print_cpu_stall(), and check_cpu_stall() use ->gp_seq instead of ->gpnum and ->completed. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 51 ++++++++++++++++++++++++------------------------ kernel/rcu/tree_plugin.h | 8 ++++---- 2 files changed, 30 insertions(+), 29 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ffa45b6175d9..9e619c4878d3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1340,7 +1340,7 @@ static inline void panic_on_rcu_stall(void) panic("RCU Stall\n"); } -static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) +static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq) { int cpu; unsigned long flags; @@ -1350,6 +1350,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) struct rcu_node *rnp = rcu_get_root(rsp); long totqlen = 0; + WARN_ON_ONCE(gp_seq & 0x2); /* Remove when ->gpnum removed. */ + /* Kick and suppress, if so configured. */ rcu_stall_kick_kthreads(rsp); if (rcu_cpu_stall_suppress) @@ -1380,17 +1382,16 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) for_each_possible_cpu(cpu) totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, cpu)->cblist); - pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", + pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start), - (long)rsp->gpnum, (long)rsp->completed, totqlen); + (long)rcu_seq_current(&rsp->gp_seq), totqlen); if (ndetected) { rcu_dump_cpu_stacks(rsp); /* Complain about tasks blocking the grace period. */ rcu_print_detail_task_stall(rsp); } else { - if (READ_ONCE(rsp->gpnum) != gpnum || - READ_ONCE(rsp->completed) == gpnum) { + if (rcu_seq_current(&rsp->gp_seq) != gp_seq) { pr_err("INFO: Stall ended before state dump start\n"); } else { j = jiffies; @@ -1442,9 +1443,9 @@ static void print_cpu_stall(struct rcu_state *rsp) for_each_possible_cpu(cpu) totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, cpu)->cblist); - pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", + pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n", jiffies - rsp->gp_start, - (long)rsp->gpnum, (long)rsp->completed, totqlen); + (long)rcu_seq_current(&rsp->gp_seq), totqlen); rcu_check_gp_kthread_starvation(rsp); @@ -1471,8 +1472,8 @@ static void print_cpu_stall(struct rcu_state *rsp) static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { - unsigned long completed; - unsigned long gpnum; + unsigned long gs1; + unsigned long gs2; unsigned long gps; unsigned long j; unsigned long jn; @@ -1488,28 +1489,28 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) /* * Lots of memory barriers to reject false positives. * - * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall, - * then rsp->gp_start, and finally rsp->completed. These values - * are updated in the opposite order with memory barriers (or - * equivalent) during grace-period initialization and cleanup. - * Now, a false positive can occur if we get an new value of - * rsp->gp_start and a old value of rsp->jiffies_stall. But given - * the memory barriers, the only way that this can happen is if one - * grace period ends and another starts between these two fetches. - * Detect this by comparing rsp->completed with the previous fetch - * from rsp->gpnum. + * The idea is to pick up rsp->gp_seq, then rsp->jiffies_stall, + * then rsp->gp_start, and finally another copy of rsp->gp_seq. + * These values are updated in the opposite order with memory + * barriers (or equivalent) during grace-period initialization + * and cleanup. Now, a false positive can occur if we get an new + * value of rsp->gp_start and a old value of rsp->jiffies_stall. + * But given the memory barriers, the only way that this can happen + * is if one grace period ends and another starts between these + * two fetches. This is detected by comparing the second fetch + * of rsp->gp_seq with the previous fetch from rsp->gp_seq. * * Given this check, comparisons of jiffies, rsp->jiffies_stall, * and rsp->gp_start suffice to forestall false positives. */ - gpnum = READ_ONCE(rsp->gpnum); - smp_rmb(); /* Pick up ->gpnum first... */ + gs1 = READ_ONCE(rsp->gp_seq); + smp_rmb(); /* Pick up ->gp_seq first... */ js = READ_ONCE(rsp->jiffies_stall); smp_rmb(); /* ...then ->jiffies_stall before the rest... */ gps = READ_ONCE(rsp->gp_start); - smp_rmb(); /* ...and finally ->gp_start before ->completed. */ - completed = READ_ONCE(rsp->completed); - if (ULONG_CMP_GE(completed, gpnum) || + smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */ + gs2 = READ_ONCE(rsp->gp_seq); + if (gs1 != gs2 || ULONG_CMP_LT(j, js) || ULONG_CMP_GE(gps, js)) return; /* No stall or GP completed since entering function. */ @@ -1527,7 +1528,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) cmpxchg(&rsp->jiffies_stall, js, jn) == js) { /* They had a few time units to dump stack, so complain. */ - print_other_cpu_stall(rsp, gpnum); + print_other_cpu_stall(rsp, gs2); } } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 2036dc7426ac..f4a88e3c388d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1755,12 +1755,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) */ touch_nmi_watchdog(); - if (rsp->gpnum == rdp->gpnum) { + ticks_value = rcu_seq_ctr(rsp->gp_seq - rdp->gp_seq); + if (ticks_value) { + ticks_title = "GPs behind"; + } else { ticks_title = "ticks this GP"; ticks_value = rdp->ticks_this_gp; - } else { - ticks_title = "GPs behind"; - ticks_value = rsp->gpnum - rdp->gpnum; } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); -- cgit v1.2.3 From ab5e869c1f7aa30a1210f5e8a277758b0599609f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 May 2018 11:07:23 -0700 Subject: rcu: Make rcu_nocb_wait_gp() check if GP already requested This commit makes rcu_nocb_wait_gp() check rdp->gp_seq_needed to see if the current CPU already knows about the needed grace period having already been requested. If so, it avoids acquiring the corresponding leaf rcu_node structure's ->lock, thus decreasing contention. This optimization is intended for cases where either multiple leader rcuo kthreads are running on the same CPU or these kthreads are running on a non-offloaded (e.g., housekeeping) CPU. Signed-off-by: Paul E. McKenney [ paulmck: Move lock release past "if" as suggested by Joel Fernandes. ] [ paulmck: Fix caching of furthest-future requested grace period. ] --- kernel/rcu/tree.c | 5 +++++ kernel/rcu/tree_plugin.h | 15 ++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1ede51690e4a..4826598867c3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1618,6 +1618,11 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq")); ret = true; /* Caller must wake GP kthread. */ unlock_out: + /* Push furthest requested GP to leaf node and rcu_data structure. */ + if (ULONG_CMP_LT(c, rnp_root->gp_seq_needed)) { + rnp->gp_seq_needed = rnp_root->gp_seq_needed; + rdp->gp_seq_needed = rnp_root->gp_seq_needed; + } if (rnp != rnp_root) raw_spin_unlock_rcu_node(rnp_root); return ret; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f4a88e3c388d..ca73931f7b30 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2104,12 +2104,17 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) bool needwake; struct rcu_node *rnp = rdp->mynode; - raw_spin_lock_irqsave_rcu_node(rnp, flags); + local_irq_save(flags); c = rcu_seq_snap(&rdp->rsp->gp_seq); - needwake = rcu_start_this_gp(rnp, rdp, c); - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - if (needwake) - rcu_gp_kthread_wake(rdp->rsp); + if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { + local_irq_restore(flags); + } else { + raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ + needwake = rcu_start_this_gp(rnp, rdp, c); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + if (needwake) + rcu_gp_kthread_wake(rdp->rsp); + } /* * Wait for the grace period. Do so interruptibly to avoid messing -- cgit v1.2.3 From 477351f7829d2268769c5d545511081555066529 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 May 2018 12:54:11 -0700 Subject: rcu: Convert rcu_grace_period tracepoint to gp_seq This commit makes the rcu_grace_period tracepoint use gp_seq instead of ->gpnum or ->completed. It also introduces a "cpuofl-bgp" string to less obscurely indicate when a CPU has gone offline while a grace period is waiting on it. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 13 +++++++------ kernel/rcu/tree.c | 43 +++++++++++++++++++++---------------------- kernel/rcu/tree_plugin.h | 2 +- 3 files changed, 29 insertions(+), 29 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 5936aac357ab..cd229e82ec8b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -52,6 +52,7 @@ TRACE_EVENT(rcu_utilization, * "cpuqs": CPU passes through a quiescent state. * "cpuonl": CPU comes online. * "cpuofl": CPU goes offline. + * "cpuofl-bgp": CPU goes offline while blocking a grace period. * "reqwait": GP kthread sleeps waiting for grace-period request. * "reqwaitsig": GP kthread awakened by signal from reqwait state. * "fqswait": GP kthread waiting until time to force quiescent states. @@ -63,24 +64,24 @@ TRACE_EVENT(rcu_utilization, */ TRACE_EVENT(rcu_grace_period, - TP_PROTO(const char *rcuname, unsigned long gpnum, const char *gpevent), + TP_PROTO(const char *rcuname, unsigned long gp_seq, const char *gpevent), - TP_ARGS(rcuname, gpnum, gpevent), + TP_ARGS(rcuname, gp_seq, gpevent), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(const char *, gpevent) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->gpevent = gpevent; ), TP_printk("%s %lu %s", - __entry->rcuname, __entry->gpnum, __entry->gpevent) + __entry->rcuname, __entry->gp_seq, __entry->gpevent) ); /* @@ -753,7 +754,7 @@ TRACE_EVENT(rcu_barrier, #else /* #ifdef CONFIG_RCU_TRACE */ -#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) +#define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0) #define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ level, grplo, grphi, event) \ do { } while (0) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 4826598867c3..7ce85ad39af6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -234,7 +234,7 @@ void rcu_sched_qs(void) if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) return; trace_rcu_grace_period(TPS("rcu_sched"), - __this_cpu_read(rcu_sched_data.gpnum), + __this_cpu_read(rcu_sched_data.gp_seq), TPS("cpuqs")); __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false); if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) @@ -249,7 +249,7 @@ void rcu_bh_qs(void) RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!"); if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_bh"), - __this_cpu_read(rcu_bh_data.gpnum), + __this_cpu_read(rcu_bh_data.gp_seq), TPS("cpuqs")); __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); } @@ -1615,7 +1615,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread")); goto unlock_out; } - trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq")); + trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), TPS("newreq")); ret = true; /* Caller must wake GP kthread. */ unlock_out: /* Push furthest requested GP to leaf node and rcu_data structure. */ @@ -1702,9 +1702,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, /* Trace depending on how much we were able to accelerate. */ if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) - trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); + trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccWaitCB")); else - trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); + trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccReadyCB")); return ret; } @@ -1774,7 +1774,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, * go looking for one. */ rdp->gpnum = rnp->gpnum; - trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); + trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpustart")); need_gp = !!(rnp->qsmask & rdp->grpmask); rdp->cpu_no_qs.b.norm = need_gp; rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); @@ -1851,7 +1851,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) rcu_seq_start(&rsp->gp_seq); if (WARN_ON_ONCE(((rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT) != rcu_seq_ctr(rnp->gp_seq))) /* Catch any ->completed/->gp_seq mismatches. */ pr_info("%s ->completed: %#lx (%#lx) ->gp_seq %#lx (%#lx)\n", __func__, rnp->completed, (rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT, rnp->gp_seq, rcu_seq_ctr(rnp->gp_seq)); - trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); + trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("start")); raw_spin_unlock_irq_rcu_node(rnp); /* @@ -1928,7 +1928,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) if (rnp == rdp->mynode) (void)__note_gp_changes(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); - trace_rcu_grace_period_init(rsp->name, rnp->gpnum, + trace_rcu_grace_period_init(rsp->name, rnp->gp_seq, rnp->level, rnp->grplo, rnp->grphi, rnp->qsmask); raw_spin_unlock_irq_rcu_node(rnp); @@ -2048,7 +2048,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) /* Declare grace period done. */ WRITE_ONCE(rsp->completed, rsp->gpnum); rcu_seq_end(&rsp->gp_seq); - trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); + trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("end")); rsp->gp_state = RCU_GP_IDLE; /* Check for GP requests since above loop. */ rdp = this_cpu_ptr(rsp->rda); @@ -2061,7 +2061,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) { WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); rsp->gp_req_activity = jiffies; - trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), + trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), TPS("newreq")); } else { WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT); @@ -2087,7 +2087,7 @@ static int __noreturn rcu_gp_kthread(void *arg) /* Handle grace-period start. */ for (;;) { trace_rcu_grace_period(rsp->name, - READ_ONCE(rsp->gpnum), + READ_ONCE(rsp->gp_seq), TPS("reqwait")); rsp->gp_state = RCU_GP_WAIT_GPS; swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & @@ -2100,7 +2100,7 @@ static int __noreturn rcu_gp_kthread(void *arg) WRITE_ONCE(rsp->gp_activity, jiffies); WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, - READ_ONCE(rsp->gpnum), + READ_ONCE(rsp->gp_seq), TPS("reqwaitsig")); } @@ -2119,7 +2119,7 @@ static int __noreturn rcu_gp_kthread(void *arg) jiffies + 3 * j); } trace_rcu_grace_period(rsp->name, - READ_ONCE(rsp->gpnum), + READ_ONCE(rsp->gp_seq), TPS("fqswait")); rsp->gp_state = RCU_GP_WAIT_FQS; ret = swait_event_idle_timeout(rsp->gp_wq, @@ -2134,12 +2134,12 @@ static int __noreturn rcu_gp_kthread(void *arg) if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || (gf & RCU_GP_FLAG_FQS)) { trace_rcu_grace_period(rsp->name, - READ_ONCE(rsp->gpnum), + READ_ONCE(rsp->gp_seq), TPS("fqsstart")); rcu_gp_fqs(rsp, first_gp_fqs); first_gp_fqs = false; trace_rcu_grace_period(rsp->name, - READ_ONCE(rsp->gpnum), + READ_ONCE(rsp->gp_seq), TPS("fqsend")); cond_resched_tasks_rcu_qs(); WRITE_ONCE(rsp->gp_activity, jiffies); @@ -2158,7 +2158,7 @@ static int __noreturn rcu_gp_kthread(void *arg) WRITE_ONCE(rsp->gp_activity, jiffies); WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, - READ_ONCE(rsp->gpnum), + READ_ONCE(rsp->gp_seq), TPS("fqswaitsig")); ret = 1; /* Keep old FQS timing. */ j = jiffies; @@ -2388,17 +2388,16 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) */ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { - RCU_TRACE(unsigned long mask;) + RCU_TRACE(bool blkd;) RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);) RCU_TRACE(struct rcu_node *rnp = rdp->mynode;) if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) return; - RCU_TRACE(mask = rdp->grpmask;) - trace_rcu_grace_period(rsp->name, - rnp->gpnum + 1 - !!(rnp->qsmask & mask), - TPS("cpuofl")); + RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);) + trace_rcu_grace_period(rsp->name, rnp->gp_seq, + blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); } /* @@ -3538,7 +3537,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->core_needs_qs = false; rdp->rcu_iw_pending = false; rdp->rcu_iw_gp_seq = rnp->gp_seq - 1; - trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); + trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index ca73931f7b30..aca9d187c509 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -301,7 +301,7 @@ static void rcu_preempt_qs(void) RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n"); if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_preempt"), - __this_cpu_read(rcu_data_p->gpnum), + __this_cpu_read(rcu_data_p->gp_seq), TPS("cpuqs")); __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false); barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ -- cgit v1.2.3 From 598ce09480efb6b48799df60c66bac70bea5ef54 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 May 2018 13:35:20 -0700 Subject: rcu: Convert rcu_preempt_task tracepoint to ->gp_seq This commit makes the rcu_preempt_task tracepoint use ->gp_seq instead of ->gpnum. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 12 ++++++------ kernel/rcu/tree_plugin.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 892016ad0647..38dbd97d65a3 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -298,24 +298,24 @@ TRACE_EVENT(rcu_nocb_wake, */ TRACE_EVENT(rcu_preempt_task, - TP_PROTO(const char *rcuname, int pid, unsigned long gpnum), + TP_PROTO(const char *rcuname, int pid, unsigned long gp_seq), - TP_ARGS(rcuname, pid, gpnum), + TP_ARGS(rcuname, pid, gp_seq), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(int, pid) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->pid = pid; ), TP_printk("%s %lu %d", - __entry->rcuname, __entry->gpnum, __entry->pid) + __entry->rcuname, __entry->gp_seq, __entry->pid) ); /* @@ -761,7 +761,7 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \ do { } while (0) #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) -#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) +#define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ grplo, grphi, gp_tasks) do { } \ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index aca9d187c509..02ca3b4e6a8f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -350,8 +350,8 @@ static void rcu_preempt_note_context_switch(bool preempt) trace_rcu_preempt_task(rdp->rsp->name, t->pid, (rnp->qsmask & rdp->grpmask) - ? rnp->gpnum - : rnp->gpnum + 1); + ? rnp->gp_seq + : rcu_seq_snap(&rnp->gp_seq)); rcu_preempt_ctxt_queue(rnp, rdp); } else if (t->rcu_read_lock_nesting < 0 && t->rcu_read_unlock_special.s) { -- cgit v1.2.3 From 865aa1e08d8aefdfd1f5d30ecfce1b8ef8cd520a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 May 2018 13:35:20 -0700 Subject: rcu: Convert rcu_unlock_preempted_task tracepoint to ->gp_seq This commit makes the rcu_unlock_preempted_task tracepoint use ->gp_seq instead of ->gpnum. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 12 ++++++------ kernel/rcu/tree_plugin.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 38dbd97d65a3..95b7491196aa 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -325,23 +325,23 @@ TRACE_EVENT(rcu_preempt_task, */ TRACE_EVENT(rcu_unlock_preempted_task, - TP_PROTO(const char *rcuname, unsigned long gpnum, int pid), + TP_PROTO(const char *rcuname, unsigned long gp_seq, int pid), - TP_ARGS(rcuname, gpnum, pid), + TP_ARGS(rcuname, gp_seq, pid), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(int, pid) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->pid = pid; ), - TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) + TP_printk("%s %lu %d", __entry->rcuname, __entry->gp_seq, __entry->pid) ); /* @@ -762,7 +762,7 @@ TRACE_EVENT(rcu_barrier, do { } while (0) #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0) -#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) +#define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ grplo, grphi, gp_tasks) do { } \ while (0) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 02ca3b4e6a8f..a10b0e26ce19 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -545,7 +545,7 @@ void rcu_read_unlock_special(struct task_struct *t) list_del_init(&t->rcu_node_entry); t->rcu_blocked_node = NULL; trace_rcu_unlock_preempted_task(TPS("rcu_preempt"), - rnp->gpnum, t->pid); + rnp->gp_seq, t->pid); if (&t->rcu_node_entry == rnp->gp_tasks) rnp->gp_tasks = np; if (&t->rcu_node_entry == rnp->exp_tasks) @@ -708,7 +708,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) t = container_of(rnp->gp_tasks, struct task_struct, rcu_node_entry); trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"), - rnp->gpnum, t->pid); + rnp->gp_seq, t->pid); } WARN_ON_ONCE(rnp->qsmask); } -- cgit v1.2.3 From db023296f0115d2fe01fdabad54678f2b806da23 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 May 2018 13:35:20 -0700 Subject: rcu: Convert rcu_quiescent_state_report tracepoint to ->gp_seq This commit makes the rcu_quiescent_state_report tracepoint use ->gp_seq instead of ->gpnum. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 12 ++++++------ kernel/rcu/tree.c | 2 +- kernel/rcu/tree_plugin.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 95b7491196aa..ac4d9d4a1ebf 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -354,15 +354,15 @@ TRACE_EVENT(rcu_unlock_preempted_task, */ TRACE_EVENT(rcu_quiescent_state_report, - TP_PROTO(const char *rcuname, unsigned long gpnum, + TP_PROTO(const char *rcuname, unsigned long gp_seq, unsigned long mask, unsigned long qsmask, u8 level, int grplo, int grphi, int gp_tasks), - TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), + TP_ARGS(rcuname, gp_seq, mask, qsmask, level, grplo, grphi, gp_tasks), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(unsigned long, mask) __field(unsigned long, qsmask) __field(u8, level) @@ -373,7 +373,7 @@ TRACE_EVENT(rcu_quiescent_state_report, TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->mask = mask; __entry->qsmask = qsmask; __entry->level = level; @@ -383,7 +383,7 @@ TRACE_EVENT(rcu_quiescent_state_report, ), TP_printk("%s %lu %lx>%lx %u %d %d %u", - __entry->rcuname, __entry->gpnum, + __entry->rcuname, __entry->gp_seq, __entry->mask, __entry->qsmask, __entry->level, __entry->grplo, __entry->grphi, __entry->gp_tasks) ); @@ -763,7 +763,7 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0) -#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ +#define trace_rcu_quiescent_state_report(rcuname, gp_seq, mask, qsmask, level, \ grplo, grphi, gp_tasks) do { } \ while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 066dbaacec30..7c6c11d5479c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2229,7 +2229,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, WARN_ON_ONCE(!rcu_is_leaf_node(rnp) && rcu_preempt_blocked_readers_cgp(rnp)); rnp->qsmask &= ~mask; - trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, + trace_rcu_quiescent_state_report(rsp->name, rnp->gp_seq, mask, rnp->qsmask, rnp->level, rnp->grplo, rnp->grphi, !!rnp->gp_tasks); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a10b0e26ce19..c8a2c7760121 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -566,7 +566,7 @@ void rcu_read_unlock_special(struct task_struct *t) empty_exp_now = sync_rcu_preempt_exp_done(rnp); if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) { trace_rcu_quiescent_state_report(TPS("preempt_rcu"), - rnp->gpnum, + rnp->gp_seq, 0, rnp->qsmask, rnp->level, rnp->grplo, -- cgit v1.2.3 From ff3bb6f4d06247508489345ee90a8a9b6f3ffd3b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 May 2018 14:34:08 -0700 Subject: rcu: Remove ->gpnum and ->completed Now that everything has been converted to use ->gp_seq instead of ->gpnum and ->completed, this commit removes ->gpnum and ->completed. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 45 +++++++++++---------------------------------- kernel/rcu/tree.h | 14 +------------- kernel/rcu/tree_plugin.h | 2 +- 3 files changed, 13 insertions(+), 48 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 42e89d4f1e33..0aeddc908181 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -95,8 +95,6 @@ struct rcu_state sname##_state = { \ .rda = &sname##_data, \ .call = cr, \ .gp_state = RCU_GP_IDLE, \ - .gpnum = 0UL - 300UL, \ - .completed = 0UL - 300UL, \ .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .name = RCU_STATE_NAME(sname), \ @@ -1349,8 +1347,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq) struct rcu_node *rnp = rcu_get_root(rsp); long totqlen = 0; - WARN_ON_ONCE(gp_seq & 0x2); /* Remove when ->gpnum removed. */ - /* Kick and suppress, if so configured. */ rcu_stall_kick_kthreads(rsp); if (rcu_cpu_stall_suppress) @@ -1582,8 +1578,6 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, * not be released. */ raw_lockdep_assert_held_rcu_node(rnp); - WARN_ON_ONCE(c & 0x2); /* Catch any lingering use of ->gpnum. */ - WARN_ON_ONCE(((rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT) != rcu_seq_ctr(rnp->gp_seq)); /* Catch any ->completed/->gp_seq mismatches. */ trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf")); for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) { if (rnp_root != rnp) @@ -1757,8 +1751,6 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) || unlikely(READ_ONCE(rdp->gpwrap))) { ret = rcu_advance_cbs(rsp, rnp, rdp); /* Advance callbacks. */ - /* Remember that we saw this grace-period completion. */ - rdp->completed = rnp->completed; trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuend")); } else { ret = rcu_accelerate_cbs(rsp, rnp, rdp); /* Recent callbacks. */ @@ -1772,7 +1764,6 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, * set up to detect a quiescent state, otherwise don't * go looking for one. */ - rdp->gpnum = rnp->gpnum; trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpustart")); need_gp = !!(rnp->qsmask & rdp->grpmask); rdp->cpu_no_qs.b.norm = need_gp; @@ -1843,13 +1834,8 @@ static bool rcu_gp_init(struct rcu_state *rsp) /* Advance to a new grace period and initialize state. */ record_gp_stall_check_time(rsp); - /* Record GP times before starting GP, hence smp_store_release(). */ - WARN_ON_ONCE(rsp->gpnum << RCU_SEQ_CTR_SHIFT != rsp->gp_seq); - smp_store_release(&rsp->gpnum, rsp->gpnum + 1); - smp_mb(); /* Pairs with barriers in stall-warning code. */ + /* Record GP times before starting GP, hence rcu_seq_start(). */ rcu_seq_start(&rsp->gp_seq); - if (WARN_ON_ONCE(((rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT) != rcu_seq_ctr(rnp->gp_seq))) /* Catch any ->completed/->gp_seq mismatches. */ - pr_info("%s ->completed: %#lx (%#lx) ->gp_seq %#lx (%#lx)\n", __func__, rnp->completed, (rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT, rnp->gp_seq, rcu_seq_ctr(rnp->gp_seq)); trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("start")); raw_spin_unlock_irq_rcu_node(rnp); @@ -1920,9 +1906,6 @@ static bool rcu_gp_init(struct rcu_state *rsp) rdp = this_cpu_ptr(rsp->rda); rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; - WRITE_ONCE(rnp->gpnum, rsp->gpnum); - if (WARN_ON_ONCE(rnp->completed != rsp->completed)) - WRITE_ONCE(rnp->completed, rsp->completed); WRITE_ONCE(rnp->gp_seq, rsp->gp_seq); if (rnp == rdp->mynode) (void)__note_gp_changes(rsp, rnp, rdp); @@ -2012,13 +1995,13 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) raw_spin_unlock_irq_rcu_node(rnp); /* - * Propagate new ->completed value to rcu_node structures so - * that other CPUs don't have to wait until the start of the next - * grace period to process their callbacks. This also avoids - * some nasty RCU grace-period initialization races by forcing - * the end of the current grace period to be completely recorded in - * all of the rcu_node structures before the beginning of the next - * grace period is recorded in any of the rcu_node structures. + * Propagate new ->gp_seq value to rcu_node structures so that + * other CPUs don't have to wait until the start of the next grace + * period to process their callbacks. This also avoids some nasty + * RCU grace-period initialization races by forcing the end of + * the current grace period to be completely recorded in all of + * the rcu_node structures before the beginning of the next grace + * period is recorded in any of the rcu_node structures. */ new_gp_seq = rsp->gp_seq; rcu_seq_end(&new_gp_seq); @@ -2027,7 +2010,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) dump_blkd_tasks(rnp, 10); WARN_ON_ONCE(rnp->qsmask); - WRITE_ONCE(rnp->completed, rsp->gpnum); WRITE_ONCE(rnp->gp_seq, new_gp_seq); rdp = this_cpu_ptr(rsp->rda); if (rnp == rdp->mynode) @@ -2045,7 +2027,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) raw_spin_lock_irq_rcu_node(rnp); /* GP before rsp->gp_seq update. */ /* Declare grace period done. */ - WRITE_ONCE(rsp->completed, rsp->gpnum); rcu_seq_end(&rsp->gp_seq); trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("end")); rsp->gp_state = RCU_GP_IDLE; @@ -3496,9 +3477,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) /* * Initialize a CPU's per-CPU RCU data. Note that only one online or - * offline event can be happening at a given time. Note also that we - * can accept some slop in the rsp->completed access due to the fact - * that this CPU cannot possibly have any RCU callbacks in flight yet. + * offline event can be happening at a given time. Note also that we can + * accept some slop in the rsp->gp_seq access due to the fact that this + * CPU cannot possibly have any RCU callbacks in flight yet. */ static void rcu_init_percpu_data(int cpu, struct rcu_state *rsp) @@ -3527,8 +3508,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rnp = rdp->mynode; raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ rdp->beenonline = true; /* We have now been online. */ - rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ - rdp->completed = rnp->completed; rdp->gp_seq = rnp->gp_seq; rdp->gp_seq_needed = rnp->gp_seq; rdp->cpu_no_qs.b.norm = true; @@ -3908,8 +3887,6 @@ static void __init rcu_init_one(struct rcu_state *rsp) raw_spin_lock_init(&rnp->fqslock); lockdep_set_class_and_name(&rnp->fqslock, &rcu_fqs_class[i], fqs[i]); - rnp->gpnum = rsp->gpnum; - rnp->completed = rsp->completed; rnp->gp_seq = rsp->gp_seq; rnp->gp_seq_needed = rsp->gp_seq; rnp->completedqs = rsp->gp_seq; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 9329c1ff695f..3def94fc9c74 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -81,12 +81,6 @@ struct rcu_node { raw_spinlock_t __private lock; /* Root rcu_node's lock protects */ /* some rcu_state fields as well as */ /* following. */ - unsigned long gpnum; /* Current grace period for this node. */ - /* This will either be equal to or one */ - /* behind the root rcu_node's gpnum. */ - unsigned long completed; /* Last GP completed for this node. */ - /* This will either be equal to or one */ - /* behind the root rcu_node's gpnum. */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq. */ unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed. */ unsigned long completedqs; /* All QSes done for this node. */ @@ -192,10 +186,6 @@ union rcu_noqs { /* Per-CPU data for read-copy update. */ struct rcu_data { /* 1) quiescent-state and grace-period handling : */ - unsigned long completed; /* Track rsp->completed gp number */ - /* in order to detect GP end. */ - unsigned long gpnum; /* Highest gp number that this CPU */ - /* is aware of having started. */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */ unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */ unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ @@ -203,7 +193,7 @@ struct rcu_data { union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ - bool gpwrap; /* Possible gpnum/completed wrap. */ + bool gpwrap; /* Possible ->gp_seq wrap. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ unsigned long ticks_this_gp; /* The number of scheduling-clock */ @@ -328,8 +318,6 @@ struct rcu_state { u8 boost ____cacheline_internodealigned_in_smp; /* Subject to priority boost. */ - unsigned long gpnum; /* Current gp number. */ - unsigned long completed; /* # of last completed gp. */ unsigned long gp_seq; /* Grace-period sequence #. */ struct task_struct *gp_kthread; /* Task for grace periods. */ struct swait_queue_head gp_wq; /* Where GP task waits. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c8a2c7760121..3a6e04103de1 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -690,7 +690,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace * period that still has RCU readers blocked! This function must be - * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock + * invoked -before- updating this rnp's ->gp_seq, and the rnp's ->lock * must be held by the caller. * * Also, if there are blocked tasks on the list, they automatically -- cgit v1.2.3 From 77cfc7bf24ba0ba37be54e224007847d485a860f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 May 2018 15:00:10 -0700 Subject: rcu: Fix typo and add additional debug This commit fixes a typo and adds some additional debugging to the message emitted when a task blocking the current grace period is listed as blocking it when either that grace period ends or the next grace period begins. This commit also reformats the console message for readability. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 + kernel/rcu/tree_plugin.h | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 85417584ffd0..a4277c1087d9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2316,6 +2316,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, return; /* Still need more quiescent states! */ } + rnp->completedqs = rnp->gp_seq; rnp_p = rnp->parent; if (rnp_p == NULL) { /* diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3a6e04103de1..677b0c9f548d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -856,8 +856,14 @@ static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck) struct list_head *lhp; raw_lockdep_assert_held_rcu_node(rnp); - pr_info("%s: grp: %d-%d level: %d ->qamask %#lx ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p &->blkd_tasks: %p offset: %u\n", __func__, rnp->grplo, rnp->grphi, rnp->level, rnp->qsmask, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks, &rnp->blkd_tasks, (unsigned int)offsetof(typeof(*rnp), blkd_tasks)); - pr_cont("\t->blkd_tasks"); + pr_info("%s: grp: %d-%d level: %d ->gp_seq %#lx ->completedqs %#lx\n", + __func__, rnp->grplo, rnp->grphi, rnp->level, + rnp->gp_seq, rnp->completedqs); + pr_info("%s: ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", + __func__, rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext); + pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n", + __func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks); + pr_info("%s: ->blkd_tasks", __func__); i = 0; list_for_each(lhp, &rnp->blkd_tasks) { pr_cont(" %p", lhp); -- cgit v1.2.3 From 0b107d24d9361132758374a7b007c7c74efa007f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 May 2018 16:18:28 -0700 Subject: rcu: Suppress false-positive splats from mid-init task resume Consider the following sequence of events in a PREEMPT=y kernel: 1. All CPUs corresponding to a given leaf rcu_node structure are offline. 2. The first phase of the rcu_gp_init() function's grace-period initialization runs, and sets that rcu_node structure's ->qsmaskinit to zero, as it should. 3. One of the CPUs corresponding to that rcu_node structure comes back online. Note that because this CPU came online after the grace period started, this grace period can safely ignore this newly onlined CPU. 4. A task running on the newly onlined CPU enters an RCU-preempt read-side critical section, and is then preempted. Because the corresponding rcu_node structure's ->qsmask is zero, rcu_preempt_ctxt_queue() leaves the rcu_node structure's ->gp_tasks field NULL, as it should. 5. The rcu_gp_init() function continues running the second phase of grace-period initialization. The ->qsmask field of the parent of the aforementioned leaf rcu_node structure is set to not expect a quiescent state from the leaf, as is only right and proper. However, when rcu_gp_init() reaches the leaf, it invokes rcu_preempt_check_blocked_tasks(), which sees that the leaf's ->blkd_tasks list is non-empty, and therefore sets the leaf's ->gp_tasks field to reference the first task on that list. 6. The grace period ends before the preempted task resumes, which is perfectly fine, given that this grace period was under no obligation to wait for that task to exit its late-starting RCU-preempt read-side critical section. Unfortunately, the leaf's ->gp_tasks field is non-NULL, so rcu_gp_cleanup() splats. After all, it appears to rcu_gp_cleanup() that the grace period failed to wait for a task that was supposed to be blocking that grace period. This commit avoids this false-positive splat by adding a check of both ->qsmaskinit and ->wait_blkd_tasks to rcu_preempt_check_blocked_tasks(). If both ->qsmaskinit and ->wait_blkd_tasks are zero, then the task must have entered its RCU-preempt read-side critical section late (after all, the CPU that it is running on was not online at that time), which means that the upper-level rcu_node structure won't be waiting for anything on the leaf anyway. If ->wait_blkd_tasks is non-zero, then there is at least one task on ths rcu_node structure's ->blkd_tasks list whose RCU read-side critical section predates the current grace period. If ->qsmaskinit is non-zero, there is at least one CPU that was online at the start of the current grace period. Thus, if both are zero, there is nothing to wait for. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 677b0c9f548d..1c9a836af5b6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -703,7 +703,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) dump_blkd_tasks(rnp, 10); - if (rcu_preempt_has_tasks(rnp)) { + if (rcu_preempt_has_tasks(rnp) && + (rnp->qsmaskinit || rnp->wait_blkd_tasks)) { rnp->gp_tasks = rnp->blkd_tasks.next; t = container_of(rnp->gp_tasks, struct task_struct, rcu_node_entry); -- cgit v1.2.3 From 1f3e5f51b933cbc25e3da0cdbdac40716df04ddb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 3 May 2018 10:35:33 -0700 Subject: rcu: Add RCU-preempt check for waiting on newly onlined CPU RCU should only be waiting on CPUs that were online at the time that the current grace period started. Failure to abide by this rule can result in confusing splats during grace-period cleanup and initialization. This commit therefore adds a check to RCU-preempt's preempted-task queuing that checks for waiting on newly onlined CPUs. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 1c9a836af5b6..6b85ce936ad4 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -183,6 +183,9 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) raw_lockdep_assert_held_rcu_node(rnp); WARN_ON_ONCE(rdp->mynode != rnp); WARN_ON_ONCE(!rcu_is_leaf_node(rnp)); + /* RCU better not be waiting on newly onlined CPUs! */ + WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask & + rdp->grpmask); /* * Decide where to queue the newly blocked task. In theory, -- cgit v1.2.3 From ff3cee39088b1931a432587059d66cd505f785dc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 May 2018 12:50:14 -0700 Subject: rcu: Add up-tree information to dump_blkd_tasks() diagnostics This commit updates dump_blkd_tasks() to print out quiescent-state bitmasks for the rcu_node structures further up the tree. This information helps debugging of interactions between CPU-hotplug operations and RCU grace-period initialization. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6b85ce936ad4..f45ff97b0d51 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -858,13 +858,15 @@ static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck) { int i; struct list_head *lhp; + struct rcu_node *rnp1; raw_lockdep_assert_held_rcu_node(rnp); - pr_info("%s: grp: %d-%d level: %d ->gp_seq %#lx ->completedqs %#lx\n", + pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", __func__, rnp->grplo, rnp->grphi, rnp->level, - rnp->gp_seq, rnp->completedqs); - pr_info("%s: ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", - __func__, rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext); + (long)rnp->gp_seq, (long)rnp->completedqs); + for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent) + pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", + __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext); pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n", __func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks); pr_info("%s: ->blkd_tasks", __func__); -- cgit v1.2.3 From 577389423187d8b51dfe6199297e579a3419b72b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 May 2018 14:18:57 -0700 Subject: rcu: Add CPU online/offline state to dump_blkd_tasks() Interactions between CPU-hotplug operations and grace-period initialization can result in dump_blkd_tasks(). One of the first debugging actions in this case is to search back in dmesg to work out which of the affected rcu_node structure's CPUs are online and to determine the last CPU-hotplug operation affecting any of those CPUs. This can be laborious and error-prone, especially when console output is lost. This commit therefore causes dump_blkd_tasks() to dump the state of the affected rcu_node structure's CPUs and the last grace period during which the last offline and online operation affected each of these CPUs. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 12 ++++++++++-- kernel/rcu/tree.h | 12 +++++++++--- kernel/rcu/tree_plugin.h | 25 ++++++++++++++++++++----- 3 files changed, 39 insertions(+), 10 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5f1a11f1f7bc..a2503ef1bbe2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1954,7 +1954,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) rcu_gp_slow(rsp, gp_init_delay); raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp = this_cpu_ptr(rsp->rda); - rcu_preempt_check_blocked_tasks(rnp); + rcu_preempt_check_blocked_tasks(rsp, rnp); rnp->qsmask = rnp->qsmaskinit; WRITE_ONCE(rnp->gp_seq, rsp->gp_seq); if (rnp == rdp->mynode) @@ -2063,7 +2063,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq_rcu_node(rnp); if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) - dump_blkd_tasks(rnp, 10); + dump_blkd_tasks(rsp, rnp, 10); WARN_ON_ONCE(rnp->qsmask); WRITE_ONCE(rnp->gp_seq, new_gp_seq); rdp = this_cpu_ptr(rsp->rda); @@ -3516,6 +3516,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks))); + rdp->rcu_ofl_gp_seq = rsp->gp_seq; + rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; + rdp->rcu_onl_gp_seq = rsp->gp_seq; + rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; rdp->cpu = cpu; rdp->rsp = rsp; rcu_boot_init_nocb_percpu_data(rdp); @@ -3711,6 +3715,8 @@ void rcu_cpu_starting(unsigned int cpu) /* Allow lockless access for expedited grace periods. */ smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */ rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */ + rdp->rcu_onl_gp_seq = READ_ONCE(rsp->gp_seq); + rdp->rcu_onl_gp_flags = READ_ONCE(rsp->gp_flags); if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */ /* Report QS -after- changing ->qsmaskinitnext! */ rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); @@ -3738,6 +3744,8 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) mask = rdp->grpmask; spin_lock(&rsp->ofl_lock); raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ + rdp->rcu_ofl_gp_seq = READ_ONCE(rsp->gp_seq); + rdp->rcu_ofl_gp_flags = READ_ONCE(rsp->gp_flags); if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */ /* Report quiescent state -before- changing ->qsmaskinitnext! */ rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 795d469c6f67..f52bc059bfec 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -255,12 +255,16 @@ struct rcu_data { /* Leader CPU takes GP-end wakeups. */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ - /* 7) RCU CPU stall data. */ + /* 7) Diagnostic data, including RCU CPU stall warnings. */ unsigned int softirq_snap; /* Snapshot of softirq activity. */ /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */ struct irq_work rcu_iw; /* Check for non-irq activity. */ bool rcu_iw_pending; /* Is ->rcu_iw pending? */ unsigned long rcu_iw_gp_seq; /* ->gp_seq associated with ->rcu_iw. */ + unsigned long rcu_ofl_gp_seq; /* ->gp_seq at last offline. */ + short rcu_ofl_gp_flags; /* ->gp_flags at last offline. */ + unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */ + short rcu_onl_gp_flags; /* ->gp_flags at last online. */ int cpu; struct rcu_state *rsp; @@ -431,11 +435,13 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); static int rcu_print_task_exp_stall(struct rcu_node *rnp); -static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); +static void rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, + struct rcu_node *rnp); static void rcu_preempt_check_callbacks(void); void call_rcu(struct rcu_head *head, rcu_callback_t func); static void __init __rcu_init_preempt(void); -static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck); +static void dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, + int ncheck); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); static void invoke_rcu_callbacks_kthread(void); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f45ff97b0d51..613372246a07 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -699,13 +699,14 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) * Also, if there are blocked tasks on the list, they automatically * block the newly created grace period, so set up ->gp_tasks accordingly. */ -static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) +static void +rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp) { struct task_struct *t; RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) - dump_blkd_tasks(rnp, 10); + dump_blkd_tasks(rsp, rnp, 10); if (rcu_preempt_has_tasks(rnp) && (rnp->qsmaskinit || rnp->wait_blkd_tasks)) { rnp->gp_tasks = rnp->blkd_tasks.next; @@ -854,10 +855,14 @@ void exit_rcu(void) * Dump the blocked-tasks state, but limit the list dump to the * specified number of elements. */ -static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck) +static void +dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck) { + int cpu; int i; struct list_head *lhp; + bool onl; + struct rcu_data *rdp; struct rcu_node *rnp1; raw_lockdep_assert_held_rcu_node(rnp); @@ -877,6 +882,14 @@ static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck) break; } pr_cont("\n"); + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { + rdp = per_cpu_ptr(rsp->rda, cpu); + onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); + pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n", + cpu, ".o"[onl], + (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, + (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); + } } #else /* #ifdef CONFIG_PREEMPT_RCU */ @@ -949,7 +962,8 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) * so there is no need to check for blocked tasks. So check only for * bogus qsmask values. */ -static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) +static void +rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp) { WARN_ON_ONCE(rnp->qsmask); } @@ -990,7 +1004,8 @@ void exit_rcu(void) /* * Dump the guaranteed-empty blocked-tasks state. Trust but verify. */ -static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck) +static void +dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck) { WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks)); } -- cgit v1.2.3 From 3949fa9bac090ad217534c30bc3b6572289abf21 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 May 2018 15:29:10 -0700 Subject: rcu: Make rcu_read_unlock_special() static Because rcu_read_unlock_special() is no longer used outside of kernel/rcu/tree_plugin.h, this commit makes it static. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 - kernel/rcu/tree_plugin.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 65163aa0bb04..67ec077c7ee5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -64,7 +64,6 @@ void rcu_barrier_tasks(void); void __rcu_read_lock(void); void __rcu_read_unlock(void); -void rcu_read_unlock_special(struct task_struct *t); void synchronize_rcu(void); /* diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 613372246a07..54a251640f53 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -127,6 +127,7 @@ static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data; static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake); +static void rcu_read_unlock_special(struct task_struct *t); /* * Tell them what RCU they are running. @@ -461,7 +462,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp) * notify RCU core processing or task having blocked during the RCU * read-side critical section. */ -void rcu_read_unlock_special(struct task_struct *t) +static void rcu_read_unlock_special(struct task_struct *t) { bool empty_exp; bool empty_norm; -- cgit v1.2.3 From a7538352da722fae5cc95ae6656ea2013f5b8b21 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 14 May 2018 13:27:33 -0700 Subject: rcu: Use pr_fmt to prefix "rcu: " to logging output This commit also adjusts some whitespace while in the area. Signed-off-by: Joe Perches Signed-off-by: Paul E. McKenney [ paulmck: Revert string-breaking %s as requested by Andy Shevchenko. ] --- kernel/rcu/rcuperf.c | 7 +++---- kernel/rcu/rcutorture.c | 4 ++-- kernel/rcu/srcutree.c | 5 ++++- kernel/rcu/tree.c | 8 +++++--- kernel/rcu/tree_plugin.h | 10 ++++++---- 5 files changed, 20 insertions(+), 14 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index b080bc4a4f45..00e395c0d7d0 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -680,12 +680,11 @@ rcu_perf_init(void) break; } if (i == ARRAY_SIZE(perf_ops)) { - pr_alert("rcu-perf: invalid perf type: \"%s\"\n", - perf_type); + pr_alert("rcu-perf: invalid perf type: \"%s\"\n", perf_type); pr_alert("rcu-perf types:"); for (i = 0; i < ARRAY_SIZE(perf_ops); i++) - pr_alert(" %s", perf_ops[i]->name); - pr_alert("\n"); + pr_cont(" %s", perf_ops[i]->name); + pr_cont("\n"); firsterr = -EINVAL; goto unwind; } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 0481c7286875..90a94fecdd73 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1755,8 +1755,8 @@ rcu_torture_init(void) torture_type); pr_alert("rcu-torture types:"); for (i = 0; i < ARRAY_SIZE(torture_ops); i++) - pr_alert(" %s", torture_ops[i]->name); - pr_alert("\n"); + pr_cont(" %s", torture_ops[i]->name); + pr_cont("\n"); firsterr = -EINVAL; goto unwind; } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index d6d6ea9738c0..e526b56998af 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) "rcu: " fmt + #include #include #include @@ -390,7 +392,8 @@ void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) } if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || WARN_ON(srcu_readers_active(sp))) { - pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); + pr_info("%s: Active srcu_struct %p state: %d\n", + __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); return; /* Caller forgot to stop doing call_srcu()? */ } free_percpu(sp->sda); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 19beabe73629..6f2922168216 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -27,6 +27,9 @@ * For detailed explanation of Read-Copy Update mechanism see - * Documentation/RCU */ + +#define pr_fmt(fmt) "rcu: " fmt + #include #include #include @@ -1374,8 +1377,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq) * See Documentation/RCU/stallwarn.txt for info on how to debug * RCU CPU stall warnings. */ - pr_err("INFO: %s detected stalls on CPUs/tasks:", - rsp->name); + pr_err("INFO: %s detected stalls on CPUs/tasks:", rsp->name); print_cpu_stall_info_begin(); rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave_rcu_node(rnp, flags); @@ -4048,7 +4050,7 @@ static void __init rcu_init_geometry(void) if (rcu_fanout_leaf == RCU_FANOUT_LEAF && nr_cpu_ids == NR_CPUS) return; - pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n", + pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n", rcu_fanout_leaf, nr_cpu_ids); /* diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 54a251640f53..dbfe90191e19 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -74,8 +74,8 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tRCU event tracing is enabled.\n"); if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) - pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", - RCU_FANOUT); + pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n", + RCU_FANOUT); if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) @@ -88,11 +88,13 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", RCU_FANOUT_LEAF); if (rcu_fanout_leaf != RCU_FANOUT_LEAF) - pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); + pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", + rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids); #ifdef CONFIG_RCU_BOOST - pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", kthread_prio, CONFIG_RCU_BOOST_DELAY); + pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", + kthread_prio, CONFIG_RCU_BOOST_DELAY); #endif if (blimit != DEFAULT_RCU_BLIMIT) pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit); -- cgit v1.2.3 From 15651201fa055ec81d3669b36ab7c2fb12c3ce36 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 May 2018 14:41:41 -0700 Subject: rcu: Mark task as .need_qs less aggressively If any scheduling-clock interrupt interrupts an RCU-preempt read-side critical section, the interrupted task's ->rcu_read_unlock_special.b.need_qs field is set. This causes the outermost rcu_read_unlock() to incur the extra overhead of calling into rcu_read_unlock_special(). This commit reduces that overhead by setting ->rcu_read_unlock_special.b.need_qs only if the grace period has been in effect for more than one second. Why one second? Because this is comfortably smaller than the minimum RCU CPU stall-warning timeout of three seconds, but long enough that the .need_qs marking should happen quite rarely. And if your RCU read-side critical section has run on-CPU for a full second, it is not unreasonable to invest some CPU time in ending the grace period quickly. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index dbfe90191e19..0239cf8a4be6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -730,6 +730,7 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp) */ static void rcu_preempt_check_callbacks(void) { + struct rcu_state *rsp = &rcu_preempt_state; struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { @@ -738,7 +739,9 @@ static void rcu_preempt_check_callbacks(void) } if (t->rcu_read_lock_nesting > 0 && __this_cpu_read(rcu_data_p->core_needs_qs) && - __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm)) + __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) && + !t->rcu_read_unlock_special.b.need_qs && + time_after(jiffies, rsp->gp_start + HZ)) t->rcu_read_unlock_special.b.need_qs = true; } -- cgit v1.2.3 From c7037ff5249cee237b8c8a6f99998ae4f3916b60 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 May 2018 18:00:17 -0700 Subject: rcu: Clarify and correct the rcu_preempt_qs() header comment The rcu_preempt_qs() function only applies to the CPU, not the task. A task really is allowed to invoke this function while in an RCU-preempt read-side critical section, but only if it has first added itself to some leaf rcu_node structure's ->blkd_tasks list. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0239cf8a4be6..07d1ad175994 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -294,13 +294,17 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) } /* - * Record a preemptible-RCU quiescent state for the specified CPU. Note - * that this just means that the task currently running on the CPU is - * not in a quiescent state. There might be any number of tasks blocked - * while in an RCU read-side critical section. + * Record a preemptible-RCU quiescent state for the specified CPU. + * Note that this does not necessarily mean that the task currently running + * on the CPU is in a quiescent state: Instead, it means that the current + * grace period need not wait on any RCU read-side critical section that + * starts later on this CPU. It also means that if the current task is + * in an RCU read-side critical section, it has already added itself to + * some leaf rcu_node structure's ->blkd_tasks list. In addition to the + * current task, there might be any number of other tasks blocked while + * in an RCU read-side critical section. * - * As with the other rcu_*_qs() functions, callers to this function - * must disable preemption. + * Callers to this function must disable preemption. */ static void rcu_preempt_qs(void) { -- cgit v1.2.3 From 164ba3fc4864346dbc365f8b89d8888e1b6cd38c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 May 2018 20:41:36 -0700 Subject: rcu: Remove unused rcu_kick_nohz_cpu() function The rcu_kick_nohz_cpu() function is no longer used, and the functionality it used to provide is now provided by a call to resched_cpu() in the force-quiescent-state function rcu_implicit_dynticks_qs(). This commit therefore removes rcu_kick_nohz_cpu(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 - kernel/rcu/tree_plugin.h | 17 ----------------- 2 files changed, 18 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d51e6edc8e83..4e74df768c57 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -483,7 +483,6 @@ static void __init rcu_spawn_nocb_kthreads(void); #ifdef CONFIG_RCU_NOCB_CPU static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp); #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ -static void __maybe_unused rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); static void rcu_bind_gp_kthread(void); static bool rcu_nohz_full_cpu(struct rcu_state *rsp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 07d1ad175994..75a91d58b8f7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2645,23 +2645,6 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ -/* - * An adaptive-ticks CPU can potentially execute in kernel mode for an - * arbitrarily long period of time with the scheduling-clock tick turned - * off. RCU will be paying attention to this CPU because it is in the - * kernel, but the CPU cannot be guaranteed to be executing the RCU state - * machine because the scheduling-clock tick has been disabled. Therefore, - * if an adaptive-ticks CPU is failing to respond to the current grace - * period and has not be idle from an RCU perspective, kick it. - */ -static void __maybe_unused rcu_kick_nohz_cpu(int cpu) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_cpu(cpu)) - smp_send_reschedule(cpu); -#endif /* #ifdef CONFIG_NO_HZ_FULL */ -} - /* * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the * grace-period kthread will do force_quiescent_state() processing? -- cgit v1.2.3 From ab6b82147f471e31d9397c95d523631b6c8953f2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 May 2018 11:21:20 -0700 Subject: rcu: Remove unused local variable "cpu" One danger of using __maybe_unused is that the compiler doesn't yell at you when you remove the last reference, witness rcu_bind_gp_kthread() and its local variable "cpu". This commit removes this local variable. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 75a91d58b8f7..2cc9bf0d363a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2670,8 +2670,6 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) */ static void rcu_bind_gp_kthread(void) { - int __maybe_unused cpu; - if (!tick_nohz_full_enabled()) return; housekeeping_affine(current, HK_FLAG_RCU); -- cgit v1.2.3 From 89b4cd4b9ebf15b01f70b85105ea5f5c6b2a3788 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Jun 2018 12:29:16 -0700 Subject: rcu: Print stall-warning NMI dyntick state in hexadecimal The ->dynticks_nmi_nesting field records the nesting depth of both interrupt and NMI handlers. Because the kernel can enter interrupts and never leave them (and vice versa) and because NMIs can interrupt manipulation of the ->dynticks_nmi_nesting field, the values in this field must be both chosen and maniupated very carefully. As a result, although the value is zero when the corresponding CPU is executing neither an interrupt nor an NMI handler, it is 4,611,686,018,427,387,906 on 64-bit systems when there is a single level of interrupt/NMI handling in progress. This number is difficult to remember and interpret, so this commit switches the output to hexadecimal, resulting in the much nicer 0x4000000000000002. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 2cc9bf0d363a..c1b17f5b9361 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1801,7 +1801,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n", + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], -- cgit v1.2.3