From fed31a4dd3adb5455df7c704de2abb639a1dc1c0 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Tue, 13 Jul 2021 08:56:45 +0800 Subject: rcu: Fix macro name CONFIG_TASKS_RCU_TRACE This commit fixes several typos where CONFIG_TASKS_RCU_TRACE should instead be CONFIG_TASKS_TRACE_RCU. Among other things, these typos could cause CONFIG_TASKS_TRACE_RCU_READ_MB=y kernels to suffer from memory-ordering bugs that could result in false-positive quiescent states and too-short grace periods. Signed-off-by: Zhouyi Zhou Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d9680b798b21..955c82b4737c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -167,7 +167,7 @@ void synchronize_rcu_tasks(void); # define synchronize_rcu_tasks synchronize_rcu # endif -# ifdef CONFIG_TASKS_RCU_TRACE +# ifdef CONFIG_TASKS_TRACE_RCU # define rcu_tasks_trace_qs(t) \ do { \ if (!likely(READ_ONCE((t)->trc_reader_checked)) && \ -- cgit v1.2.3 From a86baa69c2b7b85bab41692fa3ec188a5aae1d27 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 18 May 2021 19:17:16 -0700 Subject: rcu: Remove special bit at the bottom of the ->dynticks counter Commit b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") reserved a bit at the bottom of the ->dynticks counter to defer flushing of TLBs, but this facility never has been used. This commit therefore removes this capability along with the rcu_eqs_special_set() function used to trigger it. Link: https://lore.kernel.org/linux-doc/CALCETrWNPOOdTrFabTDd=H7+wc6xJ9rJceg6OL1S0rTV5pfSsA@mail.gmail.com/ Suggested-by: Andy Lutomirski Signed-off-by: "Joel Fernandes (Google)" [ paulmck: Forward-port to v5.13-rc1. ] Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 3 -- kernel/rcu/tree.c | 77 +++++++++---------------------------------------- 2 files changed, 14 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 953e70fafe38..9be015305f9f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -14,9 +14,6 @@ #include /* for HZ */ -/* Never flag non-existent other CPUs! */ -static inline bool rcu_eqs_special_set(int cpu) { return false; } - unsigned long get_state_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 51f24ecd94b2..42a0032dd99f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -74,17 +74,10 @@ /* Data structures. */ -/* - * Steal a bit from the bottom of ->dynticks for idle entry/exit - * control. Initially this is for TLB flushing. - */ -#define RCU_DYNTICK_CTRL_MASK 0x1 -#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1) - static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { .dynticks_nesting = 1, .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, - .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), + .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, #endif @@ -266,7 +259,6 @@ void rcu_softirq_qs(void) */ static noinstr void rcu_dynticks_eqs_enter(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); int seq; /* @@ -275,13 +267,9 @@ static noinstr void rcu_dynticks_eqs_enter(void) * next idle sojourn. */ rcu_dynticks_task_trace_enter(); // Before ->dynticks update! - seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + seq = arch_atomic_inc_return(&this_cpu_ptr(&rcu_data)->dynticks); // RCU is no longer watching. Better be in extended quiescent state! - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (seq & RCU_DYNTICK_CTRL_CTR)); - /* Better not have special action (TLB flush) pending! */ - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (seq & RCU_DYNTICK_CTRL_MASK)); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1)); } /* @@ -291,7 +279,6 @@ static noinstr void rcu_dynticks_eqs_enter(void) */ static noinstr void rcu_dynticks_eqs_exit(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); int seq; /* @@ -299,15 +286,10 @@ static noinstr void rcu_dynticks_eqs_exit(void) * and we also must force ordering with the next RCU read-side * critical section. */ - seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + seq = arch_atomic_inc_return(&this_cpu_ptr(&rcu_data)->dynticks); // RCU is now watching. Better not be in an extended quiescent state! rcu_dynticks_task_trace_exit(); // After ->dynticks update! - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !(seq & RCU_DYNTICK_CTRL_CTR)); - if (seq & RCU_DYNTICK_CTRL_MASK) { - arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks); - smp_mb__after_atomic(); /* _exit after clearing mask. */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1)); } /* @@ -324,9 +306,9 @@ static void rcu_dynticks_eqs_online(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR) + if (atomic_read(&rdp->dynticks) & 0x1) return; - atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + atomic_inc(&rdp->dynticks); } /* @@ -336,9 +318,7 @@ static void rcu_dynticks_eqs_online(void) */ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR); + return !(arch_atomic_read(&this_cpu_ptr(&rcu_data)->dynticks) & 0x1); } /* @@ -347,9 +327,7 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) */ static int rcu_dynticks_snap(struct rcu_data *rdp) { - int snap = atomic_add_return(0, &rdp->dynticks); - - return snap & ~RCU_DYNTICK_CTRL_MASK; + return atomic_add_return(0, &rdp->dynticks); } /* @@ -358,7 +336,7 @@ static int rcu_dynticks_snap(struct rcu_data *rdp) */ static bool rcu_dynticks_in_eqs(int snap) { - return !(snap & RCU_DYNTICK_CTRL_CTR); + return !(snap & 0x1); } /* Return true if the specified CPU is currently idle from an RCU viewpoint. */ @@ -389,8 +367,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) int snap; // If not quiescent, force back to earlier extended quiescent state. - snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK | - RCU_DYNTICK_CTRL_CTR); + snap = atomic_read(&rdp->dynticks) & ~0x1; smp_rmb(); // Order ->dynticks and *vp reads. if (READ_ONCE(*vp)) @@ -398,32 +375,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) smp_rmb(); // Order *vp read and ->dynticks re-read. // If still in the same extended quiescent state, we are good! - return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK); -} - -/* - * Set the special (bottom) bit of the specified CPU so that it - * will take special action (such as flushing its TLB) on the - * next exit from an extended quiescent state. Returns true if - * the bit was successfully set, or false if the CPU was not in - * an extended quiescent state. - */ -bool rcu_eqs_special_set(int cpu) -{ - int old; - int new; - int new_old; - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - - new_old = atomic_read(&rdp->dynticks); - do { - old = new_old; - if (old & RCU_DYNTICK_CTRL_CTR) - return false; - new = old | RCU_DYNTICK_CTRL_MASK; - new_old = atomic_cmpxchg(&rdp->dynticks, old, new); - } while (new_old != old); - return true; + return snap == atomic_read(&rdp->dynticks); } /* @@ -442,10 +394,9 @@ notrace void rcu_momentary_dyntick_idle(void) int special; raw_cpu_write(rcu_data.rcu_need_heavy_qs, false); - special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, - &this_cpu_ptr(&rcu_data)->dynticks); + special = atomic_add_return(2, &this_cpu_ptr(&rcu_data)->dynticks); /* It is illegal to call this from idle state. */ - WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); + WARN_ON_ONCE(!(special & 0x1)); rcu_preempt_deferred_qs(current); } EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); -- cgit v1.2.3 From 5fcb3a5f04ee6422714adb02f5364042228bfc2e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 20 May 2021 13:35:50 -0700 Subject: rcu: Mark accesses to ->rcu_read_lock_nesting KCSAN flags accesses to ->rcu_read_lock_nesting as data races, but in the past, the overhead of marked accesses was excessive. However, that was long ago, and much has changed since then, both in terms of hardware and of compilers. Here is data taken on an eight-core laptop using Intel(R) Core(TM) i9-10885H CPU @ 2.40GHz with a kernel built using gcc version 9.3.0, with all data in nanoseconds. Unmarked accesses (status quo), measured by three refscale runs: Minimum reader duration: 3.286 2.851 3.395 Median reader duration: 3.698 3.531 3.4695 Maximum reader duration: 4.481 5.215 5.157 Marked accesses, also measured by three refscale runs: Minimum reader duration: 3.501 3.677 3.580 Median reader duration: 4.053 3.723 3.895 Maximum reader duration: 7.307 4.999 5.511 This focused microbenhmark shows only sub-nanosecond differences which are unlikely to be visible at the system level. This commit therefore marks data-racing accesses to ->rcu_read_lock_nesting. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- kernel/rcu/tree_plugin.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d9680b798b21..cfeb43bfc719 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -53,7 +53,7 @@ void __rcu_read_unlock(void); * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) +#define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting) #else /* #ifdef CONFIG_PREEMPT_RCU */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index de1dc3bb7f70..83a702a4e296 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -405,17 +405,20 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) static void rcu_preempt_read_enter(void) { - current->rcu_read_lock_nesting++; + WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1); } static int rcu_preempt_read_exit(void) { - return --current->rcu_read_lock_nesting; + int ret = READ_ONCE(current->rcu_read_lock_nesting) - 1; + + WRITE_ONCE(current->rcu_read_lock_nesting, ret); + return ret; } static void rcu_preempt_depth_set(int val) { - current->rcu_read_lock_nesting = val; + WRITE_ONCE(current->rcu_read_lock_nesting, val); } /* -- cgit v1.2.3 From 751b1710eb09b0ba21718add68cd340707ca2446 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 21 May 2021 12:08:29 +0200 Subject: rculist: Unify documentation about missing list_empty_rcu() We have two separate sections that talk about why list_empty_rcu() is not needed, so this commit consolidates them. Signed-off-by: Julian Wiedmann [ paulmck: The usual wordsmithing. ] Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index f8633d37e358..d29740be4833 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -10,15 +10,6 @@ #include #include -/* - * Why is there no list_empty_rcu()? Because list_empty() serves this - * purpose. The list_empty() function fetches the RCU-protected pointer - * and compares it to the address of the list head, but neither dereferences - * this pointer itself nor provides this pointer to the caller. Therefore, - * it is not necessary to use rcu_dereference(), so that list_empty() can - * be used anywhere you would want to use a list_empty_rcu(). - */ - /* * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers * @list: list to be initialized @@ -318,21 +309,29 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, /* * Where are list_empty_rcu() and list_first_entry_rcu()? * - * Implementing those functions following their counterparts list_empty() and - * list_first_entry() is not advisable because they lead to subtle race - * conditions as the following snippet shows: + * They do not exist because they would lead to subtle race conditions: * * if (!list_empty_rcu(mylist)) { * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member); * do_something(bar); * } * - * The list may not be empty when list_empty_rcu checks it, but it may be when - * list_first_entry_rcu rereads the ->next pointer. - * - * Rereading the ->next pointer is not a problem for list_empty() and - * list_first_entry() because they would be protected by a lock that blocks - * writers. + * The list might be non-empty when list_empty_rcu() checks it, but it + * might have become empty by the time that list_first_entry_rcu() rereads + * the ->next pointer, which would result in a SEGV. + * + * When not using RCU, it is OK for list_first_entry() to re-read that + * pointer because both functions should be protected by some lock that + * blocks writers. + * + * When using RCU, list_empty() uses READ_ONCE() to fetch the + * RCU-protected ->next pointer and then compares it to the address of the + * list head. However, it neither dereferences this pointer nor provides + * this pointer to its caller. Thus, READ_ONCE() suffices (that is, + * rcu_dereference() is not needed), which means that list_empty() can be + * used anywhere you would want to use list_empty_rcu(). Just don't + * expect anything useful to happen if you do a subsequent lockless + * call to list_first_entry_rcu()!!! * * See list_first_or_null_rcu for an alternative. */ -- cgit v1.2.3 From 65bfdd36c113f5d579a382d8f2847210ea4cdca6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Jun 2021 16:31:38 -0700 Subject: srcutiny: Mark read-side data races This commit marks some interrupt-induced read-side data races in __srcu_read_lock(), __srcu_read_unlock(), and srcu_torture_stats_print(). Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 8 ++++---- kernel/rcu/srcutiny.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 0e0cf4d6a72a..6cfaa0a9a9b9 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -61,7 +61,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) int idx; idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; - WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1); + WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1); return idx; } @@ -81,11 +81,11 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, { int idx; - idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; + idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1; pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", tt, tf, idx, - READ_ONCE(ssp->srcu_lock_nesting[!idx]), - READ_ONCE(ssp->srcu_lock_nesting[idx])); + data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])), + data_race(READ_ONCE(ssp->srcu_lock_nesting[idx]))); } #endif diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 26344dc6483b..a0ba2ed49bc6 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); */ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { - int newval = ssp->srcu_lock_nesting[idx] - 1; + int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) -- cgit v1.2.3