From 1b27291b1ea4f1f2090fb07c3425db474cdb99ba Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 18 Jul 2018 14:32:31 -0700
Subject: rcutorture: Add forward-progress tests for RCU grace periods

This commit adds a kthread that loops going into and out of RCU
read-side critical sections, but also including a cond_resched(),
optionally guarded by a check of need_resched(), in that same loop.
This commit relies solely on rcu_torture_writer() progress to judge
the forward progress of grace periods.

Note that Tasks RCU and SRCU are exempted from forward-progress testing
due their (intentionally) less-robust forward-progress guarantees.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 8d9a0ea8f0b5..a6353f3d6094 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -108,6 +108,7 @@ static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
  */
 static inline void rcu_virt_note_context_switch(int cpu) { }
 static inline void rcu_cpu_stall_reset(void) { }
+static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
 static inline void rcu_idle_enter(void) { }
 static inline void rcu_idle_exit(void) { }
 static inline void rcu_irq_enter(void) { }
-- 
cgit v1.2.3


From 474e59b476b3390ef9f730515439f21640b61623 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 7 Aug 2018 14:34:44 -0700
Subject: rcutorture: Check GP completion at stutter end

The rcu_torture_writer() function invokes stutter_wait() at the end of
each writer pass, which occasionally blocks for an extended time period
in order to ensure that RCU can handle intermittent loads.  But part of
handling a busy period is invoking all the callbacks before the end of
the idle period induced by stutter_wait().

This commit therefore adds a return value to stutter_wait() indicating
whether stutter_wait() actually waited.  In addition, this commit causes
rcu_torture_writer() to test this value and if set, checks that all the
elements of the rcu_tortures[] array have been freed up.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/torture.h | 2 +-
 kernel/rcu/rcutorture.c | 5 ++++-
 kernel/torture.c        | 3 ++-
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 61dfd93b6ee4..48fad21109fc 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -77,7 +77,7 @@ void torture_shutdown_absorb(const char *title);
 int torture_shutdown_init(int ssecs, void (*cleanup)(void));
 
 /* Task stuttering, which forces load/no-load transitions. */
-void stutter_wait(const char *title);
+bool stutter_wait(const char *title);
 int torture_stutter_init(int s);
 
 /* Initialization and cleanup. */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7df8142a6a22..ae10ad531993 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1144,7 +1144,10 @@ rcu_torture_writer(void *arg)
 				       !rcu_gp_is_normal();
 		}
 		rcu_torture_writer_state = RTWS_STUTTER;
-		stutter_wait("rcu_torture_writer");
+		if (stutter_wait("rcu_torture_writer"))
+			for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
+				if (list_empty(&rcu_tortures[i].rtort_free))
+					WARN_ON_ONCE(1);
 	} while (!torture_must_stop());
 	/* Reset expediting back to unexpedited. */
 	if (expediting > 0)
diff --git a/kernel/torture.c b/kernel/torture.c
index 1ac24a826589..17d91f5fba2a 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -573,7 +573,7 @@ static int stutter;
  * Block until the stutter interval ends.  This must be called periodically
  * by all running kthreads that need to be subject to stuttering.
  */
-void stutter_wait(const char *title)
+bool stutter_wait(const char *title)
 {
 	int spt;
 
@@ -590,6 +590,7 @@ void stutter_wait(const char *title)
 		}
 		torture_shutdown_absorb(title);
 	}
+	return !!spt;
 }
 EXPORT_SYMBOL_GPL(stutter_wait);
 
-- 
cgit v1.2.3


From 3e31009898699dfca823893054748d85048dc7b3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 21 Jun 2018 12:50:01 -0700
Subject: rcu: Defer reporting RCU-preempt quiescent states when disabled

This commit defers reporting of RCU-preempt quiescent states at
rcu_read_unlock_special() time when any of interrupts, softirq, or
preemption are disabled.  These deferred quiescent states are reported
at a later RCU_SOFTIRQ, context switch, idle entry, or CPU-hotplug
offline operation.  Of course, if another RCU read-side critical
section has started in the meantime, the reporting of the quiescent
state will be further deferred.

This also means that disabling preemption, interrupts, and/or
softirqs will act as an RCU-preempt read-side critical section.
This is enforced by checking preempt_count() as needed.

Some special cases must be handled on an ad-hoc basis, for example,
context switch is a quiescent state even though both the scheduler and
do_exit() disable preemption.  In these cases, additional calls to
rcu_preempt_deferred_qs() override the preemption disabling.  Similar
logic overrides disabled interrupts in rcu_preempt_check_callbacks()
because in this case the quiescent state happened just before the
corresponding scheduling-clock interrupt.

In theory, this change lifts a long-standing restriction that required
that if interrupts were disabled across a call to rcu_read_unlock()
that the matching rcu_read_lock() also be contained within that
interrupts-disabled region of code.  Because the reporting of the
corresponding RCU-preempt quiescent state is now deferred until
after interrupts have been enabled, it is no longer possible for this
situation to result in deadlocks involving the scheduler's runqueue and
priority-inheritance locks.  This may allow some code simplification that
might reduce interrupt latency a bit.  Unfortunately, in practice this
would also defer deboosting a low-priority task that had been subjected
to RCU priority boosting, so real-time-response considerations might
well force this restriction to remain in place.

Because RCU-preempt grace periods are now blocked not only by RCU
read-side critical sections, but also by disabling of interrupts,
preemption, and softirqs, it will be possible to eliminate RCU-bh and
RCU-sched in favor of RCU-preempt in CONFIG_PREEMPT=y kernels.  This may
require some additional plumbing to provide the network denial-of-service
guarantees that have been traditionally provided by RCU-bh.  Once these
are in place, CONFIG_PREEMPT=n kernels will be able to fold RCU-bh
into RCU-sched.  This would mean that all kernels would have but
one flavor of RCU, which would open the door to significant code
cleanup.

Moving to a single flavor of RCU would also have the beneficial effect
of reducing the NOCB kthreads by at least a factor of two.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Apply rcu_read_unlock_special() preempt_count() feedback
  from Joel Fernandes. ]
[ paulmck: Adjust rcu_eqs_enter() call to rcu_preempt_deferred_qs() in
  response to bug reports from kbuild test robot. ]
[ paulmck: Fix bug located by kbuild test robot involving recursion
  via rcu_preempt_deferred_qs(). ]
---
 .../RCU/Design/Requirements/Requirements.html      |  50 ++++---
 include/linux/rcutiny.h                            |   5 +
 kernel/rcu/tree.c                                  |   9 ++
 kernel/rcu/tree.h                                  |   3 +
 kernel/rcu/tree_exp.h                              |  71 +++++++---
 kernel/rcu/tree_plugin.h                           | 144 ++++++++++++++++-----
 6 files changed, 205 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 49690228b1c6..038714475edb 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2394,30 +2394,9 @@ when invoked from a CPU-hotplug notifier.
 <p>
 RCU depends on the scheduler, and the scheduler uses RCU to
 protect some of its data structures.
-This means the scheduler is forbidden from acquiring
-the runqueue locks and the priority-inheritance locks
-in the middle of an outermost RCU read-side critical section unless either
-(1)&nbsp;it releases them before exiting that same
-RCU read-side critical section, or
-(2)&nbsp;interrupts are disabled across
-that entire RCU read-side critical section.
-This same prohibition also applies (recursively!) to any lock that is acquired
-while holding any lock to which this prohibition applies.
-Adhering to this rule prevents preemptible RCU from invoking
-<tt>rcu_read_unlock_special()</tt> while either runqueue or
-priority-inheritance locks are held, thus avoiding deadlock.
-
-<p>
-Prior to v4.4, it was only necessary to disable preemption across
-RCU read-side critical sections that acquired scheduler locks.
-In v4.4, expedited grace periods started using IPIs, and these
-IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
-Therefore, this expedited-grace-period change required disabling of
-interrupts, not just preemption.
-
-<p>
-For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must be written carefully to avoid similar deadlocks.
+The preemptible-RCU <tt>rcu_read_unlock()</tt>
+implementation must therefore be written carefully to avoid deadlocks
+involving the scheduler's runqueue and priority-inheritance locks.
 In particular, <tt>rcu_read_unlock()</tt> must tolerate an
 interrupt where the interrupt handler invokes both
 <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
@@ -2426,7 +2405,7 @@ negative nesting levels to avoid destructive recursion via
 interrupt handler's use of RCU.
 
 <p>
-This pair of mutual scheduler-RCU requirements came as a
+This scheduler-RCU requirement came as a
 <a href="https://lwn.net/Articles/453002/">complete surprise</a>.
 
 <p>
@@ -2437,9 +2416,28 @@ when running context-switch-heavy workloads when built with
 <tt>CONFIG_NO_HZ_FULL=y</tt>
 <a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
 RCU has made good progress towards meeting this requirement, even
-for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
+for context-switch-heavy <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
 but there is room for further improvement.
 
+<p>
+In the past, it was forbidden to disable interrupts across an
+<tt>rcu_read_unlock()</tt> unless that interrupt-disabled region
+of code also included the matching <tt>rcu_read_lock()</tt>.
+Violating this restriction could result in deadlocks involving the
+scheduler's runqueue and priority-inheritance spinlocks.
+This restriction was lifted when interrupt-disabled calls to
+<tt>rcu_read_unlock()</tt> started deferring the reporting of
+the resulting RCU-preempt quiescent state until the end of that
+interrupts-disabled region.
+This deferred reporting means that the scheduler's runqueue and
+priority-inheritance locks cannot be held while reporting an RCU-preempt
+quiescent state, which lifts the earlier restriction, at least from
+a deadlock perspective.
+Unfortunately, real-time systems using RCU priority boosting may
+need this restriction to remain in effect because deferred
+quiescent-state reporting also defers deboosting, which in turn
+degrades real-time latencies.
+
 <h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
 
 <p>
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 8d9a0ea8f0b5..f617ab19bb51 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -115,6 +115,11 @@ static inline void rcu_irq_exit_irqson(void) { }
 static inline void rcu_irq_enter_irqson(void) { }
 static inline void rcu_irq_exit(void) { }
 static inline void exit_rcu(void) { }
+static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+{
+	return false;
+}
+static inline void rcu_preempt_deferred_qs(struct task_struct *t) { }
 #ifdef CONFIG_SRCU
 void rcu_scheduler_starting(void);
 #else /* #ifndef CONFIG_SRCU */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 36786789b625..346624716d6e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -422,6 +422,7 @@ static void rcu_momentary_dyntick_idle(void)
 	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
 	/* It is illegal to call this from idle state. */
 	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
+	rcu_preempt_deferred_qs(current);
 }
 
 /*
@@ -729,6 +730,7 @@ static void rcu_eqs_enter(bool user)
 		do_nocb_deferred_wakeup(rdp);
 	}
 	rcu_prepare_for_idle();
+	rcu_preempt_deferred_qs(current);
 	WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
 	rcu_dynticks_eqs_enter();
 	rcu_dynticks_task_enter();
@@ -2850,6 +2852,12 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 
 	WARN_ON_ONCE(!rdp->beenonline);
 
+	/* Report any deferred quiescent states if preemption enabled. */
+	if (!(preempt_count() & PREEMPT_MASK))
+		rcu_preempt_deferred_qs(current);
+	else if (rcu_preempt_need_deferred_qs(current))
+		resched_cpu(rdp->cpu); /* Provoke future context switch. */
+
 	/* Update RCU state based on any recent quiescent states. */
 	rcu_check_quiescent_state(rsp, rdp);
 
@@ -3823,6 +3831,7 @@ void rcu_report_dead(unsigned int cpu)
 	rcu_report_exp_rdp(&rcu_sched_state,
 			   this_cpu_ptr(rcu_sched_state.rda), true);
 	preempt_enable();
+	rcu_preempt_deferred_qs(current);
 	for_each_rcu_flavor(rsp)
 		rcu_cleanup_dying_idle_cpu(cpu, rsp);
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4e74df768c57..025bd2e5592b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -195,6 +195,7 @@ struct rcu_data {
 	bool		core_needs_qs;	/* Core waits for quiesc state. */
 	bool		beenonline;	/* CPU online at least once. */
 	bool		gpwrap;		/* Possible ->gp_seq wrap. */
+	bool		deferred_qs;	/* This CPU awaiting a deferred QS? */
 	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
 	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
 	unsigned long	ticks_this_gp;	/* The number of scheduling-clock */
@@ -461,6 +462,8 @@ static void rcu_cleanup_after_idle(void);
 static void rcu_prepare_for_idle(void);
 static void rcu_idle_count_callbacks_posted(void);
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
+static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
+static void rcu_preempt_deferred_qs(struct task_struct *t);
 static void print_cpu_stall_info_begin(void);
 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
 static void print_cpu_stall_info_end(void);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 0b2c2ad69629..f9d5bbd8adce 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -262,6 +262,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
 static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
 			       bool wake)
 {
+	WRITE_ONCE(rdp->deferred_qs, false);
 	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
 }
 
@@ -735,32 +736,70 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
  */
 static void sync_rcu_exp_handler(void *info)
 {
-	struct rcu_data *rdp;
+	unsigned long flags;
 	struct rcu_state *rsp = info;
+	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+	struct rcu_node *rnp = rdp->mynode;
 	struct task_struct *t = current;
 
 	/*
-	 * Within an RCU read-side critical section, request that the next
-	 * rcu_read_unlock() report.  Unless this RCU read-side critical
-	 * section has already blocked, in which case it is already set
-	 * up for the expedited grace period to wait on it.
+	 * First, the common case of not being in an RCU read-side
+	 * critical section.  If also enabled or idle, immediately
+	 * report the quiescent state, otherwise defer.
 	 */
-	if (t->rcu_read_lock_nesting > 0 &&
-	    !t->rcu_read_unlock_special.b.blocked) {
-		t->rcu_read_unlock_special.b.exp_need_qs = true;
+	if (!t->rcu_read_lock_nesting) {
+		if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
+		    rcu_dynticks_curr_cpu_in_eqs()) {
+			rcu_report_exp_rdp(rsp, rdp, true);
+		} else {
+			rdp->deferred_qs = true;
+			resched_cpu(rdp->cpu);
+		}
 		return;
 	}
 
 	/*
-	 * We are either exiting an RCU read-side critical section (negative
-	 * values of t->rcu_read_lock_nesting) or are not in one at all
-	 * (zero value of t->rcu_read_lock_nesting).  Or we are in an RCU
-	 * read-side critical section that blocked before this expedited
-	 * grace period started.  Either way, we can immediately report
-	 * the quiescent state.
+	 * Second, the less-common case of being in an RCU read-side
+	 * critical section.  In this case we can count on a future
+	 * rcu_read_unlock().  However, this rcu_read_unlock() might
+	 * execute on some other CPU, but in that case there will be
+	 * a future context switch.  Either way, if the expedited
+	 * grace period is still waiting on this CPU, set ->deferred_qs
+	 * so that the eventual quiescent state will be reported.
+	 * Note that there is a large group of race conditions that
+	 * can have caused this quiescent state to already have been
+	 * reported, so we really do need to check ->expmask.
 	 */
-	rdp = this_cpu_ptr(rsp->rda);
-	rcu_report_exp_rdp(rsp, rdp, true);
+	if (t->rcu_read_lock_nesting > 0) {
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		if (rnp->expmask & rdp->grpmask)
+			rdp->deferred_qs = true;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
+
+	/*
+	 * The final and least likely case is where the interrupted
+	 * code was just about to or just finished exiting the RCU-preempt
+	 * read-side critical section, and no, we can't tell which.
+	 * So either way, set ->deferred_qs to flag later code that
+	 * a quiescent state is required.
+	 *
+	 * If the CPU is fully enabled (or if some buggy RCU-preempt
+	 * read-side critical section is being used from idle), just
+	 * invoke rcu_preempt_defer_qs() to immediately report the
+	 * quiescent state.  We cannot use rcu_read_unlock_special()
+	 * because we are in an interrupt handler, which will cause that
+	 * function to take an early exit without doing anything.
+	 *
+	 * Otherwise, use resched_cpu() to force a context switch after
+	 * the CPU enables everything.
+	 */
+	rdp->deferred_qs = true;
+	if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
+	    WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()))
+		rcu_preempt_deferred_qs(t);
+	else
+		resched_cpu(rdp->cpu);
 }
 
 /**
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a97c20ea9bce..542791361908 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -371,6 +371,9 @@ static void rcu_preempt_note_context_switch(bool preempt)
 		 * behalf of preempted instance of __rcu_read_unlock().
 		 */
 		rcu_read_unlock_special(t);
+		rcu_preempt_deferred_qs(t);
+	} else {
+		rcu_preempt_deferred_qs(t);
 	}
 
 	/*
@@ -464,54 +467,51 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
 }
 
 /*
- * Handle special cases during rcu_read_unlock(), such as needing to
- * notify RCU core processing or task having blocked during the RCU
- * read-side critical section.
+ * Report deferred quiescent states.  The deferral time can
+ * be quite short, for example, in the case of the call from
+ * rcu_read_unlock_special().
  */
-static void rcu_read_unlock_special(struct task_struct *t)
+static void
+rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 {
 	bool empty_exp;
 	bool empty_norm;
 	bool empty_exp_now;
-	unsigned long flags;
 	struct list_head *np;
 	bool drop_boost_mutex = false;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	union rcu_special special;
 
-	/* NMI handlers cannot block and cannot safely manipulate state. */
-	if (in_nmi())
-		return;
-
-	local_irq_save(flags);
-
 	/*
 	 * If RCU core is waiting for this CPU to exit its critical section,
 	 * report the fact that it has exited.  Because irqs are disabled,
 	 * t->rcu_read_unlock_special cannot change.
 	 */
 	special = t->rcu_read_unlock_special;
+	rdp = this_cpu_ptr(rcu_state_p->rda);
+	if (!special.s && !rdp->deferred_qs) {
+		local_irq_restore(flags);
+		return;
+	}
 	if (special.b.need_qs) {
 		rcu_preempt_qs();
 		t->rcu_read_unlock_special.b.need_qs = false;
-		if (!t->rcu_read_unlock_special.s) {
+		if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
 			local_irq_restore(flags);
 			return;
 		}
 	}
 
 	/*
-	 * Respond to a request for an expedited grace period, but only if
-	 * we were not preempted, meaning that we were running on the same
-	 * CPU throughout.  If we were preempted, the exp_need_qs flag
-	 * would have been cleared at the time of the first preemption,
-	 * and the quiescent state would be reported when we were dequeued.
+	 * Respond to a request by an expedited grace period for a
+	 * quiescent state from this CPU.  Note that requests from
+	 * tasks are handled when removing the task from the
+	 * blocked-tasks list below.
 	 */
-	if (special.b.exp_need_qs) {
-		WARN_ON_ONCE(special.b.blocked);
+	if (special.b.exp_need_qs || rdp->deferred_qs) {
 		t->rcu_read_unlock_special.b.exp_need_qs = false;
-		rdp = this_cpu_ptr(rcu_state_p->rda);
+		rdp->deferred_qs = false;
 		rcu_report_exp_rdp(rcu_state_p, rdp, true);
 		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
@@ -519,19 +519,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		}
 	}
 
-	/* Hardware IRQ handlers cannot block, complain if they get here. */
-	if (in_irq() || in_serving_softirq()) {
-		lockdep_rcu_suspicious(__FILE__, __LINE__,
-				       "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
-		pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
-			 t->rcu_read_unlock_special.s,
-			 t->rcu_read_unlock_special.b.blocked,
-			 t->rcu_read_unlock_special.b.exp_need_qs,
-			 t->rcu_read_unlock_special.b.need_qs);
-		local_irq_restore(flags);
-		return;
-	}
-
 	/* Clean up if blocked during RCU read-side critical section. */
 	if (special.b.blocked) {
 		t->rcu_read_unlock_special.b.blocked = false;
@@ -602,6 +589,72 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	}
 }
 
+/*
+ * Is a deferred quiescent-state pending, and are we also not in
+ * an RCU read-side critical section?  It is the caller's responsibility
+ * to ensure it is otherwise safe to report any deferred quiescent
+ * states.  The reason for this is that it is safe to report a
+ * quiescent state during context switch even though preemption
+ * is disabled.  This function cannot be expected to understand these
+ * nuances, so the caller must handle them.
+ */
+static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+{
+	return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs ||
+		READ_ONCE(t->rcu_read_unlock_special.s)) &&
+	       !t->rcu_read_lock_nesting;
+}
+
+/*
+ * Report a deferred quiescent state if needed and safe to do so.
+ * As with rcu_preempt_need_deferred_qs(), "safe" involves only
+ * not being in an RCU read-side critical section.  The caller must
+ * evaluate safety in terms of interrupt, softirq, and preemption
+ * disabling.
+ */
+static void rcu_preempt_deferred_qs(struct task_struct *t)
+{
+	unsigned long flags;
+	bool couldrecurse = t->rcu_read_lock_nesting >= 0;
+
+	if (!rcu_preempt_need_deferred_qs(t))
+		return;
+	if (couldrecurse)
+		t->rcu_read_lock_nesting -= INT_MIN;
+	local_irq_save(flags);
+	rcu_preempt_deferred_qs_irqrestore(t, flags);
+	if (couldrecurse)
+		t->rcu_read_lock_nesting += INT_MIN;
+}
+
+/*
+ * Handle special cases during rcu_read_unlock(), such as needing to
+ * notify RCU core processing or task having blocked during the RCU
+ * read-side critical section.
+ */
+static void rcu_read_unlock_special(struct task_struct *t)
+{
+	unsigned long flags;
+	bool preempt_bh_were_disabled =
+			!!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
+	bool irqs_were_disabled;
+
+	/* NMI handlers cannot block and cannot safely manipulate state. */
+	if (in_nmi())
+		return;
+
+	local_irq_save(flags);
+	irqs_were_disabled = irqs_disabled_flags(flags);
+	if ((preempt_bh_were_disabled || irqs_were_disabled) &&
+	    t->rcu_read_unlock_special.b.blocked) {
+		/* Need to defer quiescent state until everything is enabled. */
+		raise_softirq_irqoff(RCU_SOFTIRQ);
+		local_irq_restore(flags);
+		return;
+	}
+	rcu_preempt_deferred_qs_irqrestore(t, flags);
+}
+
 /*
  * Dump detailed information for all tasks blocking the current RCU
  * grace period on the specified rcu_node structure.
@@ -737,10 +790,20 @@ static void rcu_preempt_check_callbacks(void)
 	struct rcu_state *rsp = &rcu_preempt_state;
 	struct task_struct *t = current;
 
-	if (t->rcu_read_lock_nesting == 0) {
-		rcu_preempt_qs();
+	if (t->rcu_read_lock_nesting > 0 ||
+	    (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
+		/* No QS, force context switch if deferred. */
+		if (rcu_preempt_need_deferred_qs(t))
+			resched_cpu(smp_processor_id());
+	} else if (rcu_preempt_need_deferred_qs(t)) {
+		rcu_preempt_deferred_qs(t); /* Report deferred QS. */
+		return;
+	} else if (!t->rcu_read_lock_nesting) {
+		rcu_preempt_qs(); /* Report immediate QS. */
 		return;
 	}
+
+	/* If GP is oldish, ask for help from rcu_read_unlock_special(). */
 	if (t->rcu_read_lock_nesting > 0 &&
 	    __this_cpu_read(rcu_data_p->core_needs_qs) &&
 	    __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) &&
@@ -859,6 +922,7 @@ void exit_rcu(void)
 	barrier();
 	t->rcu_read_unlock_special.b.blocked = true;
 	__rcu_read_unlock();
+	rcu_preempt_deferred_qs(current);
 }
 
 /*
@@ -940,6 +1004,16 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
 	return false;
 }
 
+/*
+ * Because there is no preemptible RCU, there can be no deferred quiescent
+ * states.
+ */
+static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+{
+	return false;
+}
+static void rcu_preempt_deferred_qs(struct task_struct *t) { }
+
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
-- 
cgit v1.2.3


From fcc878e4dfb70128a73857c609d70570629b0d9e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2018 07:39:59 -0700
Subject: rcu: Remove now-unused ->b.exp_need_qs field from the rcu_special
 union

The ->b.exp_need_qs field is now set only to false, so this commit
removes it.  The job this field used to do is now done by the rcu_data
structure's ->deferred_qs field, which is a consequence of a better
split between task-based (the rcu_node structure's ->exp_tasks field) and
CPU-based (the aforementioned rcu_data structure's ->deferred_qs field)
tracking of quiescent states for RCU-preempt expedited grace periods.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h    |  6 +-----
 kernel/rcu/tree_plugin.h | 13 ++++---------
 2 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9..004ca21f7e80 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -571,12 +571,8 @@ union rcu_special {
 	struct {
 		u8			blocked;
 		u8			need_qs;
-		u8			exp_need_qs;
-
-		/* Otherwise the compiler can store garbage here: */
-		u8			pad;
 	} b; /* Bits. */
-	u32 s; /* Set of bits. */
+	u16 s; /* Set of bits. */
 };
 
 enum perf_event_task_context {
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 24c209676d20..527a52792dce 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -284,13 +284,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 	 * no need to check for a subsequent expedited GP.  (Though we are
 	 * still in a quiescent state in any case.)
 	 */
-	if (blkd_state & RCU_EXP_BLKD &&
-	    t->rcu_read_unlock_special.b.exp_need_qs) {
-		t->rcu_read_unlock_special.b.exp_need_qs = false;
+	if (blkd_state & RCU_EXP_BLKD && rdp->deferred_qs)
 		rcu_report_exp_rdp(rdp->rsp, rdp, true);
-	} else {
-		WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
-	}
+	else
+		WARN_ON_ONCE(rdp->deferred_qs);
 }
 
 /*
@@ -509,9 +506,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 	 * tasks are handled when removing the task from the
 	 * blocked-tasks list below.
 	 */
-	if (special.b.exp_need_qs || rdp->deferred_qs) {
-		t->rcu_read_unlock_special.b.exp_need_qs = false;
-		rdp->deferred_qs = false;
+	if (rdp->deferred_qs) {
 		rcu_report_exp_rdp(rcu_state_p, rdp, true);
 		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
-- 
cgit v1.2.3


From d28139c4e96713d52a300fb9036c5be2f45e0741 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2018 14:45:25 -0700
Subject: rcu: Apply RCU-bh QSes to RCU-sched and RCU-preempt when safe

One necessary step towards consolidating the three flavors of RCU is to
make sure that the resulting consolidated "one flavor to rule them all"
correctly handles networking denial-of-service attacks.  One thing that
allows RCU-bh to do so is that __do_softirq() invokes rcu_bh_qs() every
so often, and so something similar has to happen for consolidated RCU.

This must be done carefully.  For example, if a preemption-disabled
region of code takes an interrupt which does softirq processing before
returning, consolidated RCU must ignore the resulting rcu_bh_qs()
invocations -- preemption is still disabled, and that means an RCU
reader for the consolidated flavor.

This commit therefore creates a new rcu_softirq_qs() that is called only
from the ksoftirqd task, thus avoiding the interrupted-a-preempted-region
problem.  This new rcu_softirq_qs() function invokes rcu_sched_qs(),
rcu_preempt_qs(), and rcu_preempt_deferred_qs().  The latter call handles
any deferred quiescent states.

Note that __do_softirq() still invokes rcu_bh_qs().  It will continue to
do so until a later stage of cleanup when the RCU-bh flavor is removed.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Fix !SMP issue located by kbuild test robot. ]
---
 include/linux/rcutiny.h  | 5 +++++
 include/linux/rcutree.h  | 1 +
 kernel/rcu/tree.c        | 7 +++++++
 kernel/rcu/tree.h        | 1 +
 kernel/rcu/tree_plugin.h | 5 +++++
 kernel/softirq.c         | 2 ++
 6 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index f617ab19bb51..bcfbc40a7239 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -90,6 +90,11 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 	call_rcu(head, func);
 }
 
+static inline void rcu_softirq_qs(void)
+{
+	rcu_sched_qs();
+}
+
 #define rcu_note_context_switch(preempt) \
 	do { \
 		rcu_sched_qs(); \
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 914655848ef6..664b580695d6 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,6 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
+void rcu_softirq_qs(void);
 void rcu_note_context_switch(bool preempt);
 int rcu_needs_cpu(u64 basem, u64 *nextevt);
 void rcu_cpu_stall_reset(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0b42249e2e40..cb35a417d947 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -255,6 +255,13 @@ void rcu_bh_qs(void)
 	}
 }
 
+void rcu_softirq_qs(void)
+{
+	rcu_sched_qs();
+	rcu_preempt_qs();
+	rcu_preempt_deferred_qs(current);
+}
+
 /*
  * Steal a bit from the bottom of ->dynticks for idle entry/exit
  * control.  Initially this is for TLB flushing.
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 025bd2e5592b..e02c882861eb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -433,6 +433,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
 
 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
+static void rcu_preempt_qs(void);
 static void rcu_preempt_note_context_switch(bool preempt);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 527a52792dce..c686bf63bba5 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -974,6 +974,11 @@ static void __init rcu_bootup_announce(void)
 	rcu_bootup_announce_oddness();
 }
 
+/* Because preemptible RCU does not exist, we can ignore its QSes. */
+static void rcu_preempt_qs(void)
+{
+}
+
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 6f584861d329..ebd69694144a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -302,6 +302,8 @@ restart:
 	}
 
 	rcu_bh_qs();
+	if (__this_cpu_read(ksoftirqd) == current)
+		rcu_softirq_qs();
 	local_irq_disable();
 
 	pending = local_softirq_pending();
-- 
cgit v1.2.3


From 65cfe3583b612a22e12fba9a7bbd2d37ca5ad941 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 1 Jul 2018 07:40:52 -0700
Subject: rcu: Define RCU-bh update API in terms of RCU

Now that the main RCU API knows about softirq disabling and softirq's
quiescent states, the RCU-bh update code can be dispensed with.
This commit therefore removes the RCU-bh update-side implementation and
defines RCU-bh's update-side API in terms of that of either RCU-preempt or
RCU-sched, depending on the setting of the CONFIG_PREEMPT Kconfig option.

In kernels built with CONFIG_RCU_NOCB_CPU=y this has the knock-on effect
of reducing by one the number of rcuo kthreads per CPU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  10 +++--
 include/linux/rcutiny.h  |  10 +++--
 include/linux/rcutree.h  |   8 +++-
 kernel/rcu/tiny.c        | 115 +++++++++--------------------------------------
 kernel/rcu/tree.c        |  97 ++++-----------------------------------
 kernel/rcu/tree_plugin.h |   1 -
 kernel/softirq.c         |   1 -
 7 files changed, 48 insertions(+), 194 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 75e5b393cf44..9ebfd436cec7 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -55,11 +55,15 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 #define	call_rcu	call_rcu_sched
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
-void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 void synchronize_sched(void);
 void rcu_barrier_tasks(void);
 
+static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+
 #ifdef CONFIG_PREEMPT_RCU
 
 void __rcu_read_lock(void);
@@ -104,7 +108,6 @@ static inline int rcu_preempt_depth(void)
 void rcu_init(void);
 extern int rcu_scheduler_active __read_mostly;
 void rcu_sched_qs(void);
-void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
@@ -326,8 +329,7 @@ static inline void rcu_preempt_sleep_check(void) { }
  * and rcu_assign_pointer().  Some of these could be folded into their
  * callers, but they are left separate in order to ease introduction of
  * multiple flavors of pointers to match the multiple flavors of RCU
- * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
- * the future.
+ * (e.g., __rcu_sched, and __srcu), should this make sense in the future.
  */
 
 #ifdef __CHECKER__
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index bcfbc40a7239..ac26c27ccde8 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -56,19 +56,23 @@ static inline void cond_synchronize_sched(unsigned long oldstate)
 	might_sleep();
 }
 
-extern void rcu_barrier_bh(void);
-extern void rcu_barrier_sched(void);
-
 static inline void synchronize_rcu_expedited(void)
 {
 	synchronize_sched();	/* Only one CPU, so pretty fast anyway!!! */
 }
 
+extern void rcu_barrier_sched(void);
+
 static inline void rcu_barrier(void)
 {
 	rcu_barrier_sched();  /* Only one CPU, so only one list of callbacks! */
 }
 
+static inline void rcu_barrier_bh(void)
+{
+	rcu_barrier();
+}
+
 static inline void synchronize_rcu_bh(void)
 {
 	synchronize_sched();
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 664b580695d6..c789c302a2c9 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,7 +45,11 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }
 
-void synchronize_rcu_bh(void);
+static inline void synchronize_rcu_bh(void)
+{
+	synchronize_rcu();
+}
+
 void synchronize_sched_expedited(void);
 void synchronize_rcu_expedited(void);
 
@@ -69,7 +73,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
  */
 static inline void synchronize_rcu_bh_expedited(void)
 {
-	synchronize_sched_expedited();
+	synchronize_rcu_expedited();
 }
 
 void rcu_barrier(void);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index befc9321a89c..cadcf63c4889 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -51,64 +51,22 @@ static struct rcu_ctrlblk rcu_sched_ctrlblk = {
 	.curtail	= &rcu_sched_ctrlblk.rcucblist,
 };
 
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.donetail	= &rcu_bh_ctrlblk.rcucblist,
-	.curtail	= &rcu_bh_ctrlblk.rcucblist,
-};
-
-void rcu_barrier_bh(void)
-{
-	wait_rcu_gp(call_rcu_bh);
-}
-EXPORT_SYMBOL(rcu_barrier_bh);
-
 void rcu_barrier_sched(void)
 {
 	wait_rcu_gp(call_rcu_sched);
 }
 EXPORT_SYMBOL(rcu_barrier_sched);
 
-/*
- * Helper function for rcu_sched_qs() and rcu_bh_qs().
- * Also irqs are disabled to avoid confusion due to interrupt handlers
- * invoking call_rcu().
- */
-static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
-{
-	if (rcp->donetail != rcp->curtail) {
-		rcp->donetail = rcp->curtail;
-		return 1;
-	}
-
-	return 0;
-}
-
-/*
- * Record an rcu quiescent state.  And an rcu_bh quiescent state while we
- * are at it, given that any rcu quiescent state is also an rcu_bh
- * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
- */
+/* Record an rcu quiescent state.  */
 void rcu_sched_qs(void)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
-	    rcu_qsctr_help(&rcu_bh_ctrlblk))
-		raise_softirq(RCU_SOFTIRQ);
-	local_irq_restore(flags);
-}
-
-/*
- * Record an rcu_bh quiescent state.
- */
-void rcu_bh_qs(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	if (rcu_qsctr_help(&rcu_bh_ctrlblk))
+	if (rcu_sched_ctrlblk.donetail != rcu_sched_ctrlblk.curtail) {
+		rcu_sched_ctrlblk.donetail = rcu_sched_ctrlblk.curtail;
 		raise_softirq(RCU_SOFTIRQ);
+	}
 	local_irq_restore(flags);
 }
 
@@ -122,32 +80,27 @@ void rcu_check_callbacks(int user)
 {
 	if (user)
 		rcu_sched_qs();
-	if (user || !in_softirq())
-		rcu_bh_qs();
 }
 
-/*
- * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
- * whose grace period has elapsed.
- */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+/* Invoke the RCU callbacks whose grace period has elapsed.  */
+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
 {
 	struct rcu_head *next, *list;
 	unsigned long flags;
 
 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
-	if (rcp->donetail == &rcp->rcucblist) {
+	if (rcu_sched_ctrlblk.donetail == &rcu_sched_ctrlblk.rcucblist) {
 		/* No callbacks ready, so just leave. */
 		local_irq_restore(flags);
 		return;
 	}
-	list = rcp->rcucblist;
-	rcp->rcucblist = *rcp->donetail;
-	*rcp->donetail = NULL;
-	if (rcp->curtail == rcp->donetail)
-		rcp->curtail = &rcp->rcucblist;
-	rcp->donetail = &rcp->rcucblist;
+	list = rcu_sched_ctrlblk.rcucblist;
+	rcu_sched_ctrlblk.rcucblist = *rcu_sched_ctrlblk.donetail;
+	*rcu_sched_ctrlblk.donetail = NULL;
+	if (rcu_sched_ctrlblk.curtail == rcu_sched_ctrlblk.donetail)
+		rcu_sched_ctrlblk.curtail = &rcu_sched_ctrlblk.rcucblist;
+	rcu_sched_ctrlblk.donetail = &rcu_sched_ctrlblk.rcucblist;
 	local_irq_restore(flags);
 
 	/* Invoke the callbacks on the local list. */
@@ -162,19 +115,13 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 	}
 }
 
-static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
-{
-	__rcu_process_callbacks(&rcu_sched_ctrlblk);
-	__rcu_process_callbacks(&rcu_bh_ctrlblk);
-}
-
 /*
  * Wait for a grace period to elapse.  But it is illegal to invoke
  * synchronize_sched() from within an RCU read-side critical section.
  * Therefore, any legal call to synchronize_sched() is a quiescent
  * state, and so on a UP system, synchronize_sched() need do nothing.
- * Ditto for synchronize_rcu_bh().  (But Lai Jiangshan points out the
- * benefits of doing might_sleep() to reduce latency.)
+ * (But Lai Jiangshan points out the benefits of doing might_sleep()
+ * to reduce latency.)
  *
  * Cool, huh?  (Due to Josh Triplett.)
  */
@@ -188,11 +135,11 @@ void synchronize_sched(void)
 EXPORT_SYMBOL_GPL(synchronize_sched);
 
 /*
- * Helper function for call_rcu() and call_rcu_bh().
+ * Post an RCU callback to be invoked after the end of an RCU-sched grace
+ * period.  But since we have but one CPU, that would be after any
+ * quiescent state.
  */
-static void __call_rcu(struct rcu_head *head,
-		       rcu_callback_t func,
-		       struct rcu_ctrlblk *rcp)
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
 	unsigned long flags;
 
@@ -201,8 +148,8 @@ static void __call_rcu(struct rcu_head *head,
 	head->next = NULL;
 
 	local_irq_save(flags);
-	*rcp->curtail = head;
-	rcp->curtail = &head->next;
+	*rcu_sched_ctrlblk.curtail = head;
+	rcu_sched_ctrlblk.curtail = &head->next;
 	local_irq_restore(flags);
 
 	if (unlikely(is_idle_task(current))) {
@@ -210,28 +157,8 @@ static void __call_rcu(struct rcu_head *head,
 		resched_cpu(0);
 	}
 }
-
-/*
- * Post an RCU callback to be invoked after the end of an RCU-sched grace
- * period.  But since we have but one CPU, that would be after any
- * quiescent state.
- */
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, &rcu_sched_ctrlblk);
-}
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
-/*
- * Post an RCU bottom-half callback to be invoked after any subsequent
- * quiescent state.
- */
-void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, &rcu_bh_ctrlblk);
-}
-EXPORT_SYMBOL_GPL(call_rcu_bh);
-
 void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index cb35a417d947..aedf81a0abd8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -108,7 +108,6 @@ struct rcu_state sname##_state = { \
 }
 
 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
-RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
 
 static struct rcu_state *const rcu_state_p;
 LIST_HEAD(rcu_struct_flavors);
@@ -244,17 +243,6 @@ void rcu_sched_qs(void)
 			   this_cpu_ptr(&rcu_sched_data), true);
 }
 
-void rcu_bh_qs(void)
-{
-	RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");
-	if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
-		trace_rcu_grace_period(TPS("rcu_bh"),
-				       __this_cpu_read(rcu_bh_data.gp_seq),
-				       TPS("cpuqs"));
-		__this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
-	}
-}
-
 void rcu_softirq_qs(void)
 {
 	rcu_sched_qs();
@@ -581,7 +569,7 @@ EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
  */
 unsigned long rcu_bh_get_gp_seq(void)
 {
-	return READ_ONCE(rcu_bh_state.gp_seq);
+	return READ_ONCE(rcu_state_p->gp_seq);
 }
 EXPORT_SYMBOL_GPL(rcu_bh_get_gp_seq);
 
@@ -621,7 +609,7 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  */
 void rcu_bh_force_quiescent_state(void)
 {
-	force_quiescent_state(&rcu_bh_state);
+	force_quiescent_state(rcu_state_p);
 }
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 
@@ -680,10 +668,8 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 
 	switch (test_type) {
 	case RCU_FLAVOR:
-		rsp = rcu_state_p;
-		break;
 	case RCU_BH_FLAVOR:
-		rsp = &rcu_bh_state;
+		rsp = rcu_state_p;
 		break;
 	case RCU_SCHED_FLAVOR:
 		rsp = &rcu_sched_state;
@@ -2673,26 +2659,15 @@ void rcu_check_callbacks(int user)
 		 * nested interrupt.  In this case, the CPU is in
 		 * a quiescent state, so note it.
 		 *
-		 * No memory barrier is required here because both
-		 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
-		 * variables that other CPUs neither access nor modify,
-		 * at least not while the corresponding CPU is online.
+		 * No memory barrier is required here because
+		 * rcu_sched_qs() references only CPU-local variables
+		 * that other CPUs neither access nor modify, at least
+		 * not while the corresponding CPU is online.
 		 */
 
 		rcu_sched_qs();
-		rcu_bh_qs();
 		rcu_note_voluntary_context_switch(current);
 
-	} else if (!in_softirq()) {
-
-		/*
-		 * Get here if this CPU did not take its interrupt from
-		 * softirq, in other words, if it is not interrupting
-		 * a rcu_bh read-side critical section.  This is an _bh
-		 * critical section, so note it.
-		 */
-
-		rcu_bh_qs();
 	}
 	rcu_preempt_check_callbacks();
 	if (rcu_pending())
@@ -3079,34 +3054,6 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
-/**
- * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_bh() assumes
- * that the read-side critical sections end on completion of a softirq
- * handler. This means that read-side critical sections in process
- * context must not be interrupted by softirqs. This interface is to be
- * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by:
- *
- * - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context, OR
- * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
- *
- * These may be nested.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, &rcu_bh_state, -1, 0);
-}
-EXPORT_SYMBOL_GPL(call_rcu_bh);
-
 /*
  * Queue an RCU callback for lazy invocation after a grace period.
  * This will likely be later named something like "call_rcu_lazy()",
@@ -3191,33 +3138,6 @@ void synchronize_sched(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_sched);
 
-/**
- * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu_bh grace
- * period has elapsed, in other words after all currently executing rcu_bh
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
- * and may be nested.
- *
- * See the description of synchronize_sched() for more detailed information
- * on memory ordering guarantees.
- */
-void synchronize_rcu_bh(void)
-{
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
-	if (rcu_blocking_is_gp())
-		return;
-	if (rcu_gp_is_expedited())
-		synchronize_rcu_bh_expedited();
-	else
-		wait_rcu_gp(call_rcu_bh);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-
 /**
  * get_state_synchronize_rcu - Snapshot current RCU state
  *
@@ -3529,7 +3449,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
  */
 void rcu_barrier_bh(void)
 {
-	_rcu_barrier(&rcu_bh_state);
+	_rcu_barrier(rcu_state_p);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
 
@@ -4180,7 +4100,6 @@ void __init rcu_init(void)
 
 	rcu_bootup_announce();
 	rcu_init_geometry();
-	rcu_init_one(&rcu_bh_state);
 	rcu_init_one(&rcu_sched_state);
 	if (dump_tree)
 		rcu_dump_rcu_node_tree(&rcu_sched_state);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0d7107fb3dec..1ff742a3c8d1 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1320,7 +1320,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 static void rcu_kthread_do_work(void)
 {
 	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
-	rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
 	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ebd69694144a..7a0720a20003 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -301,7 +301,6 @@ restart:
 		pending >>= softirq_bit;
 	}
 
-	rcu_bh_qs();
 	if (__this_cpu_read(ksoftirqd) == current)
 		rcu_softirq_qs();
 	local_irq_disable();
-- 
cgit v1.2.3


From 82fcecfa81855924cc69f3078113cf63dd6c2964 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 2 Jul 2018 09:04:27 -0700
Subject: rcu: Update comments and help text for no more RCU-bh updaters

This commit updates comments and help text to account for the fact that
RCU-bh update-side functions are now simple wrappers for their RCU or
RCU-sched counterparts.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h      | 12 ++++--------
 include/linux/rcupdate_wait.h |  6 +++---
 include/linux/rcutree.h       | 14 ++------------
 kernel/rcu/Kconfig            | 10 +++++-----
 kernel/rcu/tree.c             | 17 +++++++++--------
 kernel/rcu/update.c           |  2 +-
 6 files changed, 24 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9ebfd436cec7..8d5740edd63c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -688,14 +688,10 @@ static inline void rcu_read_unlock(void)
 /**
  * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
  *
- * This is equivalent of rcu_read_lock(), but to be used when updates
- * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
- * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
- * softirq handler to be a quiescent state, a process in RCU read-side
- * critical section must be protected by disabling softirqs. Read-side
- * critical sections in interrupt context can use just rcu_read_lock(),
- * though this should at least be commented to avoid confusing people
- * reading the code.
+ * This is equivalent of rcu_read_lock(), but also disables softirqs.
+ * Note that synchronize_rcu() and friends may be used for the update
+ * side, although synchronize_rcu_bh() is available as a wrapper in the
+ * short term.  Longer term, the _bh update-side API will be eliminated.
  *
  * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
  * must occur in the same context, for example, it is illegal to invoke
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index 57f371344152..bc104699560e 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -36,13 +36,13 @@ do {									\
  * @...: List of call_rcu() functions for the flavors to wait on.
  *
  * This macro waits concurrently for multiple flavors of RCU grace periods.
- * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait
- * on concurrent RCU and RCU-bh grace periods.  Waiting on a give SRCU
+ * For example, synchronize_rcu_mult(call_rcu, call_rcu_sched) would wait
+ * on concurrent RCU and RCU-sched grace periods.  Waiting on a give SRCU
  * domain requires you to write a wrapper function for that SRCU domain's
  * call_srcu() function, supplying the corresponding srcu_struct.
  *
  * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
- * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called
+ * or RCU-sched, given that anywhere synchronize_rcu_mult() can be called
  * is automatically a grace period.
  */
 #define synchronize_rcu_mult(...) \
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c789c302a2c9..f7a41323aa54 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -58,18 +58,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 /**
  * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
  *
- * Wait for an RCU-bh grace period to elapse, but use a "big hammer"
- * approach to force the grace period to end quickly.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.  In fact,
- * if you are using synchronize_rcu_bh_expedited() in a loop, please
- * restructure your code to batch your updates, and then use a single
- * synchronize_rcu_bh() instead.
- *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
+ * This is a transitional API and will soon be removed, with all
+ * callers converted to synchronize_rcu_expedited().
  */
 static inline void synchronize_rcu_bh_expedited(void)
 {
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 9210379c0353..a0b7f0103ca9 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -229,11 +229,11 @@ config RCU_NOCB_CPU
 	  CPUs specified at boot time by the rcu_nocbs parameter.
 	  For each such CPU, a kthread ("rcuox/N") will be created to
 	  invoke callbacks, where the "N" is the CPU being offloaded,
-	  and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
-	  "s" for RCU-sched.  Nothing prevents this kthread from running
-	  on the specified CPUs, but (1) the kthreads may be preempted
-	  between each callback, and (2) affinity or cgroups can be used
-	  to force the kthreads to run on whatever set of CPUs is desired.
+	  and where the "p" for RCU-preempt and "s" for RCU-sched.
+	  Nothing prevents this kthread from running on the specified
+	  CPUs, but (1) the kthreads may be preempted between each
+	  callback, and (2) affinity or cgroups can be used to force
+	  the kthreads to run on whatever set of CPUs is desired.
 
 	  Say Y here if you want to help to debug reduced OS jitter.
 	  Say N here if you are unsure.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index aedf81a0abd8..158c58d47b07 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -565,7 +565,8 @@ unsigned long rcu_sched_get_gp_seq(void)
 EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
 
 /*
- * Return the number of RCU-bh GPs completed thus far for debug & stats.
+ * Return the number of RCU GPs completed thus far for debug & stats.
+ * This is a transitional API and will soon be removed.
  */
 unsigned long rcu_bh_get_gp_seq(void)
 {
@@ -3069,13 +3070,13 @@ void kfree_call_rcu(struct rcu_head *head,
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
 /*
- * Because a context switch is a grace period for RCU-sched and RCU-bh,
- * any blocking grace-period wait automatically implies a grace period
- * if there is only one CPU online at any point time during execution
- * of either synchronize_sched() or synchronize_rcu_bh().  It is OK to
- * occasionally incorrectly indicate that there are multiple CPUs online
- * when there was in fact only one the whole time, as this just adds
- * some overhead: RCU still operates correctly.
+ * Because a context switch is a grace period for RCU-sched, any blocking
+ * grace-period wait automatically implies a grace period if there
+ * is only one CPU online at any point time during execution of either
+ * synchronize_sched() or synchronize_rcu_bh().  It is OK to occasionally
+ * incorrectly indicate that there are multiple CPUs online when there
+ * was in fact only one the whole time, as this just adds some overhead:
+ * RCU still operates correctly.
  */
 static int rcu_blocking_is_gp(void)
 {
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39cb23d22109..9ea87d0aa386 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -298,7 +298,7 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  *
  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
  *
- * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * Note that rcu_read_lock_bh() is disallowed if the CPU is either idle or
  * offline from an RCU perspective, so check for those as well.
  */
 int rcu_read_lock_bh_held(void)
-- 
cgit v1.2.3


From 45975c7d21a1c0aba97e3d8007e2a7c123145748 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 2 Jul 2018 14:30:37 -0700
Subject: rcu: Define RCU-sched API in terms of RCU for Tree RCU PREEMPT builds

Now that RCU-preempt knows about preemption disabling, its implementation
of synchronize_rcu() works for synchronize_sched(), and likewise for the
other RCU-sched update-side API members.  This commit therefore confines
the RCU-sched update-side code to CONFIG_PREEMPT=n builds, and defines
RCU-sched's update-side API members in terms of those of RCU-preempt.

This means that any given build of the Linux kernel has only one
update-side flavor of RCU, namely RCU-preempt for CONFIG_PREEMPT=y builds
and RCU-sched for CONFIG_PREEMPT=n builds.  This in turn means that kernels
built with CONFIG_RCU_NOCB_CPU=y have only one rcuo kthread per CPU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
---
 include/linux/rcupdate.h |  14 +--
 include/linux/rcutiny.h  |   7 ++
 include/linux/rcutree.h  |   7 +-
 kernel/rcu/tree.c        | 301 ++++++++++++++++-------------------------------
 kernel/rcu/tree.h        |   9 +-
 kernel/rcu/tree_exp.h    | 153 ++++++++++++------------
 kernel/rcu/tree_plugin.h | 297 +++++++++++++++++-----------------------------
 7 files changed, 308 insertions(+), 480 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8d5740edd63c..94474bb6b5c4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -49,11 +49,11 @@
 
 /* Exported common interfaces */
 
-#ifdef CONFIG_PREEMPT_RCU
-void call_rcu(struct rcu_head *head, rcu_callback_t func);
-#else /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TINY_RCU
 #define	call_rcu	call_rcu_sched
-#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+#else
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
+#endif
 
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 void synchronize_sched(void);
@@ -92,11 +92,6 @@ static inline void __rcu_read_unlock(void)
 		preempt_enable();
 }
 
-static inline void synchronize_rcu(void)
-{
-	synchronize_sched();
-}
-
 static inline int rcu_preempt_depth(void)
 {
 	return 0;
@@ -107,7 +102,6 @@ static inline int rcu_preempt_depth(void)
 /* Internal to kernel */
 void rcu_init(void);
 extern int rcu_scheduler_active __read_mostly;
-void rcu_sched_qs(void);
 void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ac26c27ccde8..df2c0895c5e7 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -36,6 +36,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
+static inline void synchronize_rcu(void)
+{
+	synchronize_sched();
+}
+
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
@@ -94,6 +99,8 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 	call_rcu(head, func);
 }
 
+void rcu_sched_qs(void);
+
 static inline void rcu_softirq_qs(void)
 {
 	rcu_sched_qs();
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index f7a41323aa54..0c44720f0e84 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,14 +45,19 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }
 
+void synchronize_rcu(void);
 static inline void synchronize_rcu_bh(void)
 {
 	synchronize_rcu();
 }
 
-void synchronize_sched_expedited(void);
 void synchronize_rcu_expedited(void);
 
+static inline void synchronize_sched_expedited(void)
+{
+	synchronize_rcu_expedited();
+}
+
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
 /**
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 61c15de884b0..5f79315f094e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -92,24 +92,29 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var
 
 #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
 DEFINE_RCU_TPS(sname) \
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \
-struct rcu_state sname##_state = { \
-	.level = { &sname##_state.node[0] }, \
-	.rda = &sname##_data, \
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data); \
+struct rcu_state rcu_state = { \
+	.level = { &rcu_state.node[0] }, \
+	.rda = &rcu_data, \
 	.call = cr, \
 	.gp_state = RCU_GP_IDLE, \
 	.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \
-	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
+	.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex), \
 	.name = RCU_STATE_NAME(sname), \
 	.abbr = sabbr, \
-	.exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
-	.exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
-	.ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \
+	.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), \
+	.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), \
+	.ofl_lock = __SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), \
 }
 
-RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
+#ifdef CONFIG_PREEMPT_RCU
+RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
+#else
+RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu);
+#endif
 
-static struct rcu_state *const rcu_state_p;
+static struct rcu_state *const rcu_state_p = &rcu_state;
+static struct rcu_data __percpu *const rcu_data_p = &rcu_data;
 LIST_HEAD(rcu_struct_flavors);
 
 /* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -220,31 +225,9 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
 	return rcu_seq_state(rcu_seq_current(&rsp->gp_seq));
 }
 
-/*
- * Note a quiescent state.  Because we do not need to know
- * how many quiescent states passed, just if there was at least
- * one since the start of the grace period, this just sets a flag.
- * The caller must have disabled preemption.
- */
-void rcu_sched_qs(void)
-{
-	RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");
-	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
-		return;
-	trace_rcu_grace_period(TPS("rcu_sched"),
-			       __this_cpu_read(rcu_sched_data.gp_seq),
-			       TPS("cpuqs"));
-	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
-	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
-		return;
-	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
-	rcu_report_exp_rdp(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
-}
-
 void rcu_softirq_qs(void)
 {
-	rcu_sched_qs();
-	rcu_preempt_qs();
+	rcu_qs();
 	rcu_preempt_deferred_qs(current);
 }
 
@@ -418,31 +401,18 @@ static void rcu_momentary_dyntick_idle(void)
 	rcu_preempt_deferred_qs(current);
 }
 
-/*
- * Note a context switch.  This is a quiescent state for RCU-sched,
- * and requires special handling for preemptible RCU.
- * The caller must have disabled interrupts.
+/**
+ * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
+ *
+ * If the current CPU is idle or running at a first-level (not nested)
+ * interrupt from idle, return true.  The caller must have at least
+ * disabled preemption.
  */
-void rcu_note_context_switch(bool preempt)
+static int rcu_is_cpu_rrupt_from_idle(void)
 {
-	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	trace_rcu_utilization(TPS("Start context switch"));
-	rcu_sched_qs();
-	rcu_preempt_note_context_switch(preempt);
-	/* Load rcu_urgent_qs before other flags. */
-	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
-		goto out;
-	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
-		rcu_momentary_dyntick_idle();
-	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
-	if (!preempt)
-		rcu_tasks_qs(current);
-out:
-	trace_rcu_utilization(TPS("End context switch"));
-	barrier(); /* Avoid RCU read-side critical sections leaking up. */
+	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
+	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
 }
-EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 /*
  * Register a quiescent state for all RCU flavors.  If there is an
@@ -476,8 +446,8 @@ void rcu_all_qs(void)
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
-	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)))
-		rcu_sched_qs();
+	if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
+		rcu_qs();
 	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 	preempt_enable();
@@ -558,7 +528,7 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
  */
 unsigned long rcu_sched_get_gp_seq(void)
 {
-	return READ_ONCE(rcu_sched_state.gp_seq);
+	return rcu_get_gp_seq();
 }
 EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
 
@@ -590,7 +560,7 @@ EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
  */
 unsigned long rcu_exp_batches_completed_sched(void)
 {
-	return rcu_sched_state.expedited_sequence;
+	return rcu_state.expedited_sequence;
 }
 EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
 
@@ -617,7 +587,7 @@ EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  */
 void rcu_sched_force_quiescent_state(void)
 {
-	force_quiescent_state(&rcu_sched_state);
+	rcu_force_quiescent_state();
 }
 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
 
@@ -668,10 +638,8 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 	switch (test_type) {
 	case RCU_FLAVOR:
 	case RCU_BH_FLAVOR:
-		rsp = rcu_state_p;
-		break;
 	case RCU_SCHED_FLAVOR:
-		rsp = &rcu_sched_state;
+		rsp = rcu_state_p;
 		break;
 	default:
 		break;
@@ -1107,19 +1075,6 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
 
 #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
 
-/**
- * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
- *
- * If the current CPU is idle or running at a first-level (not nested)
- * interrupt from idle, return true.  The caller must have at least
- * disabled preemption.
- */
-static int rcu_is_cpu_rrupt_from_idle(void)
-{
-	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
-	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
-}
-
 /*
  * We are reporting a quiescent state on behalf of some other CPU, so
  * it is our responsibility to check for and handle potential overflow
@@ -2364,7 +2319,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
 	struct rcu_node *rnp_p;
 
 	raw_lockdep_assert_held_rcu_node(rnp);
-	if (WARN_ON_ONCE(rcu_state_p == &rcu_sched_state) ||
+	if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)) ||
 	    WARN_ON_ONCE(rsp != rcu_state_p) ||
 	    WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
 	    rnp->qsmask != 0) {
@@ -2650,25 +2605,7 @@ void rcu_check_callbacks(int user)
 {
 	trace_rcu_utilization(TPS("Start scheduler-tick"));
 	increment_cpu_stall_ticks();
-	if (user || rcu_is_cpu_rrupt_from_idle()) {
-
-		/*
-		 * Get here if this CPU took its interrupt from user
-		 * mode or from the idle loop, and if this is not a
-		 * nested interrupt.  In this case, the CPU is in
-		 * a quiescent state, so note it.
-		 *
-		 * No memory barrier is required here because
-		 * rcu_sched_qs() references only CPU-local variables
-		 * that other CPUs neither access nor modify, at least
-		 * not while the corresponding CPU is online.
-		 */
-
-		rcu_sched_qs();
-		rcu_note_voluntary_context_switch(current);
-
-	}
-	rcu_preempt_check_callbacks();
+	rcu_flavor_check_callbacks(user);
 	if (rcu_pending())
 		invoke_rcu_core();
 
@@ -2694,7 +2631,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
 		mask = 0;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->qsmask == 0) {
-			if (rcu_state_p == &rcu_sched_state ||
+			if (!IS_ENABLED(CONFIG_PREEMPT) ||
 			    rsp != rcu_state_p ||
 			    rcu_preempt_blocked_readers_cgp(rnp)) {
 				/*
@@ -3028,28 +2965,56 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 }
 
 /**
- * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
  * @func: actual callback function to be invoked after the grace period
  *
  * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_sched() assumes
- * that the read-side critical sections end on enabling of preemption
- * or on voluntary preemption.
- * RCU read-side critical sections are delimited by:
- *
- * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
- * - anything that disables preemption.
- *
- *  These may be nested.
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(), and
+ * may be nested.  In addition, regions of code across which interrupts,
+ * preemption, or softirqs have been disabled also serve as RCU read-side
+ * critical sections.  This includes hardware interrupt handlers, softirq
+ * handlers, and NMI handlers.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
+ */
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
+{
+	__call_rcu(head, func, rcu_state_p, -1, 0);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/**
+ * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
  *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
+ * This is transitional.
  */
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
-	__call_rcu(head, func, &rcu_sched_state, -1, 0);
+	call_rcu(head, func);
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
@@ -3067,73 +3032,14 @@ void kfree_call_rcu(struct rcu_head *head,
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
-/*
- * Because a context switch is a grace period for RCU-sched, any blocking
- * grace-period wait automatically implies a grace period if there
- * is only one CPU online at any point time during execution of either
- * synchronize_sched() or synchronize_rcu_bh().  It is OK to occasionally
- * incorrectly indicate that there are multiple CPUs online when there
- * was in fact only one the whole time, as this just adds some overhead:
- * RCU still operates correctly.
- */
-static int rcu_blocking_is_gp(void)
-{
-	int ret;
-
-	might_sleep();  /* Check for RCU read-side critical section. */
-	preempt_disable();
-	ret = num_online_cpus() <= 1;
-	preempt_enable();
-	return ret;
-}
-
 /**
  * synchronize_sched - wait until an rcu-sched grace period has elapsed.
  *
- * Control will return to the caller some time after a full rcu-sched
- * grace period has elapsed, in other words after all currently executing
- * rcu-sched read-side critical sections have completed.   These read-side
- * critical sections are delimited by rcu_read_lock_sched() and
- * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
- * local_irq_disable(), and so on may be used in place of
- * rcu_read_lock_sched().
- *
- * This means that all preempt_disable code sequences, including NMI and
- * non-threaded hardware-interrupt handlers, in progress on entry will
- * have completed before this primitive returns.  However, this does not
- * guarantee that softirq handlers will have completed, since in some
- * kernels, these handlers can run in process context, and can block.
- *
- * Note that this guarantee implies further memory-ordering guarantees.
- * On systems with more than one CPU, when synchronize_sched() returns,
- * each CPU is guaranteed to have executed a full memory barrier since the
- * end of its last RCU-sched read-side critical section whose beginning
- * preceded the call to synchronize_sched().  In addition, each CPU having
- * an RCU read-side critical section that extends beyond the return from
- * synchronize_sched() is guaranteed to have executed a full memory barrier
- * after the beginning of synchronize_sched() and before the beginning of
- * that RCU read-side critical section.  Note that these guarantees include
- * CPUs that are offline, idle, or executing in user mode, as well as CPUs
- * that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked synchronize_sched(), which returned
- * to its caller on CPU B, then both CPU A and CPU B are guaranteed
- * to have executed a full memory barrier during the execution of
- * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
- * again only if the system has more than one CPU).
+ * This is transitional.
  */
 void synchronize_sched(void)
 {
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_sched() in RCU-sched read-side critical section");
-	if (rcu_blocking_is_gp())
-		return;
-	if (rcu_gp_is_expedited())
-		synchronize_sched_expedited();
-	else
-		wait_rcu_gp(call_rcu_sched);
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(synchronize_sched);
 
@@ -3181,41 +3087,23 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
 /**
  * get_state_synchronize_sched - Snapshot current RCU-sched state
  *
- * Returns a cookie that is used by a later call to cond_synchronize_sched()
- * to determine whether or not a full grace period has elapsed in the
- * meantime.
+ * This is transitional, and only used by rcutorture.
  */
 unsigned long get_state_synchronize_sched(void)
 {
-	/*
-	 * Any prior manipulation of RCU-protected data must happen
-	 * before the load from ->gp_seq.
-	 */
-	smp_mb();  /* ^^^ */
-	return rcu_seq_snap(&rcu_sched_state.gp_seq);
+	return get_state_synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
 
 /**
  * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
- *
  * @oldstate: return value from earlier call to get_state_synchronize_sched()
  *
- * If a full RCU-sched grace period has elapsed since the earlier call to
- * get_state_synchronize_sched(), just return.  Otherwise, invoke
- * synchronize_sched() to wait for a full grace period.
- *
- * Yes, this function does not take counter wrap into account.  But
- * counter wrap is harmless.  If the counter wraps, we have waited for
- * more than 2 billion grace periods (and way more on a 64-bit system!),
- * so waiting for one additional grace period should be just fine.
+ * This is transitional and only used by rcutorture.
  */
 void cond_synchronize_sched(unsigned long oldstate)
 {
-	if (!rcu_seq_done(&rcu_sched_state.gp_seq, oldstate))
-		synchronize_sched();
-	else
-		smp_mb(); /* Ensure GP ends before subsequent accesses. */
+	cond_synchronize_rcu(oldstate);
 }
 EXPORT_SYMBOL_GPL(cond_synchronize_sched);
 
@@ -3452,12 +3340,28 @@ void rcu_barrier_bh(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
 
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ *
+ * Note that this primitive does not necessarily wait for an RCU grace period
+ * to complete.  For example, if there are no RCU callbacks queued anywhere
+ * in the system, then rcu_barrier() is within its rights to return
+ * immediately, without waiting for anything, much less an RCU grace period.
+ */
+void rcu_barrier(void)
+{
+	_rcu_barrier(rcu_state_p);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
 /**
  * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
+ *
+ * This is transitional.
  */
 void rcu_barrier_sched(void)
 {
-	_rcu_barrier(&rcu_sched_state);
+	rcu_barrier();
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
@@ -3756,7 +3660,7 @@ void rcu_report_dead(unsigned int cpu)
 
 	/* QS for any half-done expedited RCU-sched GP. */
 	preempt_disable();
-	rcu_report_exp_rdp(&rcu_sched_state, this_cpu_ptr(rcu_sched_state.rda));
+	rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(rcu_state.rda));
 	preempt_enable();
 	rcu_preempt_deferred_qs(current);
 	for_each_rcu_flavor(rsp)
@@ -4098,10 +4002,9 @@ void __init rcu_init(void)
 
 	rcu_bootup_announce();
 	rcu_init_geometry();
-	rcu_init_one(&rcu_sched_state);
+	rcu_init_one(&rcu_state);
 	if (dump_tree)
-		rcu_dump_rcu_node_tree(&rcu_sched_state);
-	__rcu_init_preempt();
+		rcu_dump_rcu_node_tree(&rcu_state);
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 
 	/*
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e02c882861eb..38658ca87dcb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -225,9 +225,6 @@ struct rcu_data {
 
 	/* 5) _rcu_barrier(), OOM callbacks, and expediting. */
 	struct rcu_head barrier_head;
-#ifdef CONFIG_RCU_FAST_NO_HZ
-	struct rcu_head oom_head;
-#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 	int exp_dynticks_snap;		/* Double-check need for IPI. */
 
 	/* 6) Callback offloading. */
@@ -433,8 +430,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
 
 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
-static void rcu_preempt_qs(void);
-static void rcu_preempt_note_context_switch(bool preempt);
+static void rcu_qs(void);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
@@ -444,9 +440,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp);
 static int rcu_print_task_exp_stall(struct rcu_node *rnp);
 static void rcu_preempt_check_blocked_tasks(struct rcu_state *rsp,
 					    struct rcu_node *rnp);
-static void rcu_preempt_check_callbacks(void);
+static void rcu_flavor_check_callbacks(int user);
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
-static void __init __rcu_init_preempt(void);
 static void dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp,
 			    int ncheck);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 0f8f225c1b46..5619edfd414e 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -265,7 +265,7 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp)
 	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, true);
 }
 
-/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
+/* Common code for work-done checking. */
 static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
 {
 	if (rcu_exp_gp_seq_done(rsp, s)) {
@@ -337,45 +337,6 @@ fastpath:
 	return false;
 }
 
-/* Invoked on each online non-idle CPU for expedited quiescent state. */
-static void sync_sched_exp_handler(void *data)
-{
-	struct rcu_data *rdp;
-	struct rcu_node *rnp;
-	struct rcu_state *rsp = data;
-
-	rdp = this_cpu_ptr(rsp->rda);
-	rnp = rdp->mynode;
-	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
-	    __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
-		return;
-	if (rcu_is_cpu_rrupt_from_idle()) {
-		rcu_report_exp_rdp(&rcu_sched_state,
-				   this_cpu_ptr(&rcu_sched_data));
-		return;
-	}
-	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
-	/* Store .exp before .rcu_urgent_qs. */
-	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
-	resched_cpu(smp_processor_id());
-}
-
-/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
-static void sync_sched_exp_online_cleanup(int cpu)
-{
-	struct rcu_data *rdp;
-	int ret;
-	struct rcu_node *rnp;
-	struct rcu_state *rsp = &rcu_sched_state;
-
-	rdp = per_cpu_ptr(rsp->rda, cpu);
-	rnp = rdp->mynode;
-	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
-		return;
-	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
-	WARN_ON_ONCE(ret);
-}
-
 /*
  * Select the CPUs within the specified rcu_node that the upcoming
  * expedited grace period needs to wait for.
@@ -691,39 +652,6 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
 	mutex_unlock(&rsp->exp_mutex);
 }
 
-/**
- * synchronize_sched_expedited - Brute-force RCU-sched grace period
- *
- * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
- * approach to force the grace period to end quickly.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.  In fact,
- * if you are using synchronize_sched_expedited() in a loop, please
- * restructure your code to batch your updates, and then use a single
- * synchronize_sched() instead.
- *
- * This implementation can be thought of as an application of sequence
- * locking to expedited grace periods, but using the sequence counter to
- * determine when someone else has already done the work instead of for
- * retrying readers.
- */
-void synchronize_sched_expedited(void)
-{
-	struct rcu_state *rsp = &rcu_sched_state;
-
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_sched_expedited() in RCU read-side critical section");
-
-	/* If only one CPU, this is automatically a grace period. */
-	if (rcu_blocking_is_gp())
-		return;
-
-	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
 #ifdef CONFIG_PREEMPT_RCU
 
 /*
@@ -801,6 +729,11 @@ static void sync_rcu_exp_handler(void *info)
 		resched_cpu(rdp->cpu);
 }
 
+/* PREEMPT=y, so no RCU-sched to clean up after. */
+static void sync_sched_exp_online_cleanup(int cpu)
+{
+}
+
 /**
  * synchronize_rcu_expedited - Brute-force RCU grace period
  *
@@ -818,6 +751,8 @@ static void sync_rcu_exp_handler(void *info)
  * you are using synchronize_rcu_expedited() in a loop, please restructure
  * your code to batch your updates, and then Use a single synchronize_rcu()
  * instead.
+ *
+ * This has the same semantics as (but is more brutal than) synchronize_rcu().
  */
 void synchronize_rcu_expedited(void)
 {
@@ -836,13 +771,79 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
+/* Invoked on each online non-idle CPU for expedited quiescent state. */
+static void sync_sched_exp_handler(void *data)
+{
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp = data;
+
+	rdp = this_cpu_ptr(rsp->rda);
+	rnp = rdp->mynode;
+	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
+	    __this_cpu_read(rcu_data.cpu_no_qs.b.exp))
+		return;
+	if (rcu_is_cpu_rrupt_from_idle()) {
+		rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
+		return;
+	}
+	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
+	/* Store .exp before .rcu_urgent_qs. */
+	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
+	resched_cpu(smp_processor_id());
+}
+
+/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
+static void sync_sched_exp_online_cleanup(int cpu)
+{
+	struct rcu_data *rdp;
+	int ret;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp = &rcu_state;
+
+	rdp = per_cpu_ptr(rsp->rda, cpu);
+	rnp = rdp->mynode;
+	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
+		return;
+	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
+	WARN_ON_ONCE(ret);
+}
+
 /*
- * Wait for an rcu-preempt grace period, but make it happen quickly.
- * But because preemptible RCU does not exist, map to rcu-sched.
+ * Because a context switch is a grace period for RCU-sched, any blocking
+ * grace-period wait automatically implies a grace period if there
+ * is only one CPU online at any point time during execution of either
+ * synchronize_sched() or synchronize_rcu_bh().  It is OK to occasionally
+ * incorrectly indicate that there are multiple CPUs online when there
+ * was in fact only one the whole time, as this just adds some overhead:
+ * RCU still operates correctly.
  */
+static int rcu_blocking_is_gp(void)
+{
+	int ret;
+
+	might_sleep();  /* Check for RCU read-side critical section. */
+	preempt_disable();
+	ret = num_online_cpus() <= 1;
+	preempt_enable();
+	return ret;
+}
+
+/* PREEMPT=n implementation of synchronize_rcu_expedited(). */
 void synchronize_rcu_expedited(void)
 {
-	synchronize_sched_expedited();
+	struct rcu_state *rsp = &rcu_state;
+
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_sched_expedited() in RCU read-side critical section");
+
+	/* If only one CPU, this is automatically a grace period. */
+	if (rcu_blocking_is_gp())
+		return;
+
+	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 9f0d054e6c20..2c81f8dd63b4 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -123,10 +123,6 @@ static void __init rcu_bootup_announce_oddness(void)
 
 #ifdef CONFIG_PREEMPT_RCU
 
-RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
-static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
-static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
-
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake);
 static void rcu_read_unlock_special(struct task_struct *t);
@@ -303,15 +299,15 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
  *
  * Callers to this function must disable preemption.
  */
-static void rcu_preempt_qs(void)
+static void rcu_qs(void)
 {
-	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
 	if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_preempt"),
 				       __this_cpu_read(rcu_data_p->gp_seq),
 				       TPS("cpuqs"));
 		__this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
-		barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
+		barrier(); /* Coordinate with rcu_flavor_check_callbacks(). */
 		current->rcu_read_unlock_special.b.need_qs = false;
 	}
 }
@@ -329,12 +325,14 @@ static void rcu_preempt_qs(void)
  *
  * Caller must disable interrupts.
  */
-static void rcu_preempt_note_context_switch(bool preempt)
+void rcu_note_context_switch(bool preempt)
 {
 	struct task_struct *t = current;
 	struct rcu_data *rdp = this_cpu_ptr(rcu_state_p->rda);
 	struct rcu_node *rnp;
 
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
+	trace_rcu_utilization(TPS("Start context switch"));
 	lockdep_assert_irqs_disabled();
 	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
 	if (t->rcu_read_lock_nesting > 0 &&
@@ -381,10 +379,13 @@ static void rcu_preempt_note_context_switch(bool preempt)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_preempt_qs();
+	rcu_qs();
 	if (rdp->deferred_qs)
 		rcu_report_exp_rdp(rcu_state_p, rdp);
+	trace_rcu_utilization(TPS("End context switch"));
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 /*
  * Check for preempted RCU readers blocking the current grace period
@@ -493,7 +494,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 		return;
 	}
 	if (special.b.need_qs) {
-		rcu_preempt_qs();
+		rcu_qs();
 		t->rcu_read_unlock_special.b.need_qs = false;
 		if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
 			local_irq_restore(flags);
@@ -596,7 +597,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
  */
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
-	return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs ||
+	return (this_cpu_ptr(&rcu_data)->deferred_qs ||
 		READ_ONCE(t->rcu_read_unlock_special.s)) &&
 	       t->rcu_read_lock_nesting <= 0;
 }
@@ -781,11 +782,14 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
  *
  * Caller must disable hard irqs.
  */
-static void rcu_preempt_check_callbacks(void)
+static void rcu_flavor_check_callbacks(int user)
 {
-	struct rcu_state *rsp = &rcu_preempt_state;
+	struct rcu_state *rsp = &rcu_state;
 	struct task_struct *t = current;
 
+	if (user || rcu_is_cpu_rrupt_from_idle()) {
+		rcu_note_voluntary_context_switch(current);
+	}
 	if (t->rcu_read_lock_nesting > 0 ||
 	    (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
 		/* No QS, force context switch if deferred. */
@@ -795,7 +799,7 @@ static void rcu_preempt_check_callbacks(void)
 		rcu_preempt_deferred_qs(t); /* Report deferred QS. */
 		return;
 	} else if (!t->rcu_read_lock_nesting) {
-		rcu_preempt_qs(); /* Report immediate QS. */
+		rcu_qs(); /* Report immediate QS. */
 		return;
 	}
 
@@ -808,44 +812,6 @@ static void rcu_preempt_check_callbacks(void)
 		t->rcu_read_unlock_special.b.need_qs = true;
 }
 
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, rcu_state_p, -1, 0);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
 /**
  * synchronize_rcu - wait until a grace period has elapsed.
  *
@@ -856,14 +822,28 @@ EXPORT_SYMBOL_GPL(call_rcu);
  * concurrently with new RCU read-side critical sections that began while
  * synchronize_rcu() was waiting.  RCU read-side critical sections are
  * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
+ * In addition, regions of code across which interrupts, preemption, or
+ * softirqs have been disabled also serve as RCU read-side critical
+ * sections.  This includes hardware interrupt handlers, softirq handlers,
+ * and NMI handlers.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_rcu() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-sched read-side critical section whose beginning
+ * preceded the call to synchronize_rcu().  In addition, each CPU having
+ * an RCU read-side critical section that extends beyond the return from
+ * synchronize_rcu() is guaranteed to have executed a full memory barrier
+ * after the beginning of synchronize_rcu() and before the beginning of
+ * that RCU read-side critical section.  Note that these guarantees include
+ * CPUs that are offline, idle, or executing in user mode, as well as CPUs
+ * that are executing in the kernel.
  *
- * See the description of synchronize_sched() for more detailed
- * information on memory-ordering guarantees.  However, please note
- * that -only- the memory-ordering guarantees apply.  For example,
- * synchronize_rcu() is -not- guaranteed to wait on things like code
- * protected by preempt_disable(), instead, synchronize_rcu() is -only-
- * guaranteed to wait on RCU read-side critical sections, that is, sections
- * of code protected by rcu_read_lock().
+ * Furthermore, if CPU A invoked synchronize_rcu(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
  */
 void synchronize_rcu(void)
 {
@@ -880,28 +860,6 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- *
- * Note that this primitive does not necessarily wait for an RCU grace period
- * to complete.  For example, if there are no RCU callbacks queued anywhere
- * in the system, then rcu_barrier() is within its rights to return
- * immediately, without waiting for anything, much less an RCU grace period.
- */
-void rcu_barrier(void)
-{
-	_rcu_barrier(rcu_state_p);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
-/*
- * Initialize preemptible RCU's state structures.
- */
-static void __init __rcu_init_preempt(void)
-{
-	rcu_init_one(rcu_state_p);
-}
-
 /*
  * Check for a task exiting while in a preemptible-RCU read-side
  * critical section, clean up if so.  No need to issue warnings,
@@ -964,8 +922,6 @@ dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)
 
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
-static struct rcu_state *const rcu_state_p = &rcu_sched_state;
-
 /*
  * Tell them what RCU they are running.
  */
@@ -975,18 +931,48 @@ static void __init rcu_bootup_announce(void)
 	rcu_bootup_announce_oddness();
 }
 
-/* Because preemptible RCU does not exist, we can ignore its QSes. */
-static void rcu_preempt_qs(void)
+/*
+ * Note a quiescent state for PREEMPT=n.  Because we do not need to know
+ * how many quiescent states passed, just if there was at least one since
+ * the start of the grace period, this just sets a flag.  The caller must
+ * have disabled preemption.
+ */
+static void rcu_qs(void)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
+	if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
+		return;
+	trace_rcu_grace_period(TPS("rcu_sched"),
+			       __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
+	__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
+	if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
+		return;
+	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
+	rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
 }
 
 /*
- * Because preemptible RCU does not exist, we never have to check for
- * CPUs being in quiescent states.
+ * Note a PREEMPT=n context switch.  The caller must have disabled interrupts.
  */
-static void rcu_preempt_note_context_switch(bool preempt)
+void rcu_note_context_switch(bool preempt)
 {
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
+	trace_rcu_utilization(TPS("Start context switch"));
+	rcu_qs();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
+		goto out;
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
+		rcu_momentary_dyntick_idle();
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+	if (!preempt)
+		rcu_tasks_qs(current);
+out:
+	trace_rcu_utilization(TPS("End context switch"));
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 /*
  * Because preemptible RCU does not exist, there are never any preempted
@@ -1054,29 +1040,48 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
 }
 
 /*
- * Because preemptible RCU does not exist, it never has any callbacks
- * to check.
+ * Check to see if this CPU is in a non-context-switch quiescent state
+ * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
+ * Also schedule RCU core processing.
+ *
+ * This function must be called from hardirq context.  It is normally
+ * invoked from the scheduling-clock interrupt.
  */
-static void rcu_preempt_check_callbacks(void)
+static void rcu_flavor_check_callbacks(int user)
 {
-}
+	if (user || rcu_is_cpu_rrupt_from_idle()) {
 
-/*
- * Because preemptible RCU does not exist, rcu_barrier() is just
- * another name for rcu_barrier_sched().
- */
-void rcu_barrier(void)
-{
-	rcu_barrier_sched();
+		/*
+		 * Get here if this CPU took its interrupt from user
+		 * mode or from the idle loop, and if this is not a
+		 * nested interrupt.  In this case, the CPU is in
+		 * a quiescent state, so note it.
+		 *
+		 * No memory barrier is required here because rcu_qs()
+		 * references only CPU-local variables that other CPUs
+		 * neither access nor modify, at least not while the
+		 * corresponding CPU is online.
+		 */
+
+		rcu_qs();
+	}
 }
-EXPORT_SYMBOL_GPL(rcu_barrier);
 
-/*
- * Because preemptible RCU does not exist, it need not be initialized.
- */
-static void __init __rcu_init_preempt(void)
+/* PREEMPT=n implementation of synchronize_rcu(). */
+void synchronize_rcu(void)
 {
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_rcu() in RCU-sched read-side critical section");
+	if (rcu_blocking_is_gp())
+		return;
+	if (rcu_gp_is_expedited())
+		synchronize_rcu_expedited();
+	else
+		wait_rcu_gp(call_rcu);
 }
+EXPORT_SYMBOL_GPL(synchronize_rcu);
 
 /*
  * Because preemptible RCU does not exist, tasks cannot possibly exit
@@ -1319,8 +1324,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 
 static void rcu_kthread_do_work(void)
 {
-	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
-	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
+	rcu_do_batch(&rcu_state, this_cpu_ptr(&rcu_data));
 }
 
 static void rcu_cpu_kthread_setup(unsigned int cpu)
@@ -1727,87 +1731,6 @@ static void rcu_idle_count_callbacks_posted(void)
 	__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
 }
 
-/*
- * Data for flushing lazy RCU callbacks at OOM time.
- */
-static atomic_t oom_callback_count;
-static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
-
-/*
- * RCU OOM callback -- decrement the outstanding count and deliver the
- * wake-up if we are the last one.
- */
-static void rcu_oom_callback(struct rcu_head *rhp)
-{
-	if (atomic_dec_and_test(&oom_callback_count))
-		wake_up(&oom_callback_wq);
-}
-
-/*
- * Post an rcu_oom_notify callback on the current CPU if it has at
- * least one lazy callback.  This will unnecessarily post callbacks
- * to CPUs that already have a non-lazy callback at the end of their
- * callback list, but this is an infrequent operation, so accept some
- * extra overhead to keep things simple.
- */
-static void rcu_oom_notify_cpu(void *unused)
-{
-	struct rcu_state *rsp;
-	struct rcu_data *rdp;
-
-	for_each_rcu_flavor(rsp) {
-		rdp = raw_cpu_ptr(rsp->rda);
-		if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
-			atomic_inc(&oom_callback_count);
-			rsp->call(&rdp->oom_head, rcu_oom_callback);
-		}
-	}
-}
-
-/*
- * If low on memory, ensure that each CPU has a non-lazy callback.
- * This will wake up CPUs that have only lazy callbacks, in turn
- * ensuring that they free up the corresponding memory in a timely manner.
- * Because an uncertain amount of memory will be freed in some uncertain
- * timeframe, we do not claim to have freed anything.
- */
-static int rcu_oom_notify(struct notifier_block *self,
-			  unsigned long notused, void *nfreed)
-{
-	int cpu;
-
-	/* Wait for callbacks from earlier instance to complete. */
-	wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
-	smp_mb(); /* Ensure callback reuse happens after callback invocation. */
-
-	/*
-	 * Prevent premature wakeup: ensure that all increments happen
-	 * before there is a chance of the counter reaching zero.
-	 */
-	atomic_set(&oom_callback_count, 1);
-
-	for_each_online_cpu(cpu) {
-		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
-		cond_resched_tasks_rcu_qs();
-	}
-
-	/* Unconditionally decrement: no need to wake ourselves up. */
-	atomic_dec(&oom_callback_count);
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block rcu_oom_nb = {
-	.notifier_call = rcu_oom_notify
-};
-
-static int __init rcu_register_oom_notifier(void)
-{
-	register_oom_notifier(&rcu_oom_nb);
-	return 0;
-}
-early_initcall(rcu_register_oom_notifier);
-
 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 
 #ifdef CONFIG_RCU_FAST_NO_HZ
-- 
cgit v1.2.3


From 709fdce7545c978e69f52eb19082ea3af44332f5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 3 Jul 2018 10:44:44 -0700
Subject: rcu: Express Tiny RCU updates in terms of RCU rather than RCU-sched

This commit renames Tiny RCU functions so that the lowest level of
functionality is RCU (e.g., synchronize_rcu()) rather than RCU-sched
(e.g., synchronize_sched()).  This provides greater naming compatibility
with Tree RCU, which will in turn permit more LoC removal once
the RCU-sched and RCU-bh update-side API is removed.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Fix Tiny call_rcu()'s EXPORT_SYMBOL() in response to a bug
  report from kbuild test robot. ]
---
 include/linux/rcupdate.h | 12 +++++-------
 include/linux/rcutiny.h  | 34 +++++++++++++++++++---------------
 include/linux/rcutree.h  |  1 -
 kernel/rcu/tiny.c        | 48 ++++++++++++++++++++++++------------------------
 4 files changed, 48 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 94474bb6b5c4..1207c6c9bd8b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -49,15 +49,14 @@
 
 /* Exported common interfaces */
 
-#ifdef CONFIG_TINY_RCU
-#define	call_rcu	call_rcu_sched
-#else
-void call_rcu(struct rcu_head *head, rcu_callback_t func);
+#ifndef CONFIG_TINY_RCU
+void synchronize_sched(void);
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 #endif
 
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
-void synchronize_sched(void);
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
+void synchronize_rcu(void);
 
 static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 {
@@ -68,7 +67,6 @@ static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 
 void __rcu_read_lock(void);
 void __rcu_read_unlock(void);
-void synchronize_rcu(void);
 
 /*
  * Defined as a macro as it is a very low level header included from
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index df2c0895c5e7..e66fb8bc2127 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -36,9 +36,9 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
-static inline void synchronize_rcu(void)
+static inline void synchronize_sched(void)
 {
-	synchronize_sched();
+	synchronize_rcu();
 }
 
 static inline unsigned long get_state_synchronize_rcu(void)
@@ -61,16 +61,11 @@ static inline void cond_synchronize_sched(unsigned long oldstate)
 	might_sleep();
 }
 
-static inline void synchronize_rcu_expedited(void)
-{
-	synchronize_sched();	/* Only one CPU, so pretty fast anyway!!! */
-}
+extern void rcu_barrier(void);
 
-extern void rcu_barrier_sched(void);
-
-static inline void rcu_barrier(void)
+static inline void rcu_barrier_sched(void)
 {
-	rcu_barrier_sched();  /* Only one CPU, so only one list of callbacks! */
+	rcu_barrier();  /* Only one CPU, so only one list of callbacks! */
 }
 
 static inline void rcu_barrier_bh(void)
@@ -88,27 +83,36 @@ static inline void synchronize_rcu_bh_expedited(void)
 	synchronize_sched();
 }
 
+static inline void synchronize_rcu_expedited(void)
+{
+	synchronize_sched();
+}
+
 static inline void synchronize_sched_expedited(void)
 {
 	synchronize_sched();
 }
 
-static inline void kfree_call_rcu(struct rcu_head *head,
-				  rcu_callback_t func)
+static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+
+static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	call_rcu(head, func);
 }
 
-void rcu_sched_qs(void);
+void rcu_qs(void);
 
 static inline void rcu_softirq_qs(void)
 {
-	rcu_sched_qs();
+	rcu_qs();
 }
 
 #define rcu_note_context_switch(preempt) \
 	do { \
-		rcu_sched_qs(); \
+		rcu_qs(); \
 		rcu_tasks_qs(current); \
 	} while (0)
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 0c44720f0e84..6d30a0809300 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,7 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }
 
-void synchronize_rcu(void);
 static inline void synchronize_rcu_bh(void)
 {
 	synchronize_rcu();
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index cadcf63c4889..30826fb6e438 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -46,25 +46,25 @@ struct rcu_ctrlblk {
 };
 
 /* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
-	.donetail	= &rcu_sched_ctrlblk.rcucblist,
-	.curtail	= &rcu_sched_ctrlblk.rcucblist,
+static struct rcu_ctrlblk rcu_ctrlblk = {
+	.donetail	= &rcu_ctrlblk.rcucblist,
+	.curtail	= &rcu_ctrlblk.rcucblist,
 };
 
-void rcu_barrier_sched(void)
+void rcu_barrier(void)
 {
-	wait_rcu_gp(call_rcu_sched);
+	wait_rcu_gp(call_rcu);
 }
-EXPORT_SYMBOL(rcu_barrier_sched);
+EXPORT_SYMBOL(rcu_barrier);
 
 /* Record an rcu quiescent state.  */
-void rcu_sched_qs(void)
+void rcu_qs(void)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	if (rcu_sched_ctrlblk.donetail != rcu_sched_ctrlblk.curtail) {
-		rcu_sched_ctrlblk.donetail = rcu_sched_ctrlblk.curtail;
+	if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
+		rcu_ctrlblk.donetail = rcu_ctrlblk.curtail;
 		raise_softirq(RCU_SOFTIRQ);
 	}
 	local_irq_restore(flags);
@@ -79,7 +79,7 @@ void rcu_sched_qs(void)
 void rcu_check_callbacks(int user)
 {
 	if (user)
-		rcu_sched_qs();
+		rcu_qs();
 }
 
 /* Invoke the RCU callbacks whose grace period has elapsed.  */
@@ -90,17 +90,17 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
 
 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
-	if (rcu_sched_ctrlblk.donetail == &rcu_sched_ctrlblk.rcucblist) {
+	if (rcu_ctrlblk.donetail == &rcu_ctrlblk.rcucblist) {
 		/* No callbacks ready, so just leave. */
 		local_irq_restore(flags);
 		return;
 	}
-	list = rcu_sched_ctrlblk.rcucblist;
-	rcu_sched_ctrlblk.rcucblist = *rcu_sched_ctrlblk.donetail;
-	*rcu_sched_ctrlblk.donetail = NULL;
-	if (rcu_sched_ctrlblk.curtail == rcu_sched_ctrlblk.donetail)
-		rcu_sched_ctrlblk.curtail = &rcu_sched_ctrlblk.rcucblist;
-	rcu_sched_ctrlblk.donetail = &rcu_sched_ctrlblk.rcucblist;
+	list = rcu_ctrlblk.rcucblist;
+	rcu_ctrlblk.rcucblist = *rcu_ctrlblk.donetail;
+	*rcu_ctrlblk.donetail = NULL;
+	if (rcu_ctrlblk.curtail == rcu_ctrlblk.donetail)
+		rcu_ctrlblk.curtail = &rcu_ctrlblk.rcucblist;
+	rcu_ctrlblk.donetail = &rcu_ctrlblk.rcucblist;
 	local_irq_restore(flags);
 
 	/* Invoke the callbacks on the local list. */
@@ -125,21 +125,21 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
  *
  * Cool, huh?  (Due to Josh Triplett.)
  */
-void synchronize_sched(void)
+void synchronize_rcu(void)
 {
 	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
 			 lock_is_held(&rcu_lock_map) ||
 			 lock_is_held(&rcu_sched_lock_map),
 			 "Illegal synchronize_sched() in RCU read-side critical section");
 }
-EXPORT_SYMBOL_GPL(synchronize_sched);
+EXPORT_SYMBOL_GPL(synchronize_rcu);
 
 /*
  * Post an RCU callback to be invoked after the end of an RCU-sched grace
  * period.  But since we have but one CPU, that would be after any
  * quiescent state.
  */
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	unsigned long flags;
 
@@ -148,16 +148,16 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 	head->next = NULL;
 
 	local_irq_save(flags);
-	*rcu_sched_ctrlblk.curtail = head;
-	rcu_sched_ctrlblk.curtail = &head->next;
+	*rcu_ctrlblk.curtail = head;
+	rcu_ctrlblk.curtail = &head->next;
 	local_irq_restore(flags);
 
 	if (unlikely(is_idle_task(current))) {
-		/* force scheduling for rcu_sched_qs() */
+		/* force scheduling for rcu_qs() */
 		resched_cpu(0);
 	}
 }
-EXPORT_SYMBOL_GPL(call_rcu_sched);
+EXPORT_SYMBOL_GPL(call_rcu);
 
 void __init rcu_init(void)
 {
-- 
cgit v1.2.3


From 4c7e9c1434c6fc960774a5475f2fbccbf557fdeb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 6 Jul 2018 09:54:25 -0700
Subject: rcu: Consolidate RCU-bh update-side function definitions

This commit saves a few lines by consolidating the RCU-bh function
definitions at the end of include/linux/rcupdate.h.  This consolidation
also makes it easier to remove them all when the time comes.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 27 ++++++++++++++++++++++-----
 include/linux/rcutiny.h  | 15 ---------------
 include/linux/rcutree.h  | 17 -----------------
 kernel/rcu/tree.c        |  9 ---------
 4 files changed, 22 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 1207c6c9bd8b..e530f5739033 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -58,11 +58,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
 void synchronize_rcu(void);
 
-static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
-
 #ifdef CONFIG_PREEMPT_RCU
 
 void __rcu_read_lock(void);
@@ -875,4 +870,26 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 
 
+/* Transitional pre-consolidation compatibility definitions. */
+
+static inline void synchronize_rcu_bh(void)
+{
+	synchronize_rcu();
+}
+
+static inline void synchronize_rcu_bh_expedited(void)
+{
+	synchronize_rcu_expedited();
+}
+
+static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+
+static inline void rcu_barrier_bh(void)
+{
+	rcu_barrier();
+}
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e66fb8bc2127..df82bada9b19 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -68,21 +68,6 @@ static inline void rcu_barrier_sched(void)
 	rcu_barrier();  /* Only one CPU, so only one list of callbacks! */
 }
 
-static inline void rcu_barrier_bh(void)
-{
-	rcu_barrier();
-}
-
-static inline void synchronize_rcu_bh(void)
-{
-	synchronize_sched();
-}
-
-static inline void synchronize_rcu_bh_expedited(void)
-{
-	synchronize_sched();
-}
-
 static inline void synchronize_rcu_expedited(void)
 {
 	synchronize_sched();
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 6d30a0809300..94820156aa62 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,11 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }
 
-static inline void synchronize_rcu_bh(void)
-{
-	synchronize_rcu();
-}
-
 void synchronize_rcu_expedited(void);
 
 static inline void synchronize_sched_expedited(void)
@@ -59,19 +54,7 @@ static inline void synchronize_sched_expedited(void)
 
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
-/**
- * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
- *
- * This is a transitional API and will soon be removed, with all
- * callers converted to synchronize_rcu_expedited().
- */
-static inline void synchronize_rcu_bh_expedited(void)
-{
-	synchronize_rcu_expedited();
-}
-
 void rcu_barrier(void);
-void rcu_barrier_bh(void);
 void rcu_barrier_sched(void);
 bool rcu_eqs_special_set(int cpu);
 unsigned long get_state_synchronize_rcu(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 53ba7747878c..8d5dadaf3c53 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3252,15 +3252,6 @@ static void _rcu_barrier(void)
 	mutex_unlock(&rcu_state.barrier_mutex);
 }
 
-/**
- * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
- */
-void rcu_barrier_bh(void)
-{
-	_rcu_barrier();
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_bh);
-
 /**
  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
  *
-- 
cgit v1.2.3


From a8bb74acd8efe2eb934d524ae20859980975b602 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 6 Jul 2018 11:46:47 -0700
Subject: rcu: Consolidate RCU-sched update-side function definitions

This commit saves a few lines by consolidating the RCU-sched function
definitions at the end of include/linux/rcupdate.h.  This consolidation
also makes it easier to remove them all when the time comes.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 38 +++++++++++++++++++++++++------
 include/linux/rcutiny.h  | 32 +-------------------------
 include/linux/rcutree.h  |  9 --------
 kernel/rcu/tree.c        | 58 ------------------------------------------------
 4 files changed, 32 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e530f5739033..12103e1bbe67 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -48,12 +48,6 @@
 #define ulong2long(a)		(*(long *)(&(a)))
 
 /* Exported common interfaces */
-
-#ifndef CONFIG_TINY_RCU
-void synchronize_sched(void);
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
-#endif
-
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
 void synchronize_rcu(void);
@@ -170,7 +164,7 @@ void exit_tasks_rcu_finish(void);
 #define rcu_tasks_qs(t)	do { } while (0)
 #define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
 #define call_rcu_tasks call_rcu_sched
-#define synchronize_rcu_tasks synchronize_sched
+#define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
@@ -892,4 +886,34 @@ static inline void rcu_barrier_bh(void)
 	rcu_barrier();
 }
 
+static inline void synchronize_sched(void)
+{
+	synchronize_rcu();
+}
+
+static inline void synchronize_sched_expedited(void)
+{
+	synchronize_rcu_expedited();
+}
+
+static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+
+static inline void rcu_barrier_sched(void)
+{
+	rcu_barrier();
+}
+
+static inline unsigned long get_state_synchronize_sched(void)
+{
+	return get_state_synchronize_rcu();
+}
+
+static inline void cond_synchronize_sched(unsigned long oldstate)
+{
+	cond_synchronize_rcu(oldstate);
+}
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index df82bada9b19..7fa4fb9e899e 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -36,11 +36,6 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
-static inline void synchronize_sched(void)
-{
-	synchronize_rcu();
-}
-
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
@@ -51,36 +46,11 @@ static inline void cond_synchronize_rcu(unsigned long oldstate)
 	might_sleep();
 }
 
-static inline unsigned long get_state_synchronize_sched(void)
-{
-	return 0;
-}
-
-static inline void cond_synchronize_sched(unsigned long oldstate)
-{
-	might_sleep();
-}
-
 extern void rcu_barrier(void);
 
-static inline void rcu_barrier_sched(void)
-{
-	rcu_barrier();  /* Only one CPU, so only one list of callbacks! */
-}
-
 static inline void synchronize_rcu_expedited(void)
 {
-	synchronize_sched();
-}
-
-static inline void synchronize_sched_expedited(void)
-{
-	synchronize_sched();
-}
-
-static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
+	synchronize_rcu();
 }
 
 static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 94820156aa62..d09a9abe9440 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -46,21 +46,12 @@ static inline void rcu_virt_note_context_switch(int cpu)
 }
 
 void synchronize_rcu_expedited(void);
-
-static inline void synchronize_sched_expedited(void)
-{
-	synchronize_rcu_expedited();
-}
-
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
 void rcu_barrier(void);
-void rcu_barrier_sched(void);
 bool rcu_eqs_special_set(int cpu);
 unsigned long get_state_synchronize_rcu(void);
 void cond_synchronize_rcu(unsigned long oldstate);
-unsigned long get_state_synchronize_sched(void);
-void cond_synchronize_sched(unsigned long oldstate);
 
 void rcu_idle_enter(void);
 void rcu_idle_exit(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8d5dadaf3c53..1a2551a4d583 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2950,19 +2950,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
-/**
- * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * This is transitional.
- */
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
-EXPORT_SYMBOL_GPL(call_rcu_sched);
-
 /*
  * Queue an RCU callback for lazy invocation after a grace period.
  * This will likely be later named something like "call_rcu_lazy()",
@@ -2976,17 +2963,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
-/**
- * synchronize_sched - wait until an rcu-sched grace period has elapsed.
- *
- * This is transitional.
- */
-void synchronize_sched(void)
-{
-	synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched);
-
 /**
  * get_state_synchronize_rcu - Snapshot current RCU state
  *
@@ -3028,29 +3004,6 @@ void cond_synchronize_rcu(unsigned long oldstate)
 }
 EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
 
-/**
- * get_state_synchronize_sched - Snapshot current RCU-sched state
- *
- * This is transitional, and only used by rcutorture.
- */
-unsigned long get_state_synchronize_sched(void)
-{
-	return get_state_synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
-
-/**
- * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
- * @oldstate: return value from earlier call to get_state_synchronize_sched()
- *
- * This is transitional and only used by rcutorture.
- */
-void cond_synchronize_sched(unsigned long oldstate)
-{
-	cond_synchronize_rcu(oldstate);
-}
-EXPORT_SYMBOL_GPL(cond_synchronize_sched);
-
 /*
  * Check to see if there is any immediate RCU-related work to be done by
  * the current CPU, for the specified type of RCU, returning 1 if so and
@@ -3266,17 +3219,6 @@ void rcu_barrier(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
-/**
- * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
- *
- * This is transitional.
- */
-void rcu_barrier_sched(void)
-{
-	rcu_barrier();
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_sched);
-
 /*
  * Propagate ->qsinitmask bits up the rcu_node tree to account for the
  * first CPU in a given leaf rcu_node structure coming online.  The caller
-- 
cgit v1.2.3


From 2bd8b1a2afc4463cc503665e98faa5909d1ac462 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 7 Jul 2018 18:12:26 -0700
Subject: rcu: Clean up flavor-related definitions and comments in rcupdate.h

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 12103e1bbe67..d6d543b60a9f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -119,11 +119,10 @@ static inline void rcu_init_nohz(void) { }
  * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
  * @a: Code that RCU needs to pay attention to.
  *
- * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden
- * in the inner idle loop, that is, between the rcu_idle_enter() and
- * the rcu_idle_exit() -- RCU will happily ignore any such read-side
- * critical sections.  However, things like powertop need tracepoints
- * in the inner idle loop.
+ * RCU read-side critical sections are forbidden in the inner idle loop,
+ * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU
+ * will happily ignore any such read-side critical sections.  However,
+ * things like powertop need tracepoints in the inner idle loop.
  *
  * This macro provides the way out:  RCU_NONIDLE(do_something_with_RCU())
  * will tell RCU that it needs to pay attention, invoke its argument
@@ -163,7 +162,7 @@ void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define rcu_tasks_qs(t)	do { } while (0)
 #define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
-#define call_rcu_tasks call_rcu_sched
+#define call_rcu_tasks call_rcu
 #define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
@@ -309,8 +308,8 @@ static inline void rcu_preempt_sleep_check(void) { }
  * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
  * and rcu_assign_pointer().  Some of these could be folded into their
  * callers, but they are left separate in order to ease introduction of
- * multiple flavors of pointers to match the multiple flavors of RCU
- * (e.g., __rcu_sched, and __srcu), should this make sense in the future.
+ * multiple pointers markings to match different RCU implementations
+ * (e.g., __srcu), should this make sense in the future.
  */
 
 #ifdef __CHECKER__
@@ -670,9 +669,8 @@ static inline void rcu_read_unlock(void)
  * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
  *
  * This is equivalent of rcu_read_lock(), but also disables softirqs.
- * Note that synchronize_rcu() and friends may be used for the update
- * side, although synchronize_rcu_bh() is available as a wrapper in the
- * short term.  Longer term, the _bh update-side API will be eliminated.
+ * Note that anything else that disables softirqs can also serve as
+ * an RCU read-side critical section.
  *
  * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
  * must occur in the same context, for example, it is illegal to invoke
@@ -705,10 +703,9 @@ static inline void rcu_read_unlock_bh(void)
 /**
  * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
  *
- * This is equivalent of rcu_read_lock(), but to be used when updates
- * are being done using call_rcu_sched() or synchronize_rcu_sched().
- * Read-side critical sections can also be introduced by anything that
- * disables preemption, including local_irq_disable() and friends.
+ * This is equivalent of rcu_read_lock(), but disables preemption.
+ * Read-side critical sections can also be introduced by anything else
+ * that disables preemption, including local_irq_disable() and friends.
  *
  * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
  * must occur in the same context, for example, it is illegal to invoke
-- 
cgit v1.2.3


From aff5f0369e312b0ab0ca7a2a12dd64b7e39c7091 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 7 Jul 2018 18:12:26 -0700
Subject: rcu: Clean up flavor-related definitions and comments in rculist.h

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4786c2235b98..e91ec9ddcd30 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -182,7 +182,7 @@ static inline void list_replace_rcu(struct list_head *old,
  * @list:	the RCU-protected list to splice
  * @prev:	points to the last element of the existing list
  * @next:	points to the first element of the existing list
- * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
+ * @sync:	synchronize_rcu, synchronize_rcu_expedited, ...
  *
  * The list pointed to by @prev and @next can be RCU-read traversed
  * concurrently with this function.
@@ -240,7 +240,7 @@ static inline void __list_splice_init_rcu(struct list_head *list,
  *                        designed for stacks.
  * @list:	the RCU-protected list to splice
  * @head:	the place in the existing list to splice the first list into
- * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
+ * @sync:	synchronize_rcu, synchronize_rcu_expedited, ...
  */
 static inline void list_splice_init_rcu(struct list_head *list,
 					struct list_head *head,
@@ -255,7 +255,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  *                             list, designed for queues.
  * @list:	the RCU-protected list to splice
  * @head:	the place in the existing list to splice the first list into
- * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
+ * @sync:	synchronize_rcu, synchronize_rcu_expedited, ...
  */
 static inline void list_splice_tail_init_rcu(struct list_head *list,
 					     struct list_head *head,
@@ -359,13 +359,12 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @type:       the type of the struct this is embedded in.
  * @member:     the name of the list_head within the struct.
  *
- * This primitive may safely run concurrently with the _rcu list-mutation
- * primitives such as list_add_rcu(), but requires some implicit RCU
- * read-side guarding.  One example is running within a special
- * exception-time environment where preemption is disabled and where
- * lockdep cannot be invoked (in which case updaters must use RCU-sched,
- * as in synchronize_sched(), call_rcu_sched(), and friends).  Another
- * example is when items are added to the list, but never deleted.
+ * This primitive may safely run concurrently with the _rcu
+ * list-mutation primitives such as list_add_rcu(), but requires some
+ * implicit RCU read-side guarding.  One example is running within a special
+ * exception-time environment where preemption is disabled and where lockdep
+ * cannot be invoked.  Another example is when items are added to the list,
+ * but never deleted.
  */
 #define list_entry_lockless(ptr, type, member) \
 	container_of((typeof(ptr))READ_ONCE(ptr), type, member)
@@ -376,13 +375,12 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @head:	the head for your list.
  * @member:	the name of the list_struct within the struct.
  *
- * This primitive may safely run concurrently with the _rcu list-mutation
- * primitives such as list_add_rcu(), but requires some implicit RCU
- * read-side guarding.  One example is running within a special
- * exception-time environment where preemption is disabled and where
- * lockdep cannot be invoked (in which case updaters must use RCU-sched,
- * as in synchronize_sched(), call_rcu_sched(), and friends).  Another
- * example is when items are added to the list, but never deleted.
+ * This primitive may safely run concurrently with the _rcu
+ * list-mutation primitives such as list_add_rcu(), but requires some
+ * implicit RCU read-side guarding.  One example is running within a special
+ * exception-time environment where preemption is disabled and where lockdep
+ * cannot be invoked.  Another example is when items are added to the list,
+ * but never deleted.
  */
 #define list_for_each_entry_lockless(pos, head, member) \
 	for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \
-- 
cgit v1.2.3


From df8561a0d7e4f5cb72d0aa6be43e154b2027bba6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 7 Jul 2018 18:12:26 -0700
Subject: rcu: Clean up flavor-related definitions and comments in
 rcupdate_wait.h

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate_wait.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index bc104699560e..8a16c3eb3dd0 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -33,17 +33,17 @@ do {									\
 
 /**
  * synchronize_rcu_mult - Wait concurrently for multiple grace periods
- * @...: List of call_rcu() functions for the flavors to wait on.
+ * @...: List of call_rcu() functions for different grace periods to wait on
  *
- * This macro waits concurrently for multiple flavors of RCU grace periods.
- * For example, synchronize_rcu_mult(call_rcu, call_rcu_sched) would wait
- * on concurrent RCU and RCU-sched grace periods.  Waiting on a give SRCU
+ * This macro waits concurrently for multiple types of RCU grace periods.
+ * For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait
+ * on concurrent RCU and RCU-tasks grace periods.  Waiting on a give SRCU
  * domain requires you to write a wrapper function for that SRCU domain's
  * call_srcu() function, supplying the corresponding srcu_struct.
  *
- * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
- * or RCU-sched, given that anywhere synchronize_rcu_mult() can be called
- * is automatically a grace period.
+ * If Tiny RCU, tell _wait_rcu_gp() does not bother waiting for RCU,
+ * given that anywhere synchronize_rcu_mult() can be called is automatically
+ * a grace period.
  */
 #define synchronize_rcu_mult(...) \
 	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
-- 
cgit v1.2.3


From 4d232dfe1df35254298e7986c4de8c9f63f58c79 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 10 Jul 2018 12:53:40 -0700
Subject: rcu: Remove !PREEMPT code from rcu_note_voluntary_context_switch()

Because RCU-tasks exists only in PREEMPT kernels and because RCU-sched
no longer exists in PREEMPT kernels, it is no longer necessary for the
rcu_note_voluntary_context_switch() macro to do anything for !PREEMPT
kernels.  This commit therefore removes !PREEMPT-related code from
this macro, namely, the rcu_all_qs().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d6d543b60a9f..e4f821165d0b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -150,18 +150,14 @@ static inline void rcu_init_nohz(void) { }
 		if (READ_ONCE((t)->rcu_tasks_holdout)) \
 			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
 	} while (0)
-#define rcu_note_voluntary_context_switch(t) \
-	do { \
-		rcu_all_qs(); \
-		rcu_tasks_qs(t); \
-	} while (0)
+#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t)
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void exit_tasks_rcu_start(void);
 void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define rcu_tasks_qs(t)	do { } while (0)
-#define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
+#define rcu_note_voluntary_context_switch(t) do { } while (0)
 #define call_rcu_tasks call_rcu
 #define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
-- 
cgit v1.2.3


From 395a2f097ebdddf2bfa286b6119f1b231025c2f1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 10 Jul 2018 14:00:14 -0700
Subject: rcu: Define rcu_all_qs() only in !PREEMPT builds

Now that rcu_all_qs() is used only in !PREEMPT builds, move it to
tree_plugin.h so that it is defined only in those builds.  This in
turn means that rcu_momentary_dyntick_idle() is only used in !PREEMPT
builds, but it is simply marked __maybe_unused in order to keep it
near the rest of the dyntick-idle code.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutree.h  |  2 ++
 kernel/rcu/tree.c        | 41 +----------------------------------------
 kernel/rcu/tree_plugin.h | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d09a9abe9440..7f83179177d1 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -66,7 +66,9 @@ void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 void rcu_end_inkernel_boot(void);
 bool rcu_is_watching(void);
+#ifndef CONFIG_PREEMPT
 void rcu_all_qs(void);
+#endif
 
 /* RCUtree hotplug events */
 int rcutree_prepare_cpu(unsigned int cpu);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c8761e7c7c00..e140aaa78527 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -356,7 +356,7 @@ bool rcu_eqs_special_set(int cpu)
  *
  * The caller must have disabled interrupts and must not be idle.
  */
-static void rcu_momentary_dyntick_idle(void)
+static void __maybe_unused rcu_momentary_dyntick_idle(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 	int special;
@@ -381,45 +381,6 @@ static int rcu_is_cpu_rrupt_from_idle(void)
 	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
 }
 
-/*
- * Register an urgently needed quiescent state.  If there is an
- * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
- * dyntick-idle quiescent state visible to other CPUs, which will in
- * some cases serve for expedited as well as normal grace periods.
- * Either way, register a lightweight quiescent state.
- *
- * The barrier() calls are redundant in the common case when this is
- * called externally, but just in case this is called from within this
- * file.
- *
- */
-void rcu_all_qs(void)
-{
-	unsigned long flags;
-
-	if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))
-		return;
-	preempt_disable();
-	/* Load rcu_urgent_qs before other flags. */
-	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
-		preempt_enable();
-		return;
-	}
-	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
-	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
-		local_irq_save(flags);
-		rcu_momentary_dyntick_idle();
-		local_irq_restore(flags);
-	}
-	if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
-		rcu_qs();
-	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
-	barrier(); /* Avoid RCU read-side critical sections leaking up. */
-	preempt_enable();
-}
-EXPORT_SYMBOL_GPL(rcu_all_qs);
-
 #define DEFAULT_RCU_BLIMIT 10     /* Maximum callbacks per rcu_do_batch. */
 static long blimit = DEFAULT_RCU_BLIMIT;
 #define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index cd4c1b979446..7add1c297500 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -947,6 +947,45 @@ static void rcu_qs(void)
 	rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
 }
 
+/*
+ * Register an urgently needed quiescent state.  If there is an
+ * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
+ * dyntick-idle quiescent state visible to other CPUs, which will in
+ * some cases serve for expedited as well as normal grace periods.
+ * Either way, register a lightweight quiescent state.
+ *
+ * The barrier() calls are redundant in the common case when this is
+ * called externally, but just in case this is called from within this
+ * file.
+ *
+ */
+void rcu_all_qs(void)
+{
+	unsigned long flags;
+
+	if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))
+		return;
+	preempt_disable();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
+		preempt_enable();
+		return;
+	}
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
+		local_irq_save(flags);
+		rcu_momentary_dyntick_idle();
+		local_irq_restore(flags);
+	}
+	if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
+		rcu_qs();
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(rcu_all_qs);
+
 /*
  * Note a PREEMPT=n context switch.  The caller must have disabled interrupts.
  */
-- 
cgit v1.2.3


From dd46a7882c2c2006201e053ebf5e9ad761860cc0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 10 Jul 2018 18:37:30 -0700
Subject: rcu: Inline _rcu_barrier() into its sole remaining caller

Because rcu_barrier() is a one-line wrapper function for _rcu_barrier()
and because nothing else calls _rcu_barrier(), this commit inlines
_rcu_barrier() into rcu_barrier().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 20 ++++++++--------
 kernel/rcu/tree.c          | 58 +++++++++++++++++++++-------------------------
 kernel/rcu/tree.h          |  4 ++--
 kernel/rcu/tree_plugin.h   |  2 +-
 4 files changed, 39 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index a8d07feff6a0..175e0bce22bd 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -705,20 +705,20 @@ TRACE_EVENT(rcu_torture_read,
 );
 
 /*
- * Tracepoint for _rcu_barrier() execution.  The string "s" describes
- * the _rcu_barrier phase:
- *	"Begin": _rcu_barrier() started.
- *	"EarlyExit": _rcu_barrier() piggybacked, thus early exit.
- *	"Inc1": _rcu_barrier() piggyback check counter incremented.
- *	"OfflineNoCB": _rcu_barrier() found callback on never-online CPU
- *	"OnlineNoCB": _rcu_barrier() found online no-CBs CPU.
- *	"OnlineQ": _rcu_barrier() found online CPU with callbacks.
- *	"OnlineNQ": _rcu_barrier() found online CPU, no callbacks.
+ * Tracepoint for rcu_barrier() execution.  The string "s" describes
+ * the rcu_barrier phase:
+ *	"Begin": rcu_barrier() started.
+ *	"EarlyExit": rcu_barrier() piggybacked, thus early exit.
+ *	"Inc1": rcu_barrier() piggyback check counter incremented.
+ *	"OfflineNoCB": rcu_barrier() found callback on never-online CPU
+ *	"OnlineNoCB": rcu_barrier() found online no-CBs CPU.
+ *	"OnlineQ": rcu_barrier() found online CPU with callbacks.
+ *	"OnlineNQ": rcu_barrier() found online CPU, no callbacks.
  *	"IRQ": An rcu_barrier_callback() callback posted on remote CPU.
  *	"IRQNQ": An rcu_barrier_callback() callback found no callbacks.
  *	"CB": An rcu_barrier_callback() invoked a callback, not the last.
  *	"LastCB": An rcu_barrier_callback() invoked the last callback.
- *	"Inc2": _rcu_barrier() piggyback check counter incremented.
+ *	"Inc2": rcu_barrier() piggyback check counter incremented.
  * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
  * is the count of remaining callbacks, and "done" is the piggybacking count.
  */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e140aaa78527..ce16b8da2c6f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2747,7 +2747,7 @@ static void rcu_leak_callback(struct rcu_head *rhp)
 /*
  * Helper function for call_rcu() and friends.  The cpu argument will
  * normally be -1, indicating "currently running CPU".  It may specify
- * a CPU only if that CPU is a no-CBs CPU.  Currently, only _rcu_barrier()
+ * a CPU only if that CPU is a no-CBs CPU.  Currently, only rcu_barrier()
  * is expected to specify a CPU.
  */
 static void
@@ -2981,27 +2981,27 @@ static bool rcu_cpu_has_callbacks(bool *all_lazy)
 }
 
 /*
- * Helper function for _rcu_barrier() tracing.  If tracing is disabled,
+ * Helper function for rcu_barrier() tracing.  If tracing is disabled,
  * the compiler is expected to optimize this away.
  */
-static void _rcu_barrier_trace(const char *s, int cpu, unsigned long done)
+static void rcu_barrier_trace(const char *s, int cpu, unsigned long done)
 {
 	trace_rcu_barrier(rcu_state.name, s, cpu,
 			  atomic_read(&rcu_state.barrier_cpu_count), done);
 }
 
 /*
- * RCU callback function for _rcu_barrier().  If we are last, wake
- * up the task executing _rcu_barrier().
+ * RCU callback function for rcu_barrier().  If we are last, wake
+ * up the task executing rcu_barrier().
  */
 static void rcu_barrier_callback(struct rcu_head *rhp)
 {
 	if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {
-		_rcu_barrier_trace(TPS("LastCB"), -1,
+		rcu_barrier_trace(TPS("LastCB"), -1,
 				   rcu_state.barrier_sequence);
 		complete(&rcu_state.barrier_completion);
 	} else {
-		_rcu_barrier_trace(TPS("CB"), -1, rcu_state.barrier_sequence);
+		rcu_barrier_trace(TPS("CB"), -1, rcu_state.barrier_sequence);
 	}
 }
 
@@ -3012,33 +3012,40 @@ static void rcu_barrier_func(void *unused)
 {
 	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
 
-	_rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
+	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
 	rdp->barrier_head.func = rcu_barrier_callback;
 	debug_rcu_head_queue(&rdp->barrier_head);
 	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
 		atomic_inc(&rcu_state.barrier_cpu_count);
 	} else {
 		debug_rcu_head_unqueue(&rdp->barrier_head);
-		_rcu_barrier_trace(TPS("IRQNQ"), -1,
+		rcu_barrier_trace(TPS("IRQNQ"), -1,
 				   rcu_state.barrier_sequence);
 	}
 }
 
-/* Orchestrate an RCU barrier, waiting for all RCU callbacks to complete.  */
-static void _rcu_barrier(void)
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ *
+ * Note that this primitive does not necessarily wait for an RCU grace period
+ * to complete.  For example, if there are no RCU callbacks queued anywhere
+ * in the system, then rcu_barrier() is within its rights to return
+ * immediately, without waiting for anything, much less an RCU grace period.
+ */
+void rcu_barrier(void)
 {
 	int cpu;
 	struct rcu_data *rdp;
 	unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
 
-	_rcu_barrier_trace(TPS("Begin"), -1, s);
+	rcu_barrier_trace(TPS("Begin"), -1, s);
 
 	/* Take mutex to serialize concurrent rcu_barrier() requests. */
 	mutex_lock(&rcu_state.barrier_mutex);
 
 	/* Did someone else do our work for us? */
 	if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
-		_rcu_barrier_trace(TPS("EarlyExit"), -1,
+		rcu_barrier_trace(TPS("EarlyExit"), -1,
 				   rcu_state.barrier_sequence);
 		smp_mb(); /* caller's subsequent code after above check. */
 		mutex_unlock(&rcu_state.barrier_mutex);
@@ -3047,7 +3054,7 @@ static void _rcu_barrier(void)
 
 	/* Mark the start of the barrier operation. */
 	rcu_seq_start(&rcu_state.barrier_sequence);
-	_rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
+	rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
 
 	/*
 	 * Initialize the count to one rather than to zero in order to
@@ -3070,10 +3077,10 @@ static void _rcu_barrier(void)
 		rdp = per_cpu_ptr(&rcu_data, cpu);
 		if (rcu_is_nocb_cpu(cpu)) {
 			if (!rcu_nocb_cpu_needs_barrier(cpu)) {
-				_rcu_barrier_trace(TPS("OfflineNoCB"), cpu,
+				rcu_barrier_trace(TPS("OfflineNoCB"), cpu,
 						   rcu_state.barrier_sequence);
 			} else {
-				_rcu_barrier_trace(TPS("OnlineNoCB"), cpu,
+				rcu_barrier_trace(TPS("OnlineNoCB"), cpu,
 						   rcu_state.barrier_sequence);
 				smp_mb__before_atomic();
 				atomic_inc(&rcu_state.barrier_cpu_count);
@@ -3081,11 +3088,11 @@ static void _rcu_barrier(void)
 					   rcu_barrier_callback, cpu, 0);
 			}
 		} else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
-			_rcu_barrier_trace(TPS("OnlineQ"), cpu,
+			rcu_barrier_trace(TPS("OnlineQ"), cpu,
 					   rcu_state.barrier_sequence);
 			smp_call_function_single(cpu, rcu_barrier_func, NULL, 1);
 		} else {
-			_rcu_barrier_trace(TPS("OnlineNQ"), cpu,
+			rcu_barrier_trace(TPS("OnlineNQ"), cpu,
 					   rcu_state.barrier_sequence);
 		}
 	}
@@ -3102,25 +3109,12 @@ static void _rcu_barrier(void)
 	wait_for_completion(&rcu_state.barrier_completion);
 
 	/* Mark the end of the barrier operation. */
-	_rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
+	rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
 	rcu_seq_end(&rcu_state.barrier_sequence);
 
 	/* Other rcu_barrier() invocations can now safely proceed. */
 	mutex_unlock(&rcu_state.barrier_mutex);
 }
-
-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- *
- * Note that this primitive does not necessarily wait for an RCU grace period
- * to complete.  For example, if there are no RCU callbacks queued anywhere
- * in the system, then rcu_barrier() is within its rights to return
- * immediately, without waiting for anything, much less an RCU grace period.
- */
-void rcu_barrier(void)
-{
-	_rcu_barrier();
-}
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
 /*
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 46452d3d0fad..8cf93ac277ec 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -222,7 +222,7 @@ struct rcu_data {
 					/* Grace period that needs help */
 					/*  from cond_resched(). */
 
-	/* 5) _rcu_barrier(), OOM callbacks, and expediting. */
+	/* 5) rcu_barrier(), OOM callbacks, and expediting. */
 	struct rcu_head barrier_head;
 	int exp_dynticks_snap;		/* Double-check need for IPI. */
 
@@ -328,7 +328,7 @@ struct rcu_state {
 	atomic_t barrier_cpu_count;		/* # CPUs waiting on. */
 	struct completion barrier_completion;	/* Wake at barrier end. */
 	unsigned long barrier_sequence;		/* ++ at start and end of */
-						/*  _rcu_barrier(). */
+						/*  rcu_barrier(). */
 	/* End of fields guarded by barrier_mutex. */
 
 	struct mutex exp_mutex;			/* Serialize expedited GP. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7add1c297500..beaaca7a11f4 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1993,7 +1993,7 @@ static bool rcu_nocb_cpu_needs_barrier(int cpu)
 	 * There needs to be a barrier before this function is called,
 	 * but associated with a prior determination that no more
 	 * callbacks would be posted.  In the worst case, the first
-	 * barrier in _rcu_barrier() suffices (but the caller cannot
+	 * barrier in rcu_barrier() suffices (but the caller cannot
 	 * necessarily rely on this, not a substitute for the caller
 	 * getting the concurrency design right!).  There must also be
 	 * a barrier between the following load an posting of a callback
-- 
cgit v1.2.3


From 7e28c5af4ef6b539334aa5de40feca0c041c94df Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 11 Jul 2018 08:09:28 -0700
Subject: rcu: Eliminate ->rcu_qs_ctr from the rcu_dynticks structure

The ->rcu_qs_ctr counter was intended to allow providing a lightweight
report of a quiescent state to all RCU flavors.  But now that there is
only one flavor of RCU in any one running kernel, there is no point in
having this feature.  This commit therefore removes the ->rcu_qs_ctr
field from the rcu_dynticks structure and the ->rcu_qs_ctr_snap field
from the rcu_data structure.  This results in the "rqc" option to the
rcu_fqs trace event no longer being used, so this commit also removes the
"rqc" description from the header comment.

While in the neighborhood, this commit also causes the forward-progress
request .rcu_need_heavy_qs be set one jiffies_till_sched_qs interval
later in the grace period than the first setting of .rcu_urgent_qs.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h |  5 ++---
 kernel/rcu/tree.c          | 52 +++++++++++++---------------------------------
 kernel/rcu/tree.h          |  3 ---
 kernel/rcu/tree_plugin.h   |  5 +----
 4 files changed, 17 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 175e0bce22bd..f0c4d10e614b 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -393,9 +393,8 @@ TRACE_EVENT(rcu_quiescent_state_report,
  * Tracepoint for quiescent states detected by force_quiescent_state().
  * These trace events include the type of RCU, the grace-period number
  * that was blocked by the CPU, the CPU itself, and the type of quiescent
- * state, which can be "dti" for dyntick-idle mode, "kick" when kicking
- * a CPU that has been in dyntick-idle mode for too long, or "rqc" if the
- * CPU got a quiescent state via its rcu_qs_ctr.
+ * state, which can be "dti" for dyntick-idle mode or "kick" when kicking
+ * a CPU that has been in dyntick-idle mode for too long.
  */
 TRACE_EVENT(rcu_fqs,
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 77d2cbf7c831..bc42c600027c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1018,25 +1018,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 		return 1;
 	}
 
-	/*
-	 * Has this CPU encountered a cond_resched() since the beginning
-	 * of the grace period?  For this to be the case, the CPU has to
-	 * have noticed the current grace period.  This might not be the
-	 * case for nohz_full CPUs looping in the kernel.
-	 */
-	jtsq = jiffies_till_sched_qs;
-	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
-	if (time_after(jiffies, rcu_state.gp_start + jtsq) &&
-	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
-	    rcu_seq_current(&rdp->gp_seq) == rnp->gp_seq && !rdp->gpwrap) {
-		trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("rqc"));
-		rcu_gpnum_ovf(rnp, rdp);
-		return 1;
-	} else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {
-		/* Load rcu_qs_ctr before store to rcu_urgent_qs. */
-		smp_store_release(ruqp, true);
-	}
-
 	/* If waiting too long on an offline CPU, complain. */
 	if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) &&
 	    time_after(jiffies, rcu_state.gp_start + HZ)) {
@@ -1060,29 +1041,27 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 
 	/*
 	 * A CPU running for an extended time within the kernel can
-	 * delay RCU grace periods.  When the CPU is in NO_HZ_FULL mode,
-	 * even context-switching back and forth between a pair of
-	 * in-kernel CPU-bound tasks cannot advance grace periods.
-	 * So if the grace period is old enough, make the CPU pay attention.
-	 * Note that the unsynchronized assignments to the per-CPU
-	 * rcu_need_heavy_qs variable are safe.  Yes, setting of
-	 * bits can be lost, but they will be set again on the next
-	 * force-quiescent-state pass.  So lost bit sets do not result
-	 * in incorrect behavior, merely in a grace period lasting
-	 * a few jiffies longer than it might otherwise.  Because
-	 * there are at most four threads involved, and because the
-	 * updates are only once every few jiffies, the probability of
-	 * lossage (and thus of slight grace-period extension) is
-	 * quite low.
+	 * delay RCU grace periods: (1) At age jiffies_till_sched_qs,
+	 * set .rcu_urgent_qs, (2) At age 2*jiffies_till_sched_qs, set
+	 * both .rcu_need_heavy_qs and .rcu_urgent_qs.  Note that the
+	 * unsynchronized assignments to the per-CPU rcu_need_heavy_qs
+	 * variable are safe because the assignments are repeated if this
+	 * CPU failed to pass through a quiescent state.  This code
+	 * also checks .jiffies_resched in case jiffies_till_sched_qs
+	 * is set way high.
 	 */
+	jtsq = jiffies_till_sched_qs;
+	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
 	rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
 	if (!READ_ONCE(*rnhqp) &&
-	    (time_after(jiffies, rcu_state.gp_start + jtsq) ||
+	    (time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
 	     time_after(jiffies, rcu_state.jiffies_resched))) {
 		WRITE_ONCE(*rnhqp, true);
 		/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
 		smp_store_release(ruqp, true);
 		rcu_state.jiffies_resched += jtsq; /* Re-enable beating. */
+	} else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {
+		WRITE_ONCE(*ruqp, true);
 	}
 
 	/*
@@ -1091,7 +1070,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	 * see if the CPU is getting hammered with interrupts, but only
 	 * once per grace period, just to keep the IPIs down to a dull roar.
 	 */
-	if (jiffies - rcu_state.gp_start > rcu_jiffies_till_stall_check() / 2) {
+	if (time_after(jiffies, rcu_state.jiffies_resched)) {
 		resched_cpu(rdp->cpu);
 		if (IS_ENABLED(CONFIG_IRQ_WORK) &&
 		    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
@@ -1669,7 +1648,6 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
 		trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
 		need_gp = !!(rnp->qsmask & rdp->grpmask);
 		rdp->cpu_no_qs.b.norm = need_gp;
-		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
 		rdp->core_needs_qs = need_gp;
 		zero_cpu_stall_ticks(rdp);
 	}
@@ -2230,7 +2208,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
 		 * within the current grace period.
 		 */
 		rdp->cpu_no_qs.b.norm = true;	/* need qs for new gp. */
-		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
@@ -3213,7 +3190,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
 	rdp->gp_seq = rnp->gp_seq;
 	rdp->gp_seq_needed = rnp->gp_seq;
 	rdp->cpu_no_qs.b.norm = true;
-	rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
 	rdp->core_needs_qs = false;
 	rdp->rcu_iw_pending = false;
 	rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8cf93ac277ec..4866fa44ab0b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -42,7 +42,6 @@ struct rcu_dynticks {
 	long dynticks_nmi_nesting;  /* Track irq/NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
 	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */
-	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
 	bool rcu_urgent_qs;	    /* GP old need light quiescent state. */
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	bool all_lazy;		    /* Are all CPU's CBs lazy? */
@@ -188,8 +187,6 @@ struct rcu_data {
 	/* 1) quiescent-state and grace-period handling : */
 	unsigned long	gp_seq;		/* Track rsp->rcu_gp_seq counter. */
 	unsigned long	gp_seq_needed;	/* Track rsp->rcu_gp_seq_needed ctr. */
-	unsigned long	rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */
-					/*  for rcu_all_qs() invocations. */
 	union rcu_noqs	cpu_no_qs;	/* No QSes yet for this CPU. */
 	bool		core_needs_qs;	/* Core waits for quiesc state. */
 	bool		beenonline;	/* CPU online at least once. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index beaaca7a11f4..726d57708849 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -978,9 +978,7 @@ void rcu_all_qs(void)
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
-	if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
-		rcu_qs();
-	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+	rcu_qs();
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 	preempt_enable();
 }
@@ -1000,7 +998,6 @@ void rcu_note_context_switch(bool preempt)
 	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
 	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
 		rcu_momentary_dyntick_idle();
-	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	if (!preempt)
 		rcu_tasks_qs(current);
 out:
-- 
cgit v1.2.3


From 74de6960c99d8df0d09fb29a7b014cb9c5571e2b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 24 Jul 2018 15:28:09 -0700
Subject: rcu: Provide functions for determining if call_rcu() has been invoked

This commit adds rcu_head_init() and rcu_head_after_call_rcu() functions
to help RCU users detect when another CPU has passed the specified
rcu_head structure and function to call_rcu().  The rcu_head_init()
should be invoked before making the structure visible to RCU readers,
and then the rcu_head_after_call_rcu() may be invoked from within
an RCU read-side critical section on an rcu_head structure that
was obtained during a traversal of the data structure in question.
The rcu_head_after_call_rcu() function will return true if the rcu_head
structure has already been passed (with the specified function) to
call_rcu(), otherwise it will return false.

If rcu_head_init() has not been invoked on the rcu_head structure
or if the rcu_head (AKA callback) has already been invoked, then
rcu_head_after_call_rcu() will do WARN_ON_ONCE().

Reported-by: NeilBrown <neilb@suse.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Apply neilb naming feedback. ]
---
 include/linux/rcupdate.h | 40 ++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/rcu.h         |  5 ++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e4f821165d0b..4db8bcacc51a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -857,6 +857,46 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 
 
+/* Has the specified rcu_head structure been handed to call_rcu()? */
+
+/*
+ * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu()
+ * @rhp: The rcu_head structure to initialize.
+ *
+ * If you intend to invoke rcu_head_after_call_rcu() to test whether a
+ * given rcu_head structure has already been passed to call_rcu(), then
+ * you must also invoke this rcu_head_init() function on it just after
+ * allocating that structure.  Calls to this function must not race with
+ * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation.
+ */
+static inline void rcu_head_init(struct rcu_head *rhp)
+{
+	rhp->func = (rcu_callback_t)~0L;
+}
+
+/*
+ * rcu_head_after_call_rcu - Has this rcu_head been passed to call_rcu()?
+ * @rhp: The rcu_head structure to test.
+ * @func: The function passed to call_rcu() along with @rhp.
+ *
+ * Returns @true if the @rhp has been passed to call_rcu() with @func,
+ * and @false otherwise.  Emits a warning in any other case, including
+ * the case where @rhp has already been invoked after a grace period.
+ * Calls to this function must not race with callback invocation.  One way
+ * to avoid such races is to enclose the call to rcu_head_after_call_rcu()
+ * in an RCU read-side critical section that includes a read-side fetch
+ * of the pointer to the structure containing @rhp.
+ */
+static inline bool
+rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
+{
+	if (READ_ONCE(rhp->func) == f)
+		return true;
+	WARN_ON_ONCE(READ_ONCE(rhp->func) != (rcu_callback_t)~0L);
+	return false;
+}
+
+
 /* Transitional pre-consolidation compatibility definitions. */
 
 static inline void synchronize_rcu_bh(void)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 5dec94509a7e..4c56c1d98fb3 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -224,6 +224,7 @@ void kfree(const void *);
  */
 static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 {
+	rcu_callback_t f;
 	unsigned long offset = (unsigned long)head->func;
 
 	rcu_lock_acquire(&rcu_callback_map);
@@ -234,7 +235,9 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 		return true;
 	} else {
 		RCU_TRACE(trace_rcu_invoke_callback(rn, head);)
-		head->func(head);
+		f = head->func;
+		WRITE_ONCE(head->func, (rcu_callback_t)0L);
+		f(head);
 		rcu_lock_release(&rcu_callback_map);
 		return false;
 	}
-- 
cgit v1.2.3


From 31ab604bf3232374e6471a2df9a83c4e75538390 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2018 16:15:54 -0700
Subject: rcu: Remove unused rcu_dynticks_snap() from Tiny RCU

The rcu_dynticks_snap() function is defined in include/linux/rcutiny.h,
but is no longer used by Tiny RCU.  This commit therefore removes it.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7fa4fb9e899e..f183683bdf79 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,12 +27,6 @@
 
 #include <linux/ktime.h>
 
-struct rcu_dynticks;
-static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
-{
-	return 0;
-}
-
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
-- 
cgit v1.2.3


From e0fcba9ac02af5aeb1e1c3e842eab987f817c309 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 14 Aug 2018 08:45:54 -0700
Subject: srcu: Make call_srcu() available during very early boot

Event tracing is moving to SRCU in order to take advantage of the fact
that SRCU may be safely used from idle and even offline CPUs.  However,
event tracing can invoke call_srcu() very early in the boot process,
even before workqueue_init_early() is invoked (let alone rcu_init()).
Therefore, call_srcu()'s attempts to queue work fail miserably.

This commit therefore detects this situation, and refrains from attempting
to queue work before rcu_init() time, but does everything else that it
would have done, and in addition, adds the srcu_struct to a global list.
The rcu_init() function now invokes a new srcu_init() function, which
is empty if CONFIG_SRCU=n.  Otherwise, srcu_init() queues work for
each srcu_struct on the list.  This all happens early enough in boot
that there is but a single CPU with interrupts disabled, which allows
synchronization to be dispensed with.

Of course, the queued work won't actually be invoked until after
workqueue_init() is invoked, which happens shortly after the scheduler
is up and running.  This means that although call_srcu() may be invoked
any time after per-CPU variables have been set up, there is still a very
narrow window when synchronize_srcu() won't work, and this window
extends from the time that the scheduler starts until the time that
workqueue_init() returns.  This can be fixed in a manner similar to
the fix for synchronize_rcu_expedited() and friends, but until someone
actually needs to use synchronize_srcu() during this window, this fix
is added churn for no benefit.

Finally, note that Tree SRCU's new srcu_init() function invokes
queue_work() rather than the queue_delayed_work() function that is
invoked post-boot.  The reason is that queue_delayed_work() will (as you
would expect) post a timer, and timers have not yet been initialized.
So use of queue_work() avoids the complaints about use of uninitialized
spinlocks that would otherwise result.  Besides, some delay is already
provide by the aforementioned fact that the queued work won't actually
be invoked until after the scheduler is up and running.

Requested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcutiny.h |  2 ++
 include/linux/srcutree.h | 14 ++++++++------
 kernel/rcu/rcu.h         |  6 ++++++
 kernel/rcu/srcutiny.c    | 29 +++++++++++++++++++++++++++--
 kernel/rcu/srcutree.c    | 26 ++++++++++++++++++++++++--
 kernel/rcu/tiny.c        |  1 +
 kernel/rcu/tree.c        |  1 +
 kernel/rcu/update.c      |  9 +++++++++
 8 files changed, 78 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index f41d2fb09f87..2b5c0822e683 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -36,6 +36,7 @@ struct srcu_struct {
 	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
 	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
+	struct list_head srcu_boot_entry; /* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -48,6 +49,7 @@ void srcu_drive_gp(struct work_struct *wp);
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
 	.srcu_cb_tail = &name.srcu_cb_head,				\
 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
+	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
 
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 745d4ca4dd50..9cfa4610113a 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -94,6 +94,7 @@ struct srcu_struct {
 						/*  callback for the barrier */
 						/*  operation. */
 	struct delayed_work work;
+	struct list_head srcu_boot_entry;	/* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -105,12 +106,13 @@ struct srcu_struct {
 #define SRCU_STATE_SCAN2	2
 
 #define __SRCU_STRUCT_INIT(name, pcpu_name)				\
-	{								\
-		.sda = &pcpu_name,					\
-		.lock = __SPIN_LOCK_UNLOCKED(name.lock),		\
-		.srcu_gp_seq_needed = 0 - 1,				\
-		__SRCU_DEP_MAP_INIT(name)				\
-	}
+{									\
+	.sda = &pcpu_name,						\
+	.lock = __SPIN_LOCK_UNLOCKED(name.lock),			\
+	.srcu_gp_seq_needed = -1UL,					\
+	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
+	__SRCU_DEP_MAP_INIT(name)					\
+}
 
 /*
  * Define and initialize a srcu struct at build time.
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 4d04683c31b2..e1b5aec5ec1c 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -435,6 +435,12 @@ do {									\
 
 #endif /* #if defined(SRCU) || !defined(TINY_RCU) */
 
+#ifdef CONFIG_SRCU
+void srcu_init(void);
+#else /* #ifdef CONFIG_SRCU */
+static inline void srcu_init(void) { }
+#endif /* #else #ifdef CONFIG_SRCU */
+
 #ifdef CONFIG_TINY_RCU
 /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
 static inline bool rcu_gp_is_normal(void) { return true; }
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 04fc2ed71af8..d233f0c63f6f 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -34,6 +34,8 @@
 #include "rcu.h"
 
 int rcu_scheduler_active __read_mostly;
+static LIST_HEAD(srcu_boot_list);
+static bool srcu_init_done;
 
 static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
@@ -46,6 +48,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	sp->srcu_gp_waiting = false;
 	sp->srcu_idx = 0;
 	INIT_WORK(&sp->srcu_work, srcu_drive_gp);
+	INIT_LIST_HEAD(&sp->srcu_boot_entry);
 	return 0;
 }
 
@@ -179,8 +182,12 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	*sp->srcu_cb_tail = rhp;
 	sp->srcu_cb_tail = &rhp->next;
 	local_irq_restore(flags);
-	if (!READ_ONCE(sp->srcu_gp_running))
-		schedule_work(&sp->srcu_work);
+	if (!READ_ONCE(sp->srcu_gp_running)) {
+		if (likely(srcu_init_done))
+			schedule_work(&sp->srcu_work);
+		else if (list_empty(&sp->srcu_boot_entry))
+			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
+	}
 }
 EXPORT_SYMBOL_GPL(call_srcu);
 
@@ -204,3 +211,21 @@ void __init rcu_scheduler_starting(void)
 {
 	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
 }
+
+/*
+ * Queue work for srcu_struct structures with early boot callbacks.
+ * The work won't actually execute until the workqueue initialization
+ * phase that takes place after the scheduler starts.
+ */
+void __init srcu_init(void)
+{
+	struct srcu_struct *sp;
+
+	srcu_init_done = true;
+	while (!list_empty(&srcu_boot_list)) {
+		sp = list_first_entry(&srcu_boot_list,
+				      struct srcu_struct, srcu_boot_entry);
+		list_del_init(&sp->srcu_boot_entry);
+		schedule_work(&sp->srcu_work);
+	}
+}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 6c9866a854b1..2e7f6b460150 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -51,6 +51,10 @@ module_param(exp_holdoff, ulong, 0444);
 static ulong counter_wrap_check = (ULONG_MAX >> 2);
 module_param(counter_wrap_check, ulong, 0444);
 
+/* Early-boot callback-management, so early that no lock is required! */
+static LIST_HEAD(srcu_boot_list);
+static bool __read_mostly srcu_init_done;
+
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
@@ -182,6 +186,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
 	mutex_init(&sp->srcu_barrier_mutex);
 	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
+	INIT_LIST_HEAD(&sp->srcu_boot_entry);
 	if (!is_static)
 		sp->sda = alloc_percpu(struct srcu_data);
 	init_srcu_struct_nodes(sp, is_static);
@@ -235,7 +240,6 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
 {
 	unsigned long flags;
 
-	WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT);
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
 	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
@@ -701,7 +705,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 	    rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
 		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
 		srcu_gp_start(sp);
-		queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp));
+		if (likely(srcu_init_done))
+			queue_delayed_work(rcu_gp_wq, &sp->work,
+					   srcu_get_delay(sp));
+		else if (list_empty(&sp->srcu_boot_entry))
+			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
 	}
 	spin_unlock_irqrestore_rcu_node(sp, flags);
 }
@@ -1308,3 +1316,17 @@ static int __init srcu_bootup_announce(void)
 	return 0;
 }
 early_initcall(srcu_bootup_announce);
+
+void __init srcu_init(void)
+{
+	struct srcu_struct *sp;
+
+	srcu_init_done = true;
+	while (!list_empty(&srcu_boot_list)) {
+		sp = list_first_entry(&srcu_boot_list,
+				      struct srcu_struct, srcu_boot_entry);
+		check_init_srcu_struct(sp);
+		list_del_init(&sp->srcu_boot_entry);
+		queue_work(rcu_gp_wq, &sp->work.work);
+	}
+}
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index befc9321a89c..101ed5bb836c 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -236,4 +236,5 @@ void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 	rcu_early_boot_tests();
+	srcu_init();
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0b760c1369f7..43c806291208 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4164,6 +4164,7 @@ void __init rcu_init(void)
 	WARN_ON(!rcu_gp_wq);
 	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
 	WARN_ON(!rcu_par_gp_wq);
+	srcu_init();
 }
 
 #include "tree_exp.h"
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39cb23d22109..7d057d0aaec4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -888,11 +888,16 @@ static void test_callback(struct rcu_head *r)
 	pr_info("RCU test callback executed %d\n", rcu_self_test_counter);
 }
 
+DEFINE_STATIC_SRCU(early_srcu);
+
 static void early_boot_test_call_rcu(void)
 {
 	static struct rcu_head head;
+	static struct rcu_head shead;
 
 	call_rcu(&head, test_callback);
+	if (IS_ENABLED(CONFIG_SRCU))
+		call_srcu(&early_srcu, &shead, test_callback);
 }
 
 static void early_boot_test_call_rcu_bh(void)
@@ -930,6 +935,10 @@ static int rcu_verify_early_boot_tests(void)
 	if (rcu_self_test) {
 		early_boot_test_counter++;
 		rcu_barrier();
+		if (IS_ENABLED(CONFIG_SRCU)) {
+			early_boot_test_counter++;
+			srcu_barrier(&early_srcu);
+		}
 	}
 	if (rcu_self_test_bh) {
 		early_boot_test_counter++;
-- 
cgit v1.2.3


From 4e6ea4ef56f9425cd239ffdb6be45b3aeeb347fd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 14 Aug 2018 14:41:49 -0700
Subject: srcu: Make early-boot call_srcu() reuse workqueue lists

Allocating a list_head structure that is almost never used, and, when
used, is used only during early boot (rcu_init() and earlier), is a bit
wasteful.  This commit therefore eliminates that list_head in favor of
the one in the work_struct structure.  This is safe because the work_struct
structure cannot be used until after rcu_init() returns.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcutiny.h |  2 --
 include/linux/srcutree.h |  3 +--
 kernel/rcu/srcutiny.c    | 10 +++++-----
 kernel/rcu/srcutree.c    | 11 +++++------
 4 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 2b5c0822e683..f41d2fb09f87 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -36,7 +36,6 @@ struct srcu_struct {
 	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
 	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
-	struct list_head srcu_boot_entry; /* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -49,7 +48,6 @@ void srcu_drive_gp(struct work_struct *wp);
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
 	.srcu_cb_tail = &name.srcu_cb_head,				\
 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
-	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
 
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 9cfa4610113a..0ae91b3a7406 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -94,7 +94,6 @@ struct srcu_struct {
 						/*  callback for the barrier */
 						/*  operation. */
 	struct delayed_work work;
-	struct list_head srcu_boot_entry;	/* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -110,7 +109,7 @@ struct srcu_struct {
 	.sda = &pcpu_name,						\
 	.lock = __SPIN_LOCK_UNLOCKED(name.lock),			\
 	.srcu_gp_seq_needed = -1UL,					\
-	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
+	.work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0),		\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
 
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index d233f0c63f6f..b46e6683f8c9 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -48,7 +48,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	sp->srcu_gp_waiting = false;
 	sp->srcu_idx = 0;
 	INIT_WORK(&sp->srcu_work, srcu_drive_gp);
-	INIT_LIST_HEAD(&sp->srcu_boot_entry);
+	INIT_LIST_HEAD(&sp->srcu_work.entry);
 	return 0;
 }
 
@@ -185,8 +185,8 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	if (!READ_ONCE(sp->srcu_gp_running)) {
 		if (likely(srcu_init_done))
 			schedule_work(&sp->srcu_work);
-		else if (list_empty(&sp->srcu_boot_entry))
-			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
+		else if (list_empty(&sp->srcu_work.entry))
+			list_add(&sp->srcu_work.entry, &srcu_boot_list);
 	}
 }
 EXPORT_SYMBOL_GPL(call_srcu);
@@ -224,8 +224,8 @@ void __init srcu_init(void)
 	srcu_init_done = true;
 	while (!list_empty(&srcu_boot_list)) {
 		sp = list_first_entry(&srcu_boot_list,
-				      struct srcu_struct, srcu_boot_entry);
-		list_del_init(&sp->srcu_boot_entry);
+				      struct srcu_struct, srcu_work.entry);
+		list_del_init(&sp->srcu_work.entry);
 		schedule_work(&sp->srcu_work);
 	}
 }
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 2e7f6b460150..86c7fd0a1bfe 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -186,7 +186,6 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
 	mutex_init(&sp->srcu_barrier_mutex);
 	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
-	INIT_LIST_HEAD(&sp->srcu_boot_entry);
 	if (!is_static)
 		sp->sda = alloc_percpu(struct srcu_data);
 	init_srcu_struct_nodes(sp, is_static);
@@ -708,8 +707,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 		if (likely(srcu_init_done))
 			queue_delayed_work(rcu_gp_wq, &sp->work,
 					   srcu_get_delay(sp));
-		else if (list_empty(&sp->srcu_boot_entry))
-			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
+		else if (list_empty(&sp->work.work.entry))
+			list_add(&sp->work.work.entry, &srcu_boot_list);
 	}
 	spin_unlock_irqrestore_rcu_node(sp, flags);
 }
@@ -1323,10 +1322,10 @@ void __init srcu_init(void)
 
 	srcu_init_done = true;
 	while (!list_empty(&srcu_boot_list)) {
-		sp = list_first_entry(&srcu_boot_list,
-				      struct srcu_struct, srcu_boot_entry);
+		sp = list_first_entry(&srcu_boot_list, struct srcu_struct,
+				      work.work.entry);
 		check_init_srcu_struct(sp);
-		list_del_init(&sp->srcu_boot_entry);
+		list_del_init(&sp->work.work.entry);
 		queue_work(rcu_gp_wq, &sp->work.work);
 	}
 }
-- 
cgit v1.2.3