From abb06b99484a9f5af05c7147c289faf835f68e8e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 26 Jan 2017 13:45:38 -0800
Subject: rcu: Pull rcu_sched_qs_mask into rcu_dynticks structure

The rcu_sched_qs_mask variable is yet another isolated per-CPU variable,
so this commit pulls it into the pre-existing rcu_dynticks per-CPU
structure.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/Design/Data-Structures/Data-Structures.html | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'Documentation')
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index d583c653a703..bf7f266e8888 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1104,6 +1104,7 @@ Its fields are as follows:
   1   int dynticks_nesting;
   2   int dynticks_nmi_nesting;
   3   atomic_t dynticks;
+  4   int rcu_sched_qs_mask;
 </pre>
 
 <p>The <tt>-&gt;dynticks_nesting</tt> field counts the
@@ -1117,11 +1118,17 @@ NMIs are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
 field, except that NMIs that interrupt non-dyntick-idle execution
 are not counted.
 
-</p><p>Finally, the <tt>-&gt;dynticks</tt> field counts the corresponding
+</p><p>The <tt>-&gt;dynticks</tt> field counts the corresponding
 CPU's transitions to and from dyntick-idle mode, so that this counter
 has an even value when the CPU is in dyntick-idle mode and an odd
 value otherwise.
 
+</p><p>Finally, the  <tt>-&gt;rcu_sched_qs_mask</tt> field is used
+to record the fact that the RCU core code would really like to
+see a quiescent state from the corresponding CPU.
+This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
+code, which provide a momentary idle sojourn in response.
+
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
-- 
cgit v1.2.3


From 9577df9a3122af08fff84b8a1a60dccf524a3891 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 26 Jan 2017 16:18:07 -0800
Subject: rcu: Pull rcu_qs_ctr into rcu_dynticks structure

The rcu_qs_ctr variable is yet another isolated per-CPU variable,
so this commit pulls it into the pre-existing rcu_dynticks per-CPU
structure.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../RCU/Design/Data-Structures/Data-Structures.html       | 12 ++++++++++--
 kernel/rcu/tree.c                                         | 15 ++++++---------
 kernel/rcu/tree.h                                         |  3 ++-
 kernel/rcu/tree_trace.c                                   |  4 +---
 4 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index bf7f266e8888..3d0311657533 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1105,6 +1105,7 @@ Its fields are as follows:
   2   int dynticks_nmi_nesting;
   3   atomic_t dynticks;
   4   int rcu_sched_qs_mask;
+  5   unsigned long rcu_qs_ctr;
 </pre>
 
 <p>The <tt>-&gt;dynticks_nesting</tt> field counts the
@@ -1123,12 +1124,19 @@ CPU's transitions to and from dyntick-idle mode, so that this counter
 has an even value when the CPU is in dyntick-idle mode and an odd
 value otherwise.
 
-</p><p>Finally, the  <tt>-&gt;rcu_sched_qs_mask</tt> field is used
+</p><p>The <tt>-&gt;rcu_sched_qs_mask</tt> field is used
 to record the fact that the RCU core code would really like to
-see a quiescent state from the corresponding CPU.
+see a quiescent state from the corresponding CPU, so much so that
+it is willing to call for heavy-weight dyntick-counter operations.
 This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
 code, which provide a momentary idle sojourn in response.
 
+</p><p>Finally the <tt>-&gt;rcu_qs_ctr</tt> field is used to record
+quiescent states from <tt>cond_resched()</tt>.
+Because <tt>cond_resched()</tt> can execute quite frequently, this
+must be quite lightweight, as in a non-atomic increment of this
+per-CPU field.
+
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3a0703035874..82a86a67c92a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -436,9 +436,6 @@ bool rcu_eqs_special_set(int cpu)
 	return true;
 }
 
-DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
-EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
-
 /*
  * Let the RCU core know that this CPU has gone through the scheduler,
  * which is a quiescent state.  This is called when the need for a
@@ -542,7 +539,7 @@ void rcu_all_qs(void)
 		rcu_sched_qs();
 		preempt_enable();
 	}
-	this_cpu_inc(rcu_qs_ctr);
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
 EXPORT_SYMBOL_GPL(rcu_all_qs);
@@ -1315,7 +1312,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 */
 	rnp = rdp->mynode;
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
-	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) &&
+	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
 	    READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
 		return 1;
@@ -2024,7 +2021,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
 		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
 		need_gp = !!(rnp->qsmask & rdp->grpmask);
 		rdp->cpu_no_qs.b.norm = need_gp;
-		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
 		rdp->core_needs_qs = need_gp;
 		zero_cpu_stall_ticks(rdp);
 		WRITE_ONCE(rdp->gpwrap, false);
@@ -2622,7 +2619,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
 		 * within the current grace period.
 		 */
 		rdp->cpu_no_qs.b.norm = true;	/* need qs for new gp. */
-		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
@@ -3620,7 +3617,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	/* Is the RCU core waiting for a quiescent state from this CPU? */
 	if (rcu_scheduler_fully_active &&
 	    rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
-	    rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
+	    rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) {
 		rdp->n_rp_core_needs_qs++;
 	} else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
 		rdp->n_rp_report_qs++;
@@ -3933,7 +3930,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
 	rdp->completed = rnp->completed;
 	rdp->cpu_no_qs.b.norm = true;
-	rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
+	rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
 	rdp->core_needs_qs = false;
 	trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e298281984dc..76e4467bc765 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -113,7 +113,8 @@ struct rcu_dynticks {
 				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
-	int rcu_sched_qs_mask;      /* GP old, need quiescent state. */
+	int rcu_sched_qs_mask;      /* GP old, need heavy quiescent state. */
+	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	long long dynticks_idle_nesting;
 				    /* irq/process nesting level from idle. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 8751a748499a..65b43be38e68 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -45,8 +45,6 @@
 #define RCU_TREE_NONCORE
 #include "tree.h"
 
-DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
-
 static int r_open(struct inode *inode, struct file *file,
 					const struct seq_operations *op)
 {
@@ -121,7 +119,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
 		   ulong2long(rdp->completed), ulong2long(rdp->gpnum),
 		   rdp->cpu_no_qs.b.norm,
-		   rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
+		   rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu),
 		   rdp->core_needs_qs);
 	seq_printf(m, " dt=%d/%llx/%d df=%lu",
 		   rcu_dynticks_snap(rdp->dynticks),
-- 
cgit v1.2.3


From 0f9be8cabbc343218dd2807af7308656be113045 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 27 Jan 2017 13:17:02 -0800
Subject: rcu: Eliminate flavor scan in rcu_momentary_dyntick_idle()

The rcu_momentary_dyntick_idle() function scans the RCU flavors, checking
that one of them still needs a quiescent state before doing an expensive
atomic operation on the ->dynticks counter.  However, this check reduces
overhead only after a rare race condition, and increases complexity.  This
commit therefore removes the scan and the mechanism enabling the scan.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../Design/Data-Structures/Data-Structures.html    |  4 +-
 kernel/rcu/tree.c                                  | 62 +++++-----------------
 kernel/rcu/tree.h                                  |  3 +-
 3 files changed, 15 insertions(+), 54 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index 3d0311657533..e4bf20a68fa3 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1104,7 +1104,7 @@ Its fields are as follows:
   1   int dynticks_nesting;
   2   int dynticks_nmi_nesting;
   3   atomic_t dynticks;
-  4   int rcu_sched_qs_mask;
+  4   bool rcu_need_heavy_qs;
   5   unsigned long rcu_qs_ctr;
 </pre>
 
@@ -1124,7 +1124,7 @@ CPU's transitions to and from dyntick-idle mode, so that this counter
 has an even value when the CPU is in dyntick-idle mode and an odd
 value otherwise.
 
-</p><p>The <tt>-&gt;rcu_sched_qs_mask</tt> field is used
+</p><p>The <tt>-&gt;rcu_need_heavy_qs</tt> field is used
 to record the fact that the RCU core code would really like to
 see a quiescent state from the corresponding CPU, so much so that
 it is willing to call for heavy-weight dyntick-counter operations.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 82a86a67c92a..c2cbc78a0625 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -443,44 +443,14 @@ bool rcu_eqs_special_set(int cpu)
  * memory barriers to let the RCU core know about it, regardless of what
  * this CPU might (or might not) do in the near future.
  *
- * We inform the RCU core by emulating a zero-duration dyntick-idle
- * period, which we in turn do by incrementing the ->dynticks counter
- * by two.
+ * We inform the RCU core by emulating a zero-duration dyntick-idle period.
  *
  * The caller must have disabled interrupts.
  */
 static void rcu_momentary_dyntick_idle(void)
 {
-	struct rcu_data *rdp;
-	int resched_mask;
-	struct rcu_state *rsp;
-
-	/*
-	 * Yes, we can lose flag-setting operations.  This is OK, because
-	 * the flag will be set again after some delay.
-	 */
-	resched_mask = raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask);
-	raw_cpu_write(rcu_dynticks.rcu_sched_qs_mask, 0);
-
-	/* Find the flavor that needs a quiescent state. */
-	for_each_rcu_flavor(rsp) {
-		rdp = raw_cpu_ptr(rsp->rda);
-		if (!(resched_mask & rsp->flavor_mask))
-			continue;
-		smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
-		if (READ_ONCE(rdp->mynode->completed) !=
-		    READ_ONCE(rdp->cond_resched_completed))
-			continue;
-
-		/*
-		 * Pretend to be momentarily idle for the quiescent state.
-		 * This allows the grace-period kthread to record the
-		 * quiescent state, with no need for this CPU to do anything
-		 * further.
-		 */
-		rcu_dynticks_momentary_idle();
-		break;
-	}
+	raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false);
+	rcu_dynticks_momentary_idle();
 }
 
 /*
@@ -494,7 +464,7 @@ void rcu_note_context_switch(void)
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
 	rcu_preempt_note_context_switch();
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask)))
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
 		rcu_momentary_dyntick_idle();
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
@@ -519,7 +489,7 @@ void rcu_all_qs(void)
 	unsigned long flags;
 
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask))) {
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
 		local_irq_save(flags);
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
@@ -1275,7 +1245,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 				    bool *isidle, unsigned long *maxj)
 {
 	unsigned long jtsq;
-	int *rcrmp;
+	bool *rnhqp;
 	unsigned long rjtsc;
 	struct rcu_node *rnp;
 
@@ -1332,7 +1302,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * in-kernel CPU-bound tasks cannot advance grace periods.
 	 * So if the grace period is old enough, make the CPU pay attention.
 	 * Note that the unsynchronized assignments to the per-CPU
-	 * rcu_sched_qs_mask variable are safe.  Yes, setting of
+	 * rcu_need_heavy_qs variable are safe.  Yes, setting of
 	 * bits can be lost, but they will be set again on the next
 	 * force-quiescent-state pass.  So lost bit sets do not result
 	 * in incorrect behavior, merely in a grace period lasting
@@ -1346,16 +1316,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * is set too high, we override with half of the RCU CPU stall
 	 * warning delay.
 	 */
-	rcrmp = &per_cpu(rcu_dynticks.rcu_sched_qs_mask, rdp->cpu);
-	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
-	    time_after(jiffies, rdp->rsp->jiffies_resched)) {
-		if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
-			WRITE_ONCE(rdp->cond_resched_completed,
-				   READ_ONCE(rdp->mynode->completed));
-			smp_mb(); /* ->cond_resched_completed before *rcrmp. */
-			WRITE_ONCE(*rcrmp,
-				   READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
-		}
+	rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
+	if (!READ_ONCE(*rnhqp) &&
+	    (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
+	     time_after(jiffies, rdp->rsp->jiffies_resched))) {
+		WRITE_ONCE(*rnhqp, true);
 		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 	}
 
@@ -4169,7 +4134,6 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 	static const char * const fqs[] = RCU_FQS_NAME_INIT;
 	static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-	static u8 fl_mask = 0x1;
 
 	int levelcnt[RCU_NUM_LVLS];		/* # nodes in each level. */
 	int levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
@@ -4191,8 +4155,6 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 	for (i = 1; i < rcu_num_lvls; i++)
 		rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1];
 	rcu_init_levelspread(levelspread, levelcnt);
-	rsp->flavor_mask = fl_mask;
-	fl_mask <<= 1;
 
 	/* Initialize the elements themselves, starting from the leaves. */
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 76e4467bc765..b212cd0f22c7 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -113,7 +113,7 @@ struct rcu_dynticks {
 				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
-	int rcu_sched_qs_mask;      /* GP old, need heavy quiescent state. */
+	bool rcu_need_heavy_qs;      /* GP old, need heavy quiescent state. */
 	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	long long dynticks_idle_nesting;
@@ -484,7 +484,6 @@ struct rcu_state {
 	struct rcu_node *level[RCU_NUM_LVLS + 1];
 						/* Hierarchy levels (+1 to */
 						/*  shut bogus gcc warning) */
-	u8 flavor_mask;				/* bit in flavor mask. */
 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
 	call_rcu_func_t call;			/* call_rcu() flavor. */
 	int ncpus;				/* # CPUs seen so far. */
-- 
cgit v1.2.3


From 9226b10d78ffe7895549045fe388dc5e73b87eac Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 27 Jan 2017 14:17:50 -0800
Subject: rcu: Place guard on rcu_all_qs() and rcu_note_context_switch()
 actions

The rcu_all_qs() and rcu_note_context_switch() do a series of checks,
taking various actions to supply RCU with quiescent states, depending
on the outcomes of the various checks.  This is a bit much for scheduling
fastpaths, so this commit creates a separate ->rcu_urgent_qs field in
the rcu_dynticks structure that acts as a global guard for these checks.
Thus, in the common case, rcu_all_qs() and rcu_note_context_switch()
check the ->rcu_urgent_qs field, find it false, and simply return.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 .../Design/Data-Structures/Data-Structures.html    | 11 ++++++-
 kernel/rcu/tree.c                                  | 38 ++++++++++++++--------
 kernel/rcu/tree.h                                  |  3 +-
 kernel/rcu/tree_exp.h                              |  2 ++
 kernel/rcu/tree_plugin.h                           |  8 +++--
 5 files changed, 44 insertions(+), 18 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index e4bf20a68fa3..4dec89097559 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1106,6 +1106,7 @@ Its fields are as follows:
   3   atomic_t dynticks;
   4   bool rcu_need_heavy_qs;
   5   unsigned long rcu_qs_ctr;
+  6   bool rcu_urgent_qs;
 </pre>
 
 <p>The <tt>-&gt;dynticks_nesting</tt> field counts the
@@ -1131,12 +1132,20 @@ it is willing to call for heavy-weight dyntick-counter operations.
 This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
 code, which provide a momentary idle sojourn in response.
 
-</p><p>Finally the <tt>-&gt;rcu_qs_ctr</tt> field is used to record
+</p><p>The <tt>-&gt;rcu_qs_ctr</tt> field is used to record
 quiescent states from <tt>cond_resched()</tt>.
 Because <tt>cond_resched()</tt> can execute quite frequently, this
 must be quite lightweight, as in a non-atomic increment of this
 per-CPU field.
 
+</p><p>Finally, the <tt>-&gt;rcu_urgent_qs</tt> field is used to record
+the fact that the RCU core code would really like to see a quiescent
+state from the corresponding CPU, with the various other fields indicating
+just how badly RCU wants this quiescent state.
+This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
+code, which, if nothing else, non-atomically increment <tt>-&gt;rcu_qs_ctr</tt>
+in response.
+
 <table>
 <tr><th>&nbsp;</th></tr>
 <tr><th align="left">Quick Quiz:</th></tr>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c2cbc78a0625..530ab6cf7a0b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -464,8 +464,14 @@ void rcu_note_context_switch(void)
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
 	rcu_preempt_note_context_switch();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
+		goto out;
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
 	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
 		rcu_momentary_dyntick_idle();
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+out:
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
@@ -488,29 +494,26 @@ void rcu_all_qs(void)
 {
 	unsigned long flags;
 
+	if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))
+		return;
+	preempt_disable();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
+		preempt_enable();
+		return;
+	}
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
 	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
 		local_irq_save(flags);
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
-	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) {
-		/*
-		 * Yes, we just checked a per-CPU variable with preemption
-		 * enabled, so we might be migrated to some other CPU at
-		 * this point.  That is OK because in that case, the
-		 * migration will supply the needed quiescent state.
-		 * We might end up needlessly disabling preemption and
-		 * invoking rcu_sched_qs() on the destination CPU, but
-		 * the probability and cost are both quite low, so this
-		 * should not be a problem in practice.
-		 */
-		preempt_disable();
+	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)))
 		rcu_sched_qs();
-		preempt_enable();
-	}
 	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(rcu_all_qs);
 
@@ -1246,6 +1249,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 {
 	unsigned long jtsq;
 	bool *rnhqp;
+	bool *ruqp;
 	unsigned long rjtsc;
 	struct rcu_node *rnp;
 
@@ -1281,11 +1285,15 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	 * might not be the case for nohz_full CPUs looping in the kernel.
 	 */
 	rnp = rdp->mynode;
+	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
 	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
 	    READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
 		return 1;
+	} else {
+		/* Load rcu_qs_ctr before store to rcu_urgent_qs. */
+		smp_store_release(ruqp, true);
 	}
 
 	/* Check for the CPU being offline. */
@@ -1321,6 +1329,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	    (time_after(jiffies, rdp->rsp->gp_start + jtsq) ||
 	     time_after(jiffies, rdp->rsp->jiffies_resched))) {
 		WRITE_ONCE(*rnhqp, true);
+		/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
+		smp_store_release(ruqp, true);
 		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 	}
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b212cd0f22c7..d2f276fc2edc 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -113,8 +113,9 @@ struct rcu_dynticks {
 				    /* Process level is worth LLONG_MAX/2. */
 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */
 	atomic_t dynticks;	    /* Even value for idle, else odd. */
-	bool rcu_need_heavy_qs;      /* GP old, need heavy quiescent state. */
+	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */
 	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */
+	bool rcu_urgent_qs;	    /* GP old need light quiescent state. */
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
 	long long dynticks_idle_nesting;
 				    /* irq/process nesting level from idle. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index a7b639ccd46e..a1f52bbe9db6 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -331,6 +331,8 @@ static void sync_sched_exp_handler(void *data)
 		return;
 	}
 	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
+	/* Store .exp before .rcu_urgent_qs. */
+	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 	resched_cpu(smp_processor_id());
 }
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0a62a8f1caac..621296a6694b 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1860,7 +1860,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeEmpty"));
 		} else {
-			rdp->nocb_defer_wakeup = RCU_NOGP_WAKE;
+			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE);
+			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
+			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeEmptyIsDeferred"));
 		}
@@ -1872,7 +1874,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeOvf"));
 		} else {
-			rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE;
+			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_FORCE);
+			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
+			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeOvfIsDeferred"));
 		}
-- 
cgit v1.2.3


From 5f0d5a3ae7cff0d7fa943c199c3a2e44f23e1fac Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 18 Jan 2017 02:53:44 -0800
Subject: mm: Rename SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU

A group of Linux kernel hackers reported chasing a bug that resulted
from their assumption that SLAB_DESTROY_BY_RCU provided an existence
guarantee, that is, that no block from such a slab would be reallocated
during an RCU read-side critical section.  Of course, that is not the
case.  Instead, SLAB_DESTROY_BY_RCU only prevents freeing of an entire
slab of blocks.

However, there is a phrase for this, namely "type safety".  This commit
therefore renames SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU in order
to avoid future instances of this sort of confusion.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: <linux-mm@kvack.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
[ paulmck: Add comments mentioning the old name, as requested by Eric
  Dumazet, in order to help people familiar with the old name find
  the new one. ]
Acked-by: David Rientjes <rientjes@google.com>
---
 Documentation/RCU/00-INDEX                      |  2 +-
 Documentation/RCU/rculist_nulls.txt             |  6 +++---
 Documentation/RCU/whatisRCU.txt                 |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c                 |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.h         |  2 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c |  2 +-
 fs/jbd2/journal.c                               |  2 +-
 fs/signalfd.c                                   |  2 +-
 include/linux/dma-fence.h                       |  4 ++--
 include/linux/slab.h                            |  6 ++++--
 include/net/sock.h                              |  2 +-
 kernel/fork.c                                   |  4 ++--
 kernel/signal.c                                 |  2 +-
 mm/kasan/kasan.c                                |  6 +++---
 mm/kmemcheck.c                                  |  2 +-
 mm/rmap.c                                       |  4 ++--
 mm/slab.c                                       |  6 +++---
 mm/slab.h                                       |  4 ++--
 mm/slab_common.c                                |  6 +++---
 mm/slob.c                                       |  6 +++---
 mm/slub.c                                       | 12 ++++++------
 net/dccp/ipv4.c                                 |  2 +-
 net/dccp/ipv6.c                                 |  2 +-
 net/ipv4/tcp_ipv4.c                             |  2 +-
 net/ipv6/tcp_ipv6.c                             |  2 +-
 net/llc/af_llc.c                                |  2 +-
 net/llc/llc_conn.c                              |  4 ++--
 net/llc/llc_sap.c                               |  2 +-
 net/netfilter/nf_conntrack_core.c               |  8 ++++----
 net/smc/af_smc.c                                |  2 +-
 30 files changed, 57 insertions(+), 54 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index f773a264ae02..1672573b037a 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -17,7 +17,7 @@ rcu_dereference.txt
 rcubarrier.txt
 	- RCU and Unloadable Modules
 rculist_nulls.txt
-	- RCU list primitives for use with SLAB_DESTROY_BY_RCU
+	- RCU list primitives for use with SLAB_TYPESAFE_BY_RCU
 rcuref.txt
 	- Reference-count design for elements of lists/arrays protected by RCU
 rcu.txt
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 18f9651ff23d..8151f0195f76 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -1,5 +1,5 @@
 Using hlist_nulls to protect read-mostly linked lists and
-objects using SLAB_DESTROY_BY_RCU allocations.
+objects using SLAB_TYPESAFE_BY_RCU allocations.
 
 Please read the basics in Documentation/RCU/listRCU.txt
 
@@ -7,7 +7,7 @@ Using special makers (called 'nulls') is a convenient way
 to solve following problem :
 
 A typical RCU linked list managing objects which are
-allocated with SLAB_DESTROY_BY_RCU kmem_cache can
+allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can
 use following algos :
 
 1) Lookup algo
@@ -96,7 +96,7 @@ unlock_chain(); // typically a spin_unlock()
 3) Remove algo
 --------------
 Nothing special here, we can use a standard RCU hlist deletion.
-But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused
+But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused
 very very fast (before the end of RCU grace period)
 
 if (put_last_reference_on(obj) {
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 5cbd8b2395b8..91c912e86915 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -925,7 +925,8 @@ d.	Do you need RCU grace periods to complete even in the face
 
 e.	Is your workload too update-intensive for normal use of
 	RCU, but inappropriate for other synchronization mechanisms?
-	If so, consider SLAB_DESTROY_BY_RCU.  But please be careful!
+	If so, consider SLAB_TYPESAFE_BY_RCU (which was originally
+	named SLAB_DESTROY_BY_RCU).  But please be careful!
 
 f.	Do you need read-side critical sections that are respected
 	even though they are in the middle of the idle loop, during
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6908123162d1..3b668895ac24 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4552,7 +4552,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
 					SLAB_HWCACHE_ALIGN |
 					SLAB_RECLAIM_ACCOUNT |
-					SLAB_DESTROY_BY_RCU);
+					SLAB_TYPESAFE_BY_RCU);
 	if (!dev_priv->requests)
 		goto err_vmas;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index ea511f06efaf..9ee2750e1dde 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -493,7 +493,7 @@ static inline struct drm_i915_gem_request *
 __i915_gem_active_get_rcu(const struct i915_gem_active *active)
 {
 	/* Performing a lockless retrieval of the active request is super
-	 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
+	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
 	 * slab of request objects will not be freed whilst we hold the
 	 * RCU read lock. It does not guarantee that the request itself
 	 * will not be freed and then *reused*. Viz,
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index 12647af5a336..e7fb47e84a93 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -1071,7 +1071,7 @@ int ldlm_init(void)
 	ldlm_lock_slab = kmem_cache_create("ldlm_locks",
 					   sizeof(struct ldlm_lock), 0,
 					   SLAB_HWCACHE_ALIGN |
-					   SLAB_DESTROY_BY_RCU, NULL);
+					   SLAB_TYPESAFE_BY_RCU, NULL);
 	if (!ldlm_lock_slab) {
 		kmem_cache_destroy(ldlm_resource_slab);
 		return -ENOMEM;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a1a359bfcc9c..7f8f962454e5 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2340,7 +2340,7 @@ static int jbd2_journal_init_journal_head_cache(void)
 	jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
-				SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
+				SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
 				NULL);		/* ctor */
 	retval = 0;
 	if (!jbd2_journal_head_cache) {
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 270221fcef42..7e3d71109f51 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -38,7 +38,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
 	/*
 	 * The lockless check can race with remove_wait_queue() in progress,
 	 * but in this case its caller should run under rcu_read_lock() and
-	 * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+	 * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
 	 */
 	if (likely(!waitqueue_active(wqh)))
 		return;
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 6048fa404e57..a5195a7d6f77 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence)
  *
  * Function returns NULL if no refcount could be obtained, or the fence.
  * This function handles acquiring a reference to a fence that may be
- * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU),
+ * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU),
  * so long as the caller is using RCU on the pointer to the fence.
  *
  * An alternative mechanism is to employ a seqlock to protect a bunch of
@@ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep)
 		 * have successfully acquire a reference to it. If it no
 		 * longer matches, we are holding a reference to some other
 		 * reallocated pointer. This is possible if the allocator
-		 * is using a freelist like SLAB_DESTROY_BY_RCU where the
+		 * is using a freelist like SLAB_TYPESAFE_BY_RCU where the
 		 * fence remains valid for the RCU grace period, but it
 		 * may be reallocated. When using such allocators, we are
 		 * responsible for ensuring the reference we get is to
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3c37a8c51921..04a7f7993e67 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -28,7 +28,7 @@
 #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
 #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
 /*
- * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS!
+ * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
  *
  * This delays freeing the SLAB page by a grace period, it does _NOT_
  * delay object freeing. This means that if you do kmem_cache_free()
@@ -61,8 +61,10 @@
  *
  * rcu_read_lock before reading the address, then rcu_read_unlock after
  * taking the spinlock within the structure expected at that address.
+ *
+ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
  */
-#define SLAB_DESTROY_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
+#define SLAB_TYPESAFE_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
 #define SLAB_TRACE		0x00200000UL	/* Trace allocations and frees */
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 5e5997654db6..59cdccaa30e7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -993,7 +993,7 @@ struct smc_hashinfo;
 struct module;
 
 /*
- * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
  * un-modified. Special care is taken when initializing object to zero.
  */
 static inline void sk_prot_clear_nulls(struct sock *sk, int size)
diff --git a/kernel/fork.c b/kernel/fork.c
index 6c463c80e93d..9330ce24f1bb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1313,7 +1313,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 	if (atomic_dec_and_test(&sighand->count)) {
 		signalfd_cleanup(sighand);
 		/*
-		 * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it
+		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
 		 * without an RCU grace period, see __lock_task_sighand().
 		 */
 		kmem_cache_free(sighand_cachep, sighand);
@@ -2144,7 +2144,7 @@ void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
 			SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
diff --git a/kernel/signal.c b/kernel/signal.c
index 7e59ebc2c25e..6df5f72158e4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1237,7 +1237,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
 		}
 		/*
 		 * This sighand can be already freed and even reused, but
-		 * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which
+		 * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which
 		 * initializes ->siglock: this slab can't go away, it has
 		 * the same object type, ->siglock can't be reinitialized.
 		 *
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 98b27195e38b..4b20061102f6 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -413,7 +413,7 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size,
 	*size += sizeof(struct kasan_alloc_meta);
 
 	/* Add free meta. */
-	if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+	if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor ||
 	    cache->object_size < sizeof(struct kasan_free_meta)) {
 		cache->kasan_info.free_meta_offset = *size;
 		*size += sizeof(struct kasan_free_meta);
@@ -561,7 +561,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
 	unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
 
 	/* RCU slabs could be legally used after free within the RCU period */
-	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
 		return;
 
 	kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
@@ -572,7 +572,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object)
 	s8 shadow_byte;
 
 	/* RCU slabs could be legally used after free within the RCU period */
-	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
 		return false;
 
 	shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index 5bf191756a4a..2d5959c5f7c5 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -95,7 +95,7 @@ void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
 void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
 {
 	/* TODO: RCU freeing is unsupported for now; hide false positives. */
-	if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
+	if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU))
 		kmemcheck_mark_freed(object, size);
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 49ed681ccc7b..8ffd59df8a3f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -430,7 +430,7 @@ static void anon_vma_ctor(void *data)
 void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
-			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
+			0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
 			anon_vma_ctor);
 	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
 			SLAB_PANIC|SLAB_ACCOUNT);
@@ -481,7 +481,7 @@ struct anon_vma *page_get_anon_vma(struct page *page)
 	 * If this page is still mapped, then its anon_vma cannot have been
 	 * freed.  But if it has been unmapped, we have no security against the
 	 * anon_vma structure being freed and reused (for another anon_vma:
-	 * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
+	 * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
 	 * above cannot corrupt).
 	 */
 	if (!page_mapped(page)) {
diff --git a/mm/slab.c b/mm/slab.c
index 807d86c76908..93c827864862 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1728,7 +1728,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
 
 	freelist = page->freelist;
 	slab_destroy_debugcheck(cachep, page);
-	if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
+	if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
 		call_rcu(&page->rcu_head, kmem_rcu_free);
 	else
 		kmem_freepages(cachep, page);
@@ -1924,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
 
 	cachep->num = 0;
 
-	if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU)
+	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
 		return false;
 
 	left = calculate_slab_order(cachep, size,
@@ -2030,7 +2030,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
 						2 * sizeof(unsigned long long)))
 		flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
-	if (!(flags & SLAB_DESTROY_BY_RCU))
+	if (!(flags & SLAB_TYPESAFE_BY_RCU))
 		flags |= SLAB_POISON;
 #endif
 #endif
diff --git a/mm/slab.h b/mm/slab.h
index 65e7c3fcac72..9cfcf099709c 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -126,7 +126,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
 
 /* Legal flag mask for kmem_cache_create(), for various configurations */
 #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
-			 SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
+			 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
 
 #if defined(CONFIG_DEBUG_SLAB)
 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
@@ -415,7 +415,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
 	 * back there or track user information then we can
 	 * only use the space before that information.
 	 */
-	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+	if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
 		return s->inuse;
 	/*
 	 * Else we can use all the padding etc for the allocation
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 09d0e849b07f..01a0fe2eb332 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -39,7 +39,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  * Set of flags that will prevent slab merging
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
 		SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
@@ -500,7 +500,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 	struct kmem_cache *s, *s2;
 
 	/*
-	 * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the
+	 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
 	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed
 	 * through RCU and and the associated kmem_cache are dereferenced
 	 * while freeing the pages, so the kmem_caches should be freed only
@@ -537,7 +537,7 @@ static int shutdown_cache(struct kmem_cache *s)
 	memcg_unlink_cache(s);
 	list_del(&s->list);
 
-	if (s->flags & SLAB_DESTROY_BY_RCU) {
+	if (s->flags & SLAB_TYPESAFE_BY_RCU) {
 		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
 		schedule_work(&slab_caches_to_rcu_destroy_work);
 	} else {
diff --git a/mm/slob.c b/mm/slob.c
index eac04d4357ec..1bae78d71096 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -126,7 +126,7 @@ static inline void clear_slob_page_free(struct page *sp)
 
 /*
  * struct slob_rcu is inserted at the tail of allocated slob blocks, which
- * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free
+ * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free
  * the block using call_rcu.
  */
 struct slob_rcu {
@@ -524,7 +524,7 @@ EXPORT_SYMBOL(ksize);
 
 int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
 {
-	if (flags & SLAB_DESTROY_BY_RCU) {
+	if (flags & SLAB_TYPESAFE_BY_RCU) {
 		/* leave room for rcu footer at the end of object */
 		c->size += sizeof(struct slob_rcu);
 	}
@@ -598,7 +598,7 @@ static void kmem_rcu_free(struct rcu_head *head)
 void kmem_cache_free(struct kmem_cache *c, void *b)
 {
 	kmemleak_free_recursive(b, c->flags);
-	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
+	if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
 		struct slob_rcu *slob_rcu;
 		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
 		slob_rcu->size = c->size;
diff --git a/mm/slub.c b/mm/slub.c
index 7f4bc7027ed5..57e5156f02be 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1687,7 +1687,7 @@ static void rcu_free_slab(struct rcu_head *h)
 
 static void free_slab(struct kmem_cache *s, struct page *page)
 {
-	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
+	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
 		struct rcu_head *head;
 
 		if (need_reserve_slab_rcu) {
@@ -2963,7 +2963,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
 	 * slab_free_freelist_hook() could have put the items into quarantine.
 	 * If so, no need to free them.
 	 */
-	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
+	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
 		return;
 	do_slab_free(s, page, head, tail, cnt, addr);
 }
@@ -3433,7 +3433,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 * the slab may touch the object after free or before allocation
 	 * then we should never poison the object itself.
 	 */
-	if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
+	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
 			!s->ctor)
 		s->flags |= __OBJECT_POISON;
 	else
@@ -3455,7 +3455,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 */
 	s->inuse = size;
 
-	if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
+	if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
 		s->ctor)) {
 		/*
 		 * Relocate free pointer after the object if it is not
@@ -3537,7 +3537,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
 	s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
 	s->reserved = 0;
 
-	if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
+	if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU))
 		s->reserved = sizeof(struct rcu_head);
 
 	if (!calculate_sizes(s, -1))
@@ -5042,7 +5042,7 @@ SLAB_ATTR_RO(cache_dma);
 
 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
 {
-	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
+	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
 }
 SLAB_ATTR_RO(destroy_by_rcu);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 409d0cfd3447..90210a0e3888 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -950,7 +950,7 @@ static struct proto dccp_v4_prot = {
 	.orphan_count		= &dccp_orphan_count,
 	.max_header		= MAX_DCCP_HEADER,
 	.obj_size		= sizeof(struct dccp_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.rsk_prot		= &dccp_request_sock_ops,
 	.twsk_prot		= &dccp_timewait_sock_ops,
 	.h.hashinfo		= &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 233b57367758..b4019a5e4551 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1012,7 +1012,7 @@ static struct proto dccp_v6_prot = {
 	.orphan_count	   = &dccp_orphan_count,
 	.max_header	   = MAX_DCCP_HEADER,
 	.obj_size	   = sizeof(struct dccp6_sock),
-	.slab_flags	   = SLAB_DESTROY_BY_RCU,
+	.slab_flags	   = SLAB_TYPESAFE_BY_RCU,
 	.rsk_prot	   = &dccp6_request_sock_ops,
 	.twsk_prot	   = &dccp6_timewait_sock_ops,
 	.h.hashinfo	   = &dccp_hashinfo,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9a89b8deafae..82c89abeb989 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2398,7 +2398,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 60a5295a7de6..bdbc4327ebee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1919,7 +1919,7 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
-	.slab_flags		= SLAB_DESTROY_BY_RCU,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 06186d608a27..d096ca563054 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -142,7 +142,7 @@ static struct proto llc_proto = {
 	.name	  = "LLC",
 	.owner	  = THIS_MODULE,
 	.obj_size = sizeof(struct llc_sock),
-	.slab_flags = SLAB_DESTROY_BY_RCU,
+	.slab_flags = SLAB_TYPESAFE_BY_RCU,
 };
 
 /**
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 8bc5a1bd2d45..9b02c13d258b 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -506,7 +506,7 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_estab_match(sap, daddr, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
@@ -565,7 +565,7 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_listener_match(sap, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 5404d0d195cc..63b6ab056370 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -328,7 +328,7 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
 again:
 	sk_nulls_for_each_rcu(rc, node, laddr_hb) {
 		if (llc_dgram_match(sap, laddr, rc)) {
-			/* Extra checks required by SLAB_DESTROY_BY_RCU */
+			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
 				goto again;
 			if (unlikely(llc_sk(rc)->sap != sap ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 071b97fcbefb..fdcdac7916b2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -914,7 +914,7 @@ static unsigned int early_drop_list(struct net *net,
 			continue;
 
 		/* kill only if still in same netns -- might have moved due to
-		 * SLAB_DESTROY_BY_RCU rules.
+		 * SLAB_TYPESAFE_BY_RCU rules.
 		 *
 		 * We steal the timer reference.  If that fails timer has
 		 * already fired or someone else deleted it. Just drop ref
@@ -1069,7 +1069,7 @@ __nf_conntrack_alloc(struct net *net,
 
 	/*
 	 * Do not use kmem_cache_zalloc(), as this cache uses
-	 * SLAB_DESTROY_BY_RCU.
+	 * SLAB_TYPESAFE_BY_RCU.
 	 */
 	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
 	if (ct == NULL)
@@ -1114,7 +1114,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	struct net *net = nf_ct_net(ct);
 
 	/* A freed object has refcnt == 0, that's
-	 * the golden rule for SLAB_DESTROY_BY_RCU
+	 * the golden rule for SLAB_TYPESAFE_BY_RCU
 	 */
 	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
 
@@ -1878,7 +1878,7 @@ int nf_conntrack_init_start(void)
 	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
 						sizeof(struct nf_conn),
 						NFCT_INFOMASK + 1,
-						SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
+						SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
 	if (!nf_conntrack_cachep)
 		goto err_cachep;
 
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 85837ab90e89..d34bbd6d8f38 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -101,7 +101,7 @@ struct proto smc_proto = {
 	.unhash		= smc_unhash_sk,
 	.obj_size	= sizeof(struct smc_sock),
 	.h.smc_hash	= &smc_v4_hashinfo,
-	.slab_flags	= SLAB_DESTROY_BY_RCU,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
 };
 EXPORT_SYMBOL_GPL(smc_proto);
 
-- 
cgit v1.2.3