From abb06b99484a9f5af05c7147c289faf835f68e8e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 26 Jan 2017 13:45:38 -0800 Subject: rcu: Pull rcu_sched_qs_mask into rcu_dynticks structure The rcu_sched_qs_mask variable is yet another isolated per-CPU variable, so this commit pulls it into the pre-existing rcu_dynticks per-CPU structure. Signed-off-by: Paul E. McKenney --- Documentation/RCU/Design/Data-Structures/Data-Structures.html | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index d583c653a703..bf7f266e8888 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -1104,6 +1104,7 @@ Its fields are as follows: 1 int dynticks_nesting; 2 int dynticks_nmi_nesting; 3 atomic_t dynticks; + 4 int rcu_sched_qs_mask;

The ->dynticks_nesting field counts the @@ -1117,11 +1118,17 @@ NMIs are counted by the ->dynticks_nmi_nesting field, except that NMIs that interrupt non-dyntick-idle execution are not counted. -

Finally, the ->dynticks field counts the corresponding +

The ->dynticks field counts the corresponding CPU's transitions to and from dyntick-idle mode, so that this counter has an even value when the CPU is in dyntick-idle mode and an odd value otherwise. +

Finally, the ->rcu_sched_qs_mask field is used +to record the fact that the RCU core code would really like to +see a quiescent state from the corresponding CPU. +This flag is checked by RCU's context-switch and cond_resched() +code, which provide a momentary idle sojourn in response. + -- cgit v1.2.3 From 9577df9a3122af08fff84b8a1a60dccf524a3891 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 26 Jan 2017 16:18:07 -0800 Subject: rcu: Pull rcu_qs_ctr into rcu_dynticks structure The rcu_qs_ctr variable is yet another isolated per-CPU variable, so this commit pulls it into the pre-existing rcu_dynticks per-CPU structure. Signed-off-by: Paul E. McKenney --- .../RCU/Design/Data-Structures/Data-Structures.html | 12 ++++++++++-- kernel/rcu/tree.c | 15 ++++++--------- kernel/rcu/tree.h | 3 ++- kernel/rcu/tree_trace.c | 4 +--- 4 files changed, 19 insertions(+), 15 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index bf7f266e8888..3d0311657533 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -1105,6 +1105,7 @@ Its fields are as follows: 2 int dynticks_nmi_nesting; 3 atomic_t dynticks; 4 int rcu_sched_qs_mask; + 5 unsigned long rcu_qs_ctr;

The ->dynticks_nesting field counts the @@ -1123,12 +1124,19 @@ CPU's transitions to and from dyntick-idle mode, so that this counter has an even value when the CPU is in dyntick-idle mode and an odd value otherwise. -

Finally, the ->rcu_sched_qs_mask field is used +

The ->rcu_sched_qs_mask field is used to record the fact that the RCU core code would really like to -see a quiescent state from the corresponding CPU. +see a quiescent state from the corresponding CPU, so much so that +it is willing to call for heavy-weight dyntick-counter operations. This flag is checked by RCU's context-switch and cond_resched() code, which provide a momentary idle sojourn in response. +

Finally the ->rcu_qs_ctr field is used to record +quiescent states from cond_resched(). +Because cond_resched() can execute quite frequently, this +must be quite lightweight, as in a non-atomic increment of this +per-CPU field. +

 
Quick Quiz:
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3a0703035874..82a86a67c92a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -436,9 +436,6 @@ bool rcu_eqs_special_set(int cpu) return true; } -DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); -EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); - /* * Let the RCU core know that this CPU has gone through the scheduler, * which is a quiescent state. This is called when the need for a @@ -542,7 +539,7 @@ void rcu_all_qs(void) rcu_sched_qs(); preempt_enable(); } - this_cpu_inc(rcu_qs_ctr); + this_cpu_inc(rcu_dynticks.rcu_qs_ctr); barrier(); /* Avoid RCU read-side critical sections leaking up. */ } EXPORT_SYMBOL_GPL(rcu_all_qs); @@ -1315,7 +1312,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, */ rnp = rdp->mynode; if (time_after(jiffies, rdp->rsp->gp_start + jtsq) && - READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) && + READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) && READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc")); return 1; @@ -2024,7 +2021,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); need_gp = !!(rnp->qsmask & rdp->grpmask); rdp->cpu_no_qs.b.norm = need_gp; - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); rdp->core_needs_qs = need_gp; zero_cpu_stall_ticks(rdp); WRITE_ONCE(rdp->gpwrap, false); @@ -2622,7 +2619,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * within the current grace period. */ rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } @@ -3620,7 +3617,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && rdp->core_needs_qs && rdp->cpu_no_qs.b.norm && - rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { + rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) { rdp->n_rp_core_needs_qs++; } else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) { rdp->n_rp_report_qs++; @@ -3933,7 +3930,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; rdp->cpu_no_qs.b.norm = true; - rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); + rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); rdp->core_needs_qs = false; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e298281984dc..76e4467bc765 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -113,7 +113,8 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ - int rcu_sched_qs_mask; /* GP old, need quiescent state. */ + int rcu_sched_qs_mask; /* GP old, need heavy quiescent state. */ + unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE long long dynticks_idle_nesting; /* irq/process nesting level from idle. */ diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 8751a748499a..65b43be38e68 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -45,8 +45,6 @@ #define RCU_TREE_NONCORE #include "tree.h" -DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); - static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) { @@ -121,7 +119,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) cpu_is_offline(rdp->cpu) ? '!' : ' ', ulong2long(rdp->completed), ulong2long(rdp->gpnum), rdp->cpu_no_qs.b.norm, - rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu), + rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu), rdp->core_needs_qs); seq_printf(m, " dt=%d/%llx/%d df=%lu", rcu_dynticks_snap(rdp->dynticks), -- cgit v1.2.3 From 0f9be8cabbc343218dd2807af7308656be113045 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Jan 2017 13:17:02 -0800 Subject: rcu: Eliminate flavor scan in rcu_momentary_dyntick_idle() The rcu_momentary_dyntick_idle() function scans the RCU flavors, checking that one of them still needs a quiescent state before doing an expensive atomic operation on the ->dynticks counter. However, this check reduces overhead only after a rare race condition, and increases complexity. This commit therefore removes the scan and the mechanism enabling the scan. Signed-off-by: Paul E. McKenney --- .../Design/Data-Structures/Data-Structures.html | 4 +- kernel/rcu/tree.c | 62 +++++----------------- kernel/rcu/tree.h | 3 +- 3 files changed, 15 insertions(+), 54 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index 3d0311657533..e4bf20a68fa3 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -1104,7 +1104,7 @@ Its fields are as follows: 1 int dynticks_nesting; 2 int dynticks_nmi_nesting; 3 atomic_t dynticks; - 4 int rcu_sched_qs_mask; + 4 bool rcu_need_heavy_qs; 5 unsigned long rcu_qs_ctr; @@ -1124,7 +1124,7 @@ CPU's transitions to and from dyntick-idle mode, so that this counter has an even value when the CPU is in dyntick-idle mode and an odd value otherwise. -

The ->rcu_sched_qs_mask field is used +

The ->rcu_need_heavy_qs field is used to record the fact that the RCU core code would really like to see a quiescent state from the corresponding CPU, so much so that it is willing to call for heavy-weight dyntick-counter operations. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 82a86a67c92a..c2cbc78a0625 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -443,44 +443,14 @@ bool rcu_eqs_special_set(int cpu) * memory barriers to let the RCU core know about it, regardless of what * this CPU might (or might not) do in the near future. * - * We inform the RCU core by emulating a zero-duration dyntick-idle - * period, which we in turn do by incrementing the ->dynticks counter - * by two. + * We inform the RCU core by emulating a zero-duration dyntick-idle period. * * The caller must have disabled interrupts. */ static void rcu_momentary_dyntick_idle(void) { - struct rcu_data *rdp; - int resched_mask; - struct rcu_state *rsp; - - /* - * Yes, we can lose flag-setting operations. This is OK, because - * the flag will be set again after some delay. - */ - resched_mask = raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask); - raw_cpu_write(rcu_dynticks.rcu_sched_qs_mask, 0); - - /* Find the flavor that needs a quiescent state. */ - for_each_rcu_flavor(rsp) { - rdp = raw_cpu_ptr(rsp->rda); - if (!(resched_mask & rsp->flavor_mask)) - continue; - smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ - if (READ_ONCE(rdp->mynode->completed) != - READ_ONCE(rdp->cond_resched_completed)) - continue; - - /* - * Pretend to be momentarily idle for the quiescent state. - * This allows the grace-period kthread to record the - * quiescent state, with no need for this CPU to do anything - * further. - */ - rcu_dynticks_momentary_idle(); - break; - } + raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false); + rcu_dynticks_momentary_idle(); } /* @@ -494,7 +464,7 @@ void rcu_note_context_switch(void) trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(); rcu_preempt_note_context_switch(); - if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask))) + if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) rcu_momentary_dyntick_idle(); trace_rcu_utilization(TPS("End context switch")); barrier(); /* Avoid RCU read-side critical sections leaking up. */ @@ -519,7 +489,7 @@ void rcu_all_qs(void) unsigned long flags; barrier(); /* Avoid RCU read-side critical sections leaking down. */ - if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask))) { + if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) { local_irq_save(flags); rcu_momentary_dyntick_idle(); local_irq_restore(flags); @@ -1275,7 +1245,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, bool *isidle, unsigned long *maxj) { unsigned long jtsq; - int *rcrmp; + bool *rnhqp; unsigned long rjtsc; struct rcu_node *rnp; @@ -1332,7 +1302,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * in-kernel CPU-bound tasks cannot advance grace periods. * So if the grace period is old enough, make the CPU pay attention. * Note that the unsynchronized assignments to the per-CPU - * rcu_sched_qs_mask variable are safe. Yes, setting of + * rcu_need_heavy_qs variable are safe. Yes, setting of * bits can be lost, but they will be set again on the next * force-quiescent-state pass. So lost bit sets do not result * in incorrect behavior, merely in a grace period lasting @@ -1346,16 +1316,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * is set too high, we override with half of the RCU CPU stall * warning delay. */ - rcrmp = &per_cpu(rcu_dynticks.rcu_sched_qs_mask, rdp->cpu); - if (time_after(jiffies, rdp->rsp->gp_start + jtsq) || - time_after(jiffies, rdp->rsp->jiffies_resched)) { - if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { - WRITE_ONCE(rdp->cond_resched_completed, - READ_ONCE(rdp->mynode->completed)); - smp_mb(); /* ->cond_resched_completed before *rcrmp. */ - WRITE_ONCE(*rcrmp, - READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask); - } + rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); + if (!READ_ONCE(*rnhqp) && + (time_after(jiffies, rdp->rsp->gp_start + jtsq) || + time_after(jiffies, rdp->rsp->jiffies_resched))) { + WRITE_ONCE(*rnhqp, true); rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ } @@ -4169,7 +4134,6 @@ static void __init rcu_init_one(struct rcu_state *rsp) static const char * const fqs[] = RCU_FQS_NAME_INIT; static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; - static u8 fl_mask = 0x1; int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */ int levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ @@ -4191,8 +4155,6 @@ static void __init rcu_init_one(struct rcu_state *rsp) for (i = 1; i < rcu_num_lvls; i++) rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1]; rcu_init_levelspread(levelspread, levelcnt); - rsp->flavor_mask = fl_mask; - fl_mask <<= 1; /* Initialize the elements themselves, starting from the leaves. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 76e4467bc765..b212cd0f22c7 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -113,7 +113,7 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ - int rcu_sched_qs_mask; /* GP old, need heavy quiescent state. */ + bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE long long dynticks_idle_nesting; @@ -484,7 +484,6 @@ struct rcu_state { struct rcu_node *level[RCU_NUM_LVLS + 1]; /* Hierarchy levels (+1 to */ /* shut bogus gcc warning) */ - u8 flavor_mask; /* bit in flavor mask. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ call_rcu_func_t call; /* call_rcu() flavor. */ int ncpus; /* # CPUs seen so far. */ -- cgit v1.2.3 From 9226b10d78ffe7895549045fe388dc5e73b87eac Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Jan 2017 14:17:50 -0800 Subject: rcu: Place guard on rcu_all_qs() and rcu_note_context_switch() actions The rcu_all_qs() and rcu_note_context_switch() do a series of checks, taking various actions to supply RCU with quiescent states, depending on the outcomes of the various checks. This is a bit much for scheduling fastpaths, so this commit creates a separate ->rcu_urgent_qs field in the rcu_dynticks structure that acts as a global guard for these checks. Thus, in the common case, rcu_all_qs() and rcu_note_context_switch() check the ->rcu_urgent_qs field, find it false, and simply return. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra --- .../Design/Data-Structures/Data-Structures.html | 11 ++++++- kernel/rcu/tree.c | 38 ++++++++++++++-------- kernel/rcu/tree.h | 3 +- kernel/rcu/tree_exp.h | 2 ++ kernel/rcu/tree_plugin.h | 8 +++-- 5 files changed, 44 insertions(+), 18 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index e4bf20a68fa3..4dec89097559 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -1106,6 +1106,7 @@ Its fields are as follows: 3 atomic_t dynticks; 4 bool rcu_need_heavy_qs; 5 unsigned long rcu_qs_ctr; + 6 bool rcu_urgent_qs;

The ->dynticks_nesting field counts the @@ -1131,12 +1132,20 @@ it is willing to call for heavy-weight dyntick-counter operations. This flag is checked by RCU's context-switch and cond_resched() code, which provide a momentary idle sojourn in response. -

Finally the ->rcu_qs_ctr field is used to record +

The ->rcu_qs_ctr field is used to record quiescent states from cond_resched(). Because cond_resched() can execute quite frequently, this must be quite lightweight, as in a non-atomic increment of this per-CPU field. +

Finally, the ->rcu_urgent_qs field is used to record +the fact that the RCU core code would really like to see a quiescent +state from the corresponding CPU, with the various other fields indicating +just how badly RCU wants this quiescent state. +This flag is checked by RCU's context-switch and cond_resched() +code, which, if nothing else, non-atomically increment ->rcu_qs_ctr +in response. +

 
Quick Quiz:
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c2cbc78a0625..530ab6cf7a0b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -464,8 +464,14 @@ void rcu_note_context_switch(void) trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(); rcu_preempt_note_context_switch(); + /* Load rcu_urgent_qs before other flags. */ + if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) + goto out; + this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) rcu_momentary_dyntick_idle(); + this_cpu_inc(rcu_dynticks.rcu_qs_ctr); +out: trace_rcu_utilization(TPS("End context switch")); barrier(); /* Avoid RCU read-side critical sections leaking up. */ } @@ -488,29 +494,26 @@ void rcu_all_qs(void) { unsigned long flags; + if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs)) + return; + preempt_disable(); + /* Load rcu_urgent_qs before other flags. */ + if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) { + preempt_enable(); + return; + } + this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); barrier(); /* Avoid RCU read-side critical sections leaking down. */ if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) { local_irq_save(flags); rcu_momentary_dyntick_idle(); local_irq_restore(flags); } - if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) { - /* - * Yes, we just checked a per-CPU variable with preemption - * enabled, so we might be migrated to some other CPU at - * this point. That is OK because in that case, the - * migration will supply the needed quiescent state. - * We might end up needlessly disabling preemption and - * invoking rcu_sched_qs() on the destination CPU, but - * the probability and cost are both quite low, so this - * should not be a problem in practice. - */ - preempt_disable(); + if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) rcu_sched_qs(); - preempt_enable(); - } this_cpu_inc(rcu_dynticks.rcu_qs_ctr); barrier(); /* Avoid RCU read-side critical sections leaking up. */ + preempt_enable(); } EXPORT_SYMBOL_GPL(rcu_all_qs); @@ -1246,6 +1249,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, { unsigned long jtsq; bool *rnhqp; + bool *ruqp; unsigned long rjtsc; struct rcu_node *rnp; @@ -1281,11 +1285,15 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * might not be the case for nohz_full CPUs looping in the kernel. */ rnp = rdp->mynode; + ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); if (time_after(jiffies, rdp->rsp->gp_start + jtsq) && READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) && READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc")); return 1; + } else { + /* Load rcu_qs_ctr before store to rcu_urgent_qs. */ + smp_store_release(ruqp, true); } /* Check for the CPU being offline. */ @@ -1321,6 +1329,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, (time_after(jiffies, rdp->rsp->gp_start + jtsq) || time_after(jiffies, rdp->rsp->jiffies_resched))) { WRITE_ONCE(*rnhqp, true); + /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ + smp_store_release(ruqp, true); rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index b212cd0f22c7..d2f276fc2edc 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -113,8 +113,9 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ - bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ + bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ + bool rcu_urgent_qs; /* GP old need light quiescent state. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE long long dynticks_idle_nesting; /* irq/process nesting level from idle. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index a7b639ccd46e..a1f52bbe9db6 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -331,6 +331,8 @@ static void sync_sched_exp_handler(void *data) return; } __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true); + /* Store .exp before .rcu_urgent_qs. */ + smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); resched_cpu(smp_processor_id()); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0a62a8f1caac..621296a6694b 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1860,7 +1860,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else { - rdp->nocb_defer_wakeup = RCU_NOGP_WAKE; + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE); + /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ + smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmptyIsDeferred")); } @@ -1872,7 +1874,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); } else { - rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE; + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_FORCE); + /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ + smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvfIsDeferred")); } -- cgit v1.2.3 From 5f0d5a3ae7cff0d7fa943c199c3a2e44f23e1fac Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 18 Jan 2017 02:53:44 -0800 Subject: mm: Rename SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU A group of Linux kernel hackers reported chasing a bug that resulted from their assumption that SLAB_DESTROY_BY_RCU provided an existence guarantee, that is, that no block from such a slab would be reallocated during an RCU read-side critical section. Of course, that is not the case. Instead, SLAB_DESTROY_BY_RCU only prevents freeing of an entire slab of blocks. However, there is a phrase for this, namely "type safety". This commit therefore renames SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU in order to avoid future instances of this sort of confusion. Signed-off-by: Paul E. McKenney Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Acked-by: Johannes Weiner Acked-by: Vlastimil Babka [ paulmck: Add comments mentioning the old name, as requested by Eric Dumazet, in order to help people familiar with the old name find the new one. ] Acked-by: David Rientjes --- Documentation/RCU/00-INDEX | 2 +- Documentation/RCU/rculist_nulls.txt | 6 +++--- Documentation/RCU/whatisRCU.txt | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.h | 2 +- drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c | 2 +- fs/jbd2/journal.c | 2 +- fs/signalfd.c | 2 +- include/linux/dma-fence.h | 4 ++-- include/linux/slab.h | 6 ++++-- include/net/sock.h | 2 +- kernel/fork.c | 4 ++-- kernel/signal.c | 2 +- mm/kasan/kasan.c | 6 +++--- mm/kmemcheck.c | 2 +- mm/rmap.c | 4 ++-- mm/slab.c | 6 +++--- mm/slab.h | 4 ++-- mm/slab_common.c | 6 +++--- mm/slob.c | 6 +++--- mm/slub.c | 12 ++++++------ net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 4 ++-- net/llc/llc_sap.c | 2 +- net/netfilter/nf_conntrack_core.c | 8 ++++---- net/smc/af_smc.c | 2 +- 30 files changed, 57 insertions(+), 54 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index f773a264ae02..1672573b037a 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX @@ -17,7 +17,7 @@ rcu_dereference.txt rcubarrier.txt - RCU and Unloadable Modules rculist_nulls.txt - - RCU list primitives for use with SLAB_DESTROY_BY_RCU + - RCU list primitives for use with SLAB_TYPESAFE_BY_RCU rcuref.txt - Reference-count design for elements of lists/arrays protected by RCU rcu.txt diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt index 18f9651ff23d..8151f0195f76 100644 --- a/Documentation/RCU/rculist_nulls.txt +++ b/Documentation/RCU/rculist_nulls.txt @@ -1,5 +1,5 @@ Using hlist_nulls to protect read-mostly linked lists and -objects using SLAB_DESTROY_BY_RCU allocations. +objects using SLAB_TYPESAFE_BY_RCU allocations. Please read the basics in Documentation/RCU/listRCU.txt @@ -7,7 +7,7 @@ Using special makers (called 'nulls') is a convenient way to solve following problem : A typical RCU linked list managing objects which are -allocated with SLAB_DESTROY_BY_RCU kmem_cache can +allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use following algos : 1) Lookup algo @@ -96,7 +96,7 @@ unlock_chain(); // typically a spin_unlock() 3) Remove algo -------------- Nothing special here, we can use a standard RCU hlist deletion. -But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused +But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused very very fast (before the end of RCU grace period) if (put_last_reference_on(obj) { diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 5cbd8b2395b8..91c912e86915 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -925,7 +925,8 @@ d. Do you need RCU grace periods to complete even in the face e. Is your workload too update-intensive for normal use of RCU, but inappropriate for other synchronization mechanisms? - If so, consider SLAB_DESTROY_BY_RCU. But please be careful! + If so, consider SLAB_TYPESAFE_BY_RCU (which was originally + named SLAB_DESTROY_BY_RCU). But please be careful! f. Do you need read-side critical sections that are respected even though they are in the middle of the idle loop, during diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6908123162d1..3b668895ac24 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4552,7 +4552,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_DESTROY_BY_RCU); + SLAB_TYPESAFE_BY_RCU); if (!dev_priv->requests) goto err_vmas; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea511f06efaf..9ee2750e1dde 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -493,7 +493,7 @@ static inline struct drm_i915_gem_request * __i915_gem_active_get_rcu(const struct i915_gem_active *active) { /* Performing a lockless retrieval of the active request is super - * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing + * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing * slab of request objects will not be freed whilst we hold the * RCU read lock. It does not guarantee that the request itself * will not be freed and then *reused*. Viz, diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c index 12647af5a336..e7fb47e84a93 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c @@ -1071,7 +1071,7 @@ int ldlm_init(void) ldlm_lock_slab = kmem_cache_create("ldlm_locks", sizeof(struct ldlm_lock), 0, SLAB_HWCACHE_ALIGN | - SLAB_DESTROY_BY_RCU, NULL); + SLAB_TYPESAFE_BY_RCU, NULL); if (!ldlm_lock_slab) { kmem_cache_destroy(ldlm_resource_slab); return -ENOMEM; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index a1a359bfcc9c..7f8f962454e5 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2340,7 +2340,7 @@ static int jbd2_journal_init_journal_head_cache(void) jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", sizeof(struct journal_head), 0, /* offset */ - SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU, + SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU, NULL); /* ctor */ retval = 0; if (!jbd2_journal_head_cache) { diff --git a/fs/signalfd.c b/fs/signalfd.c index 270221fcef42..7e3d71109f51 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -38,7 +38,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) /* * The lockless check can race with remove_wait_queue() in progress, * but in this case its caller should run under rcu_read_lock() and - * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return. + * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. */ if (likely(!waitqueue_active(wqh))) return; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 6048fa404e57..a5195a7d6f77 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence) * * Function returns NULL if no refcount could be obtained, or the fence. * This function handles acquiring a reference to a fence that may be - * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU), + * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU), * so long as the caller is using RCU on the pointer to the fence. * * An alternative mechanism is to employ a seqlock to protect a bunch of @@ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep) * have successfully acquire a reference to it. If it no * longer matches, we are holding a reference to some other * reallocated pointer. This is possible if the allocator - * is using a freelist like SLAB_DESTROY_BY_RCU where the + * is using a freelist like SLAB_TYPESAFE_BY_RCU where the * fence remains valid for the RCU grace period, but it * may be reallocated. When using such allocators, we are * responsible for ensuring the reference we get is to diff --git a/include/linux/slab.h b/include/linux/slab.h index 3c37a8c51921..04a7f7993e67 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -28,7 +28,7 @@ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ /* - * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS! + * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() @@ -61,8 +61,10 @@ * * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. + * + * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ -#define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ +#define SLAB_TYPESAFE_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ diff --git a/include/net/sock.h b/include/net/sock.h index 5e5997654db6..59cdccaa30e7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -993,7 +993,7 @@ struct smc_hashinfo; struct module; /* - * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes + * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes * un-modified. Special care is taken when initializing object to zero. */ static inline void sk_prot_clear_nulls(struct sock *sk, int size) diff --git a/kernel/fork.c b/kernel/fork.c index 6c463c80e93d..9330ce24f1bb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1313,7 +1313,7 @@ void __cleanup_sighand(struct sighand_struct *sighand) if (atomic_dec_and_test(&sighand->count)) { signalfd_cleanup(sighand); /* - * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it + * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it * without an RCU grace period, see __lock_task_sighand(). */ kmem_cache_free(sighand_cachep, sighand); @@ -2144,7 +2144,7 @@ void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, diff --git a/kernel/signal.c b/kernel/signal.c index 7e59ebc2c25e..6df5f72158e4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1237,7 +1237,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, } /* * This sighand can be already freed and even reused, but - * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which + * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which * initializes ->siglock: this slab can't go away, it has * the same object type, ->siglock can't be reinitialized. * diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 98b27195e38b..4b20061102f6 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -413,7 +413,7 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size, *size += sizeof(struct kasan_alloc_meta); /* Add free meta. */ - if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor || + if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor || cache->object_size < sizeof(struct kasan_free_meta)) { cache->kasan_info.free_meta_offset = *size; *size += sizeof(struct kasan_free_meta); @@ -561,7 +561,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); /* RCU slabs could be legally used after free within the RCU period */ - if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return; kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); @@ -572,7 +572,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) s8 shadow_byte; /* RCU slabs could be legally used after free within the RCU period */ - if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return false; shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index 5bf191756a4a..2d5959c5f7c5 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -95,7 +95,7 @@ void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) { /* TODO: RCU freeing is unsupported for now; hide false positives. */ - if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU)) + if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU)) kmemcheck_mark_freed(object, size); } diff --git a/mm/rmap.c b/mm/rmap.c index 49ed681ccc7b..8ffd59df8a3f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -430,7 +430,7 @@ static void anon_vma_ctor(void *data) void __init anon_vma_init(void) { anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), - 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, + 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, anon_vma_ctor); anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC|SLAB_ACCOUNT); @@ -481,7 +481,7 @@ struct anon_vma *page_get_anon_vma(struct page *page) * If this page is still mapped, then its anon_vma cannot have been * freed. But if it has been unmapped, we have no security against the * anon_vma structure being freed and reused (for another anon_vma: - * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero() + * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero() * above cannot corrupt). */ if (!page_mapped(page)) { diff --git a/mm/slab.c b/mm/slab.c index 807d86c76908..93c827864862 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1728,7 +1728,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page) freelist = page->freelist; slab_destroy_debugcheck(cachep, page); - if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU)) call_rcu(&page->rcu_head, kmem_rcu_free); else kmem_freepages(cachep, page); @@ -1924,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, cachep->num = 0; - if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU) + if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU) return false; left = calculate_slab_order(cachep, size, @@ -2030,7 +2030,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN + 2 * sizeof(unsigned long long))) flags |= SLAB_RED_ZONE | SLAB_STORE_USER; - if (!(flags & SLAB_DESTROY_BY_RCU)) + if (!(flags & SLAB_TYPESAFE_BY_RCU)) flags |= SLAB_POISON; #endif #endif diff --git a/mm/slab.h b/mm/slab.h index 65e7c3fcac72..9cfcf099709c 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -126,7 +126,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, /* Legal flag mask for kmem_cache_create(), for various configurations */ #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS ) + SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) #if defined(CONFIG_DEBUG_SLAB) #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) @@ -415,7 +415,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) * back there or track user information then we can * only use the space before that information. */ - if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) + if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) return s->inuse; /* * Else we can use all the padding etc for the allocation diff --git a/mm/slab_common.c b/mm/slab_common.c index 09d0e849b07f..01a0fe2eb332 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -39,7 +39,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, * Set of flags that will prevent slab merging */ #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ - SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ + SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ SLAB_FAILSLAB | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ @@ -500,7 +500,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) struct kmem_cache *s, *s2; /* - * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the + * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the * @slab_caches_to_rcu_destroy list. The slab pages are freed * through RCU and and the associated kmem_cache are dereferenced * while freeing the pages, so the kmem_caches should be freed only @@ -537,7 +537,7 @@ static int shutdown_cache(struct kmem_cache *s) memcg_unlink_cache(s); list_del(&s->list); - if (s->flags & SLAB_DESTROY_BY_RCU) { + if (s->flags & SLAB_TYPESAFE_BY_RCU) { list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { diff --git a/mm/slob.c b/mm/slob.c index eac04d4357ec..1bae78d71096 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -126,7 +126,7 @@ static inline void clear_slob_page_free(struct page *sp) /* * struct slob_rcu is inserted at the tail of allocated slob blocks, which - * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free + * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free * the block using call_rcu. */ struct slob_rcu { @@ -524,7 +524,7 @@ EXPORT_SYMBOL(ksize); int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) { - if (flags & SLAB_DESTROY_BY_RCU) { + if (flags & SLAB_TYPESAFE_BY_RCU) { /* leave room for rcu footer at the end of object */ c->size += sizeof(struct slob_rcu); } @@ -598,7 +598,7 @@ static void kmem_rcu_free(struct rcu_head *head) void kmem_cache_free(struct kmem_cache *c, void *b) { kmemleak_free_recursive(b, c->flags); - if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { + if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) { struct slob_rcu *slob_rcu; slob_rcu = b + (c->size - sizeof(struct slob_rcu)); slob_rcu->size = c->size; diff --git a/mm/slub.c b/mm/slub.c index 7f4bc7027ed5..57e5156f02be 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1687,7 +1687,7 @@ static void rcu_free_slab(struct rcu_head *h) static void free_slab(struct kmem_cache *s, struct page *page) { - if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { + if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { struct rcu_head *head; if (need_reserve_slab_rcu) { @@ -2963,7 +2963,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, * slab_free_freelist_hook() could have put the items into quarantine. * If so, no need to free them. */ - if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU)) + if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU)) return; do_slab_free(s, page, head, tail, cnt, addr); } @@ -3433,7 +3433,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * the slab may touch the object after free or before allocation * then we should never poison the object itself. */ - if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && + if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) && !s->ctor) s->flags |= __OBJECT_POISON; else @@ -3455,7 +3455,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ s->inuse = size; - if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || + if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor)) { /* * Relocate free pointer after the object if it is not @@ -3537,7 +3537,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); s->reserved = 0; - if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) + if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU)) s->reserved = sizeof(struct rcu_head); if (!calculate_sizes(s, -1)) @@ -5042,7 +5042,7 @@ SLAB_ATTR_RO(cache_dma); static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); + return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU)); } SLAB_ATTR_RO(destroy_by_rcu); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 409d0cfd3447..90210a0e3888 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -950,7 +950,7 @@ static struct proto dccp_v4_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 233b57367758..b4019a5e4551 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1012,7 +1012,7 @@ static struct proto dccp_v6_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9a89b8deafae..82c89abeb989 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2398,7 +2398,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 60a5295a7de6..bdbc4327ebee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1919,7 +1919,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 06186d608a27..d096ca563054 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -142,7 +142,7 @@ static struct proto llc_proto = { .name = "LLC", .owner = THIS_MODULE, .obj_size = sizeof(struct llc_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, }; /** diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 8bc5a1bd2d45..9b02c13d258b 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -506,7 +506,7 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_estab_match(sap, daddr, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || @@ -565,7 +565,7 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_listener_match(sap, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 5404d0d195cc..63b6ab056370 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -328,7 +328,7 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 071b97fcbefb..fdcdac7916b2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -914,7 +914,7 @@ static unsigned int early_drop_list(struct net *net, continue; /* kill only if still in same netns -- might have moved due to - * SLAB_DESTROY_BY_RCU rules. + * SLAB_TYPESAFE_BY_RCU rules. * * We steal the timer reference. If that fails timer has * already fired or someone else deleted it. Just drop ref @@ -1069,7 +1069,7 @@ __nf_conntrack_alloc(struct net *net, /* * Do not use kmem_cache_zalloc(), as this cache uses - * SLAB_DESTROY_BY_RCU. + * SLAB_TYPESAFE_BY_RCU. */ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) @@ -1114,7 +1114,7 @@ void nf_conntrack_free(struct nf_conn *ct) struct net *net = nf_ct_net(ct); /* A freed object has refcnt == 0, that's - * the golden rule for SLAB_DESTROY_BY_RCU + * the golden rule for SLAB_TYPESAFE_BY_RCU */ NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); @@ -1878,7 +1878,7 @@ int nf_conntrack_init_start(void) nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), NFCT_INFOMASK + 1, - SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); + SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 85837ab90e89..d34bbd6d8f38 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -101,7 +101,7 @@ struct proto smc_proto = { .unhash = smc_unhash_sk, .obj_size = sizeof(struct smc_sock), .h.smc_hash = &smc_v4_hashinfo, - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, }; EXPORT_SYMBOL_GPL(smc_proto); -- cgit v1.2.3
 
Quick Quiz: