From 0f9be8cabbc343218dd2807af7308656be113045 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Jan 2017 13:17:02 -0800 Subject: rcu: Eliminate flavor scan in rcu_momentary_dyntick_idle() The rcu_momentary_dyntick_idle() function scans the RCU flavors, checking that one of them still needs a quiescent state before doing an expensive atomic operation on the ->dynticks counter. However, this check reduces overhead only after a rare race condition, and increases complexity. This commit therefore removes the scan and the mechanism enabling the scan. Signed-off-by: Paul E. McKenney --- .../Design/Data-Structures/Data-Structures.html | 4 +- kernel/rcu/tree.c | 62 +++++----------------- kernel/rcu/tree.h | 3 +- 3 files changed, 15 insertions(+), 54 deletions(-) diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index 3d0311657533..e4bf20a68fa3 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -1104,7 +1104,7 @@ Its fields are as follows: 1 int dynticks_nesting; 2 int dynticks_nmi_nesting; 3 atomic_t dynticks; - 4 int rcu_sched_qs_mask; + 4 bool rcu_need_heavy_qs; 5 unsigned long rcu_qs_ctr; @@ -1124,7 +1124,7 @@ CPU's transitions to and from dyntick-idle mode, so that this counter has an even value when the CPU is in dyntick-idle mode and an odd value otherwise. -

The ->rcu_sched_qs_mask field is used +

The ->rcu_need_heavy_qs field is used to record the fact that the RCU core code would really like to see a quiescent state from the corresponding CPU, so much so that it is willing to call for heavy-weight dyntick-counter operations. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 82a86a67c92a..c2cbc78a0625 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -443,44 +443,14 @@ bool rcu_eqs_special_set(int cpu) * memory barriers to let the RCU core know about it, regardless of what * this CPU might (or might not) do in the near future. * - * We inform the RCU core by emulating a zero-duration dyntick-idle - * period, which we in turn do by incrementing the ->dynticks counter - * by two. + * We inform the RCU core by emulating a zero-duration dyntick-idle period. * * The caller must have disabled interrupts. */ static void rcu_momentary_dyntick_idle(void) { - struct rcu_data *rdp; - int resched_mask; - struct rcu_state *rsp; - - /* - * Yes, we can lose flag-setting operations. This is OK, because - * the flag will be set again after some delay. - */ - resched_mask = raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask); - raw_cpu_write(rcu_dynticks.rcu_sched_qs_mask, 0); - - /* Find the flavor that needs a quiescent state. */ - for_each_rcu_flavor(rsp) { - rdp = raw_cpu_ptr(rsp->rda); - if (!(resched_mask & rsp->flavor_mask)) - continue; - smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ - if (READ_ONCE(rdp->mynode->completed) != - READ_ONCE(rdp->cond_resched_completed)) - continue; - - /* - * Pretend to be momentarily idle for the quiescent state. - * This allows the grace-period kthread to record the - * quiescent state, with no need for this CPU to do anything - * further. - */ - rcu_dynticks_momentary_idle(); - break; - } + raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false); + rcu_dynticks_momentary_idle(); } /* @@ -494,7 +464,7 @@ void rcu_note_context_switch(void) trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(); rcu_preempt_note_context_switch(); - if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask))) + if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) rcu_momentary_dyntick_idle(); trace_rcu_utilization(TPS("End context switch")); barrier(); /* Avoid RCU read-side critical sections leaking up. */ @@ -519,7 +489,7 @@ void rcu_all_qs(void) unsigned long flags; barrier(); /* Avoid RCU read-side critical sections leaking down. */ - if (unlikely(raw_cpu_read(rcu_dynticks.rcu_sched_qs_mask))) { + if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) { local_irq_save(flags); rcu_momentary_dyntick_idle(); local_irq_restore(flags); @@ -1275,7 +1245,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, bool *isidle, unsigned long *maxj) { unsigned long jtsq; - int *rcrmp; + bool *rnhqp; unsigned long rjtsc; struct rcu_node *rnp; @@ -1332,7 +1302,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * in-kernel CPU-bound tasks cannot advance grace periods. * So if the grace period is old enough, make the CPU pay attention. * Note that the unsynchronized assignments to the per-CPU - * rcu_sched_qs_mask variable are safe. Yes, setting of + * rcu_need_heavy_qs variable are safe. Yes, setting of * bits can be lost, but they will be set again on the next * force-quiescent-state pass. So lost bit sets do not result * in incorrect behavior, merely in a grace period lasting @@ -1346,16 +1316,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * is set too high, we override with half of the RCU CPU stall * warning delay. */ - rcrmp = &per_cpu(rcu_dynticks.rcu_sched_qs_mask, rdp->cpu); - if (time_after(jiffies, rdp->rsp->gp_start + jtsq) || - time_after(jiffies, rdp->rsp->jiffies_resched)) { - if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { - WRITE_ONCE(rdp->cond_resched_completed, - READ_ONCE(rdp->mynode->completed)); - smp_mb(); /* ->cond_resched_completed before *rcrmp. */ - WRITE_ONCE(*rcrmp, - READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask); - } + rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); + if (!READ_ONCE(*rnhqp) && + (time_after(jiffies, rdp->rsp->gp_start + jtsq) || + time_after(jiffies, rdp->rsp->jiffies_resched))) { + WRITE_ONCE(*rnhqp, true); rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ } @@ -4169,7 +4134,6 @@ static void __init rcu_init_one(struct rcu_state *rsp) static const char * const fqs[] = RCU_FQS_NAME_INIT; static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; - static u8 fl_mask = 0x1; int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */ int levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ @@ -4191,8 +4155,6 @@ static void __init rcu_init_one(struct rcu_state *rsp) for (i = 1; i < rcu_num_lvls; i++) rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1]; rcu_init_levelspread(levelspread, levelcnt); - rsp->flavor_mask = fl_mask; - fl_mask <<= 1; /* Initialize the elements themselves, starting from the leaves. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 76e4467bc765..b212cd0f22c7 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -113,7 +113,7 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ - int rcu_sched_qs_mask; /* GP old, need heavy quiescent state. */ + bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE long long dynticks_idle_nesting; @@ -484,7 +484,6 @@ struct rcu_state { struct rcu_node *level[RCU_NUM_LVLS + 1]; /* Hierarchy levels (+1 to */ /* shut bogus gcc warning) */ - u8 flavor_mask; /* bit in flavor mask. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ call_rcu_func_t call; /* call_rcu() flavor. */ int ncpus; /* # CPUs seen so far. */ -- cgit v1.2.3