diff options
author | Paul E. McKenney <paulmck@linux.ibm.com> | 2019-07-02 16:03:33 -0700 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.ibm.com> | 2019-08-13 14:37:32 -0700 |
commit | d1b222c6be1f8bfc77099e034219732ecaeaaf96 (patch) | |
tree | e9da6ce477c530ed6584bd922484ad992f994cda /kernel/rcu/tree.c | |
parent | eda669a6a2c517fd6db41d0fe3c95c1b749c60bd (diff) | |
download | linux-d1b222c6be1f8bfc77099e034219732ecaeaaf96.tar.bz2 |
rcu/nocb: Add bypass callback queueing
Use of the rcu_data structure's segmented ->cblist for no-CBs CPUs
takes advantage of unrelated grace periods, thus reducing the memory
footprint in the face of floods of call_rcu() invocations. However,
the ->cblist field is a more-complex rcu_segcblist structure which must
be protected via locking. Even though there are only three entities
which can acquire this lock (the CPU invoking call_rcu(), the no-CBs
grace-period kthread, and the no-CBs callbacks kthread), the contention
on this lock is excessive under heavy stress.
This commit therefore greatly reduces contention by provisioning
an rcu_cblist structure field named ->nocb_bypass within the
rcu_data structure. Each no-CBs CPU is permitted only a limited
number of enqueues onto the ->cblist per jiffy, controlled by a new
nocb_nobypass_lim_per_jiffy kernel boot parameter that defaults to
about 16 enqueues per millisecond (16 * 1000 / HZ). When that limit is
exceeded, the CPU instead enqueues onto the new ->nocb_bypass.
The ->nocb_bypass is flushed into the ->cblist every jiffy or when
the number of callbacks on ->nocb_bypass exceeds qhimark, whichever
happens first. During call_rcu() floods, this flushing is carried out
by the CPU during the course of its call_rcu() invocations. However,
a CPU could simply stop invoking call_rcu() at any time. The no-CBs
grace-period kthread therefore carries out less-aggressive flushing
(every few jiffies or when the number of callbacks on ->nocb_bypass
exceeds (2 * qhimark), whichever comes first). This means that the
no-CBs grace-period kthread cannot be permitted to do unbounded waits
while there are callbacks on ->nocb_bypass. A ->nocb_bypass_timer is
used to provide the needed wakeups.
[ paulmck: Apply Coverity feedback reported by Colin Ian King. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ec320658aeef..457623100d12 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1251,6 +1251,7 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp) unsigned long gp_seq_req; bool ret = false; + rcu_lockdep_assert_cblist_protected(rdp); raw_lockdep_assert_held_rcu_node(rnp); /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ @@ -1292,7 +1293,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp, unsigned long c; bool needwake; - lockdep_assert_irqs_disabled(); + rcu_lockdep_assert_cblist_protected(rdp); c = rcu_seq_snap(&rcu_state.gp_seq); if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { /* Old request still live, so mark recent callbacks. */ @@ -1318,6 +1319,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp, */ static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp) { + rcu_lockdep_assert_cblist_protected(rdp); raw_lockdep_assert_held_rcu_node(rnp); /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ @@ -1341,6 +1343,7 @@ static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp) static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp, struct rcu_data *rdp) { + rcu_lockdep_assert_cblist_protected(rdp); if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp)) return; @@ -2187,7 +2190,9 @@ static void rcu_do_batch(struct rcu_data *rdp) * The following usually indicates a double call_rcu(). To track * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y. */ - WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0)); + WARN_ON_ONCE(count == 0 && !rcu_segcblist_empty(&rdp->cblist)); + WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) && + count != 0 && rcu_segcblist_empty(&rdp->cblist)); rcu_nocb_unlock_irqrestore(rdp, flags); @@ -2564,8 +2569,9 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, bool lazy) if (rcu_segcblist_empty(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); } - rcu_nocb_lock(rdp); - was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); + if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) + return; // Enqueued onto ->nocb_bypass, so just leave. + /* If we get here, rcu_nocb_try_bypass() acquired ->nocb_lock. */ rcu_segcblist_enqueue(&rdp->cblist, head, lazy); if (__is_kfree_rcu_offset((unsigned long)func)) trace_rcu_kfree_callback(rcu_state.name, head, @@ -2839,6 +2845,7 @@ static void rcu_barrier_func(void *unused) rdp->barrier_head.func = rcu_barrier_callback; debug_rcu_head_queue(&rdp->barrier_head); rcu_nocb_lock(rdp); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { atomic_inc(&rcu_state.barrier_cpu_count); } else { @@ -3192,6 +3199,7 @@ void rcutree_migrate_callbacks(int cpu) my_rdp = this_cpu_ptr(&rcu_data); my_rnp = my_rdp->mynode; rcu_nocb_lock(my_rdp); /* irqs already disabled. */ + WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies)); raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */ /* Leverage recent GPs and set GP for new callbacks. */ needwake = rcu_advance_cbs(my_rnp, rdp) || |