summaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.ibm.com>2019-07-02 16:03:33 -0700
committerPaul E. McKenney <paulmck@linux.ibm.com>2019-08-13 14:37:32 -0700
commitd1b222c6be1f8bfc77099e034219732ecaeaaf96 (patch)
treee9da6ce477c530ed6584bd922484ad992f994cda /kernel/rcu/tree.c
parenteda669a6a2c517fd6db41d0fe3c95c1b749c60bd (diff)
downloadlinux-d1b222c6be1f8bfc77099e034219732ecaeaaf96.tar.bz2
rcu/nocb: Add bypass callback queueing
Use of the rcu_data structure's segmented ->cblist for no-CBs CPUs takes advantage of unrelated grace periods, thus reducing the memory footprint in the face of floods of call_rcu() invocations. However, the ->cblist field is a more-complex rcu_segcblist structure which must be protected via locking. Even though there are only three entities which can acquire this lock (the CPU invoking call_rcu(), the no-CBs grace-period kthread, and the no-CBs callbacks kthread), the contention on this lock is excessive under heavy stress. This commit therefore greatly reduces contention by provisioning an rcu_cblist structure field named ->nocb_bypass within the rcu_data structure. Each no-CBs CPU is permitted only a limited number of enqueues onto the ->cblist per jiffy, controlled by a new nocb_nobypass_lim_per_jiffy kernel boot parameter that defaults to about 16 enqueues per millisecond (16 * 1000 / HZ). When that limit is exceeded, the CPU instead enqueues onto the new ->nocb_bypass. The ->nocb_bypass is flushed into the ->cblist every jiffy or when the number of callbacks on ->nocb_bypass exceeds qhimark, whichever happens first. During call_rcu() floods, this flushing is carried out by the CPU during the course of its call_rcu() invocations. However, a CPU could simply stop invoking call_rcu() at any time. The no-CBs grace-period kthread therefore carries out less-aggressive flushing (every few jiffies or when the number of callbacks on ->nocb_bypass exceeds (2 * qhimark), whichever comes first). This means that the no-CBs grace-period kthread cannot be permitted to do unbounded waits while there are callbacks on ->nocb_bypass. A ->nocb_bypass_timer is used to provide the needed wakeups. [ paulmck: Apply Coverity feedback reported by Colin Ian King. ] Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c16
1 files changed, 12 insertions, 4 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ec320658aeef..457623100d12 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1251,6 +1251,7 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
unsigned long gp_seq_req;
bool ret = false;
+ rcu_lockdep_assert_cblist_protected(rdp);
raw_lockdep_assert_held_rcu_node(rnp);
/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
@@ -1292,7 +1293,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,
unsigned long c;
bool needwake;
- lockdep_assert_irqs_disabled();
+ rcu_lockdep_assert_cblist_protected(rdp);
c = rcu_seq_snap(&rcu_state.gp_seq);
if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
/* Old request still live, so mark recent callbacks. */
@@ -1318,6 +1319,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,
*/
static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
{
+ rcu_lockdep_assert_cblist_protected(rdp);
raw_lockdep_assert_held_rcu_node(rnp);
/* If no pending (not yet ready to invoke) callbacks, nothing to do. */
@@ -1341,6 +1343,7 @@ static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
struct rcu_data *rdp)
{
+ rcu_lockdep_assert_cblist_protected(rdp);
if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
!raw_spin_trylock_rcu_node(rnp))
return;
@@ -2187,7 +2190,9 @@ static void rcu_do_batch(struct rcu_data *rdp)
* The following usually indicates a double call_rcu(). To track
* this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.
*/
- WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));
+ WARN_ON_ONCE(count == 0 && !rcu_segcblist_empty(&rdp->cblist));
+ WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
+ count != 0 && rcu_segcblist_empty(&rdp->cblist));
rcu_nocb_unlock_irqrestore(rdp, flags);
@@ -2564,8 +2569,9 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, bool lazy)
if (rcu_segcblist_empty(&rdp->cblist))
rcu_segcblist_init(&rdp->cblist);
}
- rcu_nocb_lock(rdp);
- was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
+ return; // Enqueued onto ->nocb_bypass, so just leave.
+ /* If we get here, rcu_nocb_try_bypass() acquired ->nocb_lock. */
rcu_segcblist_enqueue(&rdp->cblist, head, lazy);
if (__is_kfree_rcu_offset((unsigned long)func))
trace_rcu_kfree_callback(rcu_state.name, head,
@@ -2839,6 +2845,7 @@ static void rcu_barrier_func(void *unused)
rdp->barrier_head.func = rcu_barrier_callback;
debug_rcu_head_queue(&rdp->barrier_head);
rcu_nocb_lock(rdp);
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
atomic_inc(&rcu_state.barrier_cpu_count);
} else {
@@ -3192,6 +3199,7 @@ void rcutree_migrate_callbacks(int cpu)
my_rdp = this_cpu_ptr(&rcu_data);
my_rnp = my_rdp->mynode;
rcu_nocb_lock(my_rdp); /* irqs already disabled. */
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
/* Leverage recent GPs and set GP for new callbacks. */
needwake = rcu_advance_cbs(my_rnp, rdp) ||