diff options
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 213 |
1 files changed, 85 insertions, 128 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 51d4c3acf32d..84fe96641b2e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -97,9 +97,6 @@ struct rcu_state sname##_state = { \ .gp_state = RCU_GP_IDLE, \ .gpnum = 0UL - 300UL, \ .completed = 0UL - 300UL, \ - .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \ - .orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \ - .orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .name = RCU_STATE_NAME(sname), \ .abbr = sabbr, \ @@ -843,13 +840,9 @@ static void rcu_eqs_enter(bool user) */ void rcu_idle_enter(void) { - unsigned long flags; - - local_irq_save(flags); + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!"); rcu_eqs_enter(false); - local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(rcu_idle_enter); #ifdef CONFIG_NO_HZ_FULL /** @@ -862,7 +855,8 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); */ void rcu_user_enter(void) { - rcu_eqs_enter(1); + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!"); + rcu_eqs_enter(true); } #endif /* CONFIG_NO_HZ_FULL */ @@ -955,8 +949,10 @@ static void rcu_eqs_exit(bool user) if (oldval & DYNTICK_TASK_NEST_MASK) { rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; } else { + __this_cpu_inc(disable_rcu_irq_enter); rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; rcu_eqs_exit_common(oldval, user); + __this_cpu_dec(disable_rcu_irq_enter); } } @@ -979,7 +975,6 @@ void rcu_idle_exit(void) rcu_eqs_exit(false); local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(rcu_idle_exit); #ifdef CONFIG_NO_HZ_FULL /** @@ -1358,12 +1353,13 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) j = jiffies; gpa = READ_ONCE(rsp->gp_activity); if (j - gpa > 2 * HZ) { - pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n", + pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n", rsp->name, j - gpa, rsp->gpnum, rsp->completed, rsp->gp_flags, gp_state_getname(rsp->gp_state), rsp->gp_state, - rsp->gp_kthread ? rsp->gp_kthread->state : ~0); + rsp->gp_kthread ? rsp->gp_kthread->state : ~0, + rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1); if (rsp->gp_kthread) { sched_show_task(rsp->gp_kthread); wake_up_process(rsp->gp_kthread); @@ -2067,8 +2063,8 @@ static bool rcu_gp_init(struct rcu_state *rsp) } /* - * Helper function for wait_event_interruptible_timeout() wakeup - * at force-quiescent-state time. + * Helper function for swait_event_idle() wakeup at force-quiescent-state + * time. */ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) { @@ -2206,9 +2202,8 @@ static int __noreturn rcu_gp_kthread(void *arg) READ_ONCE(rsp->gpnum), TPS("reqwait")); rsp->gp_state = RCU_GP_WAIT_GPS; - swait_event_interruptible(rsp->gp_wq, - READ_ONCE(rsp->gp_flags) & - RCU_GP_FLAG_INIT); + swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & + RCU_GP_FLAG_INIT); rsp->gp_state = RCU_GP_DONE_GPS; /* Locking provides needed memory barrier. */ if (rcu_gp_init(rsp)) @@ -2239,7 +2234,7 @@ static int __noreturn rcu_gp_kthread(void *arg) READ_ONCE(rsp->gpnum), TPS("fqswait")); rsp->gp_state = RCU_GP_WAIT_FQS; - ret = swait_event_interruptible_timeout(rsp->gp_wq, + ret = swait_event_idle_timeout(rsp->gp_wq, rcu_gp_fqs_check_wake(rsp, &gf), j); rsp->gp_state = RCU_GP_DOING_FQS; /* Locking provides needed memory barriers. */ @@ -2409,6 +2404,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, return; } WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ + WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 && + rcu_preempt_blocked_readers_cgp(rnp)); rnp->qsmask &= ~mask; trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, mask, rnp->qsmask, rnp->level, @@ -2563,85 +2560,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * Send the specified CPU's RCU callbacks to the orphanage. The - * specified CPU must be offline, and the caller must hold the - * ->orphan_lock. - */ -static void -rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, - struct rcu_node *rnp, struct rcu_data *rdp) -{ - lockdep_assert_held(&rsp->orphan_lock); - - /* No-CBs CPUs do not have orphanable callbacks. */ - if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu)) - return; - - /* - * Orphan the callbacks. First adjust the counts. This is safe - * because _rcu_barrier() excludes CPU-hotplug operations, so it - * cannot be running now. Thus no memory barrier is required. - */ - rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist); - rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done); - - /* - * Next, move those callbacks still needing a grace period to - * the orphanage, where some other CPU will pick them up. - * Some of the callbacks might have gone partway through a grace - * period, but that is too bad. They get to start over because we - * cannot assume that grace periods are synchronized across CPUs. - */ - rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend); - - /* - * Then move the ready-to-invoke callbacks to the orphanage, - * where some other CPU will pick them up. These will not be - * required to pass though another grace period: They are done. - */ - rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done); - - /* Finally, disallow further callbacks on this CPU. */ - rcu_segcblist_disable(&rdp->cblist); -} - -/* - * Adopt the RCU callbacks from the specified rcu_state structure's - * orphanage. The caller must hold the ->orphan_lock. - */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) -{ - struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); - - lockdep_assert_held(&rsp->orphan_lock); - - /* No-CBs CPUs are handled specially. */ - if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || - rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) - return; - - /* Do the accounting first. */ - rdp->n_cbs_adopted += rsp->orphan_done.len; - if (rsp->orphan_done.len_lazy != rsp->orphan_done.len) - rcu_idle_count_callbacks_posted(); - rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done); - - /* - * We do not need a memory barrier here because the only way we - * can get here if there is an rcu_barrier() in flight is if - * we are the task doing the rcu_barrier(). - */ - - /* First adopt the ready-to-invoke callbacks, then the done ones. */ - rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done); - WARN_ON_ONCE(rsp->orphan_done.head); - rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend); - WARN_ON_ONCE(rsp->orphan_pend.head); - WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != - !rcu_segcblist_n_cbs(&rdp->cblist)); -} - -/* * Trace the fact that this CPU is going offline. */ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) @@ -2704,14 +2622,12 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) /* * The CPU has been completely removed, and some other CPU is reporting - * this fact from process context. Do the remainder of the cleanup, - * including orphaning the outgoing CPU's RCU callbacks, and also - * adopting them. There can only be one CPU hotplug operation at a time, - * so no other CPU can be attempting to update rcu_cpu_kthread_task. + * this fact from process context. Do the remainder of the cleanup. + * There can only be one CPU hotplug operation at a time, so no need for + * explicit locking. */ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { - unsigned long flags; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ @@ -2720,18 +2636,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* Adjust any no-longer-needed kthreads. */ rcu_boost_kthread_setaffinity(rnp, -1); - - /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ - raw_spin_lock_irqsave(&rsp->orphan_lock, flags); - rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); - rcu_adopt_orphan_cbs(rsp, flags); - raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); - - WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || - !rcu_segcblist_empty(&rdp->cblist), - "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", - cpu, rcu_segcblist_n_cbs(&rdp->cblist), - rcu_segcblist_first_cb(&rdp->cblist)); } /* @@ -3569,10 +3473,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp) struct rcu_state *rsp = rdp->rsp; if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { - _rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence); + _rcu_barrier_trace(rsp, TPS("LastCB"), -1, + rsp->barrier_sequence); complete(&rsp->barrier_completion); } else { - _rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence); + _rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence); } } @@ -3584,14 +3489,15 @@ static void rcu_barrier_func(void *type) struct rcu_state *rsp = type; struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); - _rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence); + _rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence); rdp->barrier_head.func = rcu_barrier_callback; debug_rcu_head_queue(&rdp->barrier_head); if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { atomic_inc(&rsp->barrier_cpu_count); } else { debug_rcu_head_unqueue(&rdp->barrier_head); - _rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence); + _rcu_barrier_trace(rsp, TPS("IRQNQ"), -1, + rsp->barrier_sequence); } } @@ -3605,14 +3511,15 @@ static void _rcu_barrier(struct rcu_state *rsp) struct rcu_data *rdp; unsigned long s = rcu_seq_snap(&rsp->barrier_sequence); - _rcu_barrier_trace(rsp, "Begin", -1, s); + _rcu_barrier_trace(rsp, TPS("Begin"), -1, s); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rsp->barrier_mutex); /* Did someone else do our work for us? */ if (rcu_seq_done(&rsp->barrier_sequence, s)) { - _rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence); + _rcu_barrier_trace(rsp, TPS("EarlyExit"), -1, + rsp->barrier_sequence); smp_mb(); /* caller's subsequent code after above check. */ mutex_unlock(&rsp->barrier_mutex); return; @@ -3620,7 +3527,7 @@ static void _rcu_barrier(struct rcu_state *rsp) /* Mark the start of the barrier operation. */ rcu_seq_start(&rsp->barrier_sequence); - _rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence); + _rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence); /* * Initialize the count to one rather than to zero in order to @@ -3643,10 +3550,10 @@ static void _rcu_barrier(struct rcu_state *rsp) rdp = per_cpu_ptr(rsp->rda, cpu); if (rcu_is_nocb_cpu(cpu)) { if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { - _rcu_barrier_trace(rsp, "OfflineNoCB", cpu, + _rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu, rsp->barrier_sequence); } else { - _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, + _rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu, rsp->barrier_sequence); smp_mb__before_atomic(); atomic_inc(&rsp->barrier_cpu_count); @@ -3654,11 +3561,11 @@ static void _rcu_barrier(struct rcu_state *rsp) rcu_barrier_callback, rsp, cpu, 0); } } else if (rcu_segcblist_n_cbs(&rdp->cblist)) { - _rcu_barrier_trace(rsp, "OnlineQ", cpu, + _rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu, rsp->barrier_sequence); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); } else { - _rcu_barrier_trace(rsp, "OnlineNQ", cpu, + _rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu, rsp->barrier_sequence); } } @@ -3675,7 +3582,7 @@ static void _rcu_barrier(struct rcu_state *rsp) wait_for_completion(&rsp->barrier_completion); /* Mark the end of the barrier operation. */ - _rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence); + _rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence); rcu_seq_end(&rsp->barrier_sequence); /* Other rcu_barrier() invocations can now safely proceed. */ @@ -3777,8 +3684,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) */ rnp = rdp->mynode; raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ - if (!rdp->beenonline) - WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1); rdp->beenonline = true; /* We have now been online. */ rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; @@ -3882,6 +3787,8 @@ void rcu_cpu_starting(unsigned int cpu) { unsigned long flags; unsigned long mask; + int nbits; + unsigned long oldmask; struct rcu_data *rdp; struct rcu_node *rnp; struct rcu_state *rsp; @@ -3892,9 +3799,15 @@ void rcu_cpu_starting(unsigned int cpu) mask = rdp->grpmask; raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->qsmaskinitnext |= mask; + oldmask = rnp->expmaskinitnext; rnp->expmaskinitnext |= mask; + oldmask ^= rnp->expmaskinitnext; + nbits = bitmap_weight(&oldmask, BITS_PER_LONG); + /* Allow lockless access for expedited grace periods. */ + smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */ raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } + smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ } #ifdef CONFIG_HOTPLUG_CPU @@ -3937,6 +3850,50 @@ void rcu_report_dead(unsigned int cpu) for_each_rcu_flavor(rsp) rcu_cleanup_dying_idle_cpu(cpu, rsp); } + +/* Migrate the dead CPU's callbacks to the current CPU. */ +static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_data *my_rdp; + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); + + if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist)) + return; /* No callbacks to migrate. */ + + local_irq_save(flags); + my_rdp = this_cpu_ptr(rsp->rda); + if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) { + local_irq_restore(flags); + return; + } + raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ + rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */ + rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */ + rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); + WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != + !rcu_segcblist_n_cbs(&my_rdp->cblist)); + raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); + WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || + !rcu_segcblist_empty(&rdp->cblist), + "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", + cpu, rcu_segcblist_n_cbs(&rdp->cblist), + rcu_segcblist_first_cb(&rdp->cblist)); +} + +/* + * The outgoing CPU has just passed through the dying-idle state, + * and we are being invoked from the CPU that was IPIed to continue the + * offline operation. We need to migrate the outgoing CPU's callbacks. + */ +void rcutree_migrate_callbacks(int cpu) +{ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rcu_migrate_callbacks(cpu, rsp); +} #endif /* |