From a4fbe35a124526e6759be07bd9c7ea796ba1e00d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 7 Oct 2012 08:36:12 -0700 Subject: rcu: Grace-period initialization excludes only RCU notifier Kirill noted the following deadlock cycle on shutdown involving padata: > With commit 755609a9087fa983f567dc5452b2fa7b089b591f I've got deadlock on > poweroff. > > It guess it happens because of race for cpu_hotplug.lock: > > CPU A CPU B > disable_nonboot_cpus() > _cpu_down() > cpu_hotplug_begin() > mutex_lock(&cpu_hotplug.lock); > __cpu_notify() > padata_cpu_callback() > __padata_remove_cpu() > padata_replace() > synchronize_rcu() > rcu_gp_kthread() > get_online_cpus(); > mutex_lock(&cpu_hotplug.lock); It would of course be good to eliminate grace-period delays from CPU-hotplug notifiers, but that is a separate issue. Deadlock is not an appropriate diagnostic for excessive CPU-hotplug latency. Fortunately, grace-period initialization does not actually need to exclude all of the CPU-hotplug operation, but rather only RCU's own CPU_UP_PREPARE and CPU_DEAD CPU-hotplug notifiers. This commit therefore introduces a new per-rcu_state onoff_mutex that provides the required concurrency control in place of the get_online_cpus() that was previously in rcu_gp_init(). Reported-by: "Kirill A. Shutemov" Signed-off-by: Paul E. McKenney Tested-by: Kirill A. Shutemov --- kernel/rcutree.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4fb2376ddf06..74df86bd9204 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -74,6 +74,7 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; .orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ + .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ .name = #sname, \ } @@ -1197,7 +1198,7 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); /* Exclude any concurrent CPU-hotplug operations. */ - get_online_cpus(); + mutex_lock(&rsp->onoff_mutex); /* * Set the quiescent-state-needed bits in all the rcu_node @@ -1234,7 +1235,7 @@ static int rcu_gp_init(struct rcu_state *rsp) cond_resched(); } - put_online_cpus(); + mutex_unlock(&rsp->onoff_mutex); return 1; } @@ -1700,6 +1701,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ /* Exclude any attempts to start a new grace period. */ + mutex_lock(&rsp->onoff_mutex); raw_spin_lock_irqsave(&rsp->onofflock, flags); /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ @@ -1744,6 +1746,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) init_callback_list(rdp); /* Disallow further callbacks on this CPU. */ rdp->nxttail[RCU_NEXT_TAIL] = NULL; + mutex_unlock(&rsp->onoff_mutex); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -2648,6 +2651,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); + /* Exclude new grace periods. */ + mutex_lock(&rsp->onoff_mutex); + /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); rdp->beenonline = 1; /* We have now been online. */ @@ -2662,14 +2668,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rcu_prepare_for_idle_init(cpu); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - /* - * A new grace period might start here. If so, we won't be part - * of it, but that is OK, as we are currently in a quiescent state. - */ - - /* Exclude any attempts to start a new GP on large systems. */ - raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ - /* Add CPU to rcu_node bitmasks. */ rnp = rdp->mynode; mask = rdp->grpmask; @@ -2693,8 +2691,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; } while (rnp != NULL && !(rnp->qsmaskinit & mask)); + local_irq_restore(flags); - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + mutex_unlock(&rsp->onoff_mutex); } static void __cpuinit rcu_prepare_cpu(int cpu) -- cgit v1.2.3