From f885b7f2b2de70be266d2cecc476f773a1e2ca5d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Apr 2012 15:52:53 -0700 Subject: rcu: Control RCU_FANOUT_LEAF from boot-time parameter Although making RCU_FANOUT_LEAF a kernel configuration parameter rather than a fixed constant makes it easier for people to decrease cache-miss overhead for large systems, it is of little help for people who must run a single pre-built kernel binary. This commit therefore allows the value of RCU_FANOUT_LEAF to be increased (but not decreased!) via a boot-time parameter named rcutree.rcu_fanout_leaf. Reported-by: Mike Galbraith Signed-off-by: Paul E. McKenney --- kernel/rcutree.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079f..780a0195d35a 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -42,28 +42,28 @@ #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT_1 -# define NUM_RCU_LVLS 1 +# define RCU_NUM_LVLS 1 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_2 -# define NUM_RCU_LVLS 2 +# define RCU_NUM_LVLS 2 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_3 -# define NUM_RCU_LVLS 3 +# define RCU_NUM_LVLS 3 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) # define NUM_RCU_LVL_3 (NR_CPUS) # define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_4 -# define NUM_RCU_LVLS 4 +# define RCU_NUM_LVLS 4 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) @@ -76,6 +76,9 @@ #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) +extern int rcu_num_lvls; +extern int rcu_num_nodes; + /* * Dynticks per-CPU state. */ @@ -206,7 +209,7 @@ struct rcu_node { */ #define rcu_for_each_node_breadth_first(rsp, rnp) \ for ((rnp) = &(rsp)->node[0]; \ - (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) /* * Do a breadth-first scan of the non-leaf rcu_node structures for the @@ -215,7 +218,7 @@ struct rcu_node { */ #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ for ((rnp) = &(rsp)->node[0]; \ - (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) + (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) /* * Scan the leaves of the rcu_node hierarchy for the specified rcu_state @@ -224,8 +227,8 @@ struct rcu_node { * It is still a leaf node, even if it is also the root node. */ #define rcu_for_each_leaf_node(rsp, rnp) \ - for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ - (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) + for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) /* Index values for nxttail array in struct rcu_data. */ #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ @@ -357,9 +360,9 @@ do { \ */ struct rcu_state { struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ - struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ - u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ /* The following fields are guarded by the root rcu_node's lock. */ -- cgit v1.2.3 From 037b64ed0bf2405a1a01542164d3418564b44fff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 23:26:01 -0700 Subject: rcu: Place pointer to call_rcu() in rcu_data structure This is a preparatory commit for increasing rcu_barrier()'s concurrency. It adds a pointer in the rcu_data structure to the corresponding call_rcu() function. This allows a pointer to the rcu_data structure to imply the function pointer, which allows _rcu_barrier() state to be placed in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 27 ++++++++++++--------------- kernel/rcutree.h | 2 ++ kernel/rcutree_plugin.h | 5 +++-- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dd7fd96c90c5..00c518fa34bb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -62,8 +62,9 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(sname) { \ +#define RCU_STATE_INITIALIZER(sname, cr) { \ .level = { &sname##_state.node[0] }, \ + .call = cr, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ @@ -76,10 +77,11 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .name = #sname, \ } -struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); +struct rcu_state rcu_sched_state = + RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; @@ -2282,21 +2284,17 @@ static void rcu_barrier_func(void *type) { int cpu = smp_processor_id(); struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); - void (*call_rcu_func)(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + struct rcu_state *rsp = type; atomic_inc(&rcu_barrier_cpu_count); - call_rcu_func = type; - call_rcu_func(head, rcu_barrier_callback); + rsp->call(head, rcu_barrier_callback); } /* * Orchestrate the specified type of RCU barrier, waiting for all * RCU callbacks of the specified type to complete. */ -static void _rcu_barrier(struct rcu_state *rsp, - void (*call_rcu_func)(struct rcu_head *head, - void (*func)(struct rcu_head *head))) +static void _rcu_barrier(struct rcu_state *rsp) { int cpu; unsigned long flags; @@ -2348,8 +2346,7 @@ static void _rcu_barrier(struct rcu_state *rsp, while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) schedule_timeout_interruptible(1); } else if (ACCESS_ONCE(rdp->qlen)) { - smp_call_function_single(cpu, rcu_barrier_func, - (void *)call_rcu_func, 1); + smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); preempt_enable(); } else { preempt_enable(); @@ -2370,7 +2367,7 @@ static void _rcu_barrier(struct rcu_state *rsp, raw_spin_unlock_irqrestore(&rsp->onofflock, flags); atomic_inc(&rcu_barrier_cpu_count); smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - call_rcu_func(&rh, rcu_barrier_callback); + rsp->call(&rh, rcu_barrier_callback); /* * Now that we have an rcu_barrier_callback() callback on each @@ -2393,7 +2390,7 @@ static void _rcu_barrier(struct rcu_state *rsp, */ void rcu_barrier_bh(void) { - _rcu_barrier(&rcu_bh_state, call_rcu_bh); + _rcu_barrier(&rcu_bh_state); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -2402,7 +2399,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); */ void rcu_barrier_sched(void) { - _rcu_barrier(&rcu_sched_state, call_rcu_sched); + _rcu_barrier(&rcu_sched_state); } EXPORT_SYMBOL_GPL(rcu_barrier_sched); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 780a0195d35a..049896a835d9 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -364,6 +364,8 @@ struct rcu_state { u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ + void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ + void (*func)(struct rcu_head *head)); /* The following fields are guarded by the root rcu_node's lock. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ef2b5231afa4..9cb3a68819fa 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -78,7 +78,8 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU -struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); +struct rcu_state rcu_preempt_state = + RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; @@ -944,7 +945,7 @@ static int rcu_preempt_cpu_has_callbacks(int cpu) */ void rcu_barrier(void) { - _rcu_barrier(&rcu_preempt_state, call_rcu); + _rcu_barrier(&rcu_preempt_state); } EXPORT_SYMBOL_GPL(rcu_barrier); -- cgit v1.2.3 From 06668efa9180f4824fe846a8ff96338c18646bc7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 23:57:46 -0700 Subject: rcu: Move _rcu_barrier()'s rcu_head structures to rcu_data structures In order for multiple flavors of RCU to each concurrently run one rcu_barrier(), each flavor needs its own per-CPU set of rcu_head structures. This commit therefore moves _rcu_barrier()'s set of per-CPU rcu_head structures from per-CPU variables to the existing per-CPU and per-RCU-flavor rcu_data structures. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 6 ++---- kernel/rcutree.h | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 00c518fa34bb..1e552598b55d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -157,7 +157,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ -static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; @@ -2282,12 +2281,11 @@ static void rcu_barrier_callback(struct rcu_head *notused) */ static void rcu_barrier_func(void *type) { - int cpu = smp_processor_id(); - struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); struct rcu_state *rsp = type; + struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); atomic_inc(&rcu_barrier_cpu_count); - rsp->call(head, rcu_barrier_callback); + rsp->call(&rdp->barrier_head, rcu_barrier_callback); } /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 049896a835d9..586d93c978f2 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -314,6 +314,9 @@ struct rcu_data { unsigned long n_rp_need_fqs; unsigned long n_rp_need_nothing; + /* 6) _rcu_barrier() callback. */ + struct rcu_head barrier_head; + int cpu; struct rcu_state *rsp; }; -- cgit v1.2.3 From 24ebbca8ecdd5129d7f829a7cb5146aaeb531f77 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 00:34:56 -0700 Subject: rcu: Move rcu_barrier_cpu_count to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_cpu_count global variable to a new ->barrier_cpu_count field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 25 ++++++++++++++----------- kernel/rcutree.h | 1 + 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 1e552598b55d..5929b021666d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -157,7 +157,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ -static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; @@ -2270,9 +2269,12 @@ static int rcu_cpu_has_callbacks(int cpu) * RCU callback function for _rcu_barrier(). If we are last, wake * up the task executing _rcu_barrier(). */ -static void rcu_barrier_callback(struct rcu_head *notused) +static void rcu_barrier_callback(struct rcu_head *rhp) { - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); + struct rcu_state *rsp = rdp->rsp; + + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rcu_barrier_completion); } @@ -2284,7 +2286,7 @@ static void rcu_barrier_func(void *type) struct rcu_state *rsp = type; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); - atomic_inc(&rcu_barrier_cpu_count); + atomic_inc(&rsp->barrier_cpu_count); rsp->call(&rdp->barrier_head, rcu_barrier_callback); } @@ -2297,9 +2299,9 @@ static void _rcu_barrier(struct rcu_state *rsp) int cpu; unsigned long flags; struct rcu_data *rdp; - struct rcu_head rh; + struct rcu_data rd; - init_rcu_head_on_stack(&rh); + init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rcu_barrier_mutex); @@ -2324,7 +2326,7 @@ static void _rcu_barrier(struct rcu_state *rsp) * us -- but before CPU 1's orphaned callbacks are invoked!!! */ init_completion(&rcu_barrier_completion); - atomic_set(&rcu_barrier_cpu_count, 1); + atomic_set(&rsp->barrier_cpu_count, 1); raw_spin_lock_irqsave(&rsp->onofflock, flags); rsp->rcu_barrier_in_progress = current; raw_spin_unlock_irqrestore(&rsp->onofflock, flags); @@ -2363,15 +2365,16 @@ static void _rcu_barrier(struct rcu_state *rsp) rcu_adopt_orphan_cbs(rsp); rsp->rcu_barrier_in_progress = NULL; raw_spin_unlock_irqrestore(&rsp->onofflock, flags); - atomic_inc(&rcu_barrier_cpu_count); + atomic_inc(&rsp->barrier_cpu_count); smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - rsp->call(&rh, rcu_barrier_callback); + rd.rsp = rsp; + rsp->call(&rd.barrier_head, rcu_barrier_callback); /* * Now that we have an rcu_barrier_callback() callback on each * CPU, and thus each counted, remove the initial count. */ - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rcu_barrier_completion); /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ @@ -2380,7 +2383,7 @@ static void _rcu_barrier(struct rcu_state *rsp) /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_barrier_mutex); - destroy_rcu_head_on_stack(&rh); + destroy_rcu_head_on_stack(&rd.barrier_head); } /** diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 586d93c978f2..c57ef0b7f097 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -400,6 +400,7 @@ struct rcu_state { struct task_struct *rcu_barrier_in_progress; /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ + atomic_t barrier_cpu_count; /* # CPUs waiting on. */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3 From 7db74df88b52844f4e966901e2972bba725e6766 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 03:03:37 -0700 Subject: rcu: Move rcu_barrier_completion to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_completion global variable to a new ->barrier_completion field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 9 ++++----- kernel/rcutree.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5929b021666d..ca7d1678ac79 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -158,7 +158,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ static DEFINE_MUTEX(rcu_barrier_mutex); -static struct completion rcu_barrier_completion; /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s @@ -2275,7 +2274,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp) struct rcu_state *rsp = rdp->rsp; if (atomic_dec_and_test(&rsp->barrier_cpu_count)) - complete(&rcu_barrier_completion); + complete(&rsp->barrier_completion); } /* @@ -2325,7 +2324,7 @@ static void _rcu_barrier(struct rcu_state *rsp) * 6. Both rcu_barrier_callback() callbacks are invoked, awakening * us -- but before CPU 1's orphaned callbacks are invoked!!! */ - init_completion(&rcu_barrier_completion); + init_completion(&rsp->barrier_completion); atomic_set(&rsp->barrier_cpu_count, 1); raw_spin_lock_irqsave(&rsp->onofflock, flags); rsp->rcu_barrier_in_progress = current; @@ -2375,10 +2374,10 @@ static void _rcu_barrier(struct rcu_state *rsp) * CPU, and thus each counted, remove the initial count. */ if (atomic_dec_and_test(&rsp->barrier_cpu_count)) - complete(&rcu_barrier_completion); + complete(&rsp->barrier_completion); /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ - wait_for_completion(&rcu_barrier_completion); + wait_for_completion(&rsp->barrier_completion); /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_barrier_mutex); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index c57ef0b7f097..d1ca4424122b 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -401,6 +401,7 @@ struct rcu_state { /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ + struct completion barrier_completion; /* Wake at barrier end. */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3 From 7be7f0be907224445acc62b3884c892f38b7ff40 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 05:18:53 -0700 Subject: rcu: Move rcu_barrier_mutex to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_mutex global variable to a new ->barrier_mutex field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 9 +++------ kernel/rcutree.h | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ca7d1678ac79..ff992ac24e73 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -71,6 +71,7 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ .orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_donetail = &sname##_state.orphan_donelist, \ + .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ @@ -155,10 +156,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); unsigned long rcutorture_testseq; unsigned long rcutorture_vernum; -/* State information for rcu_barrier() and friends. */ - -static DEFINE_MUTEX(rcu_barrier_mutex); - /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -2303,7 +2300,7 @@ static void _rcu_barrier(struct rcu_state *rsp) init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ - mutex_lock(&rcu_barrier_mutex); + mutex_lock(&rsp->barrier_mutex); smp_mb(); /* Prevent any prior operations from leaking in. */ @@ -2380,7 +2377,7 @@ static void _rcu_barrier(struct rcu_state *rsp) wait_for_completion(&rsp->barrier_completion); /* Other rcu_barrier() invocations can now safely proceed. */ - mutex_unlock(&rcu_barrier_mutex); + mutex_unlock(&rsp->barrier_mutex); destroy_rcu_head_on_stack(&rd.barrier_head); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index d1ca4424122b..7641aec3e59c 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -400,6 +400,7 @@ struct rcu_state { struct task_struct *rcu_barrier_in_progress; /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ + struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ raw_spinlock_t fqslock; /* Only one task forcing */ -- cgit v1.2.3 From cf3a9c4842b1e097dbe0854933c471d43dd24f69 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 14:56:46 -0700 Subject: rcu: Increase rcu_barrier() concurrency The traditional rcu_barrier() implementation has serialized all requests, regardless of RCU flavor, and also does not coalesce concurrent requests. In the past, this has been good and sufficient. However, systems are getting larger and use of rcu_barrier() has been increasing. This commit therefore introduces a counter-based scheme that allows _rcu_barrier() calls for the same flavor of RCU to take advantage of each others' work. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 36 +++++++++++++++++++++++++++++++++++- kernel/rcutree.h | 2 ++ 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 44a8fda9be86..6bb5d562253f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2294,13 +2294,41 @@ static void _rcu_barrier(struct rcu_state *rsp) unsigned long flags; struct rcu_data *rdp; struct rcu_data rd; + unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); + unsigned long snap_done; init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rsp->barrier_mutex); - smp_mb(); /* Prevent any prior operations from leaking in. */ + /* + * Ensure that all prior references, including to ->n_barrier_done, + * are ordered before the _rcu_barrier() machinery. + */ + smp_mb(); /* See above block comment. */ + + /* + * Recheck ->n_barrier_done to see if others did our work for us. + * This means checking ->n_barrier_done for an even-to-odd-to-even + * transition. The "if" expression below therefore rounds the old + * value up to the next even number and adds two before comparing. + */ + snap_done = ACCESS_ONCE(rsp->n_barrier_done); + if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { + smp_mb(); /* caller's subsequent code after above check. */ + mutex_unlock(&rsp->barrier_mutex); + return; + } + + /* + * Increment ->n_barrier_done to avoid duplicate work. Use + * ACCESS_ONCE() to prevent the compiler from speculating + * the increment to precede the early-exit check. + */ + ACCESS_ONCE(rsp->n_barrier_done)++; + WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); + smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ /* * Initialize the count to one rather than to zero in order to @@ -2371,6 +2399,12 @@ static void _rcu_barrier(struct rcu_state *rsp) if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rsp->barrier_completion); + /* Increment ->n_barrier_done to prevent duplicate work. */ + smp_mb(); /* Keep increment after above mechanism. */ + ACCESS_ONCE(rsp->n_barrier_done)++; + WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); + smp_mb(); /* Keep increment before caller's subsequent code. */ + /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ wait_for_completion(&rsp->barrier_completion); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7641aec3e59c..be10286ad380 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -403,6 +403,8 @@ struct rcu_state { struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ + unsigned long n_barrier_done; /* ++ at start and end of */ + /* _rcu_barrier(). */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3 From 6ce75a2326e6f8b3bdfb60e1de7934b89858e87b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jun 2012 11:01:13 -0700 Subject: rcu: Introduce for_each_rcu_flavor() and use it The arrival of TREE_PREEMPT_RCU some years back included some ugly code involving either #ifdef or #ifdef'ed wrapper functions to iterate over all non-SRCU flavors of RCU. This commit therefore introduces a for_each_rcu_flavor() iterator over the rcu_state structures for each flavor of RCU to clean up a bit of the ugliness. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 53 +++++++++++++--------- kernel/rcutree.h | 12 +++-- kernel/rcutree_plugin.h | 116 ------------------------------------------------ 3 files changed, 37 insertions(+), 144 deletions(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5376a156be8a..b61c3ffc80e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -84,6 +84,7 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; @@ -860,9 +861,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) */ void rcu_cpu_stall_reset(void) { - rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; - rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; - rcu_preempt_stall_reset(); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rsp->jiffies_stall = jiffies + ULONG_MAX / 2; } static struct notifier_block rcu_panic_block = { @@ -1827,10 +1829,11 @@ __rcu_process_callbacks(struct rcu_state *rsp) */ static void rcu_process_callbacks(struct softirq_action *unused) { + struct rcu_state *rsp; + trace_rcu_utilization("Start RCU core"); - __rcu_process_callbacks(&rcu_sched_state); - __rcu_process_callbacks(&rcu_bh_state); - rcu_preempt_process_callbacks(); + for_each_rcu_flavor(rsp) + __rcu_process_callbacks(rsp); trace_rcu_utilization("End RCU core"); } @@ -2241,9 +2244,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) */ static int rcu_pending(int cpu) { - return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || - __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_pending(cpu); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) + return 1; + return 0; } /* @@ -2253,10 +2259,13 @@ static int rcu_pending(int cpu) */ static int rcu_cpu_has_callbacks(int cpu) { + struct rcu_state *rsp; + /* RCU callbacks either ready or pending? */ - return per_cpu(rcu_sched_data, cpu).nxtlist || - per_cpu(rcu_bh_data, cpu).nxtlist || - rcu_preempt_cpu_has_callbacks(cpu); + for_each_rcu_flavor(rsp) + if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) + return 1; + return 0; } /* @@ -2551,9 +2560,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) static void __cpuinit rcu_prepare_cpu(int cpu) { - rcu_init_percpu_data(cpu, &rcu_sched_state, 0); - rcu_init_percpu_data(cpu, &rcu_bh_state, 0); - rcu_preempt_init_percpu_data(cpu); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rcu_init_percpu_data(cpu, rsp, + strcmp(rsp->name, "rcu_preempt") == 0); } /* @@ -2565,6 +2576,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; + struct rcu_state *rsp; trace_rcu_utilization("Start CPU hotplug"); switch (action) { @@ -2589,18 +2601,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, * touch any data without introducing corruption. We send the * dying CPU's callbacks to an arbitrarily chosen online CPU. */ - rcu_cleanup_dying_cpu(&rcu_bh_state); - rcu_cleanup_dying_cpu(&rcu_sched_state); - rcu_preempt_cleanup_dying_cpu(); + for_each_rcu_flavor(rsp) + rcu_cleanup_dying_cpu(rsp); rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); - rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); - rcu_preempt_cleanup_dead_cpu(cpu); + for_each_rcu_flavor(rsp) + rcu_cleanup_dead_cpu(cpu, rsp); break; default: break; @@ -2717,6 +2727,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, per_cpu_ptr(rsp->rda, i)->mynode = rnp; rcu_boot_init_percpu_data(i, rsp); } + list_add(&rsp->flavors, &rcu_struct_flavors); } /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index be10286ad380..b92c4550a6e6 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -422,8 +422,13 @@ struct rcu_state { unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ char *name; /* Name of structure. */ + struct list_head flavors; /* List of RCU flavors. */ }; +extern struct list_head rcu_struct_flavors; +#define for_each_rcu_flavor(rsp) \ + list_for_each_entry((rsp), &rcu_struct_flavors, flavors) + /* Return values for rcu_preempt_offline_tasks(). */ #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ @@ -466,25 +471,18 @@ static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); -static void rcu_preempt_stall_reset(void); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static int rcu_preempt_offline_tasks(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_preempt_cleanup_dead_cpu(int cpu); static void rcu_preempt_check_callbacks(int cpu); -static void rcu_preempt_process_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake); #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ -static int rcu_preempt_pending(int cpu); -static int rcu_preempt_cpu_has_callbacks(int cpu); -static void __cpuinit rcu_preempt_init_percpu_data(int cpu); -static void rcu_preempt_cleanup_dying_cpu(void); static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 5a80cdd9a0a3..d18b4d383afe 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -544,16 +544,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return ndetected; } -/* - * Suppress preemptible RCU's CPU stall warnings by pushing the - * time of the next stall-warning message comfortably far into the - * future. - */ -static void rcu_preempt_stall_reset(void) -{ - rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; -} - /* * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace @@ -654,14 +644,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Do CPU-offline processing for preemptible RCU. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ - rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); -} - /* * Check for a quiescent state from the current CPU. When a task blocks, * the task is recorded in the corresponding CPU's rcu_node structure, @@ -682,14 +664,6 @@ static void rcu_preempt_check_callbacks(int cpu) t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; } -/* - * Process callbacks for preemptible RCU. - */ -static void rcu_preempt_process_callbacks(void) -{ - __rcu_process_callbacks(&rcu_preempt_state); -} - #ifdef CONFIG_RCU_BOOST static void rcu_preempt_do_callbacks(void) @@ -921,24 +895,6 @@ mb_ret: } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -/* - * Check to see if there is any immediate preemptible-RCU-related work - * to be done. - */ -static int rcu_preempt_pending(int cpu) -{ - return __rcu_pending(&rcu_preempt_state, - &per_cpu(rcu_preempt_data, cpu)); -} - -/* - * Does preemptible RCU have callbacks on this CPU? - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ - return !!per_cpu(rcu_preempt_data, cpu).nxtlist; -} - /** * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. */ @@ -948,23 +904,6 @@ void rcu_barrier(void) } EXPORT_SYMBOL_GPL(rcu_barrier); -/* - * Initialize preemptible RCU's per-CPU data. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ - rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); -} - -/* - * Move preemptible RCU's callbacks from dying CPU to other online CPU - * and record a quiescent state. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ - rcu_cleanup_dying_cpu(&rcu_preempt_state); -} - /* * Initialize preemptible RCU's state structures. */ @@ -1049,14 +988,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return 0; } -/* - * Because preemptible RCU does not exist, there is no need to suppress - * its CPU stall warnings. - */ -static void rcu_preempt_stall_reset(void) -{ -} - /* * Because there is no preemptible RCU, there can be no readers blocked, * so there is no need to check for blocked tasks. So check only for @@ -1084,14 +1015,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Because preemptible RCU does not exist, it never needs CPU-offline - * processing. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ -} - /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -1100,14 +1023,6 @@ static void rcu_preempt_check_callbacks(int cpu) { } -/* - * Because preemptible RCU does not exist, it never has any callbacks - * to process. - */ -static void rcu_preempt_process_callbacks(void) -{ -} - /* * Queue an RCU callback for lazy invocation after a grace period. * This will likely be later named something like "call_rcu_lazy()", @@ -1148,22 +1063,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Because preemptible RCU does not exist, it never has any work to do. - */ -static int rcu_preempt_pending(int cpu) -{ - return 0; -} - -/* - * Because preemptible RCU does not exist, it never has callbacks - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ - return 0; -} - /* * Because preemptible RCU does not exist, rcu_barrier() is just * another name for rcu_barrier_sched(). @@ -1174,21 +1073,6 @@ void rcu_barrier(void) } EXPORT_SYMBOL_GPL(rcu_barrier); -/* - * Because preemptible RCU does not exist, there is no per-CPU - * data to initialize. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ -} - -/* - * Because there is no preemptible RCU, there is no cleanup to do. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ -} - /* * Because preemptible RCU does not exist, it need not be initialized. */ -- cgit v1.2.3 From 9d2ad24306f2fafc3612e5a216aab31f9e56e879 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 24 Jun 2012 10:15:02 -0700 Subject: rcu: Make RCU_FAST_NO_HZ respect nohz= boot parameter If the nohz= boot parameter disables nohz, then RCU_FAST_NO_HZ needs to also disable itself. This commit therefore checks for tick_nohz_enabled being zero, disabling rcu_prepare_for_idle() if so. This commit assumes that tick_nohz_enabled can change at runtime: If this is not the case, then a simpler approach suffices. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 15 +++++++++++++++ kernel/time/tick-sched.c | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079f..e978845c6800 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -97,6 +97,7 @@ struct rcu_dynticks { /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ + int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c28d25522a81..3508000e077d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1971,6 +1971,8 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ +extern int tick_nohz_enabled; + /* * Does the specified flavor of RCU have non-lazy callbacks pending on * the specified CPU? Both RCU flavor and CPU are specified by the @@ -2112,6 +2114,7 @@ static void rcu_cleanup_after_idle(int cpu) del_timer(&rdtp->idle_gp_timer); trace_rcu_prep_idle("Cleanup after idle"); + rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); } /* @@ -2137,6 +2140,18 @@ static void rcu_prepare_for_idle(int cpu) { struct timer_list *tp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + int tne; + + /* Handle nohz enablement switches conservatively. */ + tne = ACCESS_ONCE(tick_nohz_enabled); + if (tne != rdtp->tick_nohz_enabled_snap) { + if (rcu_cpu_has_callbacks(cpu)) + invoke_rcu_core(); /* force nohz to see update. */ + rdtp->tick_nohz_enabled_snap = tne; + return; + } + if (!tne) + return; /* * If this is an idle re-entry, for example, due to use of diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 869997833928..66ff07f6184c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void) /* * NO HZ enabled ? */ -static int tick_nohz_enabled __read_mostly = 1; +int tick_nohz_enabled __read_mostly = 1; /* * Enable / Disable tickless mode -- cgit v1.2.3