From 810ce8b5df1c8338065f2ae1d2ec08cc566fbb8b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Apr 2016 09:22:15 -0700 Subject: rcu: Document RCU_NONIDLE() restrictions in comment header Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5f1533e3d032..c61b6b9506e7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -379,12 +379,13 @@ static inline void rcu_init_nohz(void) * in the inner idle loop. * * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) - * will tell RCU that it needs to pay attending, invoke its argument - * (in this example, a call to the do_something_with_RCU() function), + * will tell RCU that it needs to pay attention, invoke its argument + * (in this example, calling the do_something_with_RCU() function), * and then tell RCU to go back to ignoring this CPU. It is permissible - * to nest RCU_NONIDLE() wrappers, but the nesting level is currently - * quite limited. If deeper nesting is required, it will be necessary - * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. + * to nest RCU_NONIDLE() wrappers, but not indefinitely (but the limit is + * on the order of a million or so, even on 32-bit systems). It is + * not legal to block within RCU_NONIDLE(), nor is it permissible to + * transfer control either into or out of RCU_NONIDLE()'s statement. */ #define RCU_NONIDLE(a) \ do { \ -- cgit v1.2.3 From d95f5ba90fa6043a366958897fdef705af968b70 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Apr 2016 17:18:41 -0700 Subject: torture: Break online and offline functions out of torture_onoff() This commit breaks torture_online() and torture_offline() out of torture_onoff() in preparation for allowing waketorture finer-grained control of its CPU-hotplug workload. Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 4 ++ kernel/torture.c | 168 ++++++++++++++++++++++++++++++------------------ 2 files changed, 108 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/linux/torture.h b/include/linux/torture.h index 7759fc3c622d..6685a73736a2 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -50,6 +50,10 @@ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); } while (0) /* Definitions for online/offline exerciser. */ +bool torture_offline(int cpu, long *n_onl_attempts, long *n_onl_successes, + unsigned long *sum_offl, int *min_onl, int *max_onl); +bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes, + unsigned long *sum_onl, int *min_onl, int *max_onl); int torture_onoff_init(long ooholdoff, long oointerval); void torture_onoff_stats(void); bool torture_onoff_failures(void); diff --git a/kernel/torture.c b/kernel/torture.c index fa0bdeee17ac..fb39a06bbef5 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -81,6 +81,104 @@ static unsigned long sum_online; static int min_online = -1; static int max_online; +/* + * Attempt to take a CPU offline. Return false if the CPU is already + * offline or if it is not subject to CPU-hotplug operations. The + * caller can detect other failures by looking at the statistics. + */ +bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes, + unsigned long *sum_offl, int *min_offl, int *max_offl) +{ + unsigned long delta; + int ret; + unsigned long starttime; + + if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu)) + return false; + + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: offlining %d\n", + torture_type, cpu); + starttime = jiffies; + (*n_offl_attempts)++; + ret = cpu_down(cpu); + if (ret) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: offline %d failed: errno %d\n", + torture_type, cpu, ret); + } else { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: offlined %d\n", + torture_type, cpu); + (*n_offl_successes)++; + delta = jiffies - starttime; + sum_offl += delta; + if (*min_offl < 0) { + *min_offl = delta; + *max_offl = delta; + } + if (*min_offl > delta) + *min_offl = delta; + if (*max_offl < delta) + *max_offl = delta; + } + + return true; +} +EXPORT_SYMBOL_GPL(torture_offline); + +/* + * Attempt to bring a CPU online. Return false if the CPU is already + * online or if it is not subject to CPU-hotplug operations. The + * caller can detect other failures by looking at the statistics. + */ +bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes, + unsigned long *sum_onl, int *min_onl, int *max_onl) +{ + unsigned long delta; + int ret; + unsigned long starttime; + + if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu)) + return false; + + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: onlining %d\n", + torture_type, cpu); + starttime = jiffies; + (*n_onl_attempts)++; + ret = cpu_up(cpu); + if (ret) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: online %d failed: errno %d\n", + torture_type, cpu, ret); + } else { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: onlined %d\n", + torture_type, cpu); + (*n_onl_successes)++; + delta = jiffies - starttime; + *sum_onl += delta; + if (*min_onl < 0) { + *min_onl = delta; + *max_onl = delta; + } + if (*min_onl > delta) + *min_onl = delta; + if (*max_onl < delta) + *max_onl = delta; + } + + return true; +} +EXPORT_SYMBOL_GPL(torture_online); + /* * Execute random CPU-hotplug operations at the interval specified * by the onoff_interval. @@ -89,11 +187,8 @@ static int torture_onoff(void *arg) { int cpu; - unsigned long delta; int maxcpu = -1; DEFINE_TORTURE_RANDOM(rand); - int ret; - unsigned long starttime; VERBOSE_TOROUT_STRING("torture_onoff task started"); for_each_online_cpu(cpu) @@ -106,67 +201,12 @@ torture_onoff(void *arg) } while (!torture_must_stop()) { cpu = (torture_random(&rand) >> 4) % (maxcpu + 1); - if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "torture_onoff task: offlining %d\n", - torture_type, cpu); - starttime = jiffies; - n_offline_attempts++; - ret = cpu_down(cpu); - if (ret) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "torture_onoff task: offline %d failed: errno %d\n", - torture_type, cpu, ret); - } else { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "torture_onoff task: offlined %d\n", - torture_type, cpu); - n_offline_successes++; - delta = jiffies - starttime; - sum_offline += delta; - if (min_offline < 0) { - min_offline = delta; - max_offline = delta; - } - if (min_offline > delta) - min_offline = delta; - if (max_offline < delta) - max_offline = delta; - } - } else if (cpu_is_hotpluggable(cpu)) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "torture_onoff task: onlining %d\n", - torture_type, cpu); - starttime = jiffies; - n_online_attempts++; - ret = cpu_up(cpu); - if (ret) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "torture_onoff task: online %d failed: errno %d\n", - torture_type, cpu, ret); - } else { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "torture_onoff task: onlined %d\n", - torture_type, cpu); - n_online_successes++; - delta = jiffies - starttime; - sum_online += delta; - if (min_online < 0) { - min_online = delta; - max_online = delta; - } - if (min_online > delta) - min_online = delta; - if (max_online < delta) - max_online = delta; - } - } + if (!torture_offline(cpu, + &n_offline_attempts, &n_offline_successes, + &sum_offline, &min_offline, &max_offline)) + torture_online(cpu, + &n_online_attempts, &n_online_successes, + &sum_online, &min_online, &max_online); schedule_timeout_interruptible(onoff_interval); } torture_kthread_stopping("torture_onoff"); -- cgit v1.2.3 From 3a37f7275cda5ad25c1fe9be8f20c76c60d175fa Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 1 May 2016 18:46:54 -0700 Subject: rcu: No ordering for rcu_assign_pointer() of NULL This commit does a compile-time check for rcu_assign_pointer() of NULL, and uses WRITE_ONCE() rather than smp_store_release() in that case. Reported-by: Christoph Hellwig Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c61b6b9506e7..a8af79738a0e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -650,7 +650,16 @@ static inline void rcu_preempt_sleep_check(void) * please be careful when making changes to rcu_assign_pointer() and the * other macros that it invokes. */ -#define rcu_assign_pointer(p, v) smp_store_release(&p, RCU_INITIALIZER(v)) +#define rcu_assign_pointer(p, v) \ +({ \ + uintptr_t _r_a_p__v = (uintptr_t)(v); \ + \ + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ + else \ + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ + _r_a_p__v; \ +}) /** * rcu_access_pointer() - fetch RCU pointer with no dereferencing -- cgit v1.2.3 From 4929c913bda505dbe44bb42c00da06011fee6c9d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 May 2016 11:58:56 -0700 Subject: rcu: Make call_rcu_tasks() tolerate first call with irqs disabled Currently, if the very first call to call_rcu_tasks() has irqs disabled, it will create the rcu_tasks_kthread with irqs disabled, which will result in a splat in the memory allocator, which kthread_run() invokes with the expectation that irqs are enabled. This commit fixes this problem by deferring kthread creation if called with irqs disabled. The first call to call_rcu_tasks() that has irqs enabled will create the kthread. This bug was detected by rcutorture changes that were motivated by Iftekhar Ahmed's mutation-testing efforts. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 + kernel/rcu/update.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a8af79738a0e..3bc5de08c0b7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -45,6 +45,7 @@ #include #include #include +#include #include diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 3e888cd5a594..f0d8322bc3ec 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -528,6 +528,7 @@ static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10; module_param(rcu_task_stall_timeout, int, 0644); static void rcu_spawn_tasks_kthread(void); +static struct task_struct *rcu_tasks_kthread_ptr; /* * Post an RCU-tasks callback. First call must be from process context @@ -537,6 +538,7 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) { unsigned long flags; bool needwake; + bool havetask = READ_ONCE(rcu_tasks_kthread_ptr); rhp->next = NULL; rhp->func = func; @@ -545,7 +547,9 @@ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) *rcu_tasks_cbs_tail = rhp; rcu_tasks_cbs_tail = &rhp->next; raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); - if (needwake) { + /* We can't create the thread unless interrupts are enabled. */ + if ((needwake && havetask) || + (!havetask && !irqs_disabled_flags(flags))) { rcu_spawn_tasks_kthread(); wake_up(&rcu_tasks_cbs_wq); } @@ -790,7 +794,6 @@ static int __noreturn rcu_tasks_kthread(void *arg) static void rcu_spawn_tasks_kthread(void) { static DEFINE_MUTEX(rcu_tasks_kthread_mutex); - static struct task_struct *rcu_tasks_kthread_ptr; struct task_struct *t; if (READ_ONCE(rcu_tasks_kthread_ptr)) { -- cgit v1.2.3 From 088e9d253d3a4ab7e058dd84bb532c32dadf1882 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 2 Jun 2016 13:51:41 -0300 Subject: rcu: sysctl: Panic on RCU Stall It is not always easy to determine the cause of an RCU stall just by analysing the RCU stall messages, mainly when the problem is caused by the indirect starvation of rcu threads. For example, when preempt_rcu is not awakened due to the starvation of a timer softirq. We have been hard coding panic() in the RCU stall functions for some time while testing the kernel-rt. But this is not possible in some scenarios, like when supporting customers. This patch implements the sysctl kernel.panic_on_rcu_stall. If set to 1, the system will panic() when an RCU stall takes place, enabling the capture of a vmcore. The vmcore provides a way to analyze all kernel/tasks states, helping out to point to the culprit and the solution for the stall. The kernel.panic_on_rcu_stall sysctl is disabled by default. Changes from v1: - Fixed a typo in the git log - The if(sysctl_panic_on_rcu_stall) panic() is in a static function - Fixed the CONFIG_TINY_RCU compilation issue - The var sysctl_panic_on_rcu_stall is now __read_mostly Cc: Jonathan Corbet Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Lai Jiangshan Acked-by: Christian Borntraeger Reviewed-by: Josh Triplett Reviewed-by: Arnaldo Carvalho de Melo Tested-by: "Luis Claudio R. Goncalves" Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Paul E. McKenney --- Documentation/sysctl/kernel.txt | 12 ++++++++++++ include/linux/kernel.h | 1 + kernel/rcu/tree.c | 12 ++++++++++++ kernel/sysctl.c | 11 +++++++++++ 4 files changed, 36 insertions(+) (limited to 'include') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index a3683ce2a2f3..33204604de6c 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -58,6 +58,7 @@ show up in /proc/sys/kernel: - panic_on_stackoverflow - panic_on_unrecovered_nmi - panic_on_warn +- panic_on_rcu_stall - perf_cpu_time_max_percent - perf_event_paranoid - perf_event_max_stack @@ -618,6 +619,17 @@ a kernel rebuild when attempting to kdump at the location of a WARN(). ============================================================== +panic_on_rcu_stall: + +When set to 1, calls panic() after RCU stall detection messages. This +is useful to define the root cause of RCU stalls using a vmcore. + +0: do not panic() when RCU stall takes place, default behavior. + +1: panic() after printing RCU stall messages. + +============================================================== + perf_cpu_time_max_percent: Hints to the kernel how much CPU time it should be allowed to diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 94aa10ffe156..c42082112ec8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -451,6 +451,7 @@ extern int panic_on_oops; extern int panic_on_unrecovered_nmi; extern int panic_on_io_nmi; extern int panic_on_warn; +extern int sysctl_panic_on_rcu_stall; extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c844b6142a86..e5ca15a461b9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -125,6 +125,8 @@ int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; /* Number of rcu_nodes at specified level. */ static int num_rcu_lvl[] = NUM_RCU_LVL_INIT; int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ +/* panic() on RCU Stall sysctl. */ +int sysctl_panic_on_rcu_stall __read_mostly; /* * The rcu_scheduler_active variable transitions from zero to one just @@ -1312,6 +1314,12 @@ static void rcu_stall_kick_kthreads(struct rcu_state *rsp) } } +static inline void panic_on_rcu_stall(void) +{ + if (sysctl_panic_on_rcu_stall) + panic("RCU Stall\n"); +} + static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) { int cpu; @@ -1391,6 +1399,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) rcu_check_gp_kthread_starvation(rsp); + panic_on_rcu_stall(); + force_quiescent_state(rsp); /* Kick them all. */ } @@ -1431,6 +1441,8 @@ static void print_cpu_stall(struct rcu_state *rsp) jiffies + 3 * rcu_jiffies_till_stall_check() + 3); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + panic_on_rcu_stall(); + /* * Attempt to revive the RCU machinery by forcing a context switch. * diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 87b2fc38398b..35f0dcb1cb4f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1204,6 +1204,17 @@ static struct ctl_table kern_table[] = { .extra1 = &one, .extra2 = &one, }, +#endif +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) + { + .procname = "panic_on_rcu_stall", + .data = &sysctl_panic_on_rcu_stall, + .maxlen = sizeof(sysctl_panic_on_rcu_stall), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, #endif { } }; -- cgit v1.2.3