summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@ezchip.com>2015-06-24 16:55:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 17:49:40 -0700
commitfe4ba3c34352b7e8068b7f18eb233444aed17011 (patch)
treed4b34e0d809e784c59eea68cb27c16dc795e371b /kernel
parentb5242e98c1cb834feb1e84026f09a4796b49eb4d (diff)
downloadlinux-fe4ba3c34352b7e8068b7f18eb233444aed17011.tar.bz2
watchdog: add watchdog_cpumask sysctl to assist nohz
Change the default behavior of watchdog so it only runs on the housekeeping cores when nohz_full is enabled at build and boot time. Allow modifying the set of cores the watchdog is currently running on with a new kernel.watchdog_cpumask sysctl. In the current system, the watchdog subsystem runs a periodic timer that schedules the watchdog kthread to run. However, nohz_full cores are designed to allow userspace application code running on those cores to have 100% access to the CPU. So the watchdog system prevents the nohz_full application code from being able to run the way it wants to, thus the motivation to suppress the watchdog on nohz_full cores, which this patchset provides by default. However, if we disable the watchdog globally, then the housekeeping cores can't benefit from the watchdog functionality. So we allow disabling it only on some cores. See Documentation/lockup-watchdogs.txt for more information. [jhubbard@nvidia.com: fix a watchdog crash in some configurations] Signed-off-by: Chris Metcalf <cmetcalf@ezchip.com> Acked-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Ulrich Obergfell <uobergfe@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: John Hubbard <jhubbard@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/smpboot.c1
-rw-r--r--kernel/sysctl.c7
-rw-r--r--kernel/watchdog.c67
3 files changed, 70 insertions, 5 deletions
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 5e46c2a75d59..7c434c39f02a 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -338,6 +338,7 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
*
* The cpumask field in the smp_hotplug_thread must not be updated directly
* by the client, but only by calling this function.
+ * This function can only be called on a registered smp_hotplug_thread.
*/
int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
const struct cpumask *new)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b13e9d2de302..812fcc3fd390 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -872,6 +872,13 @@ static struct ctl_table kern_table[] = {
.extra2 = &one,
},
{
+ .procname = "watchdog_cpumask",
+ .data = &watchdog_cpumask_bits,
+ .maxlen = NR_CPUS,
+ .mode = 0644,
+ .proc_handler = proc_watchdog_cpumask,
+ },
+ {
.procname = "softlockup_panic",
.data = &softlockup_panic,
.maxlen = sizeof(int),
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 581a68a04c64..a6ffa43f2993 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -19,6 +19,7 @@
#include <linux/sysctl.h>
#include <linux/smpboot.h>
#include <linux/sched/rt.h>
+#include <linux/tick.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
@@ -58,6 +59,12 @@ int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#else
#define sysctl_softlockup_all_cpu_backtrace 0
#endif
+static struct cpumask watchdog_cpumask __read_mostly;
+unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
static int __read_mostly watchdog_running;
static u64 __read_mostly sample_period;
@@ -207,7 +214,7 @@ void touch_all_softlockup_watchdogs(void)
* do we care if a 0 races with a timestamp?
* all it means is the softlock check starts one cycle later
*/
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
per_cpu(watchdog_touch_ts, cpu) = 0;
}
@@ -616,7 +623,7 @@ void watchdog_nmi_enable_all(void)
goto unlock;
get_online_cpus();
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
watchdog_nmi_enable(cpu);
put_online_cpus();
@@ -634,7 +641,7 @@ void watchdog_nmi_disable_all(void)
goto unlock;
get_online_cpus();
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
watchdog_nmi_disable(cpu);
put_online_cpus();
@@ -696,7 +703,7 @@ static void update_watchdog_all_cpus(void)
int cpu;
get_online_cpus();
- for_each_online_cpu(cpu)
+ for_each_watchdog_cpu(cpu)
update_watchdog(cpu);
put_online_cpus();
}
@@ -709,8 +716,12 @@ static int watchdog_enable_all_cpus(void)
err = smpboot_register_percpu_thread(&watchdog_threads);
if (err)
pr_err("Failed to create watchdog threads, disabled\n");
- else
+ else {
+ if (smpboot_update_cpumask_percpu_thread(
+ &watchdog_threads, &watchdog_cpumask))
+ pr_err("Failed to set cpumask for watchdog threads\n");
watchdog_running = 1;
+ }
} else {
/*
* Enable/disable the lockup detectors or
@@ -879,12 +890,58 @@ out:
mutex_unlock(&watchdog_proc_mutex);
return err;
}
+
+/*
+ * The cpumask is the mask of possible cpus that the watchdog can run
+ * on, not the mask of cpus it is actually running on. This allows the
+ * user to specify a mask that will include cpus that have not yet
+ * been brought online, if desired.
+ */
+int proc_watchdog_cpumask(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int err;
+
+ mutex_lock(&watchdog_proc_mutex);
+ err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
+ if (!err && write) {
+ /* Remove impossible cpus to keep sysctl output cleaner. */
+ cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
+ cpu_possible_mask);
+
+ if (watchdog_running) {
+ /*
+ * Failure would be due to being unable to allocate
+ * a temporary cpumask, so we are likely not in a
+ * position to do much else to make things better.
+ */
+ if (smpboot_update_cpumask_percpu_thread(
+ &watchdog_threads, &watchdog_cpumask) != 0)
+ pr_err("cpumask update failed\n");
+ }
+ }
+ mutex_unlock(&watchdog_proc_mutex);
+ return err;
+}
+
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
{
set_sample_period();
+#ifdef CONFIG_NO_HZ_FULL
+ if (tick_nohz_full_enabled()) {
+ if (!cpumask_empty(tick_nohz_full_mask))
+ pr_info("Disabling watchdog on nohz_full cores by default\n");
+ cpumask_andnot(&watchdog_cpumask, cpu_possible_mask,
+ tick_nohz_full_mask);
+ } else
+ cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
+#else
+ cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
+#endif
+
if (watchdog_enabled)
watchdog_enable_all_cpus();
}