From 665d92e38f65d70796aad2b8e49e42e80815d4a4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 25 Dec 2014 14:31:24 +0900 Subject: kbuild: do not add $(call ...) to invoke cc-version or cc-fullversion The macros cc-version, cc-fullversion and ld-version take no argument. It is not necessary to add $(call ...) to invoke them. Signed-off-by: Masahiro Yamada Acked-by: Helge Deller [parisc] Signed-off-by: Michal Marek --- kernel/gcov/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index 52aa7e8de927..6f01fa3d9ca1 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -21,7 +21,7 @@ else # is not available. We can probably move if-lt to Kbuild.include, so it's also # not defined during clean or to include Kbuild.include in # scripts/Makefile.clean. But the following workaround seems least invasive. - cc-ver := $(if $(call cc-version),$(call cc-version),0) + cc-ver := $(if $(cc-version),$(cc-version),0) endif obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o -- cgit v1.2.3 From 842857dedc40df7c86e990b7153a069d0040e552 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 25 Dec 2014 14:31:25 +0900 Subject: kbuild,gcov: remove unnecessary workaround Since commit 371fdc77af44 (kbuild: collect shorthands into scripts/Kbuild.include), scripts/Makefile.clean includes scripts/Kbuild.include. The workaround and the comment block in kernel/gcov/Makefile are no longer necessary. Signed-off-by: Masahiro Yamada Cc: Peter Oberparleiter Signed-off-by: Michal Marek --- kernel/gcov/Makefile | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index 6f01fa3d9ca1..42323c7ec106 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -10,18 +10,7 @@ ifeq ($(CONFIG_GCOV_FORMAT_3_4),y) else ifeq ($(CONFIG_GCOV_FORMAT_4_7),y) cc-ver := 0407 else -# Use cc-version if available, otherwise set 0 -# -# scripts/Kbuild.include, which contains cc-version function, is not included -# during make clean "make -f scripts/Makefile.clean obj=kernel/gcov" -# Meaning cc-ver is empty causing if-lt test to fail with -# "/bin/sh: line 0: [: -lt: unary operator expected" error mesage. -# This has no affect on the clean phase, but the error message could be -# confusing/annoying. So this dummy workaround sets cc-ver to zero if cc-version -# is not available. We can probably move if-lt to Kbuild.include, so it's also -# not defined during clean or to include Kbuild.include in -# scripts/Makefile.clean. But the following workaround seems least invasive. - cc-ver := $(if $(cc-version),$(cc-version),0) + cc-ver := $(cc-version) endif obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o -- cgit v1.2.3 From 3df8094727bd1972eb8e231e56ecdbd6e8fb2210 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 25 Dec 2014 14:31:26 +0900 Subject: kbuild,gcov: simplify kernel/gcov/Makefile Kbuild descends into kernel/gcov/ directory only when CONFIG_GCOV_KERNEL is enabled. (See kernel/Makefile) CONFIG_GCOV_KERNEL check can be omitted in kernel/gcov/Makefile. Signed-off-by: Masahiro Yamada Cc: Peter Oberparleiter Signed-off-by: Michal Marek --- kernel/gcov/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index 42323c7ec106..69b3515c1806 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -13,10 +13,10 @@ else cc-ver := $(cc-version) endif -obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o +obj-y := base.o fs.o ifeq ($(call if-lt, $(cc-ver), 0407),1) - obj-$(CONFIG_GCOV_KERNEL) += gcc_3_4.o + obj-y += gcc_3_4.o else - obj-$(CONFIG_GCOV_KERNEL) += gcc_4_7.o + obj-y += gcc_4_7.o endif -- cgit v1.2.3 From a75f8b8dab0f73459fa47a1daa10c84c4e8400a8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 25 Dec 2014 14:31:28 +0900 Subject: kbuild,gcov: simplify kernel/gcov/Makefile more CONFIG_GCOV_FORMAT_3_4 / _4_7 / _AUTODETECT are exclusive. Compare the CC version only when _AUTODETECT is enabled. This change should have no impact. Signed-off-by: Masahiro Yamada Cc: Peter Oberparleiter Signed-off-by: Michal Marek --- kernel/gcov/Makefile | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index 69b3515c1806..752d6486b67e 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -1,22 +1,7 @@ ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' -# if-lt -# Usage VAR := $(call if-lt, $(a), $(b)) -# Returns 1 if (a < b) -if-lt = $(shell [ $(1) -lt $(2) ] && echo 1) - -ifeq ($(CONFIG_GCOV_FORMAT_3_4),y) - cc-ver := 0304 -else ifeq ($(CONFIG_GCOV_FORMAT_4_7),y) - cc-ver := 0407 -else - cc-ver := $(cc-version) -endif - obj-y := base.o fs.o - -ifeq ($(call if-lt, $(cc-ver), 0407),1) - obj-y += gcc_3_4.o -else - obj-y += gcc_4_7.o -endif +obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_3_4.o +obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_4_7.o +obj-$(CONFIG_GCOV_FORMAT_AUTODETECT) += $(call cc-ifversion, -lt, 0407, \ + gcc_3_4.o, gcc_4_7.o) -- cgit v1.2.3 From c0135d07b013fa8f7ba9ec91b4369c372e6a28cb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 22 Jan 2015 22:47:14 -0800 Subject: rcu: Clear need_qs flag to prevent splat If the scheduling-clock interrupt sets the current tasks need_qs flag, but if the current CPU passes through a quiescent state in the meantime, then rcu_preempt_qs() will fail to clear the need_qs flag, which can fool RCU into thinking that additional rcu_read_unlock_special() processing is needed. This commit therefore clears the need_qs flag before checking for additional processing. For this problem to occur, we need rcu_preempt_data.passed_quiesce equal to true and current->rcu_read_unlock_special.b.need_qs also equal to true. This condition can occur as follows: 1. CPU 0 is aware of the current preemptible RCU grace period, but has not yet passed through a quiescent state. Among other things, this means that rcu_preempt_data.passed_quiesce is false. 2. Task A running on CPU 0 enters a preemptible RCU read-side critical section. 3. CPU 0 takes a scheduling-clock interrupt, which notices the RCU read-side critical section and the need for a quiescent state, and thus sets current->rcu_read_unlock_special.b.need_qs to true. 4. Task A is preempted, enters the scheduler, eventually invoking rcu_preempt_note_context_switch() which in turn invokes rcu_preempt_qs(). Because rcu_preempt_data.passed_quiesce is false, control enters the body of the "if" statement, which sets rcu_preempt_data.passed_quiesce to true. 5. At this point, CPU 0 takes an interrupt. The interrupt handler contains an RCU read-side critical section, and the rcu_read_unlock() notes that current->rcu_read_unlock_special is nonzero, and thus invokes rcu_read_unlock_special(). 6. Once in rcu_read_unlock_special(), the fact that current->rcu_read_unlock_special.b.need_qs is true becomes apparent, so rcu_read_unlock_special() invokes rcu_preempt_qs(). Recursively, given that we interrupted out of that same function in the preceding step. 7. Because rcu_preempt_data.passed_quiesce is now true, rcu_preempt_qs() does nothing, and simply returns. 8. Upon return to rcu_read_unlock_special(), it is noted that current->rcu_read_unlock_special is still nonzero (because the interrupted rcu_preempt_qs() had not yet gotten around to clearing current->rcu_read_unlock_special.b.need_qs). 9. Execution proceeds to the WARN_ON_ONCE(), which notes that we are in an interrupt handler and thus duly splats. The solution, as noted above, is to make rcu_read_unlock_special() clear out current->rcu_read_unlock_special.b.need_qs after calling rcu_preempt_qs(). The interrupted rcu_preempt_qs() will clear it again, but this is harmless. The worst that happens is that we clobber another attempt to set this field, but this is not a problem because we just got done reporting a quiescent state. Reported-by: Sasha Levin Signed-off-by: Paul E. McKenney [ paulmck: Fix embarrassing build bug noted by Sasha Levin. ] Tested-by: Sasha Levin --- kernel/rcu/tree_plugin.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 2e850a51bb8f..bca28b00f7e6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -327,6 +327,7 @@ void rcu_read_unlock_special(struct task_struct *t) special = t->rcu_read_unlock_special; if (special.b.need_qs) { rcu_preempt_qs(); + t->rcu_read_unlock_special.b.need_qs = false; if (!t->rcu_read_unlock_special.s) { local_irq_restore(flags); return; -- cgit v1.2.3 From 9791554b45a2acc28247f66a5fd5bbc212a6b8c8 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 8 Jan 2015 12:17:37 +0000 Subject: MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS Userland code may be built using an ABI which permits linking to objects that have more restrictive floating point requirements. For example, userland code may be built to target the O32 FPXX ABI. Such code may be linked with other FPXX code, or code built for either one of the more restrictive FP32 or FP64. When linking with more restrictive code, the overall requirement of the process becomes that of the more restrictive code. The kernel has no way to know in advance which mode the process will need to be executed in, and indeed it may need to change during execution. The dynamic loader is the only code which will know the overall required mode, and so it needs to have a means to instruct the kernel to switch the FP mode of the process. This patch introduces 2 new options to the prctl syscall which provide such a capability. The FP mode of the process is represented as a simple bitmask combining a number of mode bits mirroring those present in the hardware. Userland can either retrieve the current FP mode of the process: mode = prctl(PR_GET_FP_MODE); or modify the current FP mode of the process: err = prctl(PR_SET_FP_MODE, new_mode); Signed-off-by: Paul Burton Cc: Matthew Fortune Cc: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8899/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mmu.h | 3 ++ arch/mips/include/asm/mmu_context.h | 2 + arch/mips/include/asm/processor.h | 11 +++++ arch/mips/kernel/process.c | 92 +++++++++++++++++++++++++++++++++++++ arch/mips/kernel/traps.c | 19 ++++++++ include/uapi/linux/prctl.h | 5 ++ kernel/sys.c | 12 +++++ 7 files changed, 144 insertions(+) (limited to 'kernel') diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h index c436138945a8..1afa1f986df8 100644 --- a/arch/mips/include/asm/mmu.h +++ b/arch/mips/include/asm/mmu.h @@ -1,9 +1,12 @@ #ifndef __ASM_MMU_H #define __ASM_MMU_H +#include + typedef struct { unsigned long asid[NR_CPUS]; void *vdso; + atomic_t fp_mode_switching; } mm_context_t; #endif /* __ASM_MMU_H */ diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index 2f82568a3ee4..87f11072f557 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -132,6 +132,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) for_each_possible_cpu(i) cpu_context(i, mm) = 0; + atomic_set(&mm->context.fp_mode_switching, 0); + return 0; } diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index f1df4cb4a286..9daa38608cd8 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -399,4 +399,15 @@ unsigned long get_wchan(struct task_struct *p); #endif +/* + * Functions & macros implementing the PR_GET_FP_MODE & PR_SET_FP_MODE options + * to the prctl syscall. + */ +extern int mips_get_process_fp_mode(struct task_struct *task); +extern int mips_set_process_fp_mode(struct task_struct *task, + unsigned int value); + +#define GET_FP_MODE(task) mips_get_process_fp_mode(task) +#define SET_FP_MODE(task,value) mips_set_process_fp_mode(task, value) + #endif /* _ASM_PROCESSOR_H */ diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index eb76434828e8..4677b4c67da6 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -550,3 +551,94 @@ void arch_trigger_all_cpu_backtrace(bool include_self) { smp_call_function(arch_dump_stack, NULL, 1); } + +int mips_get_process_fp_mode(struct task_struct *task) +{ + int value = 0; + + if (!test_tsk_thread_flag(task, TIF_32BIT_FPREGS)) + value |= PR_FP_MODE_FR; + if (test_tsk_thread_flag(task, TIF_HYBRID_FPREGS)) + value |= PR_FP_MODE_FRE; + + return value; +} + +int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) +{ + const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE; + unsigned long switch_count; + struct task_struct *t; + + /* Check the value is valid */ + if (value & ~known_bits) + return -EOPNOTSUPP; + + /* Avoid inadvertently triggering emulation */ + if ((value & PR_FP_MODE_FR) && cpu_has_fpu && + !(current_cpu_data.fpu_id & MIPS_FPIR_F64)) + return -EOPNOTSUPP; + if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre) + return -EOPNOTSUPP; + + /* Save FP & vector context, then disable FPU & MSA */ + if (task->signal == current->signal) + lose_fpu(1); + + /* Prevent any threads from obtaining live FP context */ + atomic_set(&task->mm->context.fp_mode_switching, 1); + smp_mb__after_atomic(); + + /* + * If there are multiple online CPUs then wait until all threads whose + * FP mode is about to change have been context switched. This approach + * allows us to only worry about whether an FP mode switch is in + * progress when FP is first used in a tasks time slice. Pretty much all + * of the mode switch overhead can thus be confined to cases where mode + * switches are actually occuring. That is, to here. However for the + * thread performing the mode switch it may take a while... + */ + if (num_online_cpus() > 1) { + spin_lock_irq(&task->sighand->siglock); + + for_each_thread(task, t) { + if (t == current) + continue; + + switch_count = t->nvcsw + t->nivcsw; + + do { + spin_unlock_irq(&task->sighand->siglock); + cond_resched(); + spin_lock_irq(&task->sighand->siglock); + } while ((t->nvcsw + t->nivcsw) == switch_count); + } + + spin_unlock_irq(&task->sighand->siglock); + } + + /* + * There are now no threads of the process with live FP context, so it + * is safe to proceed with the FP mode switch. + */ + for_each_thread(task, t) { + /* Update desired FP register width */ + if (value & PR_FP_MODE_FR) { + clear_tsk_thread_flag(t, TIF_32BIT_FPREGS); + } else { + set_tsk_thread_flag(t, TIF_32BIT_FPREGS); + clear_tsk_thread_flag(t, TIF_MSA_CTX_LIVE); + } + + /* Update desired FP single layout */ + if (value & PR_FP_MODE_FRE) + set_tsk_thread_flag(t, TIF_HYBRID_FPREGS); + else + clear_tsk_thread_flag(t, TIF_HYBRID_FPREGS); + } + + /* Allow threads to use FP again */ + atomic_set(&task->mm->context.fp_mode_switching, 0); + + return 0; +} diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index ad3d2031c327..d5fbfb51b9da 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1134,10 +1134,29 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action, return NOTIFY_OK; } +static int wait_on_fp_mode_switch(atomic_t *p) +{ + /* + * The FP mode for this task is currently being switched. That may + * involve modifications to the format of this tasks FP context which + * make it unsafe to proceed with execution for the moment. Instead, + * schedule some other task. + */ + schedule(); + return 0; +} + static int enable_restore_fp_context(int msa) { int err, was_fpu_owner, prior_msa; + /* + * If an FP mode switch is currently underway, wait for it to + * complete before proceeding. + */ + wait_on_atomic_t(¤t->mm->context.fp_mode_switching, + wait_on_fp_mode_switch, TASK_KILLABLE); + if (!used_math()) { /* First time FP context user. */ preempt_disable(); diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 89f63503f903..31891d9535e2 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -185,4 +185,9 @@ struct prctl_mm_map { #define PR_MPX_ENABLE_MANAGEMENT 43 #define PR_MPX_DISABLE_MANAGEMENT 44 +#define PR_SET_FP_MODE 45 +#define PR_GET_FP_MODE 46 +# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */ +# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */ + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index a8c9f5a7dda6..08b16bbdcdf0 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -97,6 +97,12 @@ #ifndef MPX_DISABLE_MANAGEMENT # define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) #endif +#ifndef GET_FP_MODE +# define GET_FP_MODE(a) (-EINVAL) +#endif +#ifndef SET_FP_MODE +# define SET_FP_MODE(a,b) (-EINVAL) +#endif /* * this is where the system-wide overflow UID and GID are defined, for @@ -2215,6 +2221,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_MPX_DISABLE_MANAGEMENT: error = MPX_DISABLE_MANAGEMENT(me); break; + case PR_SET_FP_MODE: + error = SET_FP_MODE(me, arg2); + break; + case PR_GET_FP_MODE: + error = GET_FP_MODE(me); + break; default: error = -EINVAL; break; -- cgit v1.2.3 From e0b561ee78d82a4cc7792aa28fa4b1ea15325dcc Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Sun, 15 Feb 2015 10:03:20 +0100 Subject: livepatch: fix format string in kobject_init_and_add() kobject_init_and_add() takes expects format string for a name, so we better provide it in order to avoid infoleaks if modules craft their mod->name in a special way. Reported-by: Fengguang Wu Reported-by: Kees Cook Acked-by: Seth Jennings Signed-off-by: Jiri Kosina --- kernel/livepatch/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index ff7f47d026ac..69bf3aa3bde8 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -731,7 +731,7 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) func->state = KLP_DISABLED; return kobject_init_and_add(&func->kobj, &klp_ktype_func, - obj->kobj, func->old_name); + obj->kobj, "%s", func->old_name); } /* parts of the initialization that is done only when the object is loaded */ @@ -807,7 +807,7 @@ static int klp_init_patch(struct klp_patch *patch) patch->state = KLP_DISABLED; ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch, - klp_root_kobj, patch->mod->name); + klp_root_kobj, "%s", patch->mod->name); if (ret) goto unlock; -- cgit v1.2.3 From 8d1e5a1a1ccf5ae9d8a5a0ee7960202ccb0c5429 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 17 Feb 2015 16:43:43 +0100 Subject: locking/rtmutex: Avoid a NULL pointer dereference on deadlock With task_blocks_on_rt_mutex() returning early -EDEADLK we never add the waiter to the waitqueue. Later, we try to remove it via remove_waiter() and go boom in rt_mutex_top_waiter() because rb_entry() gives a NULL pointer. ( Tested on v3.18-RT where rtmutex is used for regular mutex and I tried to get one twice in a row. ) Not sure when this started but I guess 397335f004f4 ("rtmutex: Fix deadlock detector for real") or commit 3d5c9340d194 ("rtmutex: Handle deadlock detection smarter"). Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra Cc: Thomas Gleixner Cc: # for v3.16 and later kernels Link: http://lkml.kernel.org/r/1424187823-19600-1-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- kernel/locking/rtmutex.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 3059bc2f022d..e16e5542bf13 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1193,7 +1193,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); if (unlikely(ret)) { - remove_waiter(lock, &waiter); + if (rt_mutex_has_waiters(lock)) + remove_waiter(lock, &waiter); rt_mutex_handle_deadlock(ret, chwalk, &waiter); } -- cgit v1.2.3 From 74b8a4cb6ce3685049ee124243a52238c5cabe55 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Feb 2015 13:07:38 +0100 Subject: sched: Clarify ordering between task_rq_lock() and move_queued_task() There was a wee bit of confusion around the exact ordering here; clarify things. Reported-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/20150217121258.GM5029@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1f37fe7f77a4..fd0a6ed21849 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -341,6 +341,22 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) raw_spin_lock_irqsave(&p->pi_lock, *flags); rq = task_rq(p); raw_spin_lock(&rq->lock); + /* + * move_queued_task() task_rq_lock() + * + * ACQUIRE (rq->lock) + * [S] ->on_rq = MIGRATING [L] rq = task_rq() + * WMB (__set_task_cpu()) ACQUIRE (rq->lock); + * [S] ->cpu = new_cpu [L] task_rq() + * [L] ->on_rq + * RELEASE (rq->lock) + * + * If we observe the old cpu in task_rq_lock, the acquire of + * the old rq->lock will fully serialize against the stores. + * + * If we observe the new cpu in task_rq_lock, the acquire will + * pair with the WMB to ensure we must then also see migrating. + */ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) return rq; raw_spin_unlock(&rq->lock); -- cgit v1.2.3 From 3960c8c0c7891dfc0f7be687cbdabb0d6916d614 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Feb 2015 13:22:25 +0100 Subject: sched: Make dl_task_time() use task_rq_lock() Kirill reported that a dl task can be throttled and dequeued at the same time. This happens, when it becomes throttled in schedule(), which is called to go to sleep: current->state = TASK_INTERRUPTIBLE; schedule() deactivate_task() dequeue_task_dl() update_curr_dl() start_dl_timer() __dequeue_task_dl() prev->on_rq = 0; This invalidates the assumption from commit 0f397f2c90ce ("sched/dl: Fix race in dl_task_timer()"): "The only reason we don't strictly need ->pi_lock now is because we're guaranteed to have p->state == TASK_RUNNING here and are thus free of ttwu races". And therefore we have to use the full task_rq_lock() here. This further amends the fact that we forgot to update the rq lock loop for TASK_ON_RQ_MIGRATE, from commit cca26e8009d1 ("sched: Teach scheduler to understand TASK_ON_RQ_MIGRATING state"). Reported-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Juri Lelli Link: http://lkml.kernel.org/r/20150217123139.GN5029@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 76 ------------------------------------------------- kernel/sched/deadline.c | 12 ++------ kernel/sched/sched.h | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 85 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fd0a6ed21849..f77acd98f50a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -306,82 +306,6 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; -/* - * __task_rq_lock - lock the rq @p resides on. - */ -static inline struct rq *__task_rq_lock(struct task_struct *p) - __acquires(rq->lock) -{ - struct rq *rq; - - lockdep_assert_held(&p->pi_lock); - - for (;;) { - rq = task_rq(p); - raw_spin_lock(&rq->lock); - if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) - return rq; - raw_spin_unlock(&rq->lock); - - while (unlikely(task_on_rq_migrating(p))) - cpu_relax(); - } -} - -/* - * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. - */ -static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) - __acquires(p->pi_lock) - __acquires(rq->lock) -{ - struct rq *rq; - - for (;;) { - raw_spin_lock_irqsave(&p->pi_lock, *flags); - rq = task_rq(p); - raw_spin_lock(&rq->lock); - /* - * move_queued_task() task_rq_lock() - * - * ACQUIRE (rq->lock) - * [S] ->on_rq = MIGRATING [L] rq = task_rq() - * WMB (__set_task_cpu()) ACQUIRE (rq->lock); - * [S] ->cpu = new_cpu [L] task_rq() - * [L] ->on_rq - * RELEASE (rq->lock) - * - * If we observe the old cpu in task_rq_lock, the acquire of - * the old rq->lock will fully serialize against the stores. - * - * If we observe the new cpu in task_rq_lock, the acquire will - * pair with the WMB to ensure we must then also see migrating. - */ - if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) - return rq; - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); - - while (unlikely(task_on_rq_migrating(p))) - cpu_relax(); - } -} - -static void __task_rq_unlock(struct rq *rq) - __releases(rq->lock) -{ - raw_spin_unlock(&rq->lock); -} - -static inline void -task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) - __releases(rq->lock) - __releases(p->pi_lock) -{ - raw_spin_unlock(&rq->lock); - raw_spin_unlock_irqrestore(&p->pi_lock, *flags); -} - /* * this_rq_lock - lock this runqueue and disable interrupts. */ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index a027799ae130..e88847d9fc6a 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) struct sched_dl_entity, dl_timer); struct task_struct *p = dl_task_of(dl_se); + unsigned long flags; struct rq *rq; -again: - rq = task_rq(p); - raw_spin_lock(&rq->lock); - if (rq != task_rq(p)) { - /* Task was moved, retrying. */ - raw_spin_unlock(&rq->lock); - goto again; - } + rq = task_rq_lock(current, &flags); /* * We need to take care of several possible races here: @@ -555,7 +549,7 @@ again: push_dl_task(rq); #endif unlock: - raw_spin_unlock(&rq->lock); + task_rq_unlock(rq, current, &flags); return HRTIMER_NORESTART; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0870db23d79c..dc0f435a2779 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1380,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { } extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); +/* + * __task_rq_lock - lock the rq @p resides on. + */ +static inline struct rq *__task_rq_lock(struct task_struct *p) + __acquires(rq->lock) +{ + struct rq *rq; + + lockdep_assert_held(&p->pi_lock); + + for (;;) { + rq = task_rq(p); + raw_spin_lock(&rq->lock); + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) + return rq; + raw_spin_unlock(&rq->lock); + + while (unlikely(task_on_rq_migrating(p))) + cpu_relax(); + } +} + +/* + * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. + */ +static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) + __acquires(p->pi_lock) + __acquires(rq->lock) +{ + struct rq *rq; + + for (;;) { + raw_spin_lock_irqsave(&p->pi_lock, *flags); + rq = task_rq(p); + raw_spin_lock(&rq->lock); + /* + * move_queued_task() task_rq_lock() + * + * ACQUIRE (rq->lock) + * [S] ->on_rq = MIGRATING [L] rq = task_rq() + * WMB (__set_task_cpu()) ACQUIRE (rq->lock); + * [S] ->cpu = new_cpu [L] task_rq() + * [L] ->on_rq + * RELEASE (rq->lock) + * + * If we observe the old cpu in task_rq_lock, the acquire of + * the old rq->lock will fully serialize against the stores. + * + * If we observe the new cpu in task_rq_lock, the acquire will + * pair with the WMB to ensure we must then also see migrating. + */ + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) + return rq; + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, *flags); + + while (unlikely(task_on_rq_migrating(p))) + cpu_relax(); + } +} + +static inline void __task_rq_unlock(struct rq *rq) + __releases(rq->lock) +{ + raw_spin_unlock(&rq->lock); +} + +static inline void +task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) + __releases(rq->lock) + __releases(p->pi_lock) +{ + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, *flags); +} + #ifdef CONFIG_SMP #ifdef CONFIG_PREEMPT -- cgit v1.2.3 From a79ec89fd8459f0de850898f432a2a57d60e64de Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 16 Feb 2015 15:38:34 +0300 Subject: sched/dl: Prevent enqueue of a sleeping task in dl_task_timer() A deadline task may be throttled and dequeued at the same time. This happens, when it becomes throttled in schedule(), which is called to go to sleep: current->state = TASK_INTERRUPTIBLE; schedule() deactivate_task() dequeue_task_dl() update_curr_dl() start_dl_timer() __dequeue_task_dl() prev->on_rq = 0; Later the timer fires, but the task is still dequeued: dl_task_timer() enqueue_task_dl() /* queues on dl_rq; on_rq remains 0 */ Someone wakes it up: try_to_wake_up() enqueue_dl_entity() BUG_ON(on_dl_rq()) Patch fixes this problem, it prevents queueing !on_rq tasks on dl_rq. Reported-by: Fengguang Wu Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) [ Wrote comment. ] Cc: Juri Lelli Fixes: 1019a359d3dc ("sched/deadline: Fix stale yield state") Link: http://lkml.kernel.org/r/1374601424090314@web4j.yandex.ru Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e88847d9fc6a..9908c950d776 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -535,6 +535,26 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) sched_clock_tick(); update_rq_clock(rq); + + /* + * If the throttle happened during sched-out; like: + * + * schedule() + * deactivate_task() + * dequeue_task_dl() + * update_curr_dl() + * start_dl_timer() + * __dequeue_task_dl() + * prev->on_rq = 0; + * + * We can be both throttled and !queued. Replenish the counter + * but do not enqueue -- wait for our wakeup to do that. + */ + if (!task_on_rq_queued(p)) { + replenish_dl_entity(dl_se, dl_se); + goto unlock; + } + enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); if (dl_task(rq->curr)) check_preempt_curr_dl(rq, p, 0); -- cgit v1.2.3 From 06b1f8083d6ed379ec1207a96339f23e8f7abfcf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Feb 2015 19:20:07 +0100 Subject: sched: Fix preempt_schedule_common() triggering tracing recursion Since the function graph tracer needs to disable preemption, it might call preempt_schedule() after reenabling it if something triggered the need for rescheduling in between. Therefore we can't trace preempt_schedule() itself because we would face a function tracing recursion otherwise as the tracer is always called before PREEMPT_ACTIVE gets set to prevent that recursion. This is why preempt_schedule() is tagged as "notrace". But the same issue applies to every function called by preempt_schedule() before PREEMPT_ACTIVE is actually set. And preempt_schedule_common() is one such example. Unfortunately we forgot to tag it as notrace as well and as a result we are encountering tracing recursion since it got introduced by: a18b5d0181923 ("sched: Fix missing preemption opportunity") Let's fix that by applying the appropriate function tag to preempt_schedule_common(). Reported-by: Huang Ying Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1424110807-15057-1-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f77acd98f50a..c314000f5e52 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2839,7 +2839,7 @@ void __sched schedule_preempt_disabled(void) preempt_disable(); } -static void preempt_schedule_common(void) +static void __sched notrace preempt_schedule_common(void) { do { __preempt_count_add(PREEMPT_ACTIVE); -- cgit v1.2.3 From bc9560155f4063bbc9be71bd69d6726d41b47653 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 12 Feb 2015 20:59:13 +0100 Subject: sched/completion: Serialize completion_done() with complete() Commit de30ec47302c "Remove unnecessary ->wait.lock serialization when reading completion state" was not correct, without lock/unlock the code like stop_machine_from_inactive_cpu() while (!completion_done()) cpu_relax(); can return before complete() finishes its spin_unlock() which writes to this memory. And spin_unlock_wait(). While at it, change try_wait_for_completion() to use READ_ONCE(). Reported-by: Paul E. McKenney Reported-by: Davidlohr Bueso Tested-by: Paul E. McKenney Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) [ Added a comment with the barrier. ] Cc: Linus Torvalds Cc: Nicholas Mc Guire Cc: raghavendra.kt@linux.vnet.ibm.com Cc: waiman.long@hp.com Fixes: de30ec47302c ("sched/completion: Remove unnecessary ->wait.lock serialization when reading completion state") Link: http://lkml.kernel.org/r/20150212195913.GA30430@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/completion.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 7052d3fd4e7b..8d0f35debf35 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x) * first without taking the lock so we can * return early in the blocking case. */ - if (!ACCESS_ONCE(x->done)) + if (!READ_ONCE(x->done)) return 0; spin_lock_irqsave(&x->wait.lock, flags); @@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion); */ bool completion_done(struct completion *x) { - return !!ACCESS_ONCE(x->done); + if (!READ_ONCE(x->done)) + return false; + + /* + * If ->done, we need to wait for complete() to release ->wait.lock + * otherwise we can end up freeing the completion before complete() + * is done referencing it. + * + * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders + * the loads of ->done and ->wait.lock such that we cannot observe + * the lock before complete() acquires it while observing the ->done + * after it's acquired the lock. + */ + smp_rmb(); + spin_unlock_wait(&x->wait.lock); + return true; } EXPORT_SYMBOL(completion_done); -- cgit v1.2.3 From 9cff8adeaa34b5d2802f03f89803da57856b3b72 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 13 Feb 2015 15:49:17 +1100 Subject: sched: Prevent recursion in io_schedule() io_schedule() calls blk_flush_plug() which, depending on the contents of current->plug, can initiate arbitrary blk-io requests. Note that this contrasts with blk_schedule_flush_plug() which requires all non-trivial work to be handed off to a separate thread. This makes it possible for io_schedule() to recurse, and initiating block requests could possibly call mempool_alloc() which, in times of memory pressure, uses io_schedule(). Apart from any stack usage issues, io_schedule() will not behave correctly when called recursively as delayacct_blkio_start() does not allow for repeated calls. So: - use ->in_iowait to detect recursion. Set it earlier, and restore it to the old value. - move the call to "raw_rq" after the call to blk_flush_plug(). As this is some sort of per-cpu thing, we want some chance that we are on the right CPU - When io_schedule() is called recurively, use blk_schedule_flush_plug() which cannot further recurse. - as this makes io_schedule() a lot more complex and as io_schedule() must match io_schedule_timeout(), but all the changes in io_schedule_timeout() and make io_schedule a simple wrapper for that. Signed-off-by: NeilBrown Signed-off-by: Peter Zijlstra (Intel) [ Moved the now rudimentary io_schedule() into sched.h. ] Cc: Jens Axboe Cc: Linus Torvalds Cc: Tony Battersby Link: http://lkml.kernel.org/r/20150213162600.059fffb2@notabene.brown Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++++--- kernel/sched/core.c | 31 ++++++++++++------------------- 2 files changed, 19 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8db31ef98d2f..cb5cdc777c8a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *); */ extern void show_stack(struct task_struct *task, unsigned long *sp); -void io_schedule(void); -long io_schedule_timeout(long timeout); - extern void cpu_init (void); extern void trap_init(void); extern void update_process_times(int user); @@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); +extern long io_schedule_timeout(long timeout); + +static inline void io_schedule(void) +{ + io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); +} + struct nsproxy; struct user_namespace; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c314000f5e52..daaea922f482 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4358,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to); * This task is about to go to sleep on IO. Increment rq->nr_iowait so * that process accounting knows that this is a task in IO wait state. */ -void __sched io_schedule(void) -{ - struct rq *rq = raw_rq(); - - delayacct_blkio_start(); - atomic_inc(&rq->nr_iowait); - blk_flush_plug(current); - current->in_iowait = 1; - schedule(); - current->in_iowait = 0; - atomic_dec(&rq->nr_iowait); - delayacct_blkio_end(); -} -EXPORT_SYMBOL(io_schedule); - long __sched io_schedule_timeout(long timeout) { - struct rq *rq = raw_rq(); + int old_iowait = current->in_iowait; + struct rq *rq; long ret; + current->in_iowait = 1; + if (old_iowait) + blk_schedule_flush_plug(current); + else + blk_flush_plug(current); + delayacct_blkio_start(); + rq = raw_rq(); atomic_inc(&rq->nr_iowait); - blk_flush_plug(current); - current->in_iowait = 1; ret = schedule_timeout(timeout); - current->in_iowait = 0; + current->in_iowait = old_iowait; atomic_dec(&rq->nr_iowait); delayacct_blkio_end(); + return ret; } +EXPORT_SYMBOL(io_schedule_timeout); /** * sys_sched_get_priority_max - return maximum RT priority. -- cgit v1.2.3 From 29183a70b0b828500816bd794b3fe192fce89f73 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 9 Feb 2015 23:30:36 -0800 Subject: ntp: Fixup adjtimex freq validation on 32-bit systems Additional validation of adjtimex freq values to avoid potential multiplication overflows were added in commit 5e5aeb4367b (time: adjtimex: Validate the ADJ_FREQUENCY values) Unfortunately the patch used LONG_MAX/MIN instead of LLONG_MAX/MIN, which was fine on 64-bit systems, but being much smaller on 32-bit systems caused false positives resulting in most direct frequency adjustments to fail w/ EINVAL. ntpd only does direct frequency adjustments at startup, so the issue was not as easily observed there, but other time sync applications like ptpd and chrony were more effected by the bug. See bugs: https://bugzilla.kernel.org/show_bug.cgi?id=92481 https://bugzilla.redhat.com/show_bug.cgi?id=1188074 This patch changes the checks to use LLONG_MAX for clarity, and additionally the checks are disabled on 32-bit systems since LLONG_MAX/PPM_SCALE is always larger then the 32-bit long freq value, so multiplication overflows aren't possible there. Reported-by: Josh Boyer Reported-by: George Joseph Tested-by: George Joseph Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Cc: # v3.19+ Cc: Linus Torvalds Cc: Sasha Levin Link: http://lkml.kernel.org/r/1423553436-29747-1-git-send-email-john.stultz@linaro.org [ Prettified the changelog and the comments a bit. ] Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4b585e0fdd22..0f60b08a4f07 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -633,10 +633,14 @@ int ntp_validate_timex(struct timex *txc) if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) return -EPERM; - if (txc->modes & ADJ_FREQUENCY) { - if (LONG_MIN / PPM_SCALE > txc->freq) + /* + * Check for potential multiplication overflows that can + * only happen on 64-bit systems: + */ + if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { + if (LLONG_MIN / PPM_SCALE > txc->freq) return -EINVAL; - if (LONG_MAX / PPM_SCALE < txc->freq) + if (LLONG_MAX / PPM_SCALE < txc->freq) return -EINVAL; } -- cgit v1.2.3 From 6f1607f1bdb4f9991a8123675a03c1764b2932fe Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Wed, 4 Feb 2015 12:09:32 +0300 Subject: sched/dl: Do update_rq_clock() in yield_task_dl() update_curr_dl() needs actual rq clock. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1423040972.18770.10.camel@tkhai Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9908c950d776..3fa8fa6d9403 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -912,6 +912,7 @@ static void yield_task_dl(struct rq *rq) rq->curr->dl.dl_yielded = 1; p->dl.runtime = 0; } + update_rq_clock(rq); update_curr_dl(rq); } -- cgit v1.2.3 From 1fe89e1b6d270aa0d3452c60d38461ea589594e3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Feb 2015 11:53:18 +0100 Subject: sched/autogroup: Fix failure to set cpu.rt_runtime_us Because task_group() uses a cache of autogroup_task_group(), whose output depends on sched_class, switching classes can generate problems. In particular, when started as fair, the cache points to the autogroup, so when switching to RT the tg_rt_schedulable() test fails for every cpu.rt_{runtime,period}_us change because now the autogroup has tasks and no runtime. Furthermore, going back to the previous semantics of varying task_group() with sched_class has the down-side that the sched_debug output varies as well, even though the task really is in the autogroup. Therefore add an autogroup exception to tg_has_rt_tasks() -- such that both (all) task_group() usages in sched/core now have one. And remove all the remnants of the variable task_group() output. Reported-by: Zefan Li Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Stefan Bader Fixes: 8323f26ce342 ("sched: Fix race in task_group()") Link: http://lkml.kernel.org/r/20150209112237.GR5029@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/auto_group.c | 6 +----- kernel/sched/core.c | 6 ++++++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 8a2e230fb86a..eae160dd669d 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void) * so we don't have to move tasks around upon policy change, * or flail around trying to allocate bandwidth on the fly. * A bandwidth exception in __sched_setscheduler() allows - * the policy change to proceed. Thereafter, task_group() - * returns &root_task_group, so zero bandwidth is required. + * the policy change to proceed. */ free_rt_sched_group(tg); tg->rt_se = root_task_group.rt_se; @@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) if (tg != &root_task_group) return false; - if (p->sched_class != &fair_sched_class) - return false; - /* * We can only assume the task group can't go away on us if * autogroup_move_group() can see us on ->thread_group list. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index daaea922f482..03a67f09404c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7577,6 +7577,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg) { struct task_struct *g, *p; + /* + * Autogroups do not have RT tasks; see autogroup_create(). + */ + if (task_group_is_autogroup(tg)) + return 0; + for_each_process_thread(g, p) { if (rt_task(p) && task_group(p) == tg) return 1; -- cgit v1.2.3 From 2636ed5f8d15ff9395731593537b4b3fdf2af24d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Feb 2015 12:23:20 +0100 Subject: sched/rt: Avoid obvious configuration fail Setting the root group's cpu.rt_runtime_us to 0 is a bad thing; it would disallow the kernel creating RT tasks. One can of course still set it to 1, which will (likely) still wreck your kernel, but at least make it clear that setting it to 0 is not good. Collect both sanity checks into the one place while we're there. Suggested-by: Zefan Li Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20150209112715.GO24151@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 03a67f09404c..a4869bd426ca 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7675,6 +7675,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg, { int i, err = 0; + /* + * Disallowing the root group RT runtime is BAD, it would disallow the + * kernel creating (and or operating) RT threads. + */ + if (tg == &root_task_group && rt_runtime == 0) + return -EINVAL; + + /* No period doesn't make any sense. */ + if (rt_period == 0) + return -EINVAL; + mutex_lock(&rt_constraints_mutex); read_lock(&tasklist_lock); err = __rt_schedulable(tg, rt_period, rt_runtime); @@ -7731,9 +7742,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) rt_period = (u64)rt_period_us * NSEC_PER_USEC; rt_runtime = tg->rt_bandwidth.rt_runtime; - if (rt_period == 0) - return -EINVAL; - return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); } -- cgit v1.2.3 From 146755923262037fc4c54abc28c04b1103f3cc51 Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Mon, 29 Sep 2014 15:36:57 -0700 Subject: kdb: fix incorrect counts in KDB summary command output The output of KDB 'summary' command should report MemTotal, MemFree and Buffers output in kB. Current codes report in unit of pages. A define of K(x) as is defined in the code, but not used. This patch would apply the define to convert the values to kB. Please include me on Cc on replies. I do not subscribe to linux-kernel. Signed-off-by: Jay Lan Cc: Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 7b40c5f07dce..c6d2c0b1565d 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2583,7 +2583,7 @@ static int kdb_summary(int argc, const char **argv) #define K(x) ((x) << (PAGE_SHIFT - 10)) kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n" "Buffers: %8lu kB\n", - val.totalram, val.freeram, val.bufferram); + K(val.totalram), K(val.freeram), K(val.bufferram)); return 0; } -- cgit v1.2.3 From df0036d117e6c9df36324e517728e33543065f9a Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 8 Jan 2015 15:46:55 -0600 Subject: kdb: Fix off by one error in kdb_cpu() There was a follow on replacement patch against the prior "kgdb: Timeout if secondary CPUs ignore the roundup". See: https://lkml.org/lkml/2015/1/7/442 This patch is the delta vs the patch that was committed upstream: * Fix an off-by-one error in kdb_cpu(). * Replace NR_CPUS with CONFIG_NR_CPUS to tell checkpatch that we really want a static limit. * Removed the "KGDB: " prefix from the pr_crit() in debug_core.c (kgdb-next contains a patch which introduced pr_fmt() to this file to the tag will now be applied automatically). Cc: Daniel Thompson Cc: Signed-off-by: Jason Wessel --- kernel/debug/debug_core.c | 2 +- kernel/debug/kdb/kdb_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 07ce18ca71e0..ac5c0f9c7a20 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -604,7 +604,7 @@ return_normal: online_cpus) cpu_relax(); if (!time_left) - pr_crit("KGDB: Timed out waiting for secondary CPUs.\n"); + pr_crit("Timed out waiting for secondary CPUs.\n"); /* * At this point the primary processor is completely diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index c6d2c0b1565d..60f6bb817f70 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2256,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv) /* * Validate cpunum */ - if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) + if ((cpunum >= CONFIG_NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) return KDB_BADCPUNUM; dbg_switch_cpu = cpunum; -- cgit v1.2.3 From f7d4ca8bbfda23b4f1eae9b6757ff64166b093d5 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 7 Nov 2014 18:37:57 +0000 Subject: kdb: Avoid printing KERN_ levels to consoles Currently when kdb traps printk messages then the raw log level prefix (consisting of '\001' followed by a numeral) does not get stripped off before the message is issued to the various I/O handlers supported by kdb. This causes annoying visual noise as well as causing problems grepping for ^. It is also a change of behaviour compared to normal usage of printk() usage. For example -h ends up with different output to that of kdb's "sr h". This patch addresses the problem by stripping log levels from messages before they are issued to the I/O handlers. printk() which can also act as an i/o handler in some cases is special cased; if the caller provided a log level then the prefix will be preserved when sent to printk(). The addition of non-printable characters to the output of kdb commands is a regression, albeit and extremely elderly one, introduced by commit 04d2c8c83d0e ("printk: convert the format for KERN_ to a 2 byte pattern"). Note also that this patch does *not* restore the original behaviour from v3.5. Instead it makes printk() from within a kdb command display the message without any prefix (i.e. like printk() normally does). Signed-off-by: Daniel Thompson Cc: Joe Perches Cc: stable@vger.kernel.org Signed-off-by: Jason Wessel --- include/linux/kdb.h | 8 +++++++- kernel/debug/kdb/kdb_io.c | 22 +++++++++++++--------- kernel/printk/printk.c | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 75ae2e2631fc..a19bcf9e762e 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -156,8 +156,14 @@ typedef enum { KDB_REASON_SYSTEM_NMI, /* In NMI due to SYSTEM cmd; regs valid */ } kdb_reason_t; +enum kdb_msgsrc { + KDB_MSGSRC_INTERNAL, /* direct call to kdb_printf() */ + KDB_MSGSRC_PRINTK, /* trapped from printk() */ +}; + extern int kdb_trap_printk; -extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args); +extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, + va_list args); extern __printf(1, 2) int kdb_printf(const char *, ...); typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 7c70812caea5..a550afb99ebe 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -548,7 +548,7 @@ static int kdb_search_string(char *searched, char *searchfor) return 0; } -int vkdb_printf(const char *fmt, va_list ap) +int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) { int diag; int linecount; @@ -691,19 +691,20 @@ kdb_printit: * Write to all consoles. */ retlen = strlen(kdb_buffer); + cp = (char *) printk_skip_level(kdb_buffer); if (!dbg_kdb_mode && kgdb_connected) { - gdbstub_msg_write(kdb_buffer, retlen); + gdbstub_msg_write(cp, retlen - (cp - kdb_buffer)); } else { if (dbg_io_ops && !dbg_io_ops->is_console) { - len = retlen; - cp = kdb_buffer; + len = retlen - (cp - kdb_buffer); + cp2 = cp; while (len--) { - dbg_io_ops->write_char(*cp); - cp++; + dbg_io_ops->write_char(*cp2); + cp2++; } } while (c) { - c->write(c, kdb_buffer, retlen); + c->write(c, cp, retlen - (cp - kdb_buffer)); touch_nmi_watchdog(); c = c->next; } @@ -711,7 +712,10 @@ kdb_printit: if (logging) { saved_loglevel = console_loglevel; console_loglevel = CONSOLE_LOGLEVEL_SILENT; - printk(KERN_INFO "%s", kdb_buffer); + if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK) + printk("%s", kdb_buffer); + else + pr_info("%s", kdb_buffer); } if (KDB_STATE(PAGER)) { @@ -844,7 +848,7 @@ int kdb_printf(const char *fmt, ...) int r; va_start(ap, fmt); - r = vkdb_printf(fmt, ap); + r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); va_end(ap); return r; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 02d6b6d28796..fae29e3ffbf0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1811,7 +1811,7 @@ int vprintk_default(const char *fmt, va_list args) #ifdef CONFIG_KGDB_KDB if (unlikely(kdb_trap_printk)) { - r = vkdb_printf(fmt, args); + r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); return r; } #endif -- cgit v1.2.3 From 5454388113938d9592d6a6d5424469014da4ee86 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 6 Nov 2014 15:32:13 +0000 Subject: kdb: Remove stack dump when entering kgdb due to NMI Issuing a stack dump feels ergonomically wrong when entering due to NMI. Entering due to NMI is normally a reaction to a user request, either the NMI button on a server or a "magic knock" on a UART. Therefore the backtrace behaviour on entry due to NMI should be like SysRq-g (no stack dump) rather than like oops. Note also that the stack dump does not offer any information that cannot be trivial retrieved using the 'bt' command. Signed-off-by: Daniel Thompson Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 60f6bb817f70..0a22f455060a 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1247,7 +1247,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_printf("due to NonMaskable Interrupt @ " kdb_machreg_fmt "\n", instruction_pointer(regs)); - kdb_dumpregs(regs); break; case KDB_REASON_SSTEP: case KDB_REASON_BREAK: -- cgit v1.2.3 From ab08e464a2cd8242fdc6e4f87f3480808364a97a Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 11 Sep 2014 09:58:29 +0100 Subject: kdb: Fix a prompt management bug when using | grep Currently when the "| grep" feature is used to filter the output of a command then the prompt is not displayed for the subsequent command. Likewise any characters typed by the user are also not echoed to the display. This rather disconcerting problem eventually corrects itself when the user presses Enter and the kdb_grepping_flag is cleared as kdb_parse() tries to make sense of whatever they typed. This patch resolves the problem by moving the clearing of this flag from the middle of command processing to the beginning. Signed-off-by: Daniel Thompson Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 0a22f455060a..420418360b81 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -915,13 +915,12 @@ int kdb_parse(const char *cmdstr) char *cp; char *cpp, quoted; kdbtab_t *tp; - int i, escaped, ignore_errors = 0, check_grep; + int i, escaped, ignore_errors = 0, check_grep = 0; /* * First tokenize the command string. */ cp = (char *)cmdstr; - kdb_grepping_flag = check_grep = 0; if (KDB_FLAG(CMD_INTERRUPT)) { /* Previous command was interrupted, newline must not @@ -1280,6 +1279,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, */ kdb_nextline = 1; KDB_STATE_CLEAR(SUPPRESS); + kdb_grepping_flag = 0; cmdbuf = cmd_cur; *cmdbuf = '\0'; -- cgit v1.2.3 From fb6daa7520f9d17a97e84a3d5a947819e0313f28 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 11 Sep 2014 10:37:10 +0100 Subject: kdb: Provide forward search at more prompt Currently kdb allows the output of comamnds to be filtered using the | grep feature. This is useful but does not permit the output emitted shortly after a string match to be examined without wading through the entire unfiltered output of the command. Such a feature is particularly useful to navigate function traces because these traces often have a useful trigger string *before* the point of interest. This patch reuses the existing filtering logic to introduce a simple forward search to kdb that can be triggered from the more prompt. Signed-off-by: Daniel Thompson Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_io.c | 22 ++++++++++++++++++++-- kernel/debug/kdb/kdb_main.c | 7 ++++--- kernel/debug/kdb/kdb_private.h | 2 ++ 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index a550afb99ebe..ca13e6215537 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -680,6 +680,12 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) size_avail = sizeof(kdb_buffer) - len; goto kdb_print_out; } + if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH) + /* + * This was a interactive search (using '/' at more + * prompt) and it has completed. Clear the flag. + */ + kdb_grepping_flag = 0; /* * at this point the string is a full line and * should be printed, up to the null. @@ -798,11 +804,23 @@ kdb_printit: kdb_nextline = linecount - 1; kdb_printf("\r"); suspend_grep = 1; /* for this recursion */ + } else if (buf1[0] == '/' && !kdb_grepping_flag) { + kdb_printf("\r"); + kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN, + kdbgetenv("SEARCHPROMPT") ?: "search> "); + *strchrnul(kdb_grep_string, '\n') = '\0'; + kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH; + suspend_grep = 1; /* for this recursion */ } else if (buf1[0] && buf1[0] != '\n') { /* user hit something other than enter */ suspend_grep = 1; /* for this recursion */ - kdb_printf("\nOnly 'q' or 'Q' are processed at more " - "prompt, input ignored\n"); + if (buf1[0] != '/') + kdb_printf( + "\nOnly 'q', 'Q' or '/' are processed at " + "more prompt, input ignored\n"); + else + kdb_printf("\n'/' cannot be used during | " + "grep filtering, input ignored\n"); } else if (kdb_grepping_flag) { /* user hit enter */ suspend_grep = 1; /* for this recursion */ diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 420418360b81..4121345498e0 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -50,8 +50,7 @@ static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); -#define GREP_LEN 256 -char kdb_grep_string[GREP_LEN]; +char kdb_grep_string[KDB_GREP_STRLEN]; int kdb_grepping_flag; EXPORT_SYMBOL(kdb_grepping_flag); int kdb_grep_leading; @@ -870,7 +869,7 @@ static void parse_grep(const char *str) len = strlen(cp); if (!len) return; - if (len >= GREP_LEN) { + if (len >= KDB_GREP_STRLEN) { kdb_printf("search string too long\n"); return; } @@ -1280,6 +1279,8 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_nextline = 1; KDB_STATE_CLEAR(SUPPRESS); kdb_grepping_flag = 0; + /* ensure the old search does not leak into '/' commands */ + kdb_grep_string[0] = '\0'; cmdbuf = cmd_cur; *cmdbuf = '\0'; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index eaacd1693954..da93894db7ba 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -196,7 +196,9 @@ extern int kdb_main_loop(kdb_reason_t, kdb_reason_t, /* Miscellaneous functions and data areas */ extern int kdb_grepping_flag; +#define KDB_GREPPING_FLAG_SEARCH 0x8000 extern char kdb_grep_string[]; +#define KDB_GREP_STRLEN 256 extern int kdb_grep_leading; extern int kdb_grep_trailing; extern char *kdb_cmds[]; -- cgit v1.2.3 From 32d375f6f24c3e4c9c235672695b4c314cf6b964 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 11 Sep 2014 10:41:12 +0100 Subject: kdb: Const qualifier for kdb_getstr's prompt argument All current callers of kdb_getstr() can pass constant pointers via the prompt argument. This patch adds a const qualification to make explicit the fact that this is safe. Signed-off-by: Daniel Thompson Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_io.c | 2 +- kernel/debug/kdb/kdb_private.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index ca13e6215537..fc1ef736253c 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -439,7 +439,7 @@ poll_again: * substituted for %d, %x or %o in the prompt. */ -char *kdb_getstr(char *buffer, size_t bufsize, char *prompt) +char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt) { if (prompt && kdb_prompt_str != prompt) strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index da93894db7ba..75014d7f4568 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -211,7 +211,7 @@ extern void kdb_ps1(const struct task_struct *p); extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); -extern char *kdb_getstr(char *, size_t, char *); +extern char *kdb_getstr(char *, size_t, const char *); extern void kdb_gdb_state_pass(char *buf); /* Defines for kdb_symbol_print */ -- cgit v1.2.3 From 5516fd7b92a7c16b9111a88aaa1b13dd937d8ac5 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 28 Jan 2015 17:02:14 +0530 Subject: debug: prevent entering debug mode on panic/exception. On non-developer devices, kgdb prevents the device from rebooting after a panic. Incase of panics and exceptions, to allow the device to reboot, prevent entering debug mode to avoid getting stuck waiting for the user to interact with debugger. To avoid entering the debugger on panic/exception without any extra configuration, panic_timeout is being used which can be set via /proc/sys/kernel/panic at run time and CONFIG_PANIC_TIMEOUT sets the default value. Setting panic_timeout indicates that the user requested machine to perform unattended reboot after panic. We dont want to get stuck waiting for the user input incase of panic. Cc: Andrew Morton Cc: kgdb-bugreport@lists.sourceforge.net Cc: linux-kernel@vger.kernel.org Cc: Android Kernel Team Cc: John Stultz Cc: Sumit Semwal Signed-off-by: Colin Cross [Kiran: Added context to commit message. panic_timeout is used instead of break_on_panic and break_on_exception to honor CONFIG_PANIC_TIMEOUT Modified the commit as per community feedback] Signed-off-by: Kiran Raparthy Signed-off-by: Daniel Thompson Signed-off-by: Jason Wessel --- kernel/debug/debug_core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel') diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index ac5c0f9c7a20..0874e2edd275 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -696,6 +696,14 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) if (arch_kgdb_ops.enable_nmi) arch_kgdb_ops.enable_nmi(0); + /* + * Avoid entering the debugger if we were triggered due to an oops + * but panic_timeout indicates the system should automatically + * reboot on panic. We don't want to get stuck waiting for input + * on such systems, especially if its "just" an oops. + */ + if (signo != SIGTRAP && panic_timeout) + return 1; memset(ks, 0, sizeof(struct kgdb_state)); ks->cpu = raw_smp_processor_id(); @@ -828,6 +836,15 @@ static int kgdb_panic_event(struct notifier_block *self, unsigned long val, void *data) { + /* + * Avoid entering the debugger if we were triggered due to a panic + * We don't want to get stuck waiting for input from user in such case. + * panic_timeout indicates the system should automatically + * reboot on panic. + */ + if (panic_timeout) + return NOTIFY_DONE; + if (dbg_kdb_mode) kdb_printf("PANIC: %s\n", (char *)data); kgdb_breakpoint(); -- cgit v1.2.3 From c4ce0da8ec62d83c96e29db7dadd6d3985344bb3 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 18 Feb 2015 18:02:13 +0100 Subject: livepatch: RCU protect struct klp_func all the time when used in klp_ftrace_handler() func->new_func has been accessed after rcu_read_unlock() in klp_ftrace_handler() and therefore the access was not protected. Signed-off-by: Petr Mladek Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- kernel/livepatch/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 69bf3aa3bde8..782172f073c5 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -314,12 +314,12 @@ static void notrace klp_ftrace_handler(unsigned long ip, rcu_read_lock(); func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, stack_node); - rcu_read_unlock(); - if (WARN_ON_ONCE(!func)) - return; + goto unlock; klp_arch_set_pc(regs, (unsigned long)func->new_func); +unlock: + rcu_read_unlock(); } static int klp_disable_func(struct klp_func *func) -- cgit v1.2.3 From 39afb5ee4640b4ed2cdd9e12b2a67cf785cfced8 Mon Sep 17 00:00:00 2001 From: Jon DeVree Date: Fri, 27 Feb 2015 15:52:07 -0800 Subject: kernel/sys.c: fix UNAME26 for 4.0 There's a uname workaround for broken userspace which can't handle kernel versions of 3.x. Update it for 4.x. Signed-off-by: Jon DeVree Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 667b2e62fad2..a03d9cd23ed7 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1108,6 +1108,7 @@ DECLARE_RWSEM(uts_sem); /* * Work around broken programs that cannot handle "Linux 3.0". * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 + * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60. */ static int override_release(char __user *release, size_t len) { @@ -1127,7 +1128,7 @@ static int override_release(char __user *release, size_t len) break; rest++; } - v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; + v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60; copy = clamp_t(size_t, len, 1, sizeof(buf)); copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); ret = copy_to_user(release, buf, copy + 1); -- cgit v1.2.3 From 9d3e2d02f54160725d97f4ab1e1e8de493fbf33a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 27 Feb 2015 17:57:09 +0100 Subject: locking/rtmutex: Set state back to running on error The "usual" path is: - rt_mutex_slowlock() - set_current_state() - task_blocks_on_rt_mutex() (ret 0) - __rt_mutex_slowlock() - sleep or not but do return with __set_current_state(TASK_RUNNING) - back to caller. In the early error case where task_blocks_on_rt_mutex() return -EDEADLK we never change the task's state back to RUNNING. I assume this is intended. Without this change after ww_mutex using rt_mutex the selftest passes but later I get plenty of: | bad: scheduling from the idle thread! backtraces. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Mike Galbraith Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: afffc6c1805d ("locking/rtmutex: Optimize setting task running after being blocked") Link: http://lkml.kernel.org/r/1425056229-22326-4-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- kernel/locking/rtmutex.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index e16e5542bf13..6357265a31ad 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1193,6 +1193,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); if (unlikely(ret)) { + __set_current_state(TASK_RUNNING); if (rt_mutex_has_waiters(lock)) remove_waiter(lock, &waiter); rt_mutex_handle_deadlock(ret, chwalk, &waiter); -- cgit v1.2.3