From 665d92e38f65d70796aad2b8e49e42e80815d4a4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.m@jp.panasonic.com>
Date: Thu, 25 Dec 2014 14:31:24 +0900
Subject: kbuild: do not add $(call ...) to invoke cc-version or cc-fullversion

The macros cc-version, cc-fullversion and ld-version take no argument.
It is not necessary to add $(call ...) to invoke them.

Signed-off-by: Masahiro Yamada <yamada.m@jp.panasonic.com>
Acked-by: Helge Deller <deller@gmx.de> [parisc]
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 kernel/gcov/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 52aa7e8de927..6f01fa3d9ca1 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -21,7 +21,7 @@ else
 # is not available. We can probably move if-lt to Kbuild.include, so it's also
 # not defined during clean or to include Kbuild.include in
 # scripts/Makefile.clean. But the following workaround seems least invasive.
-  cc-ver := $(if $(call cc-version),$(call cc-version),0)
+  cc-ver := $(if $(cc-version),$(cc-version),0)
 endif
 
 obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o
-- 
cgit v1.2.3


From 842857dedc40df7c86e990b7153a069d0040e552 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.m@jp.panasonic.com>
Date: Thu, 25 Dec 2014 14:31:25 +0900
Subject: kbuild,gcov: remove unnecessary workaround

Since commit 371fdc77af44 (kbuild: collect shorthands into
scripts/Kbuild.include), scripts/Makefile.clean includes
scripts/Kbuild.include.

The workaround and the comment block in kernel/gcov/Makefile
are no longer necessary.

Signed-off-by: Masahiro Yamada <yamada.m@jp.panasonic.com>
Cc: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 kernel/gcov/Makefile | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 6f01fa3d9ca1..42323c7ec106 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -10,18 +10,7 @@ ifeq ($(CONFIG_GCOV_FORMAT_3_4),y)
 else ifeq ($(CONFIG_GCOV_FORMAT_4_7),y)
   cc-ver := 0407
 else
-# Use cc-version if available, otherwise set 0
-#
-# scripts/Kbuild.include, which contains cc-version function, is not included
-# during make clean "make -f scripts/Makefile.clean obj=kernel/gcov"
-# Meaning cc-ver is empty causing if-lt test to fail with
-# "/bin/sh: line 0: [: -lt: unary operator expected" error mesage.
-# This has no affect on the clean phase, but the error message could be
-# confusing/annoying. So this dummy workaround sets cc-ver to zero if cc-version
-# is not available. We can probably move if-lt to Kbuild.include, so it's also
-# not defined during clean or to include Kbuild.include in
-# scripts/Makefile.clean. But the following workaround seems least invasive.
-  cc-ver := $(if $(cc-version),$(cc-version),0)
+  cc-ver := $(cc-version)
 endif
 
 obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o
-- 
cgit v1.2.3


From 3df8094727bd1972eb8e231e56ecdbd6e8fb2210 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.m@jp.panasonic.com>
Date: Thu, 25 Dec 2014 14:31:26 +0900
Subject: kbuild,gcov: simplify kernel/gcov/Makefile

Kbuild descends into kernel/gcov/ directory only when
CONFIG_GCOV_KERNEL is enabled. (See kernel/Makefile)

CONFIG_GCOV_KERNEL check can be omitted in kernel/gcov/Makefile.

Signed-off-by: Masahiro Yamada <yamada.m@jp.panasonic.com>
Cc: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 kernel/gcov/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 42323c7ec106..69b3515c1806 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -13,10 +13,10 @@ else
   cc-ver := $(cc-version)
 endif
 
-obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o
+obj-y := base.o fs.o
 
 ifeq ($(call if-lt, $(cc-ver), 0407),1)
-  obj-$(CONFIG_GCOV_KERNEL) += gcc_3_4.o
+  obj-y += gcc_3_4.o
 else
-  obj-$(CONFIG_GCOV_KERNEL) += gcc_4_7.o
+  obj-y += gcc_4_7.o
 endif
-- 
cgit v1.2.3


From a75f8b8dab0f73459fa47a1daa10c84c4e8400a8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.m@jp.panasonic.com>
Date: Thu, 25 Dec 2014 14:31:28 +0900
Subject: kbuild,gcov: simplify kernel/gcov/Makefile more

CONFIG_GCOV_FORMAT_3_4 / _4_7 / _AUTODETECT are exclusive.
Compare the CC version only when _AUTODETECT is enabled.

This change should have no impact.

Signed-off-by: Masahiro Yamada <yamada.m@jp.panasonic.com>
Cc: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 kernel/gcov/Makefile | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 69b3515c1806..752d6486b67e 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -1,22 +1,7 @@
 ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
 
-# if-lt
-# Usage VAR := $(call if-lt, $(a), $(b))
-# Returns 1 if (a < b)
-if-lt = $(shell [ $(1) -lt $(2) ] && echo 1)
-
-ifeq ($(CONFIG_GCOV_FORMAT_3_4),y)
-  cc-ver := 0304
-else ifeq ($(CONFIG_GCOV_FORMAT_4_7),y)
-  cc-ver := 0407
-else
-  cc-ver := $(cc-version)
-endif
-
 obj-y := base.o fs.o
-
-ifeq ($(call if-lt, $(cc-ver), 0407),1)
-  obj-y += gcc_3_4.o
-else
-  obj-y += gcc_4_7.o
-endif
+obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_3_4.o
+obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_4_7.o
+obj-$(CONFIG_GCOV_FORMAT_AUTODETECT) += $(call cc-ifversion, -lt, 0407, \
+							gcc_3_4.o, gcc_4_7.o)
-- 
cgit v1.2.3


From c0135d07b013fa8f7ba9ec91b4369c372e6a28cb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 22 Jan 2015 22:47:14 -0800
Subject: rcu: Clear need_qs flag to prevent splat

If the scheduling-clock interrupt sets the current tasks need_qs flag,
but if the current CPU passes through a quiescent state in the meantime,
then rcu_preempt_qs() will fail to clear the need_qs flag, which can fool
RCU into thinking that additional rcu_read_unlock_special() processing
is needed.  This commit therefore clears the need_qs flag before checking
for additional processing.

For this problem to occur, we need rcu_preempt_data.passed_quiesce equal
to true and current->rcu_read_unlock_special.b.need_qs also equal to true.
This condition can occur as follows:

1.	CPU 0 is aware of the current preemptible RCU grace period,
	but has not yet passed through a quiescent state.  Among other
	things, this means that rcu_preempt_data.passed_quiesce is false.

2.	Task A running on CPU 0 enters a preemptible RCU read-side
	critical section.

3.	CPU 0 takes a scheduling-clock interrupt, which notices the
	RCU read-side critical section and the need for a quiescent state,
	and thus sets current->rcu_read_unlock_special.b.need_qs to true.

4.	Task A is preempted, enters the scheduler, eventually invoking
	rcu_preempt_note_context_switch() which in turn invokes
	rcu_preempt_qs().

	Because rcu_preempt_data.passed_quiesce is false,
	control enters the body of the "if" statement, which sets
	rcu_preempt_data.passed_quiesce to true.

5.	At this point, CPU 0 takes an interrupt.  The interrupt
	handler contains an RCU read-side critical section, and
	the rcu_read_unlock() notes that current->rcu_read_unlock_special
	is nonzero, and thus invokes rcu_read_unlock_special().

6.	Once in rcu_read_unlock_special(), the fact that
	current->rcu_read_unlock_special.b.need_qs is true becomes
	apparent, so rcu_read_unlock_special() invokes rcu_preempt_qs().
	Recursively, given that we interrupted out of that same
	function in the preceding step.

7.	Because rcu_preempt_data.passed_quiesce is now true,
	rcu_preempt_qs() does nothing, and simply returns.

8.	Upon return to rcu_read_unlock_special(), it is noted that
	current->rcu_read_unlock_special is still nonzero (because
	the interrupted rcu_preempt_qs() had not yet gotten around
	to clearing current->rcu_read_unlock_special.b.need_qs).

9.	Execution proceeds to the WARN_ON_ONCE(), which notes that
	we are in an interrupt handler and thus duly splats.

The solution, as noted above, is to make rcu_read_unlock_special()
clear out current->rcu_read_unlock_special.b.need_qs after calling
rcu_preempt_qs().  The interrupted rcu_preempt_qs() will clear it again,
but this is harmless.  The worst that happens is that we clobber another
attempt to set this field, but this is not a problem because we just
got done reporting a quiescent state.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Fix embarrassing build bug noted by Sasha Levin. ]
Tested-by: Sasha Levin <sasha.levin@oracle.com>
---
 kernel/rcu/tree_plugin.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 2e850a51bb8f..bca28b00f7e6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -327,6 +327,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 	special = t->rcu_read_unlock_special;
 	if (special.b.need_qs) {
 		rcu_preempt_qs();
+		t->rcu_read_unlock_special.b.need_qs = false;
 		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
 			return;
-- 
cgit v1.2.3


From 9791554b45a2acc28247f66a5fd5bbc212a6b8c8 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 8 Jan 2015 12:17:37 +0000
Subject: MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS

Userland code may be built using an ABI which permits linking to objects
that have more restrictive floating point requirements. For example,
userland code may be built to target the O32 FPXX ABI. Such code may be
linked with other FPXX code, or code built for either one of the more
restrictive FP32 or FP64. When linking with more restrictive code, the
overall requirement of the process becomes that of the more restrictive
code. The kernel has no way to know in advance which mode the process
will need to be executed in, and indeed it may need to change during
execution. The dynamic loader is the only code which will know the
overall required mode, and so it needs to have a means to instruct the
kernel to switch the FP mode of the process.

This patch introduces 2 new options to the prctl syscall which provide
such a capability. The FP mode of the process is represented as a
simple bitmask combining a number of mode bits mirroring those present
in the hardware. Userland can either retrieve the current FP mode of
the process:

  mode = prctl(PR_GET_FP_MODE);

or modify the current FP mode of the process:

  err = prctl(PR_SET_FP_MODE, new_mode);

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Matthew Fortune <matthew.fortune@imgtec.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/8899/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mmu.h         |  3 ++
 arch/mips/include/asm/mmu_context.h |  2 +
 arch/mips/include/asm/processor.h   | 11 +++++
 arch/mips/kernel/process.c          | 92 +++++++++++++++++++++++++++++++++++++
 arch/mips/kernel/traps.c            | 19 ++++++++
 include/uapi/linux/prctl.h          |  5 ++
 kernel/sys.c                        | 12 +++++
 7 files changed, 144 insertions(+)

(limited to 'kernel')

diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index c436138945a8..1afa1f986df8 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -1,9 +1,12 @@
 #ifndef __ASM_MMU_H
 #define __ASM_MMU_H
 
+#include <linux/atomic.h>
+
 typedef struct {
 	unsigned long asid[NR_CPUS];
 	void *vdso;
+	atomic_t fp_mode_switching;
 } mm_context_t;
 
 #endif /* __ASM_MMU_H */
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 2f82568a3ee4..87f11072f557 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -132,6 +132,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	for_each_possible_cpu(i)
 		cpu_context(i, mm) = 0;
 
+	atomic_set(&mm->context.fp_mode_switching, 0);
+
 	return 0;
 }
 
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index f1df4cb4a286..9daa38608cd8 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -399,4 +399,15 @@ unsigned long get_wchan(struct task_struct *p);
 
 #endif
 
+/*
+ * Functions & macros implementing the PR_GET_FP_MODE & PR_SET_FP_MODE options
+ * to the prctl syscall.
+ */
+extern int mips_get_process_fp_mode(struct task_struct *task);
+extern int mips_set_process_fp_mode(struct task_struct *task,
+				    unsigned int value);
+
+#define GET_FP_MODE(task)		mips_get_process_fp_mode(task)
+#define SET_FP_MODE(task,value)		mips_set_process_fp_mode(task, value)
+
 #endif /* _ASM_PROCESSOR_H */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index eb76434828e8..4677b4c67da6 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -25,6 +25,7 @@
 #include <linux/completion.h>
 #include <linux/kallsyms.h>
 #include <linux/random.h>
+#include <linux/prctl.h>
 
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
@@ -550,3 +551,94 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
 {
 	smp_call_function(arch_dump_stack, NULL, 1);
 }
+
+int mips_get_process_fp_mode(struct task_struct *task)
+{
+	int value = 0;
+
+	if (!test_tsk_thread_flag(task, TIF_32BIT_FPREGS))
+		value |= PR_FP_MODE_FR;
+	if (test_tsk_thread_flag(task, TIF_HYBRID_FPREGS))
+		value |= PR_FP_MODE_FRE;
+
+	return value;
+}
+
+int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
+{
+	const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE;
+	unsigned long switch_count;
+	struct task_struct *t;
+
+	/* Check the value is valid */
+	if (value & ~known_bits)
+		return -EOPNOTSUPP;
+
+	/* Avoid inadvertently triggering emulation */
+	if ((value & PR_FP_MODE_FR) && cpu_has_fpu &&
+	    !(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+		return -EOPNOTSUPP;
+	if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre)
+		return -EOPNOTSUPP;
+
+	/* Save FP & vector context, then disable FPU & MSA */
+	if (task->signal == current->signal)
+		lose_fpu(1);
+
+	/* Prevent any threads from obtaining live FP context */
+	atomic_set(&task->mm->context.fp_mode_switching, 1);
+	smp_mb__after_atomic();
+
+	/*
+	 * If there are multiple online CPUs then wait until all threads whose
+	 * FP mode is about to change have been context switched. This approach
+	 * allows us to only worry about whether an FP mode switch is in
+	 * progress when FP is first used in a tasks time slice. Pretty much all
+	 * of the mode switch overhead can thus be confined to cases where mode
+	 * switches are actually occuring. That is, to here. However for the
+	 * thread performing the mode switch it may take a while...
+	 */
+	if (num_online_cpus() > 1) {
+		spin_lock_irq(&task->sighand->siglock);
+
+		for_each_thread(task, t) {
+			if (t == current)
+				continue;
+
+			switch_count = t->nvcsw + t->nivcsw;
+
+			do {
+				spin_unlock_irq(&task->sighand->siglock);
+				cond_resched();
+				spin_lock_irq(&task->sighand->siglock);
+			} while ((t->nvcsw + t->nivcsw) == switch_count);
+		}
+
+		spin_unlock_irq(&task->sighand->siglock);
+	}
+
+	/*
+	 * There are now no threads of the process with live FP context, so it
+	 * is safe to proceed with the FP mode switch.
+	 */
+	for_each_thread(task, t) {
+		/* Update desired FP register width */
+		if (value & PR_FP_MODE_FR) {
+			clear_tsk_thread_flag(t, TIF_32BIT_FPREGS);
+		} else {
+			set_tsk_thread_flag(t, TIF_32BIT_FPREGS);
+			clear_tsk_thread_flag(t, TIF_MSA_CTX_LIVE);
+		}
+
+		/* Update desired FP single layout */
+		if (value & PR_FP_MODE_FRE)
+			set_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
+		else
+			clear_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
+	}
+
+	/* Allow threads to use FP again */
+	atomic_set(&task->mm->context.fp_mode_switching, 0);
+
+	return 0;
+}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ad3d2031c327..d5fbfb51b9da 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1134,10 +1134,29 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
 	return NOTIFY_OK;
 }
 
+static int wait_on_fp_mode_switch(atomic_t *p)
+{
+	/*
+	 * The FP mode for this task is currently being switched. That may
+	 * involve modifications to the format of this tasks FP context which
+	 * make it unsafe to proceed with execution for the moment. Instead,
+	 * schedule some other task.
+	 */
+	schedule();
+	return 0;
+}
+
 static int enable_restore_fp_context(int msa)
 {
 	int err, was_fpu_owner, prior_msa;
 
+	/*
+	 * If an FP mode switch is currently underway, wait for it to
+	 * complete before proceeding.
+	 */
+	wait_on_atomic_t(&current->mm->context.fp_mode_switching,
+			 wait_on_fp_mode_switch, TASK_KILLABLE);
+
 	if (!used_math()) {
 		/* First time FP context user. */
 		preempt_disable();
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 89f63503f903..31891d9535e2 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -185,4 +185,9 @@ struct prctl_mm_map {
 #define PR_MPX_ENABLE_MANAGEMENT  43
 #define PR_MPX_DISABLE_MANAGEMENT 44
 
+#define PR_SET_FP_MODE		45
+#define PR_GET_FP_MODE		46
+# define PR_FP_MODE_FR		(1 << 0)	/* 64b FP registers */
+# define PR_FP_MODE_FRE		(1 << 1)	/* 32b compatibility */
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index a8c9f5a7dda6..08b16bbdcdf0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -97,6 +97,12 @@
 #ifndef MPX_DISABLE_MANAGEMENT
 # define MPX_DISABLE_MANAGEMENT(a)	(-EINVAL)
 #endif
+#ifndef GET_FP_MODE
+# define GET_FP_MODE(a)		(-EINVAL)
+#endif
+#ifndef SET_FP_MODE
+# define SET_FP_MODE(a,b)	(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -2215,6 +2221,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_MPX_DISABLE_MANAGEMENT:
 		error = MPX_DISABLE_MANAGEMENT(me);
 		break;
+	case PR_SET_FP_MODE:
+		error = SET_FP_MODE(me, arg2);
+		break;
+	case PR_GET_FP_MODE:
+		error = GET_FP_MODE(me);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
cgit v1.2.3


From e0b561ee78d82a4cc7792aa28fa4b1ea15325dcc Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sun, 15 Feb 2015 10:03:20 +0100
Subject: livepatch: fix format string in kobject_init_and_add()

kobject_init_and_add() takes expects format string for a name, so we
better provide it in order to avoid infoleaks if modules craft their
mod->name in a special way.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Reported-by: Kees Cook <keescook@chromium.org>
Acked-by: Seth Jennings <sjenning@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 kernel/livepatch/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index ff7f47d026ac..69bf3aa3bde8 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -731,7 +731,7 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
 	func->state = KLP_DISABLED;
 
 	return kobject_init_and_add(&func->kobj, &klp_ktype_func,
-				    obj->kobj, func->old_name);
+				    obj->kobj, "%s", func->old_name);
 }
 
 /* parts of the initialization that is done only when the object is loaded */
@@ -807,7 +807,7 @@ static int klp_init_patch(struct klp_patch *patch)
 	patch->state = KLP_DISABLED;
 
 	ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch,
-				   klp_root_kobj, patch->mod->name);
+				   klp_root_kobj, "%s", patch->mod->name);
 	if (ret)
 		goto unlock;
 
-- 
cgit v1.2.3


From 8d1e5a1a1ccf5ae9d8a5a0ee7960202ccb0c5429 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 17 Feb 2015 16:43:43 +0100
Subject: locking/rtmutex: Avoid a NULL pointer dereference on deadlock

With task_blocks_on_rt_mutex() returning early -EDEADLK we never
add the waiter to the waitqueue. Later, we try to remove it via
remove_waiter() and go boom in rt_mutex_top_waiter() because
rb_entry() gives a NULL pointer.

( Tested on v3.18-RT where rtmutex is used for regular mutex and I
  tried to get one twice in a row. )

Not sure when this started but I guess 397335f004f4 ("rtmutex: Fix
deadlock detector for real") or commit 3d5c9340d194 ("rtmutex:
Handle deadlock detection smarter").

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org> # for v3.16 and later kernels
Link: http://lkml.kernel.org/r/1424187823-19600-1-git-send-email-bigeasy@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rtmutex.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3059bc2f022d..e16e5542bf13 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1193,7 +1193,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 		ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
 
 	if (unlikely(ret)) {
-		remove_waiter(lock, &waiter);
+		if (rt_mutex_has_waiters(lock))
+			remove_waiter(lock, &waiter);
 		rt_mutex_handle_deadlock(ret, chwalk, &waiter);
 	}
 
-- 
cgit v1.2.3


From 74b8a4cb6ce3685049ee124243a52238c5cabe55 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Feb 2015 13:07:38 +0100
Subject: sched: Clarify ordering between task_rq_lock() and move_queued_task()

There was a wee bit of confusion around the exact ordering here;
clarify things.

Reported-by: Kirill Tkhai <ktkhai@parallels.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20150217121258.GM5029@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1f37fe7f77a4..fd0a6ed21849 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -341,6 +341,22 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
 		raw_spin_lock_irqsave(&p->pi_lock, *flags);
 		rq = task_rq(p);
 		raw_spin_lock(&rq->lock);
+		/*
+		 *	move_queued_task()		task_rq_lock()
+		 *
+		 *	ACQUIRE (rq->lock)
+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
+		 *	[S] ->cpu = new_cpu		[L] task_rq()
+		 *					[L] ->on_rq
+		 *	RELEASE (rq->lock)
+		 *
+		 * If we observe the old cpu in task_rq_lock, the acquire of
+		 * the old rq->lock will fully serialize against the stores.
+		 *
+		 * If we observe the new cpu in task_rq_lock, the acquire will
+		 * pair with the WMB to ensure we must then also see migrating.
+		 */
 		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
 			return rq;
 		raw_spin_unlock(&rq->lock);
-- 
cgit v1.2.3


From 3960c8c0c7891dfc0f7be687cbdabb0d6916d614 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Feb 2015 13:22:25 +0100
Subject: sched: Make dl_task_time() use task_rq_lock()

Kirill reported that a dl task can be throttled and dequeued at the
same time. This happens, when it becomes throttled in schedule(),
which is called to go to sleep:

current->state = TASK_INTERRUPTIBLE;
schedule()
    deactivate_task()
        dequeue_task_dl()
            update_curr_dl()
                start_dl_timer()
            __dequeue_task_dl()
    prev->on_rq = 0;

This invalidates the assumption from commit 0f397f2c90ce ("sched/dl:
Fix race in dl_task_timer()"):

  "The only reason we don't strictly need ->pi_lock now is because
   we're guaranteed to have p->state == TASK_RUNNING here and are
   thus free of ttwu races".

And therefore we have to use the full task_rq_lock() here.

This further amends the fact that we forgot to update the rq lock loop
for TASK_ON_RQ_MIGRATE, from commit cca26e8009d1 ("sched: Teach
scheduler to understand TASK_ON_RQ_MIGRATING state").

Reported-by: Kirill Tkhai <ktkhai@parallels.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Link: http://lkml.kernel.org/r/20150217123139.GN5029@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c     | 76 -------------------------------------------------
 kernel/sched/deadline.c | 12 ++------
 kernel/sched/sched.h    | 76 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 85 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd0a6ed21849..f77acd98f50a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -306,82 +306,6 @@ __read_mostly int scheduler_running;
  */
 int sysctl_sched_rt_runtime = 950000;
 
-/*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
-	__acquires(rq->lock)
-{
-	struct rq *rq;
-
-	lockdep_assert_held(&p->pi_lock);
-
-	for (;;) {
-		rq = task_rq(p);
-		raw_spin_lock(&rq->lock);
-		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-			return rq;
-		raw_spin_unlock(&rq->lock);
-
-		while (unlikely(task_on_rq_migrating(p)))
-			cpu_relax();
-	}
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
-	__acquires(p->pi_lock)
-	__acquires(rq->lock)
-{
-	struct rq *rq;
-
-	for (;;) {
-		raw_spin_lock_irqsave(&p->pi_lock, *flags);
-		rq = task_rq(p);
-		raw_spin_lock(&rq->lock);
-		/*
-		 *	move_queued_task()		task_rq_lock()
-		 *
-		 *	ACQUIRE (rq->lock)
-		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-		 *	[S] ->cpu = new_cpu		[L] task_rq()
-		 *					[L] ->on_rq
-		 *	RELEASE (rq->lock)
-		 *
-		 * If we observe the old cpu in task_rq_lock, the acquire of
-		 * the old rq->lock will fully serialize against the stores.
-		 *
-		 * If we observe the new cpu in task_rq_lock, the acquire will
-		 * pair with the WMB to ensure we must then also see migrating.
-		 */
-		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-			return rq;
-		raw_spin_unlock(&rq->lock);
-		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
-		while (unlikely(task_on_rq_migrating(p)))
-			cpu_relax();
-	}
-}
-
-static void __task_rq_unlock(struct rq *rq)
-	__releases(rq->lock)
-{
-	raw_spin_unlock(&rq->lock);
-}
-
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
-	__releases(rq->lock)
-	__releases(p->pi_lock)
-{
-	raw_spin_unlock(&rq->lock);
-	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-}
-
 /*
  * this_rq_lock - lock this runqueue and disable interrupts.
  */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a027799ae130..e88847d9fc6a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 						     struct sched_dl_entity,
 						     dl_timer);
 	struct task_struct *p = dl_task_of(dl_se);
+	unsigned long flags;
 	struct rq *rq;
-again:
-	rq = task_rq(p);
-	raw_spin_lock(&rq->lock);
 
-	if (rq != task_rq(p)) {
-		/* Task was moved, retrying. */
-		raw_spin_unlock(&rq->lock);
-		goto again;
-	}
+	rq = task_rq_lock(current, &flags);
 
 	/*
 	 * We need to take care of several possible races here:
@@ -555,7 +549,7 @@ again:
 		push_dl_task(rq);
 #endif
 unlock:
-	raw_spin_unlock(&rq->lock);
+	task_rq_unlock(rq, current, &flags);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0870db23d79c..dc0f435a2779 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1380,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { }
 
 extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
 
+/*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+static inline struct rq *__task_rq_lock(struct task_struct *p)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	lockdep_assert_held(&p->pi_lock);
+
+	for (;;) {
+		rq = task_rq(p);
+		raw_spin_lock(&rq->lock);
+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+			return rq;
+		raw_spin_unlock(&rq->lock);
+
+		while (unlikely(task_on_rq_migrating(p)))
+			cpu_relax();
+	}
+}
+
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+	__acquires(p->pi_lock)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	for (;;) {
+		raw_spin_lock_irqsave(&p->pi_lock, *flags);
+		rq = task_rq(p);
+		raw_spin_lock(&rq->lock);
+		/*
+		 *	move_queued_task()		task_rq_lock()
+		 *
+		 *	ACQUIRE (rq->lock)
+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
+		 *	[S] ->cpu = new_cpu		[L] task_rq()
+		 *					[L] ->on_rq
+		 *	RELEASE (rq->lock)
+		 *
+		 * If we observe the old cpu in task_rq_lock, the acquire of
+		 * the old rq->lock will fully serialize against the stores.
+		 *
+		 * If we observe the new cpu in task_rq_lock, the acquire will
+		 * pair with the WMB to ensure we must then also see migrating.
+		 */
+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+			return rq;
+		raw_spin_unlock(&rq->lock);
+		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+
+		while (unlikely(task_on_rq_migrating(p)))
+			cpu_relax();
+	}
+}
+
+static inline void __task_rq_unlock(struct rq *rq)
+	__releases(rq->lock)
+{
+	raw_spin_unlock(&rq->lock);
+}
+
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+	__releases(rq->lock)
+	__releases(p->pi_lock)
+{
+	raw_spin_unlock(&rq->lock);
+	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+}
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PREEMPT
 
-- 
cgit v1.2.3


From a79ec89fd8459f0de850898f432a2a57d60e64de Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <tkhai@yandex.ru>
Date: Mon, 16 Feb 2015 15:38:34 +0300
Subject: sched/dl: Prevent enqueue of a sleeping task in dl_task_timer()

A deadline task may be throttled and dequeued at the same time.
This happens, when it becomes throttled in schedule(), which
is called to go to sleep:

current->state = TASK_INTERRUPTIBLE;
schedule()
    deactivate_task()
        dequeue_task_dl()
            update_curr_dl()
                start_dl_timer()
            __dequeue_task_dl()
    prev->on_rq = 0;

Later the timer fires, but the task is still dequeued:

dl_task_timer()
    enqueue_task_dl() /* queues on dl_rq; on_rq remains 0 */

Someone wakes it up:

try_to_wake_up()

    enqueue_dl_entity()
        BUG_ON(on_dl_rq())

Patch fixes this problem, it prevents queueing !on_rq tasks
on dl_rq.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ Wrote comment. ]
Cc: Juri Lelli <juri.lelli@arm.com>
Fixes: 1019a359d3dc ("sched/deadline: Fix stale yield state")
Link: http://lkml.kernel.org/r/1374601424090314@web4j.yandex.ru
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e88847d9fc6a..9908c950d776 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -535,6 +535,26 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 
 	sched_clock_tick();
 	update_rq_clock(rq);
+
+	/*
+	 * If the throttle happened during sched-out; like:
+	 *
+	 *   schedule()
+	 *     deactivate_task()
+	 *       dequeue_task_dl()
+	 *         update_curr_dl()
+	 *           start_dl_timer()
+	 *         __dequeue_task_dl()
+	 *     prev->on_rq = 0;
+	 *
+	 * We can be both throttled and !queued. Replenish the counter
+	 * but do not enqueue -- wait for our wakeup to do that.
+	 */
+	if (!task_on_rq_queued(p)) {
+		replenish_dl_entity(dl_se, dl_se);
+		goto unlock;
+	}
+
 	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
 	if (dl_task(rq->curr))
 		check_preempt_curr_dl(rq, p, 0);
-- 
cgit v1.2.3


From 06b1f8083d6ed379ec1207a96339f23e8f7abfcf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 16 Feb 2015 19:20:07 +0100
Subject: sched: Fix preempt_schedule_common() triggering tracing recursion

Since the function graph tracer needs to disable preemption, it might
call preempt_schedule() after reenabling  it if something triggered the
need for rescheduling in between.

Therefore we can't trace preempt_schedule() itself because we would
face a function tracing recursion otherwise as the tracer is always
called before PREEMPT_ACTIVE gets set to prevent that recursion. This is
why preempt_schedule() is tagged as "notrace".

But the same issue applies to every function called by preempt_schedule()
before PREEMPT_ACTIVE is actually set. And preempt_schedule_common() is
one such example. Unfortunately we forgot to tag it as notrace as well
and as a result we are encountering tracing recursion since it got
introduced by:

   a18b5d0181923 ("sched: Fix missing preemption opportunity")

Let's fix that by applying the appropriate function tag to
preempt_schedule_common().

Reported-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1424110807-15057-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f77acd98f50a..c314000f5e52 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2839,7 +2839,7 @@ void __sched schedule_preempt_disabled(void)
 	preempt_disable();
 }
 
-static void preempt_schedule_common(void)
+static void __sched notrace preempt_schedule_common(void)
 {
 	do {
 		__preempt_count_add(PREEMPT_ACTIVE);
-- 
cgit v1.2.3


From bc9560155f4063bbc9be71bd69d6726d41b47653 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 12 Feb 2015 20:59:13 +0100
Subject: sched/completion: Serialize completion_done() with complete()

Commit de30ec47302c "Remove unnecessary ->wait.lock serialization when
reading completion state" was not correct, without lock/unlock the code
like stop_machine_from_inactive_cpu()

	while (!completion_done())
		cpu_relax();

can return before complete() finishes its spin_unlock() which writes to
this memory. And spin_unlock_wait().

While at it, change try_wait_for_completion() to use READ_ONCE().

Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reported-by: Davidlohr Bueso <dave@stgolabs.net>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ Added a comment with the barrier. ]
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Mc Guire <der.herr@hofr.at>
Cc: raghavendra.kt@linux.vnet.ibm.com
Cc: waiman.long@hp.com
Fixes: de30ec47302c ("sched/completion: Remove unnecessary ->wait.lock serialization when reading completion state")
Link: http://lkml.kernel.org/r/20150212195913.GA30430@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/completion.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 7052d3fd4e7b..8d0f35debf35 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x)
 	 * first without taking the lock so we can
 	 * return early in the blocking case.
 	 */
-	if (!ACCESS_ONCE(x->done))
+	if (!READ_ONCE(x->done))
 		return 0;
 
 	spin_lock_irqsave(&x->wait.lock, flags);
@@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion);
  */
 bool completion_done(struct completion *x)
 {
-	return !!ACCESS_ONCE(x->done);
+	if (!READ_ONCE(x->done))
+		return false;
+
+	/*
+	 * If ->done, we need to wait for complete() to release ->wait.lock
+	 * otherwise we can end up freeing the completion before complete()
+	 * is done referencing it.
+	 *
+	 * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
+	 * the loads of ->done and ->wait.lock such that we cannot observe
+	 * the lock before complete() acquires it while observing the ->done
+	 * after it's acquired the lock.
+	 */
+	smp_rmb();
+	spin_unlock_wait(&x->wait.lock);
+	return true;
 }
 EXPORT_SYMBOL(completion_done);
-- 
cgit v1.2.3


From 9cff8adeaa34b5d2802f03f89803da57856b3b72 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 13 Feb 2015 15:49:17 +1100
Subject: sched: Prevent recursion in io_schedule()

io_schedule() calls blk_flush_plug() which, depending on the
contents of current->plug, can initiate arbitrary blk-io requests.

Note that this contrasts with blk_schedule_flush_plug() which requires
all non-trivial work to be handed off to a separate thread.

This makes it possible for io_schedule() to recurse, and initiating
block requests could possibly call mempool_alloc() which, in times of
memory pressure, uses io_schedule().

Apart from any stack usage issues, io_schedule() will not behave
correctly when called recursively as delayacct_blkio_start() does
not allow for repeated calls.

So:
 - use ->in_iowait to detect recursion.  Set it earlier, and restore
   it to the old value.
 - move the call to "raw_rq" after the call to blk_flush_plug().
   As this is some sort of per-cpu thing, we want some chance that
   we are on the right CPU
 - When io_schedule() is called recurively, use blk_schedule_flush_plug()
   which cannot further recurse.
 - as this makes io_schedule() a lot more complex and as io_schedule()
   must match io_schedule_timeout(), but all the changes in io_schedule_timeout()
   and make io_schedule a simple wrapper for that.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ Moved the now rudimentary io_schedule() into sched.h. ]
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Tony Battersby <tonyb@cybernetics.com>
Link: http://lkml.kernel.org/r/20150213162600.059fffb2@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 10 +++++++---
 kernel/sched/core.c   | 31 ++++++++++++-------------------
 2 files changed, 19 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8db31ef98d2f..cb5cdc777c8a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *);
  */
 extern void show_stack(struct task_struct *task, unsigned long *sp);
 
-void io_schedule(void);
-long io_schedule_timeout(long timeout);
-
 extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
@@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 
+extern long io_schedule_timeout(long timeout);
+
+static inline void io_schedule(void)
+{
+	io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+}
+
 struct nsproxy;
 struct user_namespace;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c314000f5e52..daaea922f482 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4358,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
  * This task is about to go to sleep on IO. Increment rq->nr_iowait so
  * that process accounting knows that this is a task in IO wait state.
  */
-void __sched io_schedule(void)
-{
-	struct rq *rq = raw_rq();
-
-	delayacct_blkio_start();
-	atomic_inc(&rq->nr_iowait);
-	blk_flush_plug(current);
-	current->in_iowait = 1;
-	schedule();
-	current->in_iowait = 0;
-	atomic_dec(&rq->nr_iowait);
-	delayacct_blkio_end();
-}
-EXPORT_SYMBOL(io_schedule);
-
 long __sched io_schedule_timeout(long timeout)
 {
-	struct rq *rq = raw_rq();
+	int old_iowait = current->in_iowait;
+	struct rq *rq;
 	long ret;
 
+	current->in_iowait = 1;
+	if (old_iowait)
+		blk_schedule_flush_plug(current);
+	else
+		blk_flush_plug(current);
+
 	delayacct_blkio_start();
+	rq = raw_rq();
 	atomic_inc(&rq->nr_iowait);
-	blk_flush_plug(current);
-	current->in_iowait = 1;
 	ret = schedule_timeout(timeout);
-	current->in_iowait = 0;
+	current->in_iowait = old_iowait;
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
+
 	return ret;
 }
+EXPORT_SYMBOL(io_schedule_timeout);
 
 /**
  * sys_sched_get_priority_max - return maximum RT priority.
-- 
cgit v1.2.3


From 29183a70b0b828500816bd794b3fe192fce89f73 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 9 Feb 2015 23:30:36 -0800
Subject: ntp: Fixup adjtimex freq validation on 32-bit systems

Additional validation of adjtimex freq values to avoid
potential multiplication overflows were added in commit
5e5aeb4367b (time: adjtimex: Validate the ADJ_FREQUENCY values)

Unfortunately the patch used LONG_MAX/MIN instead of
LLONG_MAX/MIN, which was fine on 64-bit systems, but being
much smaller on 32-bit systems caused false positives
resulting in most direct frequency adjustments to fail w/
EINVAL.

ntpd only does direct frequency adjustments at startup, so
the issue was not as easily observed there, but other time
sync applications like ptpd and chrony were more effected by
the bug.

See bugs:

  https://bugzilla.kernel.org/show_bug.cgi?id=92481
  https://bugzilla.redhat.com/show_bug.cgi?id=1188074

This patch changes the checks to use LLONG_MAX for
clarity, and additionally the checks are disabled
on 32-bit systems since LLONG_MAX/PPM_SCALE is always
larger then the 32-bit long freq value, so multiplication
overflows aren't possible there.

Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Reported-by: George Joseph <george.joseph@fairview5.com>
Tested-by: George Joseph <george.joseph@fairview5.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org> # v3.19+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Link: http://lkml.kernel.org/r/1423553436-29747-1-git-send-email-john.stultz@linaro.org
[ Prettified the changelog and the comments a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/ntp.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 4b585e0fdd22..0f60b08a4f07 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -633,10 +633,14 @@ int ntp_validate_timex(struct timex *txc)
 	if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
 		return -EPERM;
 
-	if (txc->modes & ADJ_FREQUENCY) {
-		if (LONG_MIN / PPM_SCALE > txc->freq)
+	/*
+	 * Check for potential multiplication overflows that can
+	 * only happen on 64-bit systems:
+	 */
+	if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
+		if (LLONG_MIN / PPM_SCALE > txc->freq)
 			return -EINVAL;
-		if (LONG_MAX / PPM_SCALE < txc->freq)
+		if (LLONG_MAX / PPM_SCALE < txc->freq)
 			return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 6f1607f1bdb4f9991a8123675a03c1764b2932fe Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@parallels.com>
Date: Wed, 4 Feb 2015 12:09:32 +0300
Subject: sched/dl: Do update_rq_clock() in yield_task_dl()

update_curr_dl() needs actual rq clock.

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1423040972.18770.10.camel@tkhai
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9908c950d776..3fa8fa6d9403 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -912,6 +912,7 @@ static void yield_task_dl(struct rq *rq)
 		rq->curr->dl.dl_yielded = 1;
 		p->dl.runtime = 0;
 	}
+	update_rq_clock(rq);
 	update_curr_dl(rq);
 }
 
-- 
cgit v1.2.3


From 1fe89e1b6d270aa0d3452c60d38461ea589594e3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 9 Feb 2015 11:53:18 +0100
Subject: sched/autogroup: Fix failure to set cpu.rt_runtime_us

Because task_group() uses a cache of autogroup_task_group(), whose
output depends on sched_class, switching classes can generate
problems.

In particular, when started as fair, the cache points to the
autogroup, so when switching to RT the tg_rt_schedulable() test fails
for every cpu.rt_{runtime,period}_us change because now the autogroup
has tasks and no runtime.

Furthermore, going back to the previous semantics of varying
task_group() with sched_class has the down-side that the sched_debug
output varies as well, even though the task really is in the
autogroup.

Therefore add an autogroup exception to tg_has_rt_tasks() -- such that
both (all) task_group() usages in sched/core now have one. And remove
all the remnants of the variable task_group() output.

Reported-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Stefan Bader <stefan.bader@canonical.com>
Fixes: 8323f26ce342 ("sched: Fix race in task_group()")
Link: http://lkml.kernel.org/r/20150209112237.GR5029@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/auto_group.c | 6 +-----
 kernel/sched/core.c       | 6 ++++++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 8a2e230fb86a..eae160dd669d 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void)
 	 * so we don't have to move tasks around upon policy change,
 	 * or flail around trying to allocate bandwidth on the fly.
 	 * A bandwidth exception in __sched_setscheduler() allows
-	 * the policy change to proceed.  Thereafter, task_group()
-	 * returns &root_task_group, so zero bandwidth is required.
+	 * the policy change to proceed.
 	 */
 	free_rt_sched_group(tg);
 	tg->rt_se = root_task_group.rt_se;
@@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 	if (tg != &root_task_group)
 		return false;
 
-	if (p->sched_class != &fair_sched_class)
-		return false;
-
 	/*
 	 * We can only assume the task group can't go away on us if
 	 * autogroup_move_group() can see us on ->thread_group list.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index daaea922f482..03a67f09404c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7577,6 +7577,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
 {
 	struct task_struct *g, *p;
 
+	/*
+	 * Autogroups do not have RT tasks; see autogroup_create().
+	 */
+	if (task_group_is_autogroup(tg))
+		return 0;
+
 	for_each_process_thread(g, p) {
 		if (rt_task(p) && task_group(p) == tg)
 			return 1;
-- 
cgit v1.2.3


From 2636ed5f8d15ff9395731593537b4b3fdf2af24d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 9 Feb 2015 12:23:20 +0100
Subject: sched/rt: Avoid obvious configuration fail

Setting the root group's cpu.rt_runtime_us to 0 is a bad thing; it
would disallow the kernel creating RT tasks.

One can of course still set it to 1, which will (likely) still wreck
your kernel, but at least make it clear that setting it to 0 is not
good.

Collect both sanity checks into the one place while we're there.

Suggested-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20150209112715.GO24151@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 03a67f09404c..a4869bd426ca 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7675,6 +7675,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
 {
 	int i, err = 0;
 
+	/*
+	 * Disallowing the root group RT runtime is BAD, it would disallow the
+	 * kernel creating (and or operating) RT threads.
+	 */
+	if (tg == &root_task_group && rt_runtime == 0)
+		return -EINVAL;
+
+	/* No period doesn't make any sense. */
+	if (rt_period == 0)
+		return -EINVAL;
+
 	mutex_lock(&rt_constraints_mutex);
 	read_lock(&tasklist_lock);
 	err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7731,9 +7742,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
 	rt_period = (u64)rt_period_us * NSEC_PER_USEC;
 	rt_runtime = tg->rt_bandwidth.rt_runtime;
 
-	if (rt_period == 0)
-		return -EINVAL;
-
 	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
 }
 
-- 
cgit v1.2.3


From 146755923262037fc4c54abc28c04b1103f3cc51 Mon Sep 17 00:00:00 2001
From: Jay Lan <jlan@sgi.com>
Date: Mon, 29 Sep 2014 15:36:57 -0700
Subject: kdb: fix incorrect counts in KDB summary command output

The output of KDB 'summary' command should report MemTotal, MemFree
and Buffers output in kB. Current codes report in unit of pages.

A define of K(x) as
is defined in the code, but not used.

This patch would apply the define to convert the values to kB.
Please include me on Cc on replies. I do not subscribe to linux-kernel.

Signed-off-by: Jay Lan <jlan@sgi.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/kdb/kdb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 7b40c5f07dce..c6d2c0b1565d 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2583,7 +2583,7 @@ static int kdb_summary(int argc, const char **argv)
 #define K(x) ((x) << (PAGE_SHIFT - 10))
 	kdb_printf("\nMemTotal:       %8lu kB\nMemFree:        %8lu kB\n"
 		   "Buffers:        %8lu kB\n",
-		   val.totalram, val.freeram, val.bufferram);
+		   K(val.totalram), K(val.freeram), K(val.bufferram));
 	return 0;
 }
 
-- 
cgit v1.2.3


From df0036d117e6c9df36324e517728e33543065f9a Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 8 Jan 2015 15:46:55 -0600
Subject: kdb: Fix off by one error in kdb_cpu()

There was a follow on replacement patch against the prior
"kgdb: Timeout if secondary CPUs ignore the roundup".

See: https://lkml.org/lkml/2015/1/7/442

This patch is the delta vs the patch that was committed upstream:
  * Fix an off-by-one error in kdb_cpu().
  * Replace NR_CPUS with CONFIG_NR_CPUS to tell checkpatch that we
    really want a static limit.
  * Removed the "KGDB: " prefix from the pr_crit() in debug_core.c
    (kgdb-next contains a patch which introduced pr_fmt() to this file
    to the tag will now be applied automatically).

Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/debug_core.c   | 2 +-
 kernel/debug/kdb/kdb_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 07ce18ca71e0..ac5c0f9c7a20 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -604,7 +604,7 @@ return_normal:
 		   online_cpus)
 		cpu_relax();
 	if (!time_left)
-		pr_crit("KGDB: Timed out waiting for secondary CPUs.\n");
+		pr_crit("Timed out waiting for secondary CPUs.\n");
 
 	/*
 	 * At this point the primary processor is completely
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index c6d2c0b1565d..60f6bb817f70 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2256,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv)
 	/*
 	 * Validate cpunum
 	 */
-	if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
+	if ((cpunum >= CONFIG_NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
 		return KDB_BADCPUNUM;
 
 	dbg_switch_cpu = cpunum;
-- 
cgit v1.2.3


From f7d4ca8bbfda23b4f1eae9b6757ff64166b093d5 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Fri, 7 Nov 2014 18:37:57 +0000
Subject: kdb: Avoid printing KERN_ levels to consoles

Currently when kdb traps printk messages then the raw log level prefix
(consisting of '\001' followed by a numeral) does not get stripped off
before the message is issued to the various I/O handlers supported by
kdb. This causes annoying visual noise as well as causing problems
grepping for ^. It is also a change of behaviour compared to normal usage
of printk() usage. For example <SysRq>-h ends up with different output to
that of kdb's "sr h".

This patch addresses the problem by stripping log levels from messages
before they are issued to the I/O handlers. printk() which can also
act as an i/o handler in some cases is special cased; if the caller
provided a log level then the prefix will be preserved when sent to
printk().

The addition of non-printable characters to the output of kdb commands is a
regression, albeit and extremely elderly one, introduced by commit
04d2c8c83d0e ("printk: convert the format for KERN_<LEVEL> to a 2 byte
pattern"). Note also that this patch does *not* restore the original
behaviour from v3.5. Instead it makes printk() from within a kdb command
display the message without any prefix (i.e. like printk() normally does).

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Joe Perches <joe@perches.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h       |  8 +++++++-
 kernel/debug/kdb/kdb_io.c | 22 +++++++++++++---------
 kernel/printk/printk.c    |  2 +-
 3 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 75ae2e2631fc..a19bcf9e762e 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -156,8 +156,14 @@ typedef enum {
 	KDB_REASON_SYSTEM_NMI,	/* In NMI due to SYSTEM cmd; regs valid */
 } kdb_reason_t;
 
+enum kdb_msgsrc {
+	KDB_MSGSRC_INTERNAL, /* direct call to kdb_printf() */
+	KDB_MSGSRC_PRINTK, /* trapped from printk() */
+};
+
 extern int kdb_trap_printk;
-extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args);
+extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
+				      va_list args);
 extern __printf(1, 2) int kdb_printf(const char *, ...);
 typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
 
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 7c70812caea5..a550afb99ebe 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -548,7 +548,7 @@ static int kdb_search_string(char *searched, char *searchfor)
 	return 0;
 }
 
-int vkdb_printf(const char *fmt, va_list ap)
+int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 {
 	int diag;
 	int linecount;
@@ -691,19 +691,20 @@ kdb_printit:
 	 * Write to all consoles.
 	 */
 	retlen = strlen(kdb_buffer);
+	cp = (char *) printk_skip_level(kdb_buffer);
 	if (!dbg_kdb_mode && kgdb_connected) {
-		gdbstub_msg_write(kdb_buffer, retlen);
+		gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
 	} else {
 		if (dbg_io_ops && !dbg_io_ops->is_console) {
-			len = retlen;
-			cp = kdb_buffer;
+			len = retlen - (cp - kdb_buffer);
+			cp2 = cp;
 			while (len--) {
-				dbg_io_ops->write_char(*cp);
-				cp++;
+				dbg_io_ops->write_char(*cp2);
+				cp2++;
 			}
 		}
 		while (c) {
-			c->write(c, kdb_buffer, retlen);
+			c->write(c, cp, retlen - (cp - kdb_buffer));
 			touch_nmi_watchdog();
 			c = c->next;
 		}
@@ -711,7 +712,10 @@ kdb_printit:
 	if (logging) {
 		saved_loglevel = console_loglevel;
 		console_loglevel = CONSOLE_LOGLEVEL_SILENT;
-		printk(KERN_INFO "%s", kdb_buffer);
+		if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK)
+			printk("%s", kdb_buffer);
+		else
+			pr_info("%s", kdb_buffer);
 	}
 
 	if (KDB_STATE(PAGER)) {
@@ -844,7 +848,7 @@ int kdb_printf(const char *fmt, ...)
 	int r;
 
 	va_start(ap, fmt);
-	r = vkdb_printf(fmt, ap);
+	r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
 	va_end(ap);
 
 	return r;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 02d6b6d28796..fae29e3ffbf0 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1811,7 +1811,7 @@ int vprintk_default(const char *fmt, va_list args)
 
 #ifdef CONFIG_KGDB_KDB
 	if (unlikely(kdb_trap_printk)) {
-		r = vkdb_printf(fmt, args);
+		r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
 		return r;
 	}
 #endif
-- 
cgit v1.2.3


From 5454388113938d9592d6a6d5424469014da4ee86 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 6 Nov 2014 15:32:13 +0000
Subject: kdb: Remove stack dump when entering kgdb due to NMI

Issuing a stack dump feels ergonomically wrong when entering due to NMI.

Entering due to NMI is normally a reaction to a user request, either the
NMI button on a server or a "magic knock" on a UART. Therefore the
backtrace behaviour on entry due to NMI should be like SysRq-g (no stack
dump) rather than like oops.

Note also that the stack dump does not offer any information that
cannot be trivial retrieved using the 'bt' command.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/kdb/kdb_main.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 60f6bb817f70..0a22f455060a 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1247,7 +1247,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
 		kdb_printf("due to NonMaskable Interrupt @ "
 			   kdb_machreg_fmt "\n",
 			   instruction_pointer(regs));
-		kdb_dumpregs(regs);
 		break;
 	case KDB_REASON_SSTEP:
 	case KDB_REASON_BREAK:
-- 
cgit v1.2.3


From ab08e464a2cd8242fdc6e4f87f3480808364a97a Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 11 Sep 2014 09:58:29 +0100
Subject: kdb: Fix a prompt management bug when using | grep

Currently when the "| grep" feature is used to filter the output of a
command then the prompt is not displayed for the subsequent command.
Likewise any characters typed by the user are also not echoed to the
display. This rather disconcerting problem eventually corrects itself
when the user presses Enter and the kdb_grepping_flag is cleared as
kdb_parse() tries to make sense of whatever they typed.

This patch resolves the problem by moving the clearing of this flag
from the middle of command processing to the beginning.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/kdb/kdb_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 0a22f455060a..420418360b81 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -915,13 +915,12 @@ int kdb_parse(const char *cmdstr)
 	char *cp;
 	char *cpp, quoted;
 	kdbtab_t *tp;
-	int i, escaped, ignore_errors = 0, check_grep;
+	int i, escaped, ignore_errors = 0, check_grep = 0;
 
 	/*
 	 * First tokenize the command string.
 	 */
 	cp = (char *)cmdstr;
-	kdb_grepping_flag = check_grep = 0;
 
 	if (KDB_FLAG(CMD_INTERRUPT)) {
 		/* Previous command was interrupted, newline must not
@@ -1280,6 +1279,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
 		 */
 		kdb_nextline = 1;
 		KDB_STATE_CLEAR(SUPPRESS);
+		kdb_grepping_flag = 0;
 
 		cmdbuf = cmd_cur;
 		*cmdbuf = '\0';
-- 
cgit v1.2.3


From fb6daa7520f9d17a97e84a3d5a947819e0313f28 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 11 Sep 2014 10:37:10 +0100
Subject: kdb: Provide forward search at more prompt

Currently kdb allows the output of comamnds to be filtered using the
| grep feature. This is useful but does not permit the output emitted
shortly after a string match to be examined without wading through the
entire unfiltered output of the command. Such a feature is particularly
useful to navigate function traces because these traces often have a
useful trigger string *before* the point of interest.

This patch reuses the existing filtering logic to introduce a simple
forward search to kdb that can be triggered from the more prompt.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/kdb/kdb_io.c      | 22 ++++++++++++++++++++--
 kernel/debug/kdb/kdb_main.c    |  7 ++++---
 kernel/debug/kdb/kdb_private.h |  2 ++
 3 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index a550afb99ebe..ca13e6215537 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -680,6 +680,12 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 			size_avail = sizeof(kdb_buffer) - len;
 			goto kdb_print_out;
 		}
+		if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH)
+			/*
+			 * This was a interactive search (using '/' at more
+			 * prompt) and it has completed. Clear the flag.
+			 */
+			kdb_grepping_flag = 0;
 		/*
 		 * at this point the string is a full line and
 		 * should be printed, up to the null.
@@ -798,11 +804,23 @@ kdb_printit:
 			kdb_nextline = linecount - 1;
 			kdb_printf("\r");
 			suspend_grep = 1; /* for this recursion */
+		} else if (buf1[0] == '/' && !kdb_grepping_flag) {
+			kdb_printf("\r");
+			kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN,
+				   kdbgetenv("SEARCHPROMPT") ?: "search> ");
+			*strchrnul(kdb_grep_string, '\n') = '\0';
+			kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH;
+			suspend_grep = 1; /* for this recursion */
 		} else if (buf1[0] && buf1[0] != '\n') {
 			/* user hit something other than enter */
 			suspend_grep = 1; /* for this recursion */
-			kdb_printf("\nOnly 'q' or 'Q' are processed at more "
-				   "prompt, input ignored\n");
+			if (buf1[0] != '/')
+				kdb_printf(
+				    "\nOnly 'q', 'Q' or '/' are processed at "
+				    "more prompt, input ignored\n");
+			else
+				kdb_printf("\n'/' cannot be used during | "
+					   "grep filtering, input ignored\n");
 		} else if (kdb_grepping_flag) {
 			/* user hit enter */
 			suspend_grep = 1; /* for this recursion */
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 420418360b81..4121345498e0 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -50,8 +50,7 @@
 static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE;
 module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600);
 
-#define GREP_LEN 256
-char kdb_grep_string[GREP_LEN];
+char kdb_grep_string[KDB_GREP_STRLEN];
 int kdb_grepping_flag;
 EXPORT_SYMBOL(kdb_grepping_flag);
 int kdb_grep_leading;
@@ -870,7 +869,7 @@ static void parse_grep(const char *str)
 	len = strlen(cp);
 	if (!len)
 		return;
-	if (len >= GREP_LEN) {
+	if (len >= KDB_GREP_STRLEN) {
 		kdb_printf("search string too long\n");
 		return;
 	}
@@ -1280,6 +1279,8 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
 		kdb_nextline = 1;
 		KDB_STATE_CLEAR(SUPPRESS);
 		kdb_grepping_flag = 0;
+		/* ensure the old search does not leak into '/' commands */
+		kdb_grep_string[0] = '\0';
 
 		cmdbuf = cmd_cur;
 		*cmdbuf = '\0';
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index eaacd1693954..da93894db7ba 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -196,7 +196,9 @@ extern int kdb_main_loop(kdb_reason_t, kdb_reason_t,
 
 /* Miscellaneous functions and data areas */
 extern int kdb_grepping_flag;
+#define KDB_GREPPING_FLAG_SEARCH 0x8000
 extern char kdb_grep_string[];
+#define KDB_GREP_STRLEN 256
 extern int kdb_grep_leading;
 extern int kdb_grep_trailing;
 extern char *kdb_cmds[];
-- 
cgit v1.2.3


From 32d375f6f24c3e4c9c235672695b4c314cf6b964 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 11 Sep 2014 10:41:12 +0100
Subject: kdb: Const qualifier for kdb_getstr's prompt argument

All current callers of kdb_getstr() can pass constant pointers via the
prompt argument. This patch adds a const qualification to make explicit
the fact that this is safe.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/kdb/kdb_io.c      | 2 +-
 kernel/debug/kdb/kdb_private.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index ca13e6215537..fc1ef736253c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -439,7 +439,7 @@ poll_again:
  *	substituted for %d, %x or %o in the prompt.
  */
 
-char *kdb_getstr(char *buffer, size_t bufsize, char *prompt)
+char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt)
 {
 	if (prompt && kdb_prompt_str != prompt)
 		strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index da93894db7ba..75014d7f4568 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -211,7 +211,7 @@ extern void kdb_ps1(const struct task_struct *p);
 extern void kdb_print_nameval(const char *name, unsigned long val);
 extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
 extern void kdb_meminfo_proc_show(void);
-extern char *kdb_getstr(char *, size_t, char *);
+extern char *kdb_getstr(char *, size_t, const char *);
 extern void kdb_gdb_state_pass(char *buf);
 
 /* Defines for kdb_symbol_print */
-- 
cgit v1.2.3


From 5516fd7b92a7c16b9111a88aaa1b13dd937d8ac5 Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Wed, 28 Jan 2015 17:02:14 +0530
Subject: debug: prevent entering debug mode on panic/exception.

On non-developer devices, kgdb prevents the device from rebooting
after a panic.

Incase of panics and exceptions, to allow the device to reboot, prevent
entering debug mode to avoid getting stuck waiting for the user to
interact with debugger.

To avoid entering the debugger on panic/exception without any extra
configuration, panic_timeout is being used which can be set via
/proc/sys/kernel/panic at run time and CONFIG_PANIC_TIMEOUT sets the
default value.

Setting panic_timeout indicates that the user requested machine to
perform unattended reboot after panic. We dont want to get stuck waiting
for the user input incase of panic.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: linux-kernel@vger.kernel.org
Cc: Android Kernel Team <kernel-team@android.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Colin Cross <ccross@android.com>
[Kiran: Added context to commit message.
panic_timeout is used instead of break_on_panic and
break_on_exception to honor CONFIG_PANIC_TIMEOUT
Modified the commit as per community feedback]
Signed-off-by: Kiran Raparthy <kiran.kumar@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/debug_core.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'kernel')

diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index ac5c0f9c7a20..0874e2edd275 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -696,6 +696,14 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
 
 	if (arch_kgdb_ops.enable_nmi)
 		arch_kgdb_ops.enable_nmi(0);
+	/*
+	 * Avoid entering the debugger if we were triggered due to an oops
+	 * but panic_timeout indicates the system should automatically
+	 * reboot on panic. We don't want to get stuck waiting for input
+	 * on such systems, especially if its "just" an oops.
+	 */
+	if (signo != SIGTRAP && panic_timeout)
+		return 1;
 
 	memset(ks, 0, sizeof(struct kgdb_state));
 	ks->cpu			= raw_smp_processor_id();
@@ -828,6 +836,15 @@ static int kgdb_panic_event(struct notifier_block *self,
 			    unsigned long val,
 			    void *data)
 {
+	/*
+	 * Avoid entering the debugger if we were triggered due to a panic
+	 * We don't want to get stuck waiting for input from user in such case.
+	 * panic_timeout indicates the system should automatically
+	 * reboot on panic.
+	 */
+	if (panic_timeout)
+		return NOTIFY_DONE;
+
 	if (dbg_kdb_mode)
 		kdb_printf("PANIC: %s\n", (char *)data);
 	kgdb_breakpoint();
-- 
cgit v1.2.3


From c4ce0da8ec62d83c96e29db7dadd6d3985344bb3 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.cz>
Date: Wed, 18 Feb 2015 18:02:13 +0100
Subject: livepatch: RCU protect struct klp_func all the time when used in
 klp_ftrace_handler()

func->new_func has been accessed after rcu_read_unlock() in klp_ftrace_handler()
and therefore the access was not protected.

Signed-off-by: Petr Mladek <pmladek@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 kernel/livepatch/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 69bf3aa3bde8..782172f073c5 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -314,12 +314,12 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 	rcu_read_lock();
 	func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
 				      stack_node);
-	rcu_read_unlock();
-
 	if (WARN_ON_ONCE(!func))
-		return;
+		goto unlock;
 
 	klp_arch_set_pc(regs, (unsigned long)func->new_func);
+unlock:
+	rcu_read_unlock();
 }
 
 static int klp_disable_func(struct klp_func *func)
-- 
cgit v1.2.3


From 39afb5ee4640b4ed2cdd9e12b2a67cf785cfced8 Mon Sep 17 00:00:00 2001
From: Jon DeVree <nuxi@vault24.org>
Date: Fri, 27 Feb 2015 15:52:07 -0800
Subject: kernel/sys.c: fix UNAME26 for 4.0

There's a uname workaround for broken userspace which can't handle kernel
versions of 3.x.  Update it for 4.x.

Signed-off-by: Jon DeVree <nuxi@vault24.org>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index 667b2e62fad2..a03d9cd23ed7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1108,6 +1108,7 @@ DECLARE_RWSEM(uts_sem);
 /*
  * Work around broken programs that cannot handle "Linux 3.0".
  * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60.
  */
 static int override_release(char __user *release, size_t len)
 {
@@ -1127,7 +1128,7 @@ static int override_release(char __user *release, size_t len)
 				break;
 			rest++;
 		}
-		v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+		v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60;
 		copy = clamp_t(size_t, len, 1, sizeof(buf));
 		copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
 		ret = copy_to_user(release, buf, copy + 1);
-- 
cgit v1.2.3


From 9d3e2d02f54160725d97f4ab1e1e8de493fbf33a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 27 Feb 2015 17:57:09 +0100
Subject: locking/rtmutex: Set state back to running on error

The "usual" path is:

 - rt_mutex_slowlock()
 - set_current_state()
 - task_blocks_on_rt_mutex() (ret 0)
 - __rt_mutex_slowlock()
   - sleep or not but do return with __set_current_state(TASK_RUNNING)
 - back to caller.

In the early error case where task_blocks_on_rt_mutex() return
-EDEADLK we never change the task's state back to RUNNING. I
assume this is intended. Without this change after ww_mutex
using rt_mutex the selftest passes but later I get plenty of:

  | bad: scheduling from the idle thread!

backtraces.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: afffc6c1805d ("locking/rtmutex: Optimize setting task running after being blocked")
Link: http://lkml.kernel.org/r/1425056229-22326-4-git-send-email-bigeasy@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rtmutex.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index e16e5542bf13..6357265a31ad 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1193,6 +1193,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 		ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
 
 	if (unlikely(ret)) {
+		__set_current_state(TASK_RUNNING);
 		if (rt_mutex_has_waiters(lock))
 			remove_waiter(lock, &waiter);
 		rt_mutex_handle_deadlock(ret, chwalk, &waiter);
-- 
cgit v1.2.3