From f960181d5d88ab6c84be8fb5e7f75080c78dfdd1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 3 Aug 2017 17:23:19 +0100
Subject: arm64: neon: replace generic definition of may_use_simd()

In preparation of modifying the logic that decides whether kernel mode
NEON is allowable, which is required for SVE support, introduce an
implementation of may_use_simd() that reflects the current reality, i.e.,
that SIMD is allowed in any context.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/Kbuild |  1 -
 arch/arm64/include/asm/simd.h | 23 +++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/include/asm/simd.h

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index f81c7b685fc6..2326e39d5892 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += rwsem.h
 generic-y += segment.h
 generic-y += serial.h
 generic-y += set_memory.h
-generic-y += simd.h
 generic-y += sizes.h
 generic-y += switch_to.h
 generic-y += trace_clock.h
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
new file mode 100644
index 000000000000..96959b52afae
--- /dev/null
+++ b/arch/arm64/include/asm/simd.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/types.h>
+
+/*
+ * may_use_simd - whether it is allowable at this time to issue SIMD
+ *                instructions or access the SIMD register file
+ */
+static __must_check inline bool may_use_simd(void)
+{
+	return true;
+}
+
+#endif
-- 
cgit v1.2.3


From 0fc9179ad0bf2f97790c0568442299679ca346cf Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 3 Aug 2017 17:23:20 +0100
Subject: arm64: neon: Add missing header guard in <asm/neon.h>

asm/neon.h doesn't have a header inclusion guard, but it should
have one for consistency with other headers.

This patch adds a suitable guard.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/neon.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index ad4cdc966c0f..5368bd04fe7b 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -8,6 +8,9 @@
  * published by the Free Software Foundation.
  */
 
+#ifndef __ASM_NEON_H
+#define __ASM_NEON_H
+
 #include <linux/types.h>
 #include <asm/fpsimd.h>
 
@@ -17,3 +20,5 @@
 
 void kernel_neon_begin_partial(u32 num_regs);
 void kernel_neon_end(void);
+
+#endif /* ! __ASM_NEON_H */
-- 
cgit v1.2.3


From 504641859e5c616210c0894149e09fb6928e398f Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 3 Aug 2017 17:23:21 +0100
Subject: arm64: fpsimd: Consistently use __this_cpu_ ops where appropriate

__this_cpu_ ops are not used consistently with regard to this_cpu_
ops in a couple of places in fpsimd.c.

Since preemption is explicitly disabled in
fpsimd_restore_current_state() and fpsimd_update_current_state(),
this patch converts this_cpu_ ops in those functions to __this_cpu_
ops.  This doesn't save cost on arm64, but benefits from additional
assertions in the core code.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 06da8ea16bbe..d7e5f8a2d4f5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -194,7 +194,7 @@ void fpsimd_restore_current_state(void)
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
 		fpsimd_load_state(st);
-		this_cpu_write(fpsimd_last_state, st);
+		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
 	preempt_enable();
@@ -214,7 +214,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
-		this_cpu_write(fpsimd_last_state, st);
+		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
 	preempt_enable();
-- 
cgit v1.2.3


From 4328825d4fdc185d365d8e858cace8b324198a70 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 3 Aug 2017 17:23:22 +0100
Subject: arm64: neon: Allow EFI runtime services to use FPSIMD in irq context

In order to be able to cope with kernel-mode NEON being unavailable
in hardirq/nmi context and non-nestable, we need special handling
for EFI runtime service calls that may be made during an interrupt
that interrupted a kernel_neon_begin()..._end() block.  This will
occur if the kernel tries to write diagnostic data to EFI
persistent storage during a panic triggered by an NMI for example.

EFI runtime services specify an ABI that clobbers the FPSIMD state,
rather than being able to use it optionally as an accelerator.
This means that EFI is really a special case and can be handled
specially.

To enable EFI calls from interrupts, this patch creates dedicated
__efi_fpsimd_{begin,end}() helpers solely for this purpose, which
save/restore to a separate percpu buffer if called in a context
where kernel_neon_begin() is not usable.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/efi.h    |  5 ++--
 arch/arm64/include/asm/fpsimd.h |  4 ++++
 arch/arm64/kernel/fpsimd.c      | 52 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 8f3043aba873..835822242a1a 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -3,6 +3,7 @@
 
 #include <asm/boot.h>
 #include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
 #include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/neon.h>
@@ -20,8 +21,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
 #define arch_efi_call_virt_setup()					\
 ({									\
-	kernel_neon_begin();						\
 	efi_virtmap_load();						\
+	__efi_fpsimd_begin();						\
 })
 
 #define arch_efi_call_virt(p, f, args...)				\
@@ -33,8 +34,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
 #define arch_efi_call_virt_teardown()					\
 ({									\
+	__efi_fpsimd_end();						\
 	efi_virtmap_unload();						\
-	kernel_neon_end();						\
 })
 
 #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 50f559f574fe..5155f21e15e3 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -81,6 +81,10 @@ extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
 				      u32 num_regs);
 extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
 
+/* For use by EFI runtime services calls only */
+extern void __efi_fpsimd_begin(void);
+extern void __efi_fpsimd_end(void);
+
 #endif
 
 #endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index d7e5f8a2d4f5..bcde88e2d981 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -21,12 +21,15 @@
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/preempt.h>
 #include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/hardirq.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
 
 #define FPEXC_IOF	(1 << 0)
 #define FPEXC_DZF	(1 << 1)
@@ -276,6 +279,55 @@ void kernel_neon_end(void)
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
+DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
+DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
+
+/*
+ * EFI runtime services support functions
+ *
+ * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
+ * This means that for EFI (and only for EFI), we have to assume that FPSIMD
+ * is always used rather than being an optional accelerator.
+ *
+ * These functions provide the necessary support for ensuring FPSIMD
+ * save/restore in the contexts from which EFI is used.
+ *
+ * Do not use them for any other purpose -- if tempted to do so, you are
+ * either doing something wrong or you need to propose some refactoring.
+ */
+
+/*
+ * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call
+ */
+void __efi_fpsimd_begin(void)
+{
+	if (!system_supports_fpsimd())
+		return;
+
+	WARN_ON(preemptible());
+
+	if (may_use_simd())
+		kernel_neon_begin();
+	else {
+		fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+		__this_cpu_write(efi_fpsimd_state_used, true);
+	}
+}
+
+/*
+ * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call
+ */
+void __efi_fpsimd_end(void)
+{
+	if (!system_supports_fpsimd())
+		return;
+
+	if (__this_cpu_xchg(efi_fpsimd_state_used, false))
+		fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
+	else
+		kernel_neon_end();
+}
+
 #endif /* CONFIG_KERNEL_MODE_NEON */
 
 #ifdef CONFIG_CPU_PM
-- 
cgit v1.2.3


From cb84d11e1625aa3a081d898ca2640bf3a9ca0e96 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 3 Aug 2017 17:23:23 +0100
Subject: arm64: neon: Remove support for nested or hardirq kernel-mode NEON

Support for kernel-mode NEON to be nested and/or used in hardirq
context adds significant complexity, and the benefits may be
marginal.  In practice, kernel-mode NEON is not used in hardirq
context, and is rarely used in softirq context (by certain mac80211
drivers).

This patch implements an arm64 may_use_simd() function to allow
clients to check whether kernel-mode NEON is usable in the current
context, and simplifies kernel_neon_{begin,end}() to handle only
saving of the task FPSIMD state (if any).  Without nesting, there
is no other state to save.

The partial fpsimd save/restore functions become redundant as a
result of these changes, so they are removed too.

The save/restore model is changed to operate directly on
task_struct without additional percpu storage.  This simplifies the
code and saves a bit of memory, but means that softirqs must now be
disabled when manipulating the task fpsimd state from task context:
correspondingly, preempt_{en,dis}sable() calls are upgraded to
local_bh_{en,dis}able() as appropriate.  fpsimd_thread_switch()
already runs with hardirqs disabled and so is already protected
from softirqs.

These changes should make it easier to support kernel-mode NEON in
the presence of the Scalable Vector extension in the future.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/fpsimd.h       |  14 -----
 arch/arm64/include/asm/fpsimdmacros.h |  56 -----------------
 arch/arm64/include/asm/neon.h         |   4 +-
 arch/arm64/include/asm/simd.h         |  33 +++++++++-
 arch/arm64/kernel/entry-fpsimd.S      |  24 -------
 arch/arm64/kernel/fpsimd.c            | 115 +++++++++++++++++++++++-----------
 6 files changed, 111 insertions(+), 135 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 5155f21e15e3..410c48163c6a 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -41,16 +41,6 @@ struct fpsimd_state {
 	unsigned int cpu;
 };
 
-/*
- * Struct for stacking the bottom 'n' FP/SIMD registers.
- */
-struct fpsimd_partial_state {
-	u32		fpsr;
-	u32		fpcr;
-	u32		num_regs;
-	__uint128_t	vregs[32];
-};
-
 
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -77,10 +67,6 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
 
-extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
-				      u32 num_regs);
-extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
-
 /* For use by EFI runtime services calls only */
 extern void __efi_fpsimd_begin(void);
 extern void __efi_fpsimd_end(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index a2daf1293028..0f5fdd388b0d 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -75,59 +75,3 @@
 	ldr	w\tmpnr, [\state, #16 * 2 + 4]
 	fpsimd_restore_fpcr x\tmpnr, \state
 .endm
-
-.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
-	mrs	x\tmpnr1, fpsr
-	str	w\numnr, [\state, #8]
-	mrs	x\tmpnr2, fpcr
-	stp	w\tmpnr1, w\tmpnr2, [\state]
-	adr	x\tmpnr1, 0f
-	add	\state, \state, x\numnr, lsl #4
-	sub	x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
-	br	x\tmpnr1
-	stp	q30, q31, [\state, #-16 * 30 - 16]
-	stp	q28, q29, [\state, #-16 * 28 - 16]
-	stp	q26, q27, [\state, #-16 * 26 - 16]
-	stp	q24, q25, [\state, #-16 * 24 - 16]
-	stp	q22, q23, [\state, #-16 * 22 - 16]
-	stp	q20, q21, [\state, #-16 * 20 - 16]
-	stp	q18, q19, [\state, #-16 * 18 - 16]
-	stp	q16, q17, [\state, #-16 * 16 - 16]
-	stp	q14, q15, [\state, #-16 * 14 - 16]
-	stp	q12, q13, [\state, #-16 * 12 - 16]
-	stp	q10, q11, [\state, #-16 * 10 - 16]
-	stp	q8, q9, [\state, #-16 * 8 - 16]
-	stp	q6, q7, [\state, #-16 * 6 - 16]
-	stp	q4, q5, [\state, #-16 * 4 - 16]
-	stp	q2, q3, [\state, #-16 * 2 - 16]
-	stp	q0, q1, [\state, #-16 * 0 - 16]
-0:
-.endm
-
-.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
-	ldp	w\tmpnr1, w\tmpnr2, [\state]
-	msr	fpsr, x\tmpnr1
-	fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
-	adr	x\tmpnr1, 0f
-	ldr	w\tmpnr2, [\state, #8]
-	add	\state, \state, x\tmpnr2, lsl #4
-	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
-	br	x\tmpnr1
-	ldp	q30, q31, [\state, #-16 * 30 - 16]
-	ldp	q28, q29, [\state, #-16 * 28 - 16]
-	ldp	q26, q27, [\state, #-16 * 26 - 16]
-	ldp	q24, q25, [\state, #-16 * 24 - 16]
-	ldp	q22, q23, [\state, #-16 * 22 - 16]
-	ldp	q20, q21, [\state, #-16 * 20 - 16]
-	ldp	q18, q19, [\state, #-16 * 18 - 16]
-	ldp	q16, q17, [\state, #-16 * 16 - 16]
-	ldp	q14, q15, [\state, #-16 * 14 - 16]
-	ldp	q12, q13, [\state, #-16 * 12 - 16]
-	ldp	q10, q11, [\state, #-16 * 10 - 16]
-	ldp	q8, q9, [\state, #-16 * 8 - 16]
-	ldp	q6, q7, [\state, #-16 * 6 - 16]
-	ldp	q4, q5, [\state, #-16 * 4 - 16]
-	ldp	q2, q3, [\state, #-16 * 2 - 16]
-	ldp	q0, q1, [\state, #-16 * 0 - 16]
-0:
-.endm
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index 5368bd04fe7b..fb9d137256a6 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -16,9 +16,7 @@
 
 #define cpu_has_neon()		system_supports_fpsimd()
 
-#define kernel_neon_begin()	kernel_neon_begin_partial(32)
-
-void kernel_neon_begin_partial(u32 num_regs);
+void kernel_neon_begin(void);
 void kernel_neon_end(void);
 
 #endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 96959b52afae..5a1a927b74a2 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -9,15 +9,46 @@
 #ifndef __ASM_SIMD_H
 #define __ASM_SIMD_H
 
+#include <linux/compiler.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
 #include <linux/types.h>
 
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+DECLARE_PER_CPU(bool, kernel_neon_busy);
+
 /*
  * may_use_simd - whether it is allowable at this time to issue SIMD
  *                instructions or access the SIMD register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
  */
 static __must_check inline bool may_use_simd(void)
 {
-	return true;
+	/*
+	 * The raw_cpu_read() is racy if called with preemption enabled.
+	 * This is not a bug: kernel_neon_busy is only set when
+	 * preemption is disabled, so we cannot migrate to another CPU
+	 * while it is set, nor can we migrate to a CPU where it is set.
+	 * So, if we find it clear on some CPU then we're guaranteed to
+	 * find it clear on any CPU we could migrate to.
+	 *
+	 * If we are in between kernel_neon_begin()...kernel_neon_end(),
+	 * the flag will be set, but preemption is also disabled, so we
+	 * can't migrate to another CPU and spuriously see it become
+	 * false.
+	 */
+	return !in_irq() && !in_nmi() && !raw_cpu_read(kernel_neon_busy);
 }
 
+#else /* ! CONFIG_KERNEL_MODE_NEON */
+
+static __must_check inline bool may_use_simd(void) {
+	return false;
+}
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */
+
 #endif
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index c44a82f146b1..6a27cd6dbfa6 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state)
 	fpsimd_restore x0, 8
 	ret
 ENDPROC(fpsimd_load_state)
-
-#ifdef CONFIG_KERNEL_MODE_NEON
-
-/*
- * Save the bottom n FP registers.
- *
- * x0 - pointer to struct fpsimd_partial_state
- */
-ENTRY(fpsimd_save_partial_state)
-	fpsimd_save_partial x0, 1, 8, 9
-	ret
-ENDPROC(fpsimd_save_partial_state)
-
-/*
- * Load the bottom n FP registers.
- *
- * x0 - pointer to struct fpsimd_partial_state
- */
-ENTRY(fpsimd_load_partial_state)
-	fpsimd_restore_partial x0, 8, 9
-	ret
-ENDPROC(fpsimd_load_partial_state)
-
-#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index bcde88e2d981..138fcfaeadc1 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -17,18 +17,18 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/bottom_half.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/sched/signal.h>
 #include <linux/signal.h>
-#include <linux/hardirq.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
-#include <asm/neon.h>
 #include <asm/simd.h>
 
 #define FPEXC_IOF	(1 << 0)
@@ -65,6 +65,13 @@
  * CPU currently contain the most recent userland FPSIMD state of the current
  * task.
  *
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
+ * save the task's FPSIMD context back to task_struct from softirq context.
+ * To prevent this from racing with the manipulation of the task's FPSIMD state
+ * from task context and thereby corrupting the state, it is necessary to
+ * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
+ * flag with local_bh_disable() unless softirqs are already masked.
+ *
  * For a certain task, the sequence may look something like this:
  * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
  *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
@@ -164,9 +171,14 @@ void fpsimd_flush_thread(void)
 {
 	if (!system_supports_fpsimd())
 		return;
+
+	local_bh_disable();
+
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	fpsimd_flush_task_state(current);
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
+
+	local_bh_enable();
 }
 
 /*
@@ -177,10 +189,13 @@ void fpsimd_preserve_current_state(void)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_save_state(&current->thread.fpsimd_state);
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -192,7 +207,9 @@ void fpsimd_restore_current_state(void)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
@@ -200,7 +217,8 @@ void fpsimd_restore_current_state(void)
 		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -212,7 +230,9 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	fpsimd_load_state(state);
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
@@ -220,7 +240,8 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -233,49 +254,69 @@ void fpsimd_flush_task_state(struct task_struct *t)
 
 #ifdef CONFIG_KERNEL_MODE_NEON
 
-static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
-static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+DEFINE_PER_CPU(bool, kernel_neon_busy);
 
 /*
  * Kernel-side NEON support functions
  */
-void kernel_neon_begin_partial(u32 num_regs)
+
+/*
+ * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the FPSIMD registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_neon_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the FPSIMD registers until kernel_neon_end() is
+ * called.
+ */
+void kernel_neon_begin(void)
 {
 	if (WARN_ON(!system_supports_fpsimd()))
 		return;
-	if (in_interrupt()) {
-		struct fpsimd_partial_state *s = this_cpu_ptr(
-			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
 
-		BUG_ON(num_regs > 32);
-		fpsimd_save_partial_state(s, roundup(num_regs, 2));
-	} else {
-		/*
-		 * Save the userland FPSIMD state if we have one and if we
-		 * haven't done so already. Clear fpsimd_last_state to indicate
-		 * that there is no longer userland FPSIMD state in the
-		 * registers.
-		 */
-		preempt_disable();
-		if (current->mm &&
-		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
-			fpsimd_save_state(&current->thread.fpsimd_state);
-		this_cpu_write(fpsimd_last_state, NULL);
-	}
+	BUG_ON(!may_use_simd());
+
+	local_bh_disable();
+
+	__this_cpu_write(kernel_neon_busy, true);
+
+	/* Save unsaved task fpsimd state, if any: */
+	if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+		fpsimd_save_state(&current->thread.fpsimd_state);
+
+	/* Invalidate any task state remaining in the fpsimd regs: */
+	__this_cpu_write(fpsimd_last_state, NULL);
+
+	preempt_disable();
+
+	local_bh_enable();
 }
-EXPORT_SYMBOL(kernel_neon_begin_partial);
+EXPORT_SYMBOL(kernel_neon_begin);
 
+/*
+ * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
+ *
+ * Must be called from a context in which kernel_neon_begin() was previously
+ * called, with no call to kernel_neon_end() in the meantime.
+ *
+ * The caller must not use the FPSIMD registers after this function is called,
+ * unless kernel_neon_begin() is called again in the meantime.
+ */
 void kernel_neon_end(void)
 {
+	bool busy;
+
 	if (!system_supports_fpsimd())
 		return;
-	if (in_interrupt()) {
-		struct fpsimd_partial_state *s = this_cpu_ptr(
-			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
-		fpsimd_load_partial_state(s);
-	} else {
-		preempt_enable();
-	}
+
+	busy = __this_cpu_xchg(kernel_neon_busy, false);
+	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
+
+	preempt_enable();
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
-- 
cgit v1.2.3


From 174dfb12860eac361f3ced9fefb51393fec5bd32 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 4 Aug 2017 15:10:12 +0100
Subject: arm64: neon: Temporarily add a kernel_mode_begin_partial() definition

The crypto code currently relies on kernel_mode_begin_partial() being
available. Until the corresponding crypto patches are merged, define
this macro temporarily, though with different semantics as it cannot be
called in interrupt context.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/neon.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index fb9d137256a6..f922eaf780f9 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -19,4 +19,11 @@
 void kernel_neon_begin(void);
 void kernel_neon_end(void);
 
+/*
+ * Temporary macro to allow the crypto code to compile. Note that the
+ * semantics of kernel_neon_begin_partial() are now different from the
+ * original as it does not allow being called in an interrupt context.
+ */
+#define kernel_neon_begin_partial(num_regs)	kernel_neon_begin()
+
 #endif /* ! __ASM_NEON_H */
-- 
cgit v1.2.3


From 35d0e6fb4d219d64ab3b7cffef7a11a0662140f5 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Tue, 1 Aug 2017 15:35:53 +0100
Subject: arm64: syscallno is secretly an int, make it official

The upper 32 bits of the syscallno field in thread_struct are
handled inconsistently, being sometimes zero extended and sometimes
sign-extended.  In fact, only the lower 32 bits seem to have any
real significance for the behaviour of the code: it's been OK to
handle the upper bits inconsistently because they don't matter.

Currently, the only place I can find where those bits are
significant is in calling trace_sys_enter(), which may be
unintentional: for example, if a compat tracer attempts to cancel a
syscall by passing -1 to (COMPAT_)PTRACE_SET_SYSCALL at the
syscall-enter-stop, it will be traced as syscall 4294967295
rather than -1 as might be expected (and as occurs for a native
tracer doing the same thing).  Elsewhere, reads of syscallno cast
it to an int or truncate it.

There's also a conspicuous amount of code and casting to bodge
around the fact that although semantically an int, syscallno is
stored as a u64.

Let's not pretend any more.

In order to preserve the stp x instruction that stores the syscall
number in entry.S, this patch special-cases the layout of struct
pt_regs for big endian so that the newly 32-bit syscallno field
maps onto the low bits of the stored value.  This is not beautiful,
but benchmarking of the getpid syscall on Juno suggests indicates a
minor slowdown if the stp is split into an stp x and stp w.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/processor.h |  2 +-
 arch/arm64/include/asm/ptrace.h    |  9 ++++++++-
 arch/arm64/kernel/entry.S          | 34 +++++++++++++++++-----------------
 arch/arm64/kernel/ptrace.c         |  2 +-
 arch/arm64/kernel/signal.c         |  6 +++---
 arch/arm64/kernel/signal32.c       |  2 +-
 arch/arm64/kernel/traps.c          |  2 +-
 7 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 64c9e78f9882..379def1d6b67 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -112,7 +112,7 @@ void tls_preserve_current_state(void);
 static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
 {
 	memset(regs, 0, sizeof(*regs));
-	regs->syscallno = ~0UL;
+	regs->syscallno = ~0;
 	regs->pc = pc;
 }
 
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 11403fdd0a50..21c87dc240e1 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -116,7 +116,14 @@ struct pt_regs {
 		};
 	};
 	u64 orig_x0;
-	u64 syscallno;
+#ifdef __AARCH64EB__
+	u32 unused2;
+	s32 syscallno;
+#else
+	s32 syscallno;
+	u32 unused2;
+#endif
+
 	u64 orig_addr_limit;
 	u64 unused;	// maintain 16 byte alignment
 };
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b738880350f9..3bf0bd7a2f29 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -142,8 +142,8 @@ alternative_else_nop_endif
 	 * Set syscallno to -1 by default (overridden later if real syscall).
 	 */
 	.if	\el == 0
-	mvn	x21, xzr
-	str	x21, [sp, #S_SYSCALLNO]
+	mvn	w21, wzr
+	str	w21, [sp, #S_SYSCALLNO]
 	.endif
 
 	/*
@@ -290,8 +290,9 @@ alternative_else_nop_endif
  *
  * x7 is reserved for the system call number in 32-bit mode.
  */
-sc_nr	.req	x25		// number of system calls
-scno	.req	x26		// syscall number
+wsc_nr	.req	w25		// number of system calls
+wscno	.req	w26		// syscall number
+xscno	.req	x26		// syscall number (zero-extended)
 stbl	.req	x27		// syscall table pointer
 tsk	.req	x28		// current thread_info
 
@@ -577,8 +578,8 @@ el0_svc_compat:
 	 * AArch32 syscall handling
 	 */
 	adrp	stbl, compat_sys_call_table	// load compat syscall table pointer
-	uxtw	scno, w7			// syscall number in w7 (r7)
-	mov     sc_nr, #__NR_compat_syscalls
+	mov	wscno, w7			// syscall number in w7 (r7)
+	mov     wsc_nr, #__NR_compat_syscalls
 	b	el0_svc_naked
 
 	.align	6
@@ -798,19 +799,19 @@ ENDPROC(ret_from_fork)
 	.align	6
 el0_svc:
 	adrp	stbl, sys_call_table		// load syscall table pointer
-	uxtw	scno, w8			// syscall number in w8
-	mov	sc_nr, #__NR_syscalls
+	mov	wscno, w8			// syscall number in w8
+	mov	wsc_nr, #__NR_syscalls
 el0_svc_naked:					// compat entry point
-	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
+	stp	x0, xscno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
 	enable_dbg_and_irq
 	ct_user_exit 1
 
 	ldr	x16, [tsk, #TSK_TI_FLAGS]	// check for syscall hooks
 	tst	x16, #_TIF_SYSCALL_WORK
 	b.ne	__sys_trace
-	cmp     scno, sc_nr                     // check upper syscall limit
+	cmp     wscno, wsc_nr			// check upper syscall limit
 	b.hs	ni_sys
-	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
+	ldr	x16, [stbl, xscno, lsl #3]	// address in the syscall table
 	blr	x16				// call sys_* routine
 	b	ret_fast_syscall
 ni_sys:
@@ -824,24 +825,23 @@ ENDPROC(el0_svc)
 	 * switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	w0, #-1				// set default errno for
-	cmp     scno, x0			// user-issued syscall(-1)
+	cmp     wscno, #-1			// user-issued syscall(-1)?
 	b.ne	1f
-	mov	x0, #-ENOSYS
+	mov	x0, #-ENOSYS			// set default errno if so
 	str	x0, [sp, #S_X0]
 1:	mov	x0, sp
 	bl	syscall_trace_enter
 	cmp	w0, #-1				// skip the syscall?
 	b.eq	__sys_trace_return_skipped
-	uxtw	scno, w0			// syscall number (possibly new)
+	mov	wscno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
-	cmp	scno, sc_nr			// check upper syscall limit
+	cmp	wscno, wsc_nr			// check upper syscall limit
 	b.hs	__ni_sys_trace
 	ldp	x0, x1, [sp]			// restore the syscall args
 	ldp	x2, x3, [sp, #S_X2]
 	ldp	x4, x5, [sp, #S_X4]
 	ldp	x6, x7, [sp, #S_X6]
-	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
+	ldr	x16, [stbl, xscno, lsl #3]	// address in the syscall table
 	blr	x16				// call sys_* routine
 
 __sys_trace_return:
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 1b38c0150aec..de774805f672 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1363,7 +1363,7 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
-		regs->syscallno = ~0UL;
+		regs->syscallno = ~0;
 
 	regs->regs[regno] = saved_reg;
 }
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 089c3747995d..4d04b891c00d 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -387,7 +387,7 @@ static int restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid sys_rt_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0UL;
+	regs->syscallno = ~0;
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 	if (err == 0)
@@ -673,7 +673,7 @@ static void do_signal(struct pt_regs *regs)
 {
 	unsigned long continue_addr = 0, restart_addr = 0;
 	int retval = 0;
-	int syscall = (int)regs->syscallno;
+	int syscall = regs->syscallno;
 	struct ksignal ksig;
 
 	/*
@@ -687,7 +687,7 @@ static void do_signal(struct pt_regs *regs)
 		/*
 		 * Avoid additional syscall restarting via ret_to_user.
 		 */
-		regs->syscallno = ~0UL;
+		regs->syscallno = ~0;
 
 		/*
 		 * Prepare for system call restart. We do this here so that a
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index c747a0fc5d7d..d98ca76cbd39 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid compat_sys_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0UL;
+	regs->syscallno = ~0;
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8a62648848e5..0f047e916cee 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -593,7 +593,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
 
 	if (show_unhandled_signals_ratelimited()) {
 		pr_info("%s[%d]: syscall %d\n", current->comm,
-			task_pid_nr(current), (int)regs->syscallno);
+			task_pid_nr(current), regs->syscallno);
 		dump_instr("", regs);
 		if (user_mode(regs))
 			__show_regs(regs);
-- 
cgit v1.2.3


From 17c28958600928109049a3bcc814b0d5bfb1ff3a Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Tue, 1 Aug 2017 15:35:54 +0100
Subject: arm64: Abstract syscallno manipulation

The -1 "no syscall" value is written in various ways, shared with
the user ABI in some places, and generally obscure.

This patch attempts to make things a little more consistent and
readable by replacing all these uses with a single #define.  A
couple of symbolic helpers are provided to clarify the intent
further.

Because the in-syscall check in do_signal() is changed from >= 0 to
!= NO_SYSCALL by this patch, different behaviour may be observable
if syscallno is set to values less than -1 by a tracer.  However,
this is not different from the behaviour that is already observable
if a tracer sets syscallno to a value >= __NR_(compat_)syscalls.

It appears that this can cause spurious syscall restarting, but
that is not a new behaviour either, and does not appear harmful.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/processor.h |  2 +-
 arch/arm64/include/asm/ptrace.h    | 21 +++++++++++++++++++++
 arch/arm64/kernel/entry.S          | 10 ++++------
 arch/arm64/kernel/ptrace.c         |  2 +-
 arch/arm64/kernel/signal.c         | 10 +++++-----
 arch/arm64/kernel/signal32.c       |  2 +-
 6 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 379def1d6b67..b7334f11c4cf 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -112,7 +112,7 @@ void tls_preserve_current_state(void);
 static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
 {
 	memset(regs, 0, sizeof(*regs));
-	regs->syscallno = ~0;
+	forget_syscall(regs);
 	regs->pc = pc;
 }
 
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 21c87dc240e1..4f64373b84fd 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -72,8 +72,19 @@
 #define COMPAT_PT_TEXT_ADDR		0x10000
 #define COMPAT_PT_DATA_ADDR		0x10004
 #define COMPAT_PT_TEXT_END_ADDR		0x10008
+
+/*
+ * If pt_regs.syscallno == NO_SYSCALL, then the thread is not executing
+ * a syscall -- i.e., its most recent entry into the kernel from
+ * userspace was not via SVC, or otherwise a tracer cancelled the syscall.
+ *
+ * This must have the value -1, for ABI compatibility with ptrace etc.
+ */
+#define NO_SYSCALL (-1)
+
 #ifndef __ASSEMBLY__
 #include <linux/bug.h>
+#include <linux/types.h>
 
 /* sizeof(struct user) for AArch32 */
 #define COMPAT_USER_SZ	296
@@ -128,6 +139,16 @@ struct pt_regs {
 	u64 unused;	// maintain 16 byte alignment
 };
 
+static inline bool in_syscall(struct pt_regs const *regs)
+{
+	return regs->syscallno != NO_SYSCALL;
+}
+
+static inline void forget_syscall(struct pt_regs *regs)
+{
+	regs->syscallno = NO_SYSCALL;
+}
+
 #define MAX_REG_OFFSET offsetof(struct pt_regs, pstate)
 
 #define arch_has_single_step()	(1)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 3bf0bd7a2f29..cace76d17535 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -138,11 +138,9 @@ alternative_else_nop_endif
 
 	stp	x22, x23, [sp, #S_PC]
 
-	/*
-	 * Set syscallno to -1 by default (overridden later if real syscall).
-	 */
+	/* Not in a syscall by default (el0_svc overwrites for real syscall) */
 	.if	\el == 0
-	mvn	w21, wzr
+	mov	w21, #NO_SYSCALL
 	str	w21, [sp, #S_SYSCALLNO]
 	.endif
 
@@ -825,13 +823,13 @@ ENDPROC(el0_svc)
 	 * switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	cmp     wscno, #-1			// user-issued syscall(-1)?
+	cmp     wscno, #NO_SYSCALL		// user-issued syscall(-1)?
 	b.ne	1f
 	mov	x0, #-ENOSYS			// set default errno if so
 	str	x0, [sp, #S_X0]
 1:	mov	x0, sp
 	bl	syscall_trace_enter
-	cmp	w0, #-1				// skip the syscall?
+	cmp	w0, #NO_SYSCALL			// skip the syscall?
 	b.eq	__sys_trace_return_skipped
 	mov	wscno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index de774805f672..28619b5b6746 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1363,7 +1363,7 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
-		regs->syscallno = ~0;
+		forget_syscall(regs);
 
 	regs->regs[regno] = saved_reg;
 }
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4d04b891c00d..4991e87f80cc 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -36,6 +36,7 @@
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
 #include <asm/fpsimd.h>
+#include <asm/ptrace.h>
 #include <asm/signal32.h>
 #include <asm/vdso.h>
 
@@ -387,7 +388,7 @@ static int restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid sys_rt_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0;
+	forget_syscall(regs);
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 	if (err == 0)
@@ -673,13 +674,12 @@ static void do_signal(struct pt_regs *regs)
 {
 	unsigned long continue_addr = 0, restart_addr = 0;
 	int retval = 0;
-	int syscall = regs->syscallno;
 	struct ksignal ksig;
 
 	/*
 	 * If we were from a system call, check for system call restarting...
 	 */
-	if (syscall >= 0) {
+	if (in_syscall(regs)) {
 		continue_addr = regs->pc;
 		restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4);
 		retval = regs->regs[0];
@@ -687,7 +687,7 @@ static void do_signal(struct pt_regs *regs)
 		/*
 		 * Avoid additional syscall restarting via ret_to_user.
 		 */
-		regs->syscallno = ~0;
+		forget_syscall(regs);
 
 		/*
 		 * Prepare for system call restart. We do this here so that a
@@ -731,7 +731,7 @@ static void do_signal(struct pt_regs *regs)
 	 * Handle restarting a different system call. As above, if a debugger
 	 * has chosen to restart at a different PC, ignore the restart.
 	 */
-	if (syscall >= 0 && regs->pc == restart_addr) {
+	if (in_syscall(regs) && regs->pc == restart_addr) {
 		if (retval == -ERESTART_RESTARTBLOCK)
 			setup_restart_syscall(regs);
 		user_rewind_single_step(current);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index d98ca76cbd39..4e5a664be04b 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid compat_sys_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0;
+	forget_syscall(regs);
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 
-- 
cgit v1.2.3


From 1f9b8936f36f4a8e1d9923f5d03295d668cdf098 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Fri, 4 Aug 2017 09:31:42 +0100
Subject: arm64: Decode information from ESR upon mem faults

When receiving unhandled faults from the CPU, description is very sparse.
Adding information about faults decoded from ESR.

Added defines to esr.h corresponding ESR fields. Values are based on ARM
Archtecture Reference Manual (DDI 0487B.a), section D7.2.28 ESR_ELx, Exception
Syndrome Register (ELx) (pages D7-2275 to D7-2280).

New output is of the form:
[   77.818059] Mem abort info:
[   77.820826]   Exception class = DABT (current EL), IL = 32 bits
[   77.826706]   SET = 0, FnV = 0
[   77.829742]   EA = 0, S1PTW = 0
[   77.832849] Data abort info:
[   77.835713]   ISV = 0, ISS = 0x00000070
[   77.839522]   CM = 0, WnR = 1

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
[catalin.marinas@arm.com: fix "%lu" in a pr_alert() call]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/esr.h | 39 ++++++++++++++++++++++++++----------
 arch/arm64/mm/fault.c        | 47 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 8cabd57b6348..130b5343ba6d 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -77,16 +77,23 @@
 #define ESR_ELx_EC_MASK		(UL(0x3F) << ESR_ELx_EC_SHIFT)
 #define ESR_ELx_EC(esr)		(((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
 
-#define ESR_ELx_IL		(UL(1) << 25)
+#define ESR_ELx_IL_SHIFT	(25)
+#define ESR_ELx_IL		(UL(1) << ESR_ELx_IL_SHIFT)
 #define ESR_ELx_ISS_MASK	(ESR_ELx_IL - 1)
 
 /* ISS field definitions shared by different classes */
-#define ESR_ELx_WNR		(UL(1) << 6)
+#define ESR_ELx_WNR_SHIFT	(6)
+#define ESR_ELx_WNR		(UL(1) << ESR_ELx_WNR_SHIFT)
 
 /* Shared ISS field definitions for Data/Instruction aborts */
-#define ESR_ELx_FnV		(UL(1) << 10)
-#define ESR_ELx_EA		(UL(1) << 9)
-#define ESR_ELx_S1PTW		(UL(1) << 7)
+#define ESR_ELx_SET_SHIFT	(11)
+#define ESR_ELx_SET_MASK	(UL(3) << ESR_ELx_SET_SHIFT)
+#define ESR_ELx_FnV_SHIFT	(10)
+#define ESR_ELx_FnV		(UL(1) << ESR_ELx_FnV_SHIFT)
+#define ESR_ELx_EA_SHIFT	(9)
+#define ESR_ELx_EA		(UL(1) << ESR_ELx_EA_SHIFT)
+#define ESR_ELx_S1PTW_SHIFT	(7)
+#define ESR_ELx_S1PTW		(UL(1) << ESR_ELx_S1PTW_SHIFT)
 
 /* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
 #define ESR_ELx_FSC		(0x3F)
@@ -97,15 +104,20 @@
 #define ESR_ELx_FSC_PERM	(0x0C)
 
 /* ISS field definitions for Data Aborts */
-#define ESR_ELx_ISV		(UL(1) << 24)
+#define ESR_ELx_ISV_SHIFT	(24)
+#define ESR_ELx_ISV		(UL(1) << ESR_ELx_ISV_SHIFT)
 #define ESR_ELx_SAS_SHIFT	(22)
 #define ESR_ELx_SAS		(UL(3) << ESR_ELx_SAS_SHIFT)
-#define ESR_ELx_SSE		(UL(1) << 21)
+#define ESR_ELx_SSE_SHIFT	(21)
+#define ESR_ELx_SSE		(UL(1) << ESR_ELx_SSE_SHIFT)
 #define ESR_ELx_SRT_SHIFT	(16)
 #define ESR_ELx_SRT_MASK	(UL(0x1F) << ESR_ELx_SRT_SHIFT)
-#define ESR_ELx_SF 		(UL(1) << 15)
-#define ESR_ELx_AR 		(UL(1) << 14)
-#define ESR_ELx_CM 		(UL(1) << 8)
+#define ESR_ELx_SF_SHIFT	(15)
+#define ESR_ELx_SF 		(UL(1) << ESR_ELx_SF_SHIFT)
+#define ESR_ELx_AR_SHIFT	(14)
+#define ESR_ELx_AR 		(UL(1) << ESR_ELx_AR_SHIFT)
+#define ESR_ELx_CM_SHIFT	(8)
+#define ESR_ELx_CM 		(UL(1) << ESR_ELx_CM_SHIFT)
 
 /* ISS field definitions for exceptions taken in to Hyp */
 #define ESR_ELx_CV		(UL(1) << 24)
@@ -209,6 +221,13 @@
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 
+static inline bool esr_is_data_abort(u32 esr)
+{
+	const u32 ec = ESR_ELx_EC(esr);
+
+	return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
+}
+
 const char *esr_get_class_string(u32 esr);
 #endif /* __ASSEMBLY */
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2509e4fe6992..52ee273afeec 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -82,6 +82,49 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
 }
 #endif
 
+static void data_abort_decode(unsigned int esr)
+{
+	pr_alert("Data abort info:\n");
+
+	if (esr & ESR_ELx_ISV) {
+		pr_alert("  Access size = %u byte(s)\n",
+			 1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT));
+		pr_alert("  SSE = %lu, SRT = %lu\n",
+			 (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT,
+			 (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT);
+		pr_alert("  SF = %lu, AR = %lu\n",
+			 (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT,
+			 (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT);
+	} else {
+		pr_alert("  ISV = 0, ISS = 0x%08lu\n", esr & ESR_ELx_ISS_MASK);
+	}
+
+	pr_alert("  CM = %lu, WnR = %lu\n",
+		 (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT,
+		 (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
+}
+
+/*
+ * Decode mem abort information
+ */
+static void mem_abort_decode(unsigned int esr)
+{
+	pr_alert("Mem abort info:\n");
+
+	pr_alert("  Exception class = %s, IL = %u bits\n",
+		 esr_get_class_string(esr),
+		 (esr & ESR_ELx_IL) ? 32 : 16);
+	pr_alert("  SET = %lu, FnV = %lu\n",
+		 (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT,
+		 (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT);
+	pr_alert("  EA = %lu, S1PTW = %lu\n",
+		 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
+		 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
+
+	if (esr_is_data_abort(esr))
+		data_abort_decode(esr);
+}
+
 /*
  * Dump out the page tables associated with 'addr' in the currently active mm.
  */
@@ -248,6 +291,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 	pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
 		 addr);
 
+	mem_abort_decode(esr);
+
 	show_pte(addr);
 	die("Oops", regs, esr);
 	bust_spinlocks(0);
@@ -702,6 +747,8 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 	pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
 		 inf->name, esr, addr);
 
+	mem_abort_decode(esr);
+
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code  = inf->code;
-- 
cgit v1.2.3


From 11cefd5ac25f242349994140a3bce3a20db0c751 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 7 Aug 2017 12:36:35 +0100
Subject: arm64: neon: Export kernel_neon_busy to loadable modules

may_use_simd() can be invoked from loadable modules and it accesses
kernel_neon_busy. Make sure that the latter is exported.

Fixes: cb84d11e1625 ("arm64: neon: Remove support for nested or hardirq kernel-mode NEON")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 138fcfaeadc1..9da4e636b328 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -255,6 +255,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
 #ifdef CONFIG_KERNEL_MODE_NEON
 
 DEFINE_PER_CPU(bool, kernel_neon_busy);
+EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
 
 /*
  * Kernel-side NEON support functions
-- 
cgit v1.2.3


From 1031a1592908ccd3240f4a5731c96c382c932310 Mon Sep 17 00:00:00 2001
From: Pratyush Anand <panand@redhat.com>
Date: Sat, 1 Jul 2017 12:03:35 +0530
Subject: arm64: perf: Allow more than one cycle counter to be used

Currently:
$ perf stat -e cycles:u -e cycles:k  true

 Performance counter stats for 'true':

          2,24,699      cycles:u
     <not counted>      cycles:k	(0.00%)

       0.000788087 seconds time elapsed

We can not count more than one cycle counter in one instance,because we
allow to map cycle counter into PMCCNTR_EL0 only. However, if I did not
miss anything then specification do not prohibit to use PMEVCNTR<n>_EL0
for cycle count as well.

Modify the code so that it still prefers to use PMCCNTR_EL0 for cycle
counter, however allow to use PMEVCNTR<n>_EL0 if PMCCNTR_EL0 is already
in use.

After this patch:

$ perf stat -e cycles:u -e cycles:k   true

 Performance counter stats for 'true':

          2,17,310      cycles:u
          7,40,009      cycles:k

       0.000764149 seconds time elapsed

Signed-off-by: Pratyush Anand <panand@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index b5798ba21189..372317667773 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -846,17 +846,14 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
 
-	/* Always place a cycle counter into the cycle counter. */
+	/* Always prefer to place a cycle counter into the cycle counter. */
 	if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
-		if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
-			return -EAGAIN;
-
-		return ARMV8_IDX_CYCLE_COUNTER;
+		if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
+			return ARMV8_IDX_CYCLE_COUNTER;
 	}
 
 	/*
-	 * For anything other than a cycle counter, try and use
-	 * the events counters
+	 * Otherwise use events counters
 	 */
 	for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
 		if (!test_and_set_bit(idx, cpuc->used_mask))
-- 
cgit v1.2.3


From c1be2ddb1e25ecf425d93a81dc64a650b9ff3107 Mon Sep 17 00:00:00 2001
From: Tai Nguyen <ttnguyen@apm.com>
Date: Thu, 13 Jul 2017 11:19:08 -0700
Subject: perf: xgene: Remove unnecessary managed resources cleanup

Managed resources in the driver should be automatically cleaned up on
driver detach. It's unnecessary to manually free/unmmap these resources.
One of the manual cleanup causes static checkers to complain.
The bug is reported by Dan Carpenter <dan.carpenter@oracle.com> in [1]

[1] https://www.spinics.net/lists/arm-kernel/msg593012.html

This patch gets rid of all the unnecessary manual cleanup and properly
unregister all the registered PMU devices by the driver on driver detach.

Signed-off-by: Tai Nguyen <ttnguyen@apm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/xgene_pmu.c | 74 ++++++++++++++----------------------------------
 1 file changed, 22 insertions(+), 52 deletions(-)

diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index e841282d690c..eb23311bc70c 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1147,7 +1147,6 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
 {
 	struct device *dev = xgene_pmu->dev;
 	struct xgene_pmu_dev *pmu;
-	int rc;
 
 	pmu = devm_kzalloc(dev, sizeof(*pmu), GFP_KERNEL);
 	if (!pmu)
@@ -1159,7 +1158,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
 	switch (pmu->inf->type) {
 	case PMU_TYPE_L3C:
 		if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask))
-			goto dev_err;
+			return -ENODEV;
 		if (xgene_pmu->version == PCP_PMU_V3)
 			pmu->attr_groups = l3c_pmu_v3_attr_groups;
 		else
@@ -1177,7 +1176,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
 		break;
 	case PMU_TYPE_MCB:
 		if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
-			goto dev_err;
+			return -ENODEV;
 		if (xgene_pmu->version == PCP_PMU_V3)
 			pmu->attr_groups = mcb_pmu_v3_attr_groups;
 		else
@@ -1185,7 +1184,7 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
 		break;
 	case PMU_TYPE_MC:
 		if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
-			goto dev_err;
+			return -ENODEV;
 		if (xgene_pmu->version == PCP_PMU_V3)
 			pmu->attr_groups = mc_pmu_v3_attr_groups;
 		else
@@ -1195,19 +1194,14 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
 		return -EINVAL;
 	}
 
-	rc = xgene_init_perf(pmu, ctx->name);
-	if (rc) {
+	if (xgene_init_perf(pmu, ctx->name)) {
 		dev_err(dev, "%s PMU: Failed to init perf driver\n", ctx->name);
-		goto dev_err;
+		return -ENODEV;
 	}
 
 	dev_info(dev, "%s PMU registered\n", ctx->name);
 
-	return rc;
-
-dev_err:
-	devm_kfree(dev, pmu);
-	return -ENODEV;
+	return 0;
 }
 
 static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
@@ -1515,13 +1509,13 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
 	acpi_dev_free_resource_list(&resource_list);
 	if (rc < 0) {
 		dev_err(dev, "PMU type %d: No resource address found\n", type);
-		goto err;
+		return NULL;
 	}
 
 	dev_csr = devm_ioremap_resource(dev, &res);
 	if (IS_ERR(dev_csr)) {
 		dev_err(dev, "PMU type %d: Fail to map resource\n", type);
-		goto err;
+		return NULL;
 	}
 
 	/* A PMU device node without enable-bit-index is always enabled */
@@ -1535,7 +1529,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
 	ctx->name = xgene_pmu_dev_name(dev, type, enable_bit);
 	if (!ctx->name) {
 		dev_err(dev, "PMU type %d: Fail to get device name\n", type);
-		goto err;
+		return NULL;
 	}
 	inf = &ctx->inf;
 	inf->type = type;
@@ -1543,9 +1537,6 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
 	inf->enable_mask = 1 << enable_bit;
 
 	return ctx;
-err:
-	devm_kfree(dev, ctx);
-	return NULL;
 }
 
 static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
@@ -1663,20 +1654,20 @@ xgene_pmu_dev_ctx *fdt_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
 	void __iomem *dev_csr;
 	struct resource res;
 	int enable_bit;
-	int rc;
 
 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return NULL;
-	rc = of_address_to_resource(np, 0, &res);
-	if (rc < 0) {
+
+	if (of_address_to_resource(np, 0, &res) < 0) {
 		dev_err(dev, "PMU type %d: No resource address found\n", type);
-		goto err;
+		return NULL;
 	}
+
 	dev_csr = devm_ioremap_resource(dev, &res);
 	if (IS_ERR(dev_csr)) {
 		dev_err(dev, "PMU type %d: Fail to map resource\n", type);
-		goto err;
+		return NULL;
 	}
 
 	/* A PMU device node without enable-bit-index is always enabled */
@@ -1686,17 +1677,15 @@ xgene_pmu_dev_ctx *fdt_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
 	ctx->name = xgene_pmu_dev_name(dev, type, enable_bit);
 	if (!ctx->name) {
 		dev_err(dev, "PMU type %d: Fail to get device name\n", type);
-		goto err;
+		return NULL;
 	}
+
 	inf = &ctx->inf;
 	inf->type = type;
 	inf->csr = dev_csr;
 	inf->enable_mask = 1 << enable_bit;
 
 	return ctx;
-err:
-	devm_kfree(dev, ctx);
-	return NULL;
 }
 
 static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
@@ -1868,22 +1857,20 @@ static int xgene_pmu_probe(struct platform_device *pdev)
 	xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(xgene_pmu->pcppmu_csr)) {
 		dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n");
-		rc = PTR_ERR(xgene_pmu->pcppmu_csr);
-		goto err;
+		return PTR_ERR(xgene_pmu->pcppmu_csr);
 	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		dev_err(&pdev->dev, "No IRQ resource\n");
-		rc = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 	rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr,
 				IRQF_NOBALANCING | IRQF_NO_THREAD,
 				dev_name(&pdev->dev), xgene_pmu);
 	if (rc) {
 		dev_err(&pdev->dev, "Could not request IRQ %d\n", irq);
-		goto err;
+		return rc;
 	}
 
 	raw_spin_lock_init(&xgene_pmu->lock);
@@ -1903,42 +1890,29 @@ static int xgene_pmu_probe(struct platform_device *pdev)
 	rc = irq_set_affinity(irq, &xgene_pmu->cpu);
 	if (rc) {
 		dev_err(&pdev->dev, "Failed to set interrupt affinity!\n");
-		goto err;
+		return rc;
 	}
 
 	/* Walk through the tree for all PMU perf devices */
 	rc = xgene_pmu_probe_pmu_dev(xgene_pmu, pdev);
 	if (rc) {
 		dev_err(&pdev->dev, "No PMU perf devices found!\n");
-		goto err;
+		return rc;
 	}
 
 	/* Enable interrupt */
 	xgene_pmu->ops->unmask_int(xgene_pmu);
 
 	return 0;
-
-err:
-	if (xgene_pmu->pcppmu_csr)
-		devm_iounmap(&pdev->dev, xgene_pmu->pcppmu_csr);
-	devm_kfree(&pdev->dev, xgene_pmu);
-
-	return rc;
 }
 
 static void
 xgene_pmu_dev_cleanup(struct xgene_pmu *xgene_pmu, struct list_head *pmus)
 {
 	struct xgene_pmu_dev_ctx *ctx;
-	struct device *dev = xgene_pmu->dev;
-	struct xgene_pmu_dev *pmu_dev;
 
 	list_for_each_entry(ctx, pmus, next) {
-		pmu_dev = ctx->pmu_dev;
-		if (pmu_dev->inf->csr)
-			devm_iounmap(dev, pmu_dev->inf->csr);
-		devm_kfree(dev, ctx);
-		devm_kfree(dev, pmu_dev);
+		perf_pmu_unregister(&ctx->pmu_dev->pmu);
 	}
 }
 
@@ -1951,10 +1925,6 @@ static int xgene_pmu_remove(struct platform_device *pdev)
 	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcbpmus);
 	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus);
 
-	if (xgene_pmu->pcppmu_csr)
-		devm_iounmap(&pdev->dev, xgene_pmu->pcppmu_csr);
-	devm_kfree(&pdev->dev, xgene_pmu);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From bc8648d49a958148bb84444252b96d19659a42e1 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 4 Aug 2017 17:42:06 +0100
Subject: ACPI/IORT: Handle PCI aliases properly for IOMMUs

When a PCI device has DMA quirks, we need to ensure that an upstream
IOMMU knows about all possible aliases, since the presence of a DMA
quirk does not preclude the device still also emitting transactions
(e.g. MSIs) on its 'real' RID. Similarly, the rules for bridge aliasing
are relatively complex, and some bridges may only take ownership of
transactions under particular transient circumstances, leading again to
multiple RIDs potentially being seen at the IOMMU for the given device.

Take all this into account in iort_iommu_configure() by mapping every
RID produced by the alias walk, not just whichever one comes out last.
Since adding any more internal PTR_ERR() juggling would have confused me
no end, a bit of refactoring happens in the process - we know where to
find the ops if everything succeeded, so we're free to just pass regular
error codes around up until then.

CC: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
CC: Hanjun Guo <hanjun.guo@linaro.org>
CC: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
[lorenzo.pieralisi@arm.com: tagged __get_pci_rid __maybe_unused]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 drivers/acpi/arm64/iort.c | 111 ++++++++++++++++++++++++----------------------
 1 file changed, 59 insertions(+), 52 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index a3215ee671c1..919f8d84b0ef 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -588,7 +588,8 @@ void acpi_configure_pmsi_domain(struct device *dev)
 		dev_set_msi_domain(dev, msi_domain);
 }
 
-static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
+static int __maybe_unused __get_pci_rid(struct pci_dev *pdev, u16 alias,
+					void *data)
 {
 	u32 *rid = data;
 
@@ -633,8 +634,7 @@ int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
 {
 	int err = 0;
 
-	if (!IS_ERR_OR_NULL(ops) && ops->add_device && dev->bus &&
-	    !dev->iommu_group)
+	if (ops->add_device && dev->bus && !dev->iommu_group)
 		err = ops->add_device(dev);
 
 	return err;
@@ -648,36 +648,49 @@ int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev)
 { return 0; }
 #endif
 
-static const struct iommu_ops *iort_iommu_xlate(struct device *dev,
-					struct acpi_iort_node *node,
-					u32 streamid)
+static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
+			    u32 streamid)
 {
-	const struct iommu_ops *ops = NULL;
-	int ret = -ENODEV;
+	const struct iommu_ops *ops;
 	struct fwnode_handle *iort_fwnode;
 
-	if (node) {
-		iort_fwnode = iort_get_fwnode(node);
-		if (!iort_fwnode)
-			return NULL;
+	if (!node)
+		return -ENODEV;
 
-		ops = iommu_ops_from_fwnode(iort_fwnode);
-		/*
-		 * If the ops look-up fails, this means that either
-		 * the SMMU drivers have not been probed yet or that
-		 * the SMMU drivers are not built in the kernel;
-		 * Depending on whether the SMMU drivers are built-in
-		 * in the kernel or not, defer the IOMMU configuration
-		 * or just abort it.
-		 */
-		if (!ops)
-			return iort_iommu_driver_enabled(node->type) ?
-			       ERR_PTR(-EPROBE_DEFER) : NULL;
+	iort_fwnode = iort_get_fwnode(node);
+	if (!iort_fwnode)
+		return -ENODEV;
 
-		ret = arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
-	}
+	/*
+	 * If the ops look-up fails, this means that either
+	 * the SMMU drivers have not been probed yet or that
+	 * the SMMU drivers are not built in the kernel;
+	 * Depending on whether the SMMU drivers are built-in
+	 * in the kernel or not, defer the IOMMU configuration
+	 * or just abort it.
+	 */
+	ops = iommu_ops_from_fwnode(iort_fwnode);
+	if (!ops)
+		return iort_iommu_driver_enabled(node->type) ?
+		       -EPROBE_DEFER : -ENODEV;
 
-	return ret ? NULL : ops;
+	return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
+}
+
+struct iort_pci_alias_info {
+	struct device *dev;
+	struct acpi_iort_node *node;
+};
+
+static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
+{
+	struct iort_pci_alias_info *info = data;
+	struct acpi_iort_node *parent;
+	u32 streamid;
+
+	parent = iort_node_map_id(info->node, alias, &streamid,
+				  IORT_IOMMU_TYPE);
+	return iort_iommu_xlate(info->dev, parent, streamid);
 }
 
 /**
@@ -713,9 +726,9 @@ void iort_set_dma_mask(struct device *dev)
 const struct iommu_ops *iort_iommu_configure(struct device *dev)
 {
 	struct acpi_iort_node *node, *parent;
-	const struct iommu_ops *ops = NULL;
+	const struct iommu_ops *ops;
 	u32 streamid = 0;
-	int err;
+	int err = -ENODEV;
 
 	/*
 	 * If we already translated the fwspec there
@@ -727,21 +740,16 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 
 	if (dev_is_pci(dev)) {
 		struct pci_bus *bus = to_pci_dev(dev)->bus;
-		u32 rid;
-
-		pci_for_each_dma_alias(to_pci_dev(dev), __get_pci_rid,
-				       &rid);
+		struct iort_pci_alias_info info = { .dev = dev };
 
 		node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
 				      iort_match_node_callback, &bus->dev);
 		if (!node)
 			return NULL;
 
-		parent = iort_node_map_id(node, rid, &streamid,
-					  IORT_IOMMU_TYPE);
-
-		ops = iort_iommu_xlate(dev, parent, streamid);
-
+		info.node = node;
+		err = pci_for_each_dma_alias(to_pci_dev(dev),
+					     iort_pci_iommu_init, &info);
 	} else {
 		int i = 0;
 
@@ -750,31 +758,30 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 		if (!node)
 			return NULL;
 
-		parent = iort_node_map_platform_id(node, &streamid,
-						   IORT_IOMMU_TYPE, i++);
-
-		while (parent) {
-			ops = iort_iommu_xlate(dev, parent, streamid);
-			if (IS_ERR_OR_NULL(ops))
-				return ops;
-
+		do {
 			parent = iort_node_map_platform_id(node, &streamid,
 							   IORT_IOMMU_TYPE,
 							   i++);
-		}
+
+			if (parent)
+				err = iort_iommu_xlate(dev, parent, streamid);
+		} while (parent && !err);
 	}
 
 	/*
 	 * If we have reason to believe the IOMMU driver missed the initial
 	 * add_device callback for dev, replay it to get things in order.
 	 */
-	err = iort_add_device_replay(ops, dev);
-	if (err)
-		ops = ERR_PTR(err);
+	if (!err) {
+		ops = dev->iommu_fwspec->ops;
+		err = iort_add_device_replay(ops, dev);
+	}
 
 	/* Ignore all other errors apart from EPROBE_DEFER */
-	if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) {
-		dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops));
+	if (err == -EPROBE_DEFER) {
+		ops = ERR_PTR(err);
+	} else if (err) {
+		dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
 		ops = NULL;
 	}
 
-- 
cgit v1.2.3


From db44e9c5ecf1b60336ccfc176b07ba7d81d855e0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 26 Jul 2017 14:41:40 +0100
Subject: arm64: Add ASM_BUG()

Currently. we can only use BUG() from C code, though there are
situations where we would like an equivalent mechanism in assembly code.

This patch refactors our BUG() definition such that it can be used in
either C or assembly, in the form of a new ASM_BUG().

The refactoring requires the removal of escape sequences, such as '\n'
and '\t', but these aren't strictly necessary as we can use ';' to
terminate assembler statements.

The low-level assembly is factored out into <asm/asm-bug.h>, with
<asm/bug.h> retained as the C wrapper.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/asm-bug.h | 54 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/bug.h     | 35 +++-----------------------
 2 files changed, 57 insertions(+), 32 deletions(-)
 create mode 100644 arch/arm64/include/asm/asm-bug.h

diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
new file mode 100644
index 000000000000..636e755bcdca
--- /dev/null
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -0,0 +1,54 @@
+#ifndef __ASM_ASM_BUG_H
+/*
+ * Copyright (C) 2017  ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#define __ASM_ASM_BUG_H
+
+#include <asm/brk-imm.h>
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
+#define __BUGVERBOSE_LOCATION(file, line)			\
+		.pushsection .rodata.str,"aMS",@progbits,1;	\
+	2:	.string file;					\
+		.popsection;					\
+								\
+		.long 2b - 0b;					\
+		.short line;
+#else
+#define _BUGVERBOSE_LOCATION(file, line)
+#endif
+
+#ifdef CONFIG_GENERIC_BUG
+
+#define __BUG_ENTRY(flags) 				\
+		.pushsection __bug_table,"aw";		\
+		.align 2;				\
+	0:	.long 1f - 0b;				\
+_BUGVERBOSE_LOCATION(__FILE__, __LINE__)		\
+		.short flags; 				\
+		.popsection;				\
+	1:
+#else
+#define __BUG_ENTRY(flags)
+#endif
+
+#define ASM_BUG_FLAGS(flags)				\
+	__BUG_ENTRY(flags)				\
+	brk	BUG_BRK_IMM
+
+#define ASM_BUG()	ASM_BUG_FLAGS(0)
+
+#endif /* __ASM_ASM_BUG_H */
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index a02a57186f56..d7dc43752705 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,41 +18,12 @@
 #ifndef _ARCH_ARM64_ASM_BUG_H
 #define _ARCH_ARM64_ASM_BUG_H
 
-#include <asm/brk-imm.h>
+#include <linux/stringify.h>
 
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
-#define __BUGVERBOSE_LOCATION(file, line)				\
-		".pushsection .rodata.str,\"aMS\",@progbits,1\n"	\
-	"2:	.string \"" file "\"\n\t"				\
-		".popsection\n\t"					\
-									\
-		".long 2b - 0b\n\t"					\
-		".short " #line "\n\t"
-#else
-#define _BUGVERBOSE_LOCATION(file, line)
-#endif
-
-#ifdef CONFIG_GENERIC_BUG
-
-#define __BUG_ENTRY(flags) 				\
-		".pushsection __bug_table,\"aw\"\n\t"	\
-		".align 2\n\t"				\
-	"0:	.long 1f - 0b\n\t"			\
-_BUGVERBOSE_LOCATION(__FILE__, __LINE__)		\
-		".short " #flags "\n\t"			\
-		".popsection\n"				\
-	"1:	"
-#else
-#define __BUG_ENTRY(flags) ""
-#endif
+#include <asm/asm-bug.h>
 
 #define __BUG_FLAGS(flags)				\
-	asm volatile (					\
-		__BUG_ENTRY(flags)			\
-		"brk %[imm]" :: [imm] "i" (BUG_BRK_IMM)	\
-	);
-
+	asm volatile (__stringify(ASM_BUG_FLAGS(flags)));
 
 #define BUG() do {					\
 	__BUG_FLAGS(0);					\
-- 
cgit v1.2.3


From 2d0e751a4789fc5ab4a5c9de5d6407b41fdfbbf0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 26 Jul 2017 11:14:53 +0100
Subject: arm64: consistently use bl for C exception entry

In most cases, our exception entry assembly branches to C handlers with
a BL instruction, but in cases where we do not expect to return, we use
B instead.

While this is correct today, it means that backtraces for fatal
exceptions miss the entry assembly (as the LR is stale at the point we
call C code), while non-fatal exceptions have the entry assembly in the
LR. In subsequent patches, we will need the LR to be set in these cases
in order to backtrace reliably.

This patch updates these sites to use a BL, ensuring consistency, and
preparing for backtrace rework. An ASM_BUG() is added after each of
these new BLs, which both catches unexpected returns, and ensures that
the LR value doesn't point to another function label.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/entry.S | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b738880350f9..660612a07ec5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -351,7 +351,8 @@ END(vectors)
 	mov	x0, sp
 	mov	x1, #\reason
 	mrs	x2, esr_el1
-	b	bad_mode
+	bl	bad_mode
+	ASM_BUG()
 	.endm
 
 el0_sync_invalid:
@@ -448,14 +449,16 @@ el1_sp_pc:
 	mrs	x0, far_el1
 	enable_dbg
 	mov	x2, sp
-	b	do_sp_pc_abort
+	bl	do_sp_pc_abort
+	ASM_BUG()
 el1_undef:
 	/*
 	 * Undefined instruction
 	 */
 	enable_dbg
 	mov	x0, sp
-	b	do_undefinstr
+	bl	do_undefinstr
+	ASM_BUG()
 el1_dbg:
 	/*
 	 * Debug exception handling
@@ -473,7 +476,8 @@ el1_inv:
 	mov	x0, sp
 	mov	x2, x1
 	mov	x1, #BAD_SYNC
-	b	bad_mode
+	bl	bad_mode
+	ASM_BUG()
 ENDPROC(el1_sync)
 
 	.align	6
-- 
cgit v1.2.3


From ed84b4e9582bdfeffc617589fe17dddfc5fe6672 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 26 Jul 2017 16:05:20 +0100
Subject: arm64: move non-entry code out of .entry.text

Currently, cpu_switch_to and ret_from_fork both live in .entry.text,
though neither form the critical path for an exception entry.

In subsequent patches, we will require that code in .entry.text is part
of the critical path for exception entry, for which we can assume
certain properties (e.g. the presence of exception regs on the stack).

Neither cpu_switch_to nor ret_from_fork will meet these requirements, so
we must move them out of .entry.text. To ensure that neither are kprobed
after being moved out of .entry.text, we must explicitly blacklist them,
requiring a new NOKPROBE() asm helper.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h | 11 +++++
 arch/arm64/kernel/entry.S          | 90 +++++++++++++++++++-------------------
 2 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 1b67c3782d00..610a42018241 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -403,6 +403,17 @@ alternative_endif
 	.size	__pi_##x, . - x;	\
 	ENDPROC(x)
 
+/*
+ * Annotate a function as being unsuitable for kprobes.
+ */
+#ifdef CONFIG_KPROBES
+#define NOKPROBE(x)				\
+	.pushsection "_kprobe_blacklist", "aw";	\
+	.quad	x;				\
+	.popsection;
+#else
+#define NOKPROBE(x)
+#endif
 	/*
 	 * Emit a 64-bit absolute little endian symbol reference in a way that
 	 * ensures that it will be resolved at build time, even when building a
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 660612a07ec5..9e126d3d8b53 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -710,38 +710,6 @@ el0_irq_naked:
 	b	ret_to_user
 ENDPROC(el0_irq)
 
-/*
- * Register switch for AArch64. The callee-saved registers need to be saved
- * and restored. On entry:
- *   x0 = previous task_struct (must be preserved across the switch)
- *   x1 = next task_struct
- * Previous and next are guaranteed not to be the same.
- *
- */
-ENTRY(cpu_switch_to)
-	mov	x10, #THREAD_CPU_CONTEXT
-	add	x8, x0, x10
-	mov	x9, sp
-	stp	x19, x20, [x8], #16		// store callee-saved registers
-	stp	x21, x22, [x8], #16
-	stp	x23, x24, [x8], #16
-	stp	x25, x26, [x8], #16
-	stp	x27, x28, [x8], #16
-	stp	x29, x9, [x8], #16
-	str	lr, [x8]
-	add	x8, x1, x10
-	ldp	x19, x20, [x8], #16		// restore callee-saved registers
-	ldp	x21, x22, [x8], #16
-	ldp	x23, x24, [x8], #16
-	ldp	x25, x26, [x8], #16
-	ldp	x27, x28, [x8], #16
-	ldp	x29, x9, [x8], #16
-	ldr	lr, [x8]
-	mov	sp, x9
-	msr	sp_el0, x1
-	ret
-ENDPROC(cpu_switch_to)
-
 /*
  * This is the fast syscall return path.  We do as little as possible here,
  * and this includes saving x0 back into the kernel stack.
@@ -784,18 +752,6 @@ finish_ret_to_user:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 
-/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-	bl	schedule_tail
-	cbz	x19, 1f				// not a kernel thread
-	mov	x0, x20
-	blr	x19
-1:	get_thread_info tsk
-	b	ret_to_user
-ENDPROC(ret_from_fork)
-
 /*
  * SVC handler.
  */
@@ -869,3 +825,49 @@ ENTRY(sys_rt_sigreturn_wrapper)
 	mov	x0, sp
 	b	sys_rt_sigreturn
 ENDPROC(sys_rt_sigreturn_wrapper)
+
+/*
+ * Register switch for AArch64. The callee-saved registers need to be saved
+ * and restored. On entry:
+ *   x0 = previous task_struct (must be preserved across the switch)
+ *   x1 = next task_struct
+ * Previous and next are guaranteed not to be the same.
+ *
+ */
+ENTRY(cpu_switch_to)
+	mov	x10, #THREAD_CPU_CONTEXT
+	add	x8, x0, x10
+	mov	x9, sp
+	stp	x19, x20, [x8], #16		// store callee-saved registers
+	stp	x21, x22, [x8], #16
+	stp	x23, x24, [x8], #16
+	stp	x25, x26, [x8], #16
+	stp	x27, x28, [x8], #16
+	stp	x29, x9, [x8], #16
+	str	lr, [x8]
+	add	x8, x1, x10
+	ldp	x19, x20, [x8], #16		// restore callee-saved registers
+	ldp	x21, x22, [x8], #16
+	ldp	x23, x24, [x8], #16
+	ldp	x25, x26, [x8], #16
+	ldp	x27, x28, [x8], #16
+	ldp	x29, x9, [x8], #16
+	ldr	lr, [x8]
+	mov	sp, x9
+	msr	sp_el0, x1
+	ret
+ENDPROC(cpu_switch_to)
+NOKPROBE(cpu_switch_to)
+
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+	bl	schedule_tail
+	cbz	x19, 1f				// not a kernel thread
+	mov	x0, x20
+	blr	x19
+1:	get_thread_info tsk
+	b	ret_to_user
+ENDPROC(ret_from_fork)
+NOKPROBE(ret_from_fork)
-- 
cgit v1.2.3


From 096683724cb2eb95fea759a2580996df1039fdd0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 20 Jul 2017 14:01:01 +0100
Subject: arm64: unwind: avoid percpu indirection for irq stack

Our IRQ_STACK_PTR() and on_irq_stack() helpers both take a cpu argument,
used to generate a percpu address. In all cases, they are passed
{raw_,}smp_processor_id(), so this parameter is redundant.

Since {raw_,}smp_processor_id() use a percpu variable internally, this
approach means we generate a percpu offset to find the current cpu, then
use this to index an array of percpu offsets, which we then use to find
the current CPU's IRQ stack pointer. Thus, most of the work is
redundant.

Instead, we can consistently use raw_cpu_ptr() to generate the CPU's
irq_stack pointer by simply adding the percpu offset to the irq_stack
address, which is simpler in both respects.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/irq.h   | 6 +++---
 arch/arm64/kernel/ptrace.c     | 2 +-
 arch/arm64/kernel/stacktrace.c | 4 ++--
 arch/arm64/kernel/traps.c      | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index b77197d941fc..6d6f85e4923e 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -32,7 +32,7 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
  * from kernel_entry can be found.
  *
  */
-#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
+#define IRQ_STACK_PTR() ((unsigned long)raw_cpu_ptr(irq_stack) + IRQ_STACK_START_SP)
 
 /*
  * The offset from irq_stack_ptr where entry.S will store the original
@@ -47,10 +47,10 @@ static inline int nr_legacy_irqs(void)
 	return 0;
 }
 
-static inline bool on_irq_stack(unsigned long sp, int cpu)
+static inline bool on_irq_stack(unsigned long sp)
 {
 	/* variable names the same as kernel/stacktrace.c */
-	unsigned long low = (unsigned long)per_cpu(irq_stack, cpu);
+	unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack);
 	unsigned long high = low + IRQ_STACK_START_SP;
 
 	return (low <= sp && sp <= high);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 1b38c0150aec..baf0838205c7 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -127,7 +127,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
 {
 	return ((addr & ~(THREAD_SIZE - 1))  ==
 		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
-		on_irq_stack(addr, raw_smp_processor_id());
+		on_irq_stack(addr);
 }
 
 /**
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 09d37d66b630..6ffb965be641 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -54,13 +54,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	 * non-preemptible context.
 	 */
 	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+		irq_stack_ptr = IRQ_STACK_PTR();
 	else
 		irq_stack_ptr = 0;
 
 	low  = frame->sp;
 	/* irq stacks are not THREAD_SIZE aligned */
-	if (on_irq_stack(frame->sp, raw_smp_processor_id()))
+	if (on_irq_stack(frame->sp))
 		high = irq_stack_ptr;
 	else
 		high = ALIGN(low, THREAD_SIZE) - 0x20;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d48f47080213..5797f5037ec9 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -159,7 +159,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	 * non-preemptible context.
 	 */
 	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+		irq_stack_ptr = IRQ_STACK_PTR();
 	else
 		irq_stack_ptr = 0;
 
-- 
cgit v1.2.3


From c7365330753c55a061db0a1837a27fd5e44b1408 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 22 Jul 2017 12:48:34 +0100
Subject: arm64: unwind: disregard frame.sp when validating frame pointer

Currently, when unwinding the call stack, we validate the frame pointer
of each frame against frame.sp, whose value is not clearly defined, and
which makes it more difficult to link stack frames together across
different stacks. It is far better to simply check whether the frame
pointer itself points into a valid stack.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/irq.h   | 10 +++++++++-
 arch/arm64/kernel/stacktrace.c | 24 +++++++-----------------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 6d6f85e4923e..8155e486ce48 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -7,6 +7,7 @@
 #ifndef __ASSEMBLER__
 
 #include <linux/percpu.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm-generic/irq.h>
 #include <asm/thread_info.h>
@@ -49,12 +50,19 @@ static inline int nr_legacy_irqs(void)
 
 static inline bool on_irq_stack(unsigned long sp)
 {
-	/* variable names the same as kernel/stacktrace.c */
 	unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack);
 	unsigned long high = low + IRQ_STACK_START_SP;
 
 	return (low <= sp && sp <= high);
 }
 
+static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
+{
+	unsigned long low = (unsigned long)task_stack_page(tsk);
+	unsigned long high = low + THREAD_SIZE;
+
+	return (low <= sp && sp < high);
+}
+
 #endif /* !__ASSEMBLER__ */
 #endif
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 6ffb965be641..beaf51fb3088 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -42,9 +42,10 @@
  */
 int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
-	unsigned long high, low;
 	unsigned long fp = frame->fp;
-	unsigned long irq_stack_ptr;
+
+	if (fp & 0xf)
+		return -EINVAL;
 
 	if (!tsk)
 		tsk = current;
@@ -53,19 +54,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	 * Switching between stacks is valid when tracing current and in
 	 * non-preemptible context.
 	 */
-	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR();
-	else
-		irq_stack_ptr = 0;
-
-	low  = frame->sp;
-	/* irq stacks are not THREAD_SIZE aligned */
-	if (on_irq_stack(frame->sp))
-		high = irq_stack_ptr;
-	else
-		high = ALIGN(low, THREAD_SIZE) - 0x20;
-
-	if (fp < low || fp > high || fp & 0xf)
+	if (!(tsk == current && !preemptible() && on_irq_stack(fp)) &&
+	    !on_task_stack(tsk, fp))
 		return -EINVAL;
 
 	frame->sp = fp + 0x10;
@@ -94,9 +84,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	 * Check the frame->fp we read from the bottom of the irq_stack,
 	 * and the original task stack pointer are both in current->stack.
 	 */
-	if (frame->sp == irq_stack_ptr) {
+	if (frame->sp == IRQ_STACK_PTR()) {
 		struct pt_regs *irq_args;
-		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(frame->sp);
 
 		if (object_is_on_stack((void *)orig_sp) &&
 		   object_is_on_stack((void *)frame->fp)) {
-- 
cgit v1.2.3


From 5fe0ce3b9e514168427463d0c7a86dfa635da3d2 Mon Sep 17 00:00:00 2001
From: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Date: Wed, 2 Aug 2017 10:58:25 -0700
Subject: ACPI/IORT: numa: Add numa node mapping for smmuv3 devices

ARM IORT specification(rev. C) has added  provision to define proximity
domain in SMMUv3 IORT table. Adding required code to parse Proximity
domain and set numa_node of smmv3 platform devices.

Signed-off-by: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
[lorenzo.pieralisi@arm.com: update pr_info()/commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 drivers/acpi/arm64/iort.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 919f8d84b0ef..34972d7f4d29 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -915,6 +915,27 @@ static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
 	return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE;
 }
 
+#if defined(CONFIG_ACPI_NUMA) && defined(ACPI_IORT_SMMU_V3_PXM_VALID)
+/*
+ * set numa proximity domain for smmuv3 device
+ */
+static void  __init arm_smmu_v3_set_proximity(struct device *dev,
+					      struct acpi_iort_node *node)
+{
+	struct acpi_iort_smmu_v3 *smmu;
+
+	smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+	if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) {
+		set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm));
+		pr_info("SMMU-v3[%llx] Mapped to Proximity domain %d\n",
+			smmu->base_address,
+			smmu->pxm);
+	}
+}
+#else
+#define arm_smmu_v3_set_proximity NULL
+#endif
+
 static int __init arm_smmu_count_resources(struct acpi_iort_node *node)
 {
 	struct acpi_iort_smmu *smmu;
@@ -984,13 +1005,16 @@ struct iort_iommu_config {
 	int (*iommu_count_resources)(struct acpi_iort_node *node);
 	void (*iommu_init_resources)(struct resource *res,
 				     struct acpi_iort_node *node);
+	void (*iommu_set_proximity)(struct device *dev,
+				    struct acpi_iort_node *node);
 };
 
 static const struct iort_iommu_config iort_arm_smmu_v3_cfg __initconst = {
 	.name = "arm-smmu-v3",
 	.iommu_is_coherent = arm_smmu_v3_is_coherent,
 	.iommu_count_resources = arm_smmu_v3_count_resources,
-	.iommu_init_resources = arm_smmu_v3_init_resources
+	.iommu_init_resources = arm_smmu_v3_init_resources,
+	.iommu_set_proximity = arm_smmu_v3_set_proximity,
 };
 
 static const struct iort_iommu_config iort_arm_smmu_cfg __initconst = {
@@ -1035,6 +1059,9 @@ static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node)
 	if (!pdev)
 		return -ENOMEM;
 
+	if (ops->iommu_set_proximity)
+		ops->iommu_set_proximity(&pdev->dev, node);
+
 	count = ops->iommu_count_resources(node);
 
 	r = kcalloc(count, sizeof(*r), GFP_KERNEL);
-- 
cgit v1.2.3


From 6c833bb9247ed51028279ef7b82ebbbe60d789e3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 8 Aug 2017 16:58:33 +0100
Subject: arm64: perf: Allow standard PMUv3 events to be extended by the CPU
 type

Rather than continue adding CPU-specific event maps, instead look up by
default in the PMUv3 event map and only fallback to the CPU-specific maps
if either the event isn't described by PMUv3, or it is described but
the PMCEID registers say that it is unsupported by the current CPU.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 46 ++++++++++++++++++++++++------------------
 drivers/perf/arm_pmu.c         |  6 ++++++
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 372317667773..b83f986e7fbf 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -921,7 +921,13 @@ static void armv8pmu_reset(void *info)
 			    ARMV8_PMU_PMCR_LC);
 }
 
-static int armv8_pmuv3_map_event(struct perf_event *event)
+static int __armv8_pmuv3_map_event(struct perf_event *event,
+				   const unsigned (*extra_event_map)
+						  [PERF_COUNT_HW_MAX],
+				   const unsigned (*extra_cache_map)
+						  [PERF_COUNT_HW_CACHE_MAX]
+						  [PERF_COUNT_HW_CACHE_OP_MAX]
+						  [PERF_COUNT_HW_CACHE_RESULT_MAX])
 {
 	int hw_event_id;
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
@@ -929,44 +935,44 @@ static int armv8_pmuv3_map_event(struct perf_event *event)
 	hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
 				       &armv8_pmuv3_perf_cache_map,
 				       ARMV8_PMU_EVTYPE_EVENT);
-	if (hw_event_id < 0)
-		return hw_event_id;
 
-	/* disable micro/arch events not supported by this PMU */
-	if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) &&
-		!test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
-			return -EOPNOTSUPP;
+	/* Onl expose micro/arch events supported by this PMU */
+	if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
+	    && test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
+		return hw_event_id;
 	}
 
-	return hw_event_id;
+	return armpmu_map_event(event, extra_event_map, extra_cache_map,
+				ARMV8_PMU_EVTYPE_EVENT);
+}
+
+static int armv8_pmuv3_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, NULL);
 }
 
 static int armv8_a53_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_a53_perf_map,
-				&armv8_a53_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, &armv8_a53_perf_map,
+				       &armv8_a53_perf_cache_map);
 }
 
 static int armv8_a57_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_a57_perf_map,
-				&armv8_a57_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, &armv8_a57_perf_map,
+				       &armv8_a57_perf_cache_map);
 }
 
 static int armv8_thunder_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_thunder_perf_map,
-				&armv8_thunder_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, &armv8_thunder_perf_map,
+				       &armv8_thunder_perf_cache_map);
 }
 
 static int armv8_vulcan_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_vulcan_perf_map,
-				&armv8_vulcan_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, &armv8_vulcan_perf_map,
+				       &armv8_vulcan_perf_cache_map);
 }
 
 struct armv8pmu_probe_info {
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 1c5e0f333779..d14fc2e67f93 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -47,6 +47,9 @@ armpmu_map_cache_event(const unsigned (*cache_map)
 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return -EINVAL;
 
+	if (!cache_map)
+		return -ENOENT;
+
 	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
 
 	if (ret == CACHE_OP_UNSUPPORTED)
@@ -63,6 +66,9 @@ armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 	if (config >= PERF_COUNT_HW_MAX)
 		return -EINVAL;
 
+	if (!event_map)
+		return -ENOENT;
+
 	mapping = (*event_map)[config];
 	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
-- 
cgit v1.2.3


From 09c2a7dc4ca2755892b74858fe3bf62b652ff9d0 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:38 +0100
Subject: arm64: mm: Fix set_memory_valid() declaration

Clearly, set_memory_valid() has never been seen in the same room as its
declaration... Whilst the type mismatch is such that kexec probably
wasn't broken in practice, fix it to match the definition as it should.

Fixes: 9b0aa14e3155 ("arm64: mm: add set_memory_valid()")
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cacheflush.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index d74a284abdc2..4d4f650c290e 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -150,6 +150,6 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
 {
 }
 
-int set_memory_valid(unsigned long addr, unsigned long size, int enable);
+int set_memory_valid(unsigned long addr, int numpages, int enable);
 
 #endif
-- 
cgit v1.2.3


From d46befef4c03fb61a62b3319ff5265aaac7bc465 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:39 +0100
Subject: arm64: Convert __inval_cache_range() to area-based

__inval_cache_range() is already the odd one out among our data cache
maintenance routines as the only remaining range-based one; as we're
going to want an invalidation routine to call from C code for the pmem
API, let's tweak the prototype and name to bring it in line with the
clean operations, and to make its relationship with __dma_inv_area()
neatly mirror that of __clean_dcache_area_poc() and __dma_clean_area().
The loop clearing the early page tables gets mildly massaged in the
process for the sake of consistency.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/arm64/kernel/head.S            | 18 +++++++++---------
 arch/arm64/mm/cache.S               | 23 ++++++++++++++---------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 4d4f650c290e..b4b43a94dffd 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -67,6 +67,7 @@
  */
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __inval_dcache_area(void *addr, size_t len);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 973df7de7bf8..73a0531e0187 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -143,8 +143,8 @@ preserve_boot_args:
 	dmb	sy				// needed before dc ivac with
 						// MMU off
 
-	add	x1, x0, #0x20			// 4 x 8 bytes
-	b	__inval_cache_range		// tail call
+	mov	x1, #0x20			// 4 x 8 bytes
+	b	__inval_dcache_area		// tail call
 ENDPROC(preserve_boot_args)
 
 /*
@@ -221,20 +221,20 @@ __create_page_tables:
 	 * dirty cache lines being evicted.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
-	bl	__inval_cache_range
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+	bl	__inval_dcache_area
 
 	/*
 	 * Clear the idmap and swapper page tables.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
 1:	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
-	cmp	x0, x6
-	b.lo	1b
+	subs	x1, x1, #64
+	b.ne	1b
 
 	mov	x7, SWAPPER_MM_MMUFLAGS
 
@@ -307,9 +307,9 @@ __create_page_tables:
 	 * tables again to remove any speculatively loaded cache lines.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
 	dmb	sy
-	bl	__inval_cache_range
+	bl	__inval_dcache_area
 
 	ret	x28
 ENDPROC(__create_page_tables)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 83c27b6e6dca..ed47fbbb4b05 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -109,20 +109,25 @@ ENTRY(__clean_dcache_area_pou)
 ENDPROC(__clean_dcache_area_pou)
 
 /*
- *	__dma_inv_area(start, size)
- *	- start   - virtual start address of region
+ *	__inval_dcache_area(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are invalidated. Any partial lines at the ends of the interval are
+ *	also cleaned to PoC to prevent data loss.
+ *
+ *	- kaddr   - kernel address
  *	- size    - size in question
  */
-__dma_inv_area:
-	add	x1, x1, x0
+ENTRY(__inval_dcache_area)
 	/* FALLTHROUGH */
 
 /*
- *	__inval_cache_range(start, end)
- *	- start   - start address of region
- *	- end     - end address of region
+ *	__dma_inv_area(start, size)
+ *	- start   - virtual start address of region
+ *	- size    - size in question
  */
-ENTRY(__inval_cache_range)
+__dma_inv_area:
+	add	x1, x1, x0
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
@@ -140,7 +145,7 @@ ENTRY(__inval_cache_range)
 	b.lo	2b
 	dsb	sy
 	ret
-ENDPIPROC(__inval_cache_range)
+ENDPIPROC(__inval_dcache_area)
 ENDPROC(__dma_inv_area)
 
 /*
-- 
cgit v1.2.3


From 7aac405ebb3224037efd56b73d82d181111cdac3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:40 +0100
Subject: arm64: Expose DC CVAP to userspace

The ARMv8.2-DCPoP feature introduces persistent memory support to the
architecture, by defining a point of persistence in the memory
hierarchy, and a corresponding cache maintenance operation, DC CVAP.
Expose the support via HWCAP and MRS emulation.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/cpu-feature-registers.txt | 2 ++
 arch/arm64/include/asm/sysreg.h               | 1 +
 arch/arm64/include/uapi/asm/hwcap.h           | 1 +
 arch/arm64/kernel/cpufeature.c                | 2 ++
 arch/arm64/kernel/cpuinfo.c                   | 1 +
 5 files changed, 7 insertions(+)

diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
index d1c97f9f51cc..dad411d635d8 100644
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -179,6 +179,8 @@ infrastructure:
      | FCMA                         | [19-16] |    y    |
      |--------------------------------------------------|
      | JSCVT                        | [15-12] |    y    |
+     |--------------------------------------------------|
+     | DPB                          | [3-0]   |    y    |
      x--------------------------------------------------x
 
 Appendix I: Example
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 248339e4aaf5..f707fed5886f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -329,6 +329,7 @@
 #define ID_AA64ISAR1_LRCPC_SHIFT	20
 #define ID_AA64ISAR1_FCMA_SHIFT		16
 #define ID_AA64ISAR1_JSCVT_SHIFT	12
+#define ID_AA64ISAR1_DPB_SHIFT		0
 
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_GIC_SHIFT		24
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 4e187ce2a811..4b9344cba83a 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -35,5 +35,6 @@
 #define HWCAP_JSCVT		(1 << 13)
 #define HWCAP_FCMA		(1 << 14)
 #define HWCAP_LRCPC		(1 << 15)
+#define HWCAP_DCPOP		(1 << 16)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9f9e0064c8c1..a2542ef3ff25 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -120,6 +120,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -916,6 +917,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index f495ee5049fd..311885962830 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -68,6 +68,7 @@ static const char *const hwcap_str[] = {
 	"jscvt",
 	"fcma",
 	"lrcpc",
+	"dcpop",
 	NULL
 };
 
-- 
cgit v1.2.3


From e1bc5d1b8e0547c258e65dd97a03560f4d69e635 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:41 +0100
Subject: arm64: Handle trapped DC CVAP

Cache clean to PoP is subject to the same access controls as to PoC, so
if we are trapping userspace cache maintenance with SCTLR_EL1.UCI, we
need to be prepared to handle it. To avoid getting into complicated
fights with binutils about ARMv8.2 options, we'll just cheat and use the
raw SYS instruction rather than the 'proper' DC alias.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/esr.h | 3 ++-
 arch/arm64/kernel/traps.c    | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 130b5343ba6d..66ed8b6b9976 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -169,9 +169,10 @@
 /*
  * User space cache operations have the following sysreg encoding
  * in System instructions.
- * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0)
+ * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 14 }, WRITE (L=0)
  */
 #define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC	14
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP	12
 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAU	11
 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAC	10
 #define ESR_ELx_SYS64_ISS_CRM_IC_IVAU	5
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 0f047e916cee..ccb9727d67b2 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -484,6 +484,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAC:	/* DC CVAC, gets promoted */
 		__user_cache_maint("dc civac", address, ret);
 		break;
+	case ESR_ELx_SYS64_ISS_CRM_DC_CVAP:	/* DC CVAP */
+		__user_cache_maint("sys 3, c7, c12, 1", address, ret);
+		break;
 	case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC:	/* DC CIVAC */
 		__user_cache_maint("dc civac", address, ret);
 		break;
-- 
cgit v1.2.3


From d50e071fdaa33c1b399c764c44fa1ce879881185 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:42 +0100
Subject: arm64: Implement pmem API support

Add a clean-to-point-of-persistence cache maintenance helper, and wire
up the basic architectural support for the pmem driver based on it.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
[catalin.marinas@arm.com: move arch_*_pmem() functions to arch/arm64/mm/flush.c]
[catalin.marinas@arm.com: change dmb(sy) to dmb(osh)]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig                  | 11 +++++++++++
 arch/arm64/include/asm/assembler.h  |  6 ++++++
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/arm64/include/asm/cpucaps.h    |  3 ++-
 arch/arm64/kernel/cpufeature.c      | 11 +++++++++++
 arch/arm64/mm/cache.S               | 14 ++++++++++++++
 arch/arm64/mm/flush.c               | 16 ++++++++++++++++
 7 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dfd908630631..0b0576a54724 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -960,6 +960,17 @@ config ARM64_UAO
 	  regular load/store instructions if the cpu does not implement the
 	  feature.
 
+config ARM64_PMEM
+	bool "Enable support for persistent memory"
+	select ARCH_HAS_PMEM_API
+	help
+	  Say Y to enable support for the persistent memory API based on the
+	  ARMv8.2 DCPoP feature.
+
+	  The feature is detected at runtime, and the kernel will use DC CVAC
+	  operations if DC CVAP is not supported (following the behaviour of
+	  DC CVAP itself if the system does not define a point of persistence).
+
 endmenu
 
 config ARM64_MODULE_CMODEL_LARGE
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 1b67c3782d00..5d8903c45031 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -352,6 +352,12 @@ alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
 	dc	\op, \kaddr
 alternative_else
 	dc	civac, \kaddr
+alternative_endif
+	.elseif	(\op == cvap)
+alternative_if ARM64_HAS_DCPOP
+	sys 3, c7, c12, 1, \kaddr	// dc cvap
+alternative_else
+	dc	cvac, \kaddr
 alternative_endif
 	.else
 	dc	\op, \kaddr
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index b4b43a94dffd..76d1cc85d5b1 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -69,6 +69,7 @@ extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
 extern void __inval_dcache_area(void *addr, size_t len);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
+extern void __clean_dcache_area_pop(void *addr, size_t len);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 extern void sync_icache_aliases(void *kaddr, unsigned long len);
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8d2272c6822c..8da621627d7c 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -39,7 +39,8 @@
 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003	18
 #define ARM64_WORKAROUND_858921			19
 #define ARM64_WORKAROUND_CAVIUM_30115		20
+#define ARM64_HAS_DCPOP				21
 
-#define ARM64_NCAPS				21
+#define ARM64_NCAPS				22
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a2542ef3ff25..cd52d365d1f0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -889,6 +889,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 0,
 		.matches = has_no_fpsimd,
 	},
+#ifdef CONFIG_ARM64_PMEM
+	{
+		.desc = "Data cache clean to Point of Persistence",
+		.capability = ARM64_HAS_DCPOP,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+		.min_field_value = 1,
+	},
+#endif
 	{},
 };
 
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index ed47fbbb4b05..7f1dbe962cf5 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -171,6 +171,20 @@ __dma_clean_area:
 ENDPIPROC(__clean_dcache_area_poc)
 ENDPROC(__dma_clean_area)
 
+/*
+ *	__clean_dcache_area_pop(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are cleaned to the PoP.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__clean_dcache_area_pop)
+	dcache_by_line_op cvap, sy, x0, x1, x2, x3
+	ret
+ENDPIPROC(__clean_dcache_area_pop)
+
 /*
  *	__dma_flush_area(start, size)
  *
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 21a8d828cbf4..280f90ff33a2 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -83,3 +83,19 @@ EXPORT_SYMBOL(flush_dcache_page);
  * Additional functions defined in assembly.
  */
 EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
+{
+	/* Ensure order against any prior non-cacheable writes */
+	dmb(osh);
+	__clean_dcache_area_pop(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
+static inline void arch_invalidate_pmem(void *addr, size_t size)
+{
+	__inval_dcache_area(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+#endif
-- 
cgit v1.2.3


From 5d7bdeb1eeb250222304cb7b8126892cc47980a8 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:43 +0100
Subject: arm64: uaccess: Implement *_flushcache variants

Implement the set of copy functions with guarantees of a clean cache
upon completion necessary to support the pmem driver.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig               |  1 +
 arch/arm64/include/asm/string.h  |  4 ++++
 arch/arm64/include/asm/uaccess.h | 12 ++++++++++++
 arch/arm64/lib/Makefile          |  2 ++
 4 files changed, 19 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0b0576a54724..e43a63b3d14b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -963,6 +963,7 @@ config ARM64_UAO
 config ARM64_PMEM
 	bool "Enable support for persistent memory"
 	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_UACCESS_FLUSHCACHE
 	help
 	  Say Y to enable support for the persistent memory API based on the
 	  ARMv8.2 DCPoP feature.
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index d0aa42907569..dd95d33a5bd5 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -52,6 +52,10 @@ extern void *__memset(void *, int, __kernel_size_t);
 #define __HAVE_ARCH_MEMCMP
 extern int memcmp(const void *, const void *, size_t);
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
+void memcpy_flushcache(void *dst, const void *src, size_t cnt);
+#endif
 
 #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
 
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index fab46a0ea223..bf8435deb8a1 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -347,4 +347,16 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
 
 extern __must_check long strnlen_user(const char __user *str, long n);
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+struct page;
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len);
+extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n);
+
+static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+	kasan_check_write(dst, size);
+	return __copy_user_flushcache(dst, src, size);
+}
+#endif
+
 #endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index c86b7909ef31..a0abc142c92b 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -17,3 +17,5 @@ CFLAGS_atomic_ll_sc.o	:= -fcall-used-x0 -ffixed-x1 -ffixed-x2		\
 		   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12	\
 		   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15	\
 		   -fcall-saved-x18
+
+lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
-- 
cgit v1.2.3


From 739586951b8abe381a98797a5e27a0a9336333d6 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dsafonov@virtuozzo.com>
Date: Wed, 26 Jul 2017 20:07:37 +0300
Subject: arm64/vdso: Support mremap() for vDSO

vDSO VMA address is saved in mm_context for the purpose of using
restorer from vDSO page to return to userspace after signal handling.

In Checkpoint Restore in Userspace (CRIU) project we place vDSO VMA
on restore back to the place where it was on the dump.
With the exception for x86 (where there is API to map vDSO with
arch_prctl()), we move vDSO inherited from CRIU task to restoree
position by mremap().

CRIU does support arm64 architecture, but kernel doesn't update
context.vdso pointer after mremap(). Which results in translation
fault after signal handling on restored application:
https://github.com/xemul/criu/issues/288

Make vDSO code track the VMA address by supplying .mremap() fops
the same way it's done for x86 and arm32 by:
commit b059a453b1cf ("x86/vdso: Add mremap hook to vm_special_mapping")
commit 280e87e98c09 ("ARM: 8683/1: ARM32: Support mremap() for sigpage/vDSO").

Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/vdso.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index e8f759f764f2..2d419006ad43 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -110,12 +110,27 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
 }
 #endif /* CONFIG_COMPAT */
 
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	unsigned long vdso_size = vdso_end - vdso_start;
+
+	if (vdso_size != new_size)
+		return -EINVAL;
+
+	current->mm->context.vdso = (void *)new_vma->vm_start;
+
+	return 0;
+}
+
 static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
 	{
 		.name	= "[vvar]",
 	},
 	{
 		.name	= "[vdso]",
+		.mremap = vdso_mremap,
 	},
 };
 
-- 
cgit v1.2.3


From 7326749801396105aef0ed9229df746ac9e24300 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 22 Jul 2017 18:45:33 +0100
Subject: arm64: unwind: reference pt_regs via embedded stack frame

As it turns out, the unwind code is slightly broken, and probably has
been for a while. The problem is in the dumping of the exception stack,
which is intended to dump the contents of the pt_regs struct at each
level in the call stack where an exception was taken and routed to a
routine marked as __exception (which means its stack frame is right
below the pt_regs struct on the stack).

'Right below the pt_regs struct' is ill defined, though: the unwind
code assigns 'frame pointer + 0x10' to the .sp member of the stackframe
struct at each level, and dump_backtrace() happily dereferences that as
the pt_regs pointer when encountering an __exception routine. However,
the actual size of the stack frame created by this routine (which could
be one of many __exception routines we have in the kernel) is not known,
and so frame.sp is pretty useless to figure out where struct pt_regs
really is.

So it seems the only way to ensure that we can find our struct pt_regs
when walking the stack frames is to put it at a known fixed offset of
the stack frame pointer that is passed to such __exception routines.
The simplest way to do that is to put it inside pt_regs itself, which is
the main change implemented by this patch. As a bonus, doing this allows
us to get rid of a fair amount of cruft related to walking from one stack
to the other, which is especially nice since we intend to introduce yet
another stack for overflow handling once we add support for vmapped
stacks. It also fixes an inconsistency where we only add a stack frame
pointing to ELR_EL1 if we are executing from the IRQ stack but not when
we are executing from the task stack.

To consistly identify exceptions regs even in the presence of exceptions
taken from entry code, we must check whether the next frame was created
by entry text, rather than whether the current frame was crated by
exception text.

To avoid backtracing using PCs that fall in the idmap, or are controlled
by userspace, we must explcitly zero the FP and LR in startup paths, and
must ensure that the frame embedded in pt_regs is zeroed upon entry from
EL0. To avoid these NULL entries showin in the backtrace, unwind_frame()
is updated to avoid them.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[Mark: compare current frame against .entry.text, avoid bogus PCs]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/irq.h    | 25 -------------------------
 arch/arm64/include/asm/ptrace.h |  1 +
 arch/arm64/include/asm/traps.h  |  5 +++++
 arch/arm64/kernel/asm-offsets.c |  1 +
 arch/arm64/kernel/entry.S       | 20 ++++++++++++--------
 arch/arm64/kernel/head.S        |  4 ++++
 arch/arm64/kernel/stacktrace.c  | 33 ++++++---------------------------
 arch/arm64/kernel/traps.c       | 32 +++++++-------------------------
 8 files changed, 36 insertions(+), 85 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 8155e486ce48..8ba89c4ca183 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -16,31 +16,6 @@ struct pt_regs;
 
 DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
 
-/*
- * The highest address on the stack, and the first to be used. Used to
- * find the dummy-stack frame put down by el?_irq() in entry.S, which
- * is structured as follows:
- *
- *       ------------
- *       |          |  <- irq_stack_ptr
- *   top ------------
- *       |   x19    | <- irq_stack_ptr - 0x08
- *       ------------
- *       |   x29    | <- irq_stack_ptr - 0x10
- *       ------------
- *
- * where x19 holds a copy of the task stack pointer where the struct pt_regs
- * from kernel_entry can be found.
- *
- */
-#define IRQ_STACK_PTR() ((unsigned long)raw_cpu_ptr(irq_stack) + IRQ_STACK_START_SP)
-
-/*
- * The offset from irq_stack_ptr where entry.S will store the original
- * stack pointer. Used by unwind_frame() and dump_backtrace().
- */
-#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08)))
-
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 static inline int nr_legacy_irqs(void)
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 11403fdd0a50..ee72aa979078 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -119,6 +119,7 @@ struct pt_regs {
 	u64 syscallno;
 	u64 orig_addr_limit;
 	u64 unused;	// maintain 16 byte alignment
+	u64 stackframe[2];
 };
 
 #define MAX_REG_OFFSET offsetof(struct pt_regs, pstate)
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 02e9035b0685..41361684580d 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -60,4 +60,9 @@ static inline int in_exception_text(unsigned long ptr)
 	return in ? : __in_irqentry_text(ptr);
 }
 
+static inline int in_entry_text(unsigned long ptr)
+{
+	return ptr >= (unsigned long)&__entry_text_start &&
+	       ptr < (unsigned long)&__entry_text_end;
+}
 #endif
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index b3bb7ef97bc8..71bf088f1e4b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -75,6 +75,7 @@ int main(void)
   DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
   DEFINE(S_ORIG_ADDR_LIMIT,	offsetof(struct pt_regs, orig_addr_limit));
+  DEFINE(S_STACKFRAME,		offsetof(struct pt_regs, stackframe));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 9e126d3d8b53..612a077ba109 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -111,6 +111,18 @@
 	mrs	x23, spsr_el1
 	stp	lr, x21, [sp, #S_LR]
 
+	/*
+	 * In order to be able to dump the contents of struct pt_regs at the
+	 * time the exception was taken (in case we attempt to walk the call
+	 * stack later), chain it together with the stack frames.
+	 */
+	.if \el == 0
+	stp	xzr, xzr, [sp, #S_STACKFRAME]
+	.else
+	stp	x29, x22, [sp, #S_STACKFRAME]
+	.endif
+	add	x29, sp, #S_STACKFRAME
+
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 	/*
 	 * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
@@ -265,14 +277,6 @@ alternative_else_nop_endif
 
 	/* switch to the irq stack */
 	mov	sp, x26
-
-	/*
-	 * Add a dummy stack frame, this non-standard format is fixed up
-	 * by unwind_frame()
-	 */
-	stp     x29, x19, [sp, #-16]!
-	mov	x29, sp
-
 9998:
 	.endm
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 973df7de7bf8..f9e4aacf4f42 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -362,6 +362,9 @@ __primary_switched:
 	ret					// to __primary_switch()
 0:
 #endif
+	add	sp, sp, #16
+	mov	x29, #0
+	mov	x30, #0
 	b	start_kernel
 ENDPROC(__primary_switched)
 
@@ -617,6 +620,7 @@ __secondary_switched:
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	msr	sp_el0, x2
 	mov	x29, #0
+	mov	x30, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index beaf51fb3088..81d9262acaf0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -76,34 +76,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 	/*
-	 * Check whether we are going to walk through from interrupt stack
-	 * to task stack.
-	 * If we reach the end of the stack - and its an interrupt stack,
-	 * unpack the dummy frame to find the original elr.
-	 *
-	 * Check the frame->fp we read from the bottom of the irq_stack,
-	 * and the original task stack pointer are both in current->stack.
+	 * Frames created upon entry from EL0 have NULL FP and PC values, so
+	 * don't bother reporting these. Frames created by __noreturn functions
+	 * might have a valid FP even if PC is bogus, so only terminate where
+	 * both are NULL.
 	 */
-	if (frame->sp == IRQ_STACK_PTR()) {
-		struct pt_regs *irq_args;
-		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(frame->sp);
-
-		if (object_is_on_stack((void *)orig_sp) &&
-		   object_is_on_stack((void *)frame->fp)) {
-			frame->sp = orig_sp;
-
-			/* orig_sp is the saved pt_regs, find the elr */
-			irq_args = (struct pt_regs *)orig_sp;
-			frame->pc = irq_args->pc;
-		} else {
-			/*
-			 * This frame has a non-standard format, and we
-			 * didn't fix it, because the data looked wrong.
-			 * Refuse to output this frame.
-			 */
-			return -EINVAL;
-		}
-	}
+	if (!frame->fp && !frame->pc)
+		return -EINVAL;
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5797f5037ec9..075c29a24345 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -143,7 +143,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	unsigned long irq_stack_ptr;
 	int skip;
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
@@ -154,15 +153,6 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	if (!try_get_task_stack(tsk))
 		return;
 
-	/*
-	 * Switching between stacks is valid when tracing current and in
-	 * non-preemptible context.
-	 */
-	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR();
-	else
-		irq_stack_ptr = 0;
-
 	if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
 		frame.sp = current_stack_pointer;
@@ -182,13 +172,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	skip = !!regs;
 	printk("Call trace:\n");
 	while (1) {
-		unsigned long where = frame.pc;
 		unsigned long stack;
 		int ret;
 
 		/* skip until specified stack frame */
 		if (!skip) {
-			dump_backtrace_entry(where);
+			dump_backtrace_entry(frame.pc);
 		} else if (frame.fp == regs->regs[29]) {
 			skip = 0;
 			/*
@@ -203,20 +192,13 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		ret = unwind_frame(tsk, &frame);
 		if (ret < 0)
 			break;
-		stack = frame.sp;
-		if (in_exception_text(where)) {
-			/*
-			 * If we switched to the irq_stack before calling this
-			 * exception handler, then the pt_regs will be on the
-			 * task stack. The easiest way to tell is if the large
-			 * pt_regs would overlap with the end of the irq_stack.
-			 */
-			if (stack < irq_stack_ptr &&
-			    (stack + sizeof(struct pt_regs)) > irq_stack_ptr)
-				stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+		if (in_entry_text(frame.pc)) {
+			stack = frame.fp - offsetof(struct pt_regs, stackframe);
 
-			dump_mem("", "Exception stack", stack,
-				 stack + sizeof(struct pt_regs));
+			if (on_task_stack(tsk, stack) ||
+			    (tsk == current && !preemptible() && on_irq_stack(stack)))
+				dump_mem("", "Exception stack", stack,
+					 stack + sizeof(struct pt_regs));
 		}
 	}
 
-- 
cgit v1.2.3


From 31e43ad3b74a5d7b282023b72f25fc677c14c727 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 23 Jul 2017 09:05:38 +0100
Subject: arm64: unwind: remove sp from struct stackframe

The unwind code sets the sp member of struct stackframe to
'frame pointer + 0x10' unconditionally, without regard for whether
doing so produces a legal value. So let's simply remove it now that
we have stopped using it anyway.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/stacktrace.h | 1 -
 arch/arm64/kernel/perf_callchain.c  | 1 -
 arch/arm64/kernel/process.c         | 5 +----
 arch/arm64/kernel/return_address.c  | 1 -
 arch/arm64/kernel/stacktrace.c      | 4 ----
 arch/arm64/kernel/time.c            | 1 -
 arch/arm64/kernel/traps.c           | 2 --
 7 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 5b6eafccc5d8..3bebab378c72 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -20,7 +20,6 @@ struct task_struct;
 
 struct stackframe {
 	unsigned long fp;
-	unsigned long sp;
 	unsigned long pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	unsigned int graph;
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 713ca824f266..bcafd7dcfe8b 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -162,7 +162,6 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 	}
 
 	frame.fp = regs->regs[29];
-	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = current->curr_ret_stack;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 659ae8094ed5..85b953dd023a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -382,15 +382,12 @@ unsigned long get_wchan(struct task_struct *p)
 		return 0;
 
 	frame.fp = thread_saved_fp(p);
-	frame.sp = thread_saved_sp(p);
 	frame.pc = thread_saved_pc(p);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = p->curr_ret_stack;
 #endif
 	do {
-		if (frame.sp < stack_page ||
-		    frame.sp >= stack_page + THREAD_SIZE ||
-		    unwind_frame(p, &frame))
+		if (unwind_frame(p, &frame))
 			goto out;
 		if (!in_sched_functions(frame.pc)) {
 			ret = frame.pc;
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 12a87f2600f2..933adbc0f654 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -42,7 +42,6 @@ void *return_address(unsigned int level)
 	data.addr = NULL;
 
 	frame.fp = (unsigned long)__builtin_frame_address(0);
-	frame.sp = current_stack_pointer;
 	frame.pc = (unsigned long)return_address; /* dummy */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = current->curr_ret_stack;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 81d9262acaf0..35588caad9d0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -58,7 +58,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	    !on_task_stack(tsk, fp))
 		return -EINVAL;
 
-	frame->sp = fp + 0x10;
 	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
 	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
 
@@ -136,7 +135,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 	data.no_sched_functions = 0;
 
 	frame.fp = regs->regs[29];
-	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = current->curr_ret_stack;
@@ -161,12 +159,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	if (tsk != current) {
 		data.no_sched_functions = 1;
 		frame.fp = thread_saved_fp(tsk);
-		frame.sp = thread_saved_sp(tsk);
 		frame.pc = thread_saved_pc(tsk);
 	} else {
 		data.no_sched_functions = 0;
 		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.sp = current_stack_pointer;
 		frame.pc = (unsigned long)save_stack_trace_tsk;
 	}
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index da33c90248e9..a4391280fba9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -50,7 +50,6 @@ unsigned long profile_pc(struct pt_regs *regs)
 		return regs->pc;
 
 	frame.fp = regs->regs[29];
-	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	frame.graph = -1; /* no task info */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 075c29a24345..c2a81bf8827e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -155,14 +155,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 
 	if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.sp = current_stack_pointer;
 		frame.pc = (unsigned long)dump_backtrace;
 	} else {
 		/*
 		 * task blocked in __switch_to
 		 */
 		frame.fp = thread_saved_fp(tsk);
-		frame.sp = thread_saved_sp(tsk);
 		frame.pc = thread_saved_pc(tsk);
 	}
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-- 
cgit v1.2.3


From 66c3ec5a712005625437474cf5a04148d7890350 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Wed, 9 Aug 2017 11:43:28 +0100
Subject: arm64: neon: Forbid when irqs are disabled

Currently, may_use_simd() can return true if IRQs are disabled.  If
the caller goes ahead and calls kernel_neon_begin(), this can
result in use of local_bh_enable() in an unsafe context.

In particular, __efi_fpsimd_begin() may do this when calling EFI as
part of system shutdown.

This patch ensures that callers don't think they can use
kernel_neon_begin() in such a context.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/simd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 5a1a927b74a2..fa8b3fe932e6 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -10,6 +10,7 @@
 #define __ASM_SIMD_H
 
 #include <linux/compiler.h>
+#include <linux/irqflags.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/types.h>
@@ -40,7 +41,8 @@ static __must_check inline bool may_use_simd(void)
 	 * can't migrate to another CPU and spuriously see it become
 	 * false.
 	 */
-	return !in_irq() && !in_nmi() && !raw_cpu_read(kernel_neon_busy);
+	return !in_irq() && !irqs_disabled() && !in_nmi() &&
+		!raw_cpu_read(kernel_neon_busy);
 }
 
 #else /* ! CONFIG_KERNEL_MODE_NEON */
-- 
cgit v1.2.3


From 35129dde88afad07f54b332d4f9eda2d254b80f2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 13 Jul 2017 18:16:00 +0100
Subject: md/raid6: use faster multiplication for ARM NEON delta syndrome

The P/Q left side optimization in the delta syndrome simply involves
repeatedly multiplying a value by polynomial 'x' in GF(2^8). Given
that 'x * x * x * x' equals 'x^4' even in the polynomial world, we
can accelerate this substantially by performing up to 4 such operations
at once, using the NEON instructions for polynomial multiplication.

Results on a Cortex-A57 running in 64-bit mode:

  Before:
  -------
  raid6: neonx1   xor()  1680 MB/s
  raid6: neonx2   xor()  2286 MB/s
  raid6: neonx4   xor()  3162 MB/s
  raid6: neonx8   xor()  3389 MB/s

  After:
  ------
  raid6: neonx1   xor()  2281 MB/s
  raid6: neonx2   xor()  3362 MB/s
  raid6: neonx4   xor()  3787 MB/s
  raid6: neonx8   xor()  4239 MB/s

While we're at it, simplify MASK() by using a signed shift rather than
a vector compare involving a temp register.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 lib/raid6/neon.uc | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc
index 4fa51b761dd0..d5242f544551 100644
--- a/lib/raid6/neon.uc
+++ b/lib/raid6/neon.uc
@@ -46,8 +46,12 @@ static inline unative_t SHLBYTE(unative_t v)
  */
 static inline unative_t MASK(unative_t v)
 {
-	const uint8x16_t temp = NBYTES(0);
-	return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp);
+	return (unative_t)vshrq_n_s8((int8x16_t)v, 7);
+}
+
+static inline unative_t PMUL(unative_t v, unative_t u)
+{
+	return (unative_t)vmulq_p8((poly8x16_t)v, (poly8x16_t)u);
 }
 
 void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
@@ -110,7 +114,30 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
 			wq$$ = veorq_u8(w1$$, wd$$);
 		}
 		/* P/Q left side optimization */
-		for ( z = start-1 ; z >= 0 ; z-- ) {
+		for ( z = start-1 ; z >= 3 ; z -= 4 ) {
+			w2$$ = vshrq_n_u8(wq$$, 4);
+			w1$$ = vshlq_n_u8(wq$$, 4);
+
+			w2$$ = PMUL(w2$$, x1d);
+			wq$$ = veorq_u8(w1$$, w2$$);
+		}
+
+		switch (z) {
+		case 2:
+			w2$$ = vshrq_n_u8(wq$$, 5);
+			w1$$ = vshlq_n_u8(wq$$, 3);
+
+			w2$$ = PMUL(w2$$, x1d);
+			wq$$ = veorq_u8(w1$$, w2$$);
+			break;
+		case 1:
+			w2$$ = vshrq_n_u8(wq$$, 6);
+			w1$$ = vshlq_n_u8(wq$$, 2);
+
+			w2$$ = PMUL(w2$$, x1d);
+			wq$$ = veorq_u8(w1$$, w2$$);
+			break;
+		case 0:
 			w2$$ = MASK(wq$$);
 			w1$$ = SHLBYTE(wq$$);
 
-- 
cgit v1.2.3


From 6ec4e2514decd6fb4782a9364fa71d6244d05af4 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 13 Jul 2017 18:16:01 +0100
Subject: md/raid6: implement recovery using ARM NEON intrinsics

Provide a NEON accelerated implementation of the recovery algorithm,
which supersedes the default byte-by-byte one.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/raid/pq.h      |   1 +
 lib/raid6/Makefile           |   4 +-
 lib/raid6/algos.c            |   3 ++
 lib/raid6/recov_neon.c       | 110 ++++++++++++++++++++++++++++++++++++++++
 lib/raid6/recov_neon_inner.c | 117 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 234 insertions(+), 1 deletion(-)
 create mode 100644 lib/raid6/recov_neon.c
 create mode 100644 lib/raid6/recov_neon_inner.c

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 30f945329818..583cdd3d49ca 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -121,6 +121,7 @@ extern const struct raid6_recov_calls raid6_recov_ssse3;
 extern const struct raid6_recov_calls raid6_recov_avx2;
 extern const struct raid6_recov_calls raid6_recov_avx512;
 extern const struct raid6_recov_calls raid6_recov_s390xc;
+extern const struct raid6_recov_calls raid6_recov_neon;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011f5599..a93adf6dcfb2 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -5,7 +5,7 @@ raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
-raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
+raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 
@@ -26,7 +26,9 @@ NEON_FLAGS := -ffreestanding
 ifeq ($(ARCH),arm)
 NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon
 endif
+CFLAGS_recov_neon_inner.o += $(NEON_FLAGS)
 ifeq ($(ARCH),arm64)
+CFLAGS_REMOVE_recov_neon_inner.o += -mgeneral-regs-only
 CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only
 CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only
 CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049fd7d3..476994723258 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -112,6 +112,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #endif
 #ifdef CONFIG_S390
 	&raid6_recov_s390xc,
+#endif
+#if defined(CONFIG_KERNEL_MODE_NEON)
+	&raid6_recov_neon,
 #endif
 	&raid6_recov_intx1,
 	NULL
diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c
new file mode 100644
index 000000000000..eeb5c4065b92
--- /dev/null
+++ b/lib/raid6/recov_neon.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/raid/pq.h>
+
+#ifdef __KERNEL__
+#include <asm/neon.h>
+#else
+#define kernel_neon_begin()
+#define kernel_neon_end()
+#define cpu_has_neon()		(1)
+#endif
+
+static int raid6_has_neon(void)
+{
+	return cpu_has_neon();
+}
+
+void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
+			      uint8_t *dq, const uint8_t *pbmul,
+			      const uint8_t *qmul);
+
+void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
+			      const uint8_t *qmul);
+
+static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
+		int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+
+	p = (u8 *)ptrs[disks - 2];
+	q = (u8 *)ptrs[disks - 1];
+
+	/*
+	 * Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for
+	 * delta p and delta q
+	 */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]     = dp;
+	ptrs[failb]     = dq;
+	ptrs[disks - 2] = p;
+	ptrs[disks - 1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+					 raid6_gfexp[failb]]];
+
+	kernel_neon_begin();
+	__raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
+	kernel_neon_end();
+}
+
+static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
+		void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+
+	p = (u8 *)ptrs[disks - 2];
+	q = (u8 *)ptrs[disks - 1];
+
+	/*
+	 * Compute syndrome with zero for the missing data page
+	 * Use the dead data page as temporary storage for delta q
+	 */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks - 1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]     = dq;
+	ptrs[disks - 1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	kernel_neon_begin();
+	__raid6_datap_recov_neon(bytes, p, q, dq, qmul);
+	kernel_neon_end();
+}
+
+const struct raid6_recov_calls raid6_recov_neon = {
+	.data2		= raid6_2data_recov_neon,
+	.datap		= raid6_datap_recov_neon,
+	.valid		= raid6_has_neon,
+	.name		= "neon",
+	.priority	= 10,
+};
diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c
new file mode 100644
index 000000000000..8cd20c9f834a
--- /dev/null
+++ b/lib/raid6/recov_neon_inner.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <arm_neon.h>
+
+static const uint8x16_t x0f = {
+	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+};
+
+#ifdef CONFIG_ARM
+/*
+ * AArch32 does not provide this intrinsic natively because it does not
+ * implement the underlying instruction. AArch32 only provides a 64-bit
+ * wide vtbl.8 instruction, so use that instead.
+ */
+static uint8x16_t vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
+{
+	union {
+		uint8x16_t	val;
+		uint8x8x2_t	pair;
+	} __a = { a };
+
+	return vcombine_u8(vtbl2_u8(__a.pair, vget_low_u8(b)),
+			   vtbl2_u8(__a.pair, vget_high_u8(b)));
+}
+#endif
+
+void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
+			      uint8_t *dq, const uint8_t *pbmul,
+			      const uint8_t *qmul)
+{
+	uint8x16_t pm0 = vld1q_u8(pbmul);
+	uint8x16_t pm1 = vld1q_u8(pbmul + 16);
+	uint8x16_t qm0 = vld1q_u8(qmul);
+	uint8x16_t qm1 = vld1q_u8(qmul + 16);
+
+	/*
+	 * while ( bytes-- ) {
+	 *	uint8_t px, qx, db;
+	 *
+	 *	px    = *p ^ *dp;
+	 *	qx    = qmul[*q ^ *dq];
+	 *	*dq++ = db = pbmul[px] ^ qx;
+	 *	*dp++ = db ^ px;
+	 *	p++; q++;
+	 * }
+	 */
+
+	while (bytes) {
+		uint8x16_t vx, vy, px, qx, db;
+
+		px = veorq_u8(vld1q_u8(p), vld1q_u8(dp));
+		vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
+
+		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
+		vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
+		vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
+		qx = veorq_u8(vx, vy);
+
+		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4);
+		vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f));
+		vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f));
+		vx = veorq_u8(vx, vy);
+		db = veorq_u8(vx, qx);
+
+		vst1q_u8(dq, db);
+		vst1q_u8(dp, veorq_u8(db, px));
+
+		bytes -= 16;
+		p += 16;
+		q += 16;
+		dp += 16;
+		dq += 16;
+	}
+}
+
+void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
+			      const uint8_t *qmul)
+{
+	uint8x16_t qm0 = vld1q_u8(qmul);
+	uint8x16_t qm1 = vld1q_u8(qmul + 16);
+
+	/*
+	 * while (bytes--) {
+	 *	*p++ ^= *dq = qmul[*q ^ *dq];
+	 *	q++; dq++;
+	 * }
+	 */
+
+	while (bytes) {
+		uint8x16_t vx, vy;
+
+		vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
+
+		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
+		vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
+		vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
+		vx = veorq_u8(vx, vy);
+		vy = veorq_u8(vx, vld1q_u8(p));
+
+		vst1q_u8(dq, vx);
+		vst1q_u8(p, vy);
+
+		bytes -= 16;
+		p += 16;
+		q += 16;
+		dq += 16;
+	}
+}
-- 
cgit v1.2.3


From 21cfa0e96d1d521bec4e2f22a19437080e1357e7 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 10 Aug 2017 10:49:21 +0100
Subject: arm64: uaccess: Add the uaccess_flushcache.c file

The uaccess_flushcache.c file was inadvertently dropped by the
maintainer in a previous commit. Add it back.

Fixes: 5d7bdeb1eeb2 ("arm64: uaccess: Implement *_flushcache variants")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/lib/uaccess_flushcache.c | 47 +++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 arch/arm64/lib/uaccess_flushcache.c

diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
new file mode 100644
index 000000000000..b6ceafdb8b72
--- /dev/null
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/uaccess.h>
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+
+void memcpy_flushcache(void *dst, const void *src, size_t cnt)
+{
+	/*
+	 * We assume this should not be called with @dst pointing to
+	 * non-cacheable memory, such that we don't need an explicit
+	 * barrier to order the cache maintenance against the memcpy.
+	 */
+	memcpy(dst, src, cnt);
+	__clean_dcache_area_pop(dst, cnt);
+}
+EXPORT_SYMBOL_GPL(memcpy_flushcache);
+
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+			    size_t len)
+{
+	memcpy_flushcache(to, page_address(page) + offset, len);
+}
+
+unsigned long __copy_user_flushcache(void *to, const void __user *from,
+				     unsigned long n)
+{
+	unsigned long rc = __arch_copy_from_user(to, from, n);
+
+	/* See above */
+	__clean_dcache_area_pop(to, n - rc);
+	return rc;
+}
-- 
cgit v1.2.3


From 5cf7fb26ea841e31488723a32b1613e6b5b876fe Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Tue, 25 Jul 2017 17:27:36 +0100
Subject: arm64: perf: Connect additional events to pmu counters

Last level caches and node events were almost never connected in current
supported cores.

We connect last level caches to the actual last level within the core and
node events are connected to bus accesses.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index b83f986e7fbf..f336753d5baa 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -302,6 +302,9 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
 	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
 
 static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -317,6 +320,11 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
 	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
 
+	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
+	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
+	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
+	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
+
 	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
 
@@ -326,6 +334,9 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
 	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
 
 static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-- 
cgit v1.2.3


From d0d09d4d99e08767050bc30f2b19d6146abe01e2 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 8 Aug 2017 17:11:27 +0100
Subject: arm64: perf: Remove redundant entries from CPU-specific event maps

Now that the event mapping code always looks into the PMUv3 events
before any extended mappings, the extended mappings can be reduced to
only those events that are not discoverable through the PMCEID registers.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 114 ++---------------------------------------
 1 file changed, 4 insertions(+), 110 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f336753d5baa..f7737f6dcc36 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -202,55 +202,6 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
 };
 
-/* ARM Cortex-A53 HW events mapping. */
-static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
-	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-};
-
-/* ARM Cortex-A57 and Cortex-A72 events mapping. */
-static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
-	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-};
-
-static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
-	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
-};
-
-/* Broadcom Vulcan events mapping */
-static const unsigned armv8_vulcan_perf_map[PERF_COUNT_HW_MAX] = {
-	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_BR_RETIRED,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
-};
-
 static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 						[PERF_COUNT_HW_CACHE_OP_MAX]
 						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -281,28 +232,8 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 	PERF_CACHE_MAP_ALL_UNSUPPORTED,
 
-	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
 	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
-	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
-	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
-	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
-	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
-
-	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-
 	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
 	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
@@ -317,24 +248,9 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
 	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
-	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
-	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
-	[C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE,
-	[C(LL)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
-
 	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
 
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-
 	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
 	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
@@ -351,8 +267,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS,
 	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
 	[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS,
 	[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS,
 
@@ -360,13 +274,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
-
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 };
 
 static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -379,22 +286,11 @@ static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
 	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-	[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB,
-
 	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
 	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
 	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
 
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-
 	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
 	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
@@ -964,25 +860,23 @@ static int armv8_pmuv3_map_event(struct perf_event *event)
 
 static int armv8_a53_map_event(struct perf_event *event)
 {
-	return __armv8_pmuv3_map_event(event, &armv8_a53_perf_map,
-				       &armv8_a53_perf_cache_map);
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map);
 }
 
 static int armv8_a57_map_event(struct perf_event *event)
 {
-	return __armv8_pmuv3_map_event(event, &armv8_a57_perf_map,
-				       &armv8_a57_perf_cache_map);
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map);
 }
 
 static int armv8_thunder_map_event(struct perf_event *event)
 {
-	return __armv8_pmuv3_map_event(event, &armv8_thunder_perf_map,
+	return __armv8_pmuv3_map_event(event, NULL,
 				       &armv8_thunder_perf_cache_map);
 }
 
 static int armv8_vulcan_map_event(struct perf_event *event)
 {
-	return __armv8_pmuv3_map_event(event, &armv8_vulcan_perf_map,
+	return __armv8_pmuv3_map_event(event, NULL,
 				       &armv8_vulcan_perf_cache_map);
 }
 
-- 
cgit v1.2.3


From 5561b6c5e9813df16d7453f6ce1a0546221fca97 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Wed, 9 Aug 2017 17:46:38 +0100
Subject: arm64: perf: add support for Cortex-A73

The Cortex-A73 uses some implementation defined perf events.

This patch sets up the necessary mapping for Cortex-A73.

Mappings are based on Cortex-A73 TRM r0p2, section 11.9 Events
(pages 11-457 to 11-460).

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/devicetree/bindings/arm/pmu.txt |  1 +
 arch/arm64/kernel/perf_event.c                | 37 +++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 61c8b4620415..54c9727c70d8 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -9,6 +9,7 @@ Required properties:
 - compatible : should be one of
 	"apm,potenza-pmu"
 	"arm,armv8-pmuv3"
+	"arm,cortex-a73-pmu"
 	"arm,cortex-a72-pmu"
 	"arm,cortex-a57-pmu"
 	"arm,cortex-a53-pmu"
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f7737f6dcc36..3fc00f61f729 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -255,6 +255,21 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
 
+static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
 static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 						   [PERF_COUNT_HW_CACHE_OP_MAX]
 						   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -868,6 +883,11 @@ static int armv8_a57_map_event(struct perf_event *event)
 	return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map);
 }
 
+static int armv8_a73_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map);
+}
+
 static int armv8_thunder_map_event(struct perf_event *event)
 {
 	return __armv8_pmuv3_map_event(event, NULL,
@@ -1018,6 +1038,22 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
 	return 0;
 }
 
+static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
+	cpu_pmu->name			= "armv8_cortex_a73";
+	cpu_pmu->map_event		= armv8_a73_map_event;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
+
+	return 0;
+}
+
 static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int ret = armv8_pmu_init(cpu_pmu);
@@ -1055,6 +1091,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
 	{.compatible = "arm,cortex-a53-pmu",	.data = armv8_a53_pmu_init},
 	{.compatible = "arm,cortex-a57-pmu",	.data = armv8_a57_pmu_init},
 	{.compatible = "arm,cortex-a72-pmu",	.data = armv8_a72_pmu_init},
+	{.compatible = "arm,cortex-a73-pmu",	.data = armv8_a73_pmu_init},
 	{.compatible = "cavium,thunder-pmu",	.data = armv8_thunder_pmu_init},
 	{.compatible = "brcm,vulcan-pmu",	.data = armv8_vulcan_pmu_init},
 	{},
-- 
cgit v1.2.3


From e884f80cf2a76a86547e2316982e1f200f556ddf Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Wed, 9 Aug 2017 17:46:39 +0100
Subject: arm64: perf: add support for Cortex-A35

The Cortex-A35 uses some implementation defined perf events.

The Cortex-A35 derives from the Cortex-A53 core, using the same event mapings
based on Cortex-A35 TRM r0p2, section C2.3 - Performance monitoring events
(pages C2-562 to C2-565).

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 Documentation/devicetree/bindings/arm/pmu.txt |  1 +
 arch/arm64/kernel/perf_event.c                | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 54c9727c70d8..13611a8199bb 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -13,6 +13,7 @@ Required properties:
 	"arm,cortex-a72-pmu"
 	"arm,cortex-a57-pmu"
 	"arm,cortex-a53-pmu"
+	"arm,cortex-a35-pmu"
 	"arm,cortex-a17-pmu"
 	"arm,cortex-a15-pmu"
 	"arm,cortex-a12-pmu"
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 3fc00f61f729..9eaef51f83ff 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -990,6 +990,22 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
 	return 0;
 }
 
+static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int ret = armv8_pmu_init(cpu_pmu);
+	if (ret)
+		return ret;
+
+	cpu_pmu->name			= "armv8_cortex_a35";
+	cpu_pmu->map_event		= armv8_a53_map_event;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
+
+	return 0;
+}
+
 static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int ret = armv8_pmu_init(cpu_pmu);
@@ -1088,6 +1104,7 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
 
 static const struct of_device_id armv8_pmu_of_device_ids[] = {
 	{.compatible = "arm,armv8-pmuv3",	.data = armv8_pmuv3_init},
+	{.compatible = "arm,cortex-a35-pmu",	.data = armv8_a35_pmu_init},
 	{.compatible = "arm,cortex-a53-pmu",	.data = armv8_a53_pmu_init},
 	{.compatible = "arm,cortex-a57-pmu",	.data = armv8_a57_pmu_init},
 	{.compatible = "arm,cortex-a72-pmu",	.data = armv8_a72_pmu_init},
-- 
cgit v1.2.3


From caf5ef7d15c511bbef691d0931adad56c2967435 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 10 Aug 2017 16:52:31 +0200
Subject: arm64: fix pmem interface definition

Defining the two functions as 'static inline' and exporting them
leads to the interesting case where we can use the interface
from loadable modules, but not from built-in drivers, as shown
in this link failure:

vers/nvdimm/claim.o: In function `nsio_rw_bytes':
claim.c:(.text+0x1b8): undefined reference to `arch_invalidate_pmem'
drivers/nvdimm/pmem.o: In function `pmem_dax_flush':
pmem.c:(.text+0x11c): undefined reference to `arch_wb_cache_pmem'
drivers/nvdimm/pmem.o: In function `pmem_make_request':
pmem.c:(.text+0x5a4): undefined reference to `arch_invalidate_pmem'
pmem.c:(.text+0x650): undefined reference to `arch_invalidate_pmem'
pmem.c:(.text+0x6d4): undefined reference to `arch_invalidate_pmem'

This removes the bogus 'static inline'.

Fixes: d50e071fdaa3 ("arm64: Implement pmem API support")
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/flush.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 280f90ff33a2..e36ed5087b5c 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(flush_dcache_page);
 EXPORT_SYMBOL(flush_icache_range);
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
-static inline void arch_wb_cache_pmem(void *addr, size_t size)
+void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	/* Ensure order against any prior non-cacheable writes */
 	dmb(osh);
@@ -93,7 +93,7 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
-static inline void arch_invalidate_pmem(void *addr, size_t size)
+void arch_invalidate_pmem(void *addr, size_t size)
 {
 	__inval_dcache_area(addr, size);
 }
-- 
cgit v1.2.3


From 4d36037a9a07f88c2dc7eea5d1991f282e0a9901 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 10 Aug 2017 17:45:22 +0100
Subject: ACPI/IORT: Fix build regression without IOMMU

A recent change reintroduced a bug that had previously been
fixed by commit d49f2dedf33b ("ACPI/IORT: Fix CONFIG_IOMMU_API
dependency"):

drivers/acpi/arm64/iort.c: In function 'iort_iommu_configure':
drivers/acpi/arm64/iort.c:829:26: error: 'struct iommu_fwspec' has no member named 'ops'

Replace a direct reference to iommu_fwspec->ops with a helper function
call to fix the issue.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/acpi/arm64/iort.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 34972d7f4d29..736783c67ea0 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -773,7 +773,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev)
 	 * add_device callback for dev, replay it to get things in order.
 	 */
 	if (!err) {
-		ops = dev->iommu_fwspec->ops;
+		ops = iort_fwspec_iommu_ops(dev->iommu_fwspec);
 		err = iort_add_device_replay(ops, dev);
 	}
 
-- 
cgit v1.2.3


From 82d24d114f249d919b918ff8eefde4117db8f088 Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <kevin.brodsky@arm.com>
Date: Fri, 4 Aug 2017 10:17:00 -0700
Subject: arm64: compat: Remove leftover variable declaration

Commit a1d5ebaf8ccd ("arm64: big-endian: don't treat code as data when
copying sigret code") moved the 32-bit sigreturn trampoline code from
the aarch32_sigret_code array to kuser32.S. The commit removed the
array definition from signal32.c, but not its declaration in
signal32.h. Remove the leftover declaration.

Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/signal32.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index eeaa97559bab..81abea0b7650 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -22,8 +22,6 @@
 
 #define AARCH32_KERN_SIGRET_CODE_OFFSET	0x500
 
-extern const compat_ulong_t aarch32_sigret_code[6];
-
 int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
 		       struct pt_regs *regs);
 int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
-- 
cgit v1.2.3


From 969ff73e72fe903a2354c51e01c1a1f937c544ca Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Wed, 26 Jul 2017 21:34:26 +0800
Subject: arm64: numa: Remove the unused parent_node() macro

Commit a7be6e5a7f8d ("mm: drop useless local parameters of
__register_one_node()") removes the last user of parent_node().

The parent_node() macro in ARM64 platform is unnecessary.

Remove it for cleanup.

Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/numa.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index bf466d1876e3..ef7b23863a7c 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -7,9 +7,6 @@
 
 #define NR_NODE_MEMBLKS		(MAX_NUMNODES * 2)
 
-/* currently, arm64 implements flat NUMA topology */
-#define parent_node(node)	(node)
-
 int __node_distance(int from, int to);
 #define node_distance(a, b) __node_distance(a, b)
 
-- 
cgit v1.2.3


From c5bc503cbeee8586395aa541d2b53c69c3dd6930 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Aug 2017 12:10:51 +0100
Subject: arm64: remove __die()'s stack dump

Our __die() implementation tries to dump the stack memory, in addition
to a backtrace, which is problematic.

For contemporary 16K stacks, this can be a lot of data, which can take a
long time to dump, and can push other useful context out of the kernel's
printk ringbuffer (and/or a user's scrollback buffer on an attached
console).

Additionally, the code implicitly assumes that the SP is on the task's
stack, and tries to dump everything between the SP and the highest task
stack address. When the SP points at an IRQ stack (or is corrupted),
this makes the kernel attempt to dump vast amounts of VA space. With
vmap'd stacks, this may result in erroneous accesses to peripherals.

This patch removes the memory dump, leaving us to rely on the backtrace,
and other means of dumping stack memory such as kdump.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/kernel/traps.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index c2a81bf8827e..9633773ca42c 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -237,8 +237,6 @@ static int __die(const char *str, int err, struct pt_regs *regs)
 		 end_of_stack(tsk));
 
 	if (!user_mode(regs)) {
-		dump_mem(KERN_EMERG, "Stack: ", regs->sp,
-			 THREAD_SIZE + (unsigned long)task_stack_page(tsk));
 		dump_backtrace(regs, tsk);
 		dump_instr(KERN_EMERG, regs);
 	}
-- 
cgit v1.2.3


From 48ac3c18cc62d4a23d5dc5c59f8720589d0de14b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 14 Jul 2017 12:23:09 +0100
Subject: fork: allow arch-override of VMAP stack alignment

In some cases, an architecture might wish its stacks to be aligned to a
boundary larger than THREAD_SIZE. For example, using an alignment of
double THREAD_SIZE can allow for stack overflows smaller than
THREAD_SIZE to be detected by checking a single bit of the stack
pointer.

This patch allows architectures to override the alignment of VMAP'd
stacks, by defining THREAD_ALIGN. Where not defined, this defaults to
THREAD_SIZE, as is the case today.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: linux-kernel@vger.kernel.org
---
 include/linux/thread_info.h | 4 ++++
 kernel/fork.c               | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 250a27614328..905d769d8ddc 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -38,6 +38,10 @@ enum {
 
 #ifdef __KERNEL__
 
+#ifndef THREAD_ALIGN
+#define THREAD_ALIGN	THREAD_SIZE
+#endif
+
 #ifdef CONFIG_DEBUG_STACK_USAGE
 # define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \
 				 __GFP_ZERO)
diff --git a/kernel/fork.c b/kernel/fork.c
index 17921b0390b4..f12882a2323b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -88,6 +88,7 @@
 #include <linux/sysctl.h>
 #include <linux/kcov.h>
 #include <linux/livepatch.h>
+#include <linux/thread_info.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -217,7 +218,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 		return s->addr;
 	}
 
-	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
 				     VMALLOC_START, VMALLOC_END,
 				     THREADINFO_GFP,
 				     PAGE_KERNEL,
-- 
cgit v1.2.3


From 34be98f4944f99076f049a6806fc5f5207a755d3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 20 Jul 2017 17:15:45 +0100
Subject: arm64: kernel: remove {THREAD,IRQ_STACK}_START_SP

For historical reasons, we leave the top 16 bytes of our task and IRQ
stacks unused, a practice used to ensure that the SP can always be
masked to find the base of the current stack (historically, where
thread_info could be found).

However, this is not necessary, as:

* When an exception is taken from a task stack, we decrement the SP by
  S_FRAME_SIZE and stash the exception registers before we compare the
  SP against the task stack. In such cases, the SP must be at least
  S_FRAME_SIZE below the limit, and can be safely masked to determine
  whether the task stack is in use.

* When transitioning to an IRQ stack, we'll place a dummy frame onto the
  IRQ stack before enabling asynchronous exceptions, or executing code
  we expect to trigger faults. Thus, if an exception is taken from the
  IRQ stack, the SP must be at least 16 bytes below the limit.

* We no longer mask the SP to find the thread_info, which is now found
  via sp_el0. Note that historically, the offset was critical to ensure
  that cpu_switch_to() found the correct stack for new threads that
  hadn't yet executed ret_from_fork().

Given that, this initial offset serves no purpose, and can be removed.
This brings us in-line with other architectures (e.g. x86) which do not
rely on this masking.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[Mark: rebase, kill THREAD_START_SP, commit msg additions]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/irq.h         | 5 ++---
 arch/arm64/include/asm/processor.h   | 2 +-
 arch/arm64/include/asm/thread_info.h | 1 -
 arch/arm64/kernel/entry.S            | 2 +-
 arch/arm64/kernel/smp.c              | 2 +-
 5 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 8ba89c4ca183..1ebe202b1a24 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -2,7 +2,6 @@
 #define __ASM_IRQ_H
 
 #define IRQ_STACK_SIZE			THREAD_SIZE
-#define IRQ_STACK_START_SP		THREAD_START_SP
 
 #ifndef __ASSEMBLER__
 
@@ -26,9 +25,9 @@ static inline int nr_legacy_irqs(void)
 static inline bool on_irq_stack(unsigned long sp)
 {
 	unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack);
-	unsigned long high = low + IRQ_STACK_START_SP;
+	unsigned long high = low + IRQ_STACK_SIZE;
 
-	return (low <= sp && sp <= high);
+	return (low <= sp && sp < high);
 }
 
 static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 64c9e78f9882..6687dd29f7e0 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -159,7 +159,7 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
 
 #define task_pt_regs(p) \
-	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
+	((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
 
 #define KSTK_EIP(tsk)	((unsigned long)task_pt_regs(tsk)->pc)
 #define KSTK_ESP(tsk)	user_stack_pointer(task_pt_regs(tsk))
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 46c3b93cf865..b29ab0e12e60 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -30,7 +30,6 @@
 #endif
 
 #define THREAD_SIZE		16384
-#define THREAD_START_SP		(THREAD_SIZE - 16)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 612a077ba109..f31c7b26a686 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -272,7 +272,7 @@ alternative_else_nop_endif
 	cbnz	x25, 9998f
 
 	adr_this_cpu x25, irq_stack, x26
-	mov	x26, #IRQ_STACK_START_SP
+	mov	x26, #IRQ_STACK_SIZE
 	add	x26, x25, x26
 
 	/* switch to the irq stack */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc66e6ec3a99..f13ddb2404f9 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -154,7 +154,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * page tables.
 	 */
 	secondary_data.task = idle;
-	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+	secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
 	update_cpu_boot_status(CPU_MMU_OFF);
 	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
 
-- 
cgit v1.2.3


From b6531456ba279bb7cea5dd2e7b3ec6a3ed0f4668 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 14 Jul 2017 19:43:56 +0100
Subject: arm64: factor out PAGE_* and CONT_* definitions

Some headers rely on PAGE_* definitions from <asm/page.h>, but cannot
include this due to potential circular includes. For example, a number
of definitions in <asm/memory.h> rely on PAGE_SHIFT, and <asm/page.h>
includes <asm/memory.h>.

This requires users of these definitions to include both headers, which
is fragile and error-prone.

This patch ameliorates matters by moving the basic definitions out to a
new header, <asm/page-def.h>. Both <asm/page.h> and <asm/memory.h> are
updated to include this, avoiding this fragility, and avoiding the
possibility of circular include dependencies.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/memory.h   |  1 +
 arch/arm64/include/asm/page-def.h | 34 ++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/page.h     | 12 +-----------
 3 files changed, 36 insertions(+), 11 deletions(-)
 create mode 100644 arch/arm64/include/asm/page-def.h

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 32f82723338a..77d55dcfb86c 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -25,6 +25,7 @@
 #include <linux/const.h>
 #include <linux/types.h>
 #include <asm/bug.h>
+#include <asm/page-def.h>
 #include <asm/sizes.h>
 
 /*
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
new file mode 100644
index 000000000000..01591a29dc2e
--- /dev/null
+++ b/arch/arm64/include/asm/page-def.h
@@ -0,0 +1,34 @@
+/*
+ * Based on arch/arm/include/asm/page.h
+ *
+ * Copyright (C) 1995-2003 Russell King
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PAGE_DEF_H
+#define __ASM_PAGE_DEF_H
+
+#include <linux/const.h>
+
+/* PAGE_SHIFT determines the page size */
+/* CONT_SHIFT determines the number of pages which can be tracked together  */
+#define PAGE_SHIFT		CONFIG_ARM64_PAGE_SHIFT
+#define CONT_SHIFT		CONFIG_ARM64_CONT_SHIFT
+#define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
+
+#define CONT_SIZE		(_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
+#define CONT_MASK		(~(CONT_SIZE-1))
+
+#endif /* __ASM_PAGE_DEF_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 8472c6def5ef..60d02c81a3a2 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -19,17 +19,7 @@
 #ifndef __ASM_PAGE_H
 #define __ASM_PAGE_H
 
-#include <linux/const.h>
-
-/* PAGE_SHIFT determines the page size */
-/* CONT_SHIFT determines the number of pages which can be tracked together  */
-#define PAGE_SHIFT		CONFIG_ARM64_PAGE_SHIFT
-#define CONT_SHIFT		CONFIG_ARM64_CONT_SHIFT
-#define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
-
-#define CONT_SIZE		(_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
-#define CONT_MASK		(~(CONT_SIZE-1))
+#include <asm/page-def.h>
 
 #ifndef __ASSEMBLY__
 
-- 
cgit v1.2.3


From dbc9344a68e506f19f80a9affc8fe7023a9cdc4c Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 14 Jul 2017 16:39:21 +0100
Subject: arm64: clean up THREAD_* definitions

Currently we define THREAD_SIZE and THREAD_SIZE_ORDER separately, with
the latter dependent on particular CONFIG_ARM64_*K_PAGES definitions.
This is somewhat opaque, and will get in the way of future modifications
to THREAD_SIZE.

This patch cleans this up, defining both in terms of a common
THREAD_SHIFT, and using PAGE_SHIFT to calculate THREAD_SIZE_ORDER,
rather than using a number of definitions dependent on config symbols.
Subsequent patches will make use of this to alter the stack size used in
some configurations.

At the same time, these are moved into <asm/memory.h>, which will avoid
circular include issues in subsequent patches. To ensure that existing
code isn't adversely affected, <asm/thread_info.h> is updated to
transitively include these definitions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/memory.h      | 8 ++++++++
 arch/arm64/include/asm/thread_info.h | 9 +--------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 77d55dcfb86c..8ab4774e2616 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -102,6 +102,14 @@
 #define KASAN_SHADOW_SIZE	(0)
 #endif
 
+#define THREAD_SHIFT		14
+
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
+#endif
+
+#define THREAD_SIZE		(UL(1) << THREAD_SHIFT)
+
 /*
  * Memory types available.
  */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index b29ab0e12e60..aa04b733b349 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -23,18 +23,11 @@
 
 #include <linux/compiler.h>
 
-#ifdef CONFIG_ARM64_4K_PAGES
-#define THREAD_SIZE_ORDER	2
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define THREAD_SIZE_ORDER	0
-#endif
-
-#define THREAD_SIZE		16384
-
 #ifndef __ASSEMBLY__
 
 struct task_struct;
 
+#include <asm/memory.h>
 #include <asm/stack_pointer.h>
 #include <asm/types.h>
 
-- 
cgit v1.2.3


From f60ad4edcf07238a3d2646d65d8d217032452550 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 20 Jul 2017 12:26:48 +0100
Subject: arm64: clean up irq stack definitions

Before we add yet another stack to the kernel, it would be nice to
ensure that we consistently organise stack definitions and related
helper functions.

This patch moves the basic IRQ stack defintions to <asm/memory.h> to
live with their task stack counterparts. Helpers used for unwinding are
moved into <asm/stacktrace.h>, where subsequent patches will add helpers
for other stacks. Includes are fixed up accordingly.

This patch is a pure refactoring -- there should be no functional
changes as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/irq.h        | 24 ------------------------
 arch/arm64/include/asm/memory.h     |  2 ++
 arch/arm64/include/asm/stacktrace.h | 25 ++++++++++++++++++++++++-
 arch/arm64/kernel/ptrace.c          |  1 +
 4 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 1ebe202b1a24..5e6f77239064 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,20 +1,12 @@
 #ifndef __ASM_IRQ_H
 #define __ASM_IRQ_H
 
-#define IRQ_STACK_SIZE			THREAD_SIZE
-
 #ifndef __ASSEMBLER__
 
-#include <linux/percpu.h>
-#include <linux/sched/task_stack.h>
-
 #include <asm-generic/irq.h>
-#include <asm/thread_info.h>
 
 struct pt_regs;
 
-DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
-
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 static inline int nr_legacy_irqs(void)
@@ -22,21 +14,5 @@ static inline int nr_legacy_irqs(void)
 	return 0;
 }
 
-static inline bool on_irq_stack(unsigned long sp)
-{
-	unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack);
-	unsigned long high = low + IRQ_STACK_SIZE;
-
-	return (low <= sp && sp < high);
-}
-
-static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
-{
-	unsigned long low = (unsigned long)task_stack_page(tsk);
-	unsigned long high = low + THREAD_SIZE;
-
-	return (low <= sp && sp < high);
-}
-
 #endif /* !__ASSEMBLER__ */
 #endif
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 8ab4774e2616..1fc24532987e 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -110,6 +110,8 @@
 
 #define THREAD_SIZE		(UL(1) << THREAD_SHIFT)
 
+#define IRQ_STACK_SIZE		THREAD_SIZE
+
 /*
  * Memory types available.
  */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 3bebab378c72..000e24182a5c 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -16,7 +16,12 @@
 #ifndef __ASM_STACKTRACE_H
 #define __ASM_STACKTRACE_H
 
-struct task_struct;
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/memory.h>
+#include <asm/ptrace.h>
 
 struct stackframe {
 	unsigned long fp;
@@ -31,4 +36,22 @@ extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
 extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
 
+DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
+
+static inline bool on_irq_stack(unsigned long sp)
+{
+	unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack);
+	unsigned long high = low + IRQ_STACK_SIZE;
+
+	return (low <= sp && sp < high);
+}
+
+static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
+{
+	unsigned long low = (unsigned long)task_stack_page(tsk);
+	unsigned long high = low + THREAD_SIZE;
+
+	return (low <= sp && sp < high);
+}
+
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index baf0838205c7..a9f87157c371 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -42,6 +42,7 @@
 #include <asm/compat.h>
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
+#include <asm/stacktrace.h>
 #include <asm/syscall.h>
 #include <asm/traps.h>
 #include <asm/system_misc.h>
-- 
cgit v1.2.3


From 8018ba4edfd3a8b46f876c65988bd0d8e35c32a6 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 14 Jul 2017 15:38:43 +0100
Subject: arm64: move SEGMENT_ALIGN to <asm/memory.h>

Currently we define SEGMENT_ALIGN directly in our vmlinux.lds.S.

This is unfortunate, as the EFI stub currently open-codes the same
number, and in future we'll want to fiddle with this.

This patch moves the definition to our <asm/memory.h>, where it can be
used by both vmlinux.lds.S and the EFI stub code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/memory.h | 19 +++++++++++++++++++
 arch/arm64/kernel/vmlinux.lds.S | 16 ----------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 1fc24532987e..7fa6ad48d574 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -112,6 +112,25 @@
 
 #define IRQ_STACK_SIZE		THREAD_SIZE
 
+/*
+ * Alignment of kernel segments (e.g. .text, .data).
+ */
+#if defined(CONFIG_DEBUG_ALIGN_RODATA)
+/*
+ *  4 KB granule:   1 level 2 entry
+ * 16 KB granule: 128 level 3 entries, with contiguous bit
+ * 64 KB granule:  32 level 3 entries, with contiguous bit
+ */
+#define SEGMENT_ALIGN			SZ_2M
+#else
+/*
+ *  4 KB granule:  16 level 3 entries, with contiguous bit
+ * 16 KB granule:   4 level 3 entries, without contiguous bit
+ * 64 KB granule:   1 level 3 entry
+ */
+#define SEGMENT_ALIGN			SZ_64K
+#endif
+
 /*
  * Memory types available.
  */
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 987a00ee446c..71565386d063 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -72,22 +72,6 @@ PECOFF_FILE_ALIGNMENT = 0x200;
 #define PECOFF_EDATA_PADDING
 #endif
 
-#if defined(CONFIG_DEBUG_ALIGN_RODATA)
-/*
- *  4 KB granule:   1 level 2 entry
- * 16 KB granule: 128 level 3 entries, with contiguous bit
- * 64 KB granule:  32 level 3 entries, with contiguous bit
- */
-#define SEGMENT_ALIGN			SZ_2M
-#else
-/*
- *  4 KB granule:  16 level 3 entries, with contiguous bit
- * 16 KB granule:   4 level 3 entries, without contiguous bit
- * 64 KB granule:   1 level 3 entry
- */
-#define SEGMENT_ALIGN			SZ_64K
-#endif
-
 SECTIONS
 {
 	/*
-- 
cgit v1.2.3


From 170976bcab073870af059b5e848c80689bd5e931 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 14 Jul 2017 15:54:36 +0100
Subject: efi/arm64: add EFI_KIMG_ALIGN

The EFI stub is intimately coupled with the kernel, and takes advantage
of this by relocating the kernel at a weaker alignment than the
documented boot protocol mandates.

However, it does so by assuming it can align the kernel to the segment
alignment, and assumes that this is 64K. In subsequent patches, we'll
have to consider other details to determine this de-facto alignment
constraint.

This patch adds a new EFI_KIMG_ALIGN definition that will track the
kernel's de-facto alignment requirements. Subsequent patches will modify
this as required.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
---
 arch/arm64/include/asm/efi.h              | 3 +++
 drivers/firmware/efi/libstub/arm64-stub.c | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 8f3043aba873..0e8cc3b85bb8 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -4,6 +4,7 @@
 #include <asm/boot.h>
 #include <asm/cpufeature.h>
 #include <asm/io.h>
+#include <asm/memory.h>
 #include <asm/mmu_context.h>
 #include <asm/neon.h>
 #include <asm/ptrace.h>
@@ -48,6 +49,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
  */
 #define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */
 
+#define EFI_KIMG_ALIGN	SEGMENT_ALIGN
+
 /* on arm64, the FDT may be located anywhere in system RAM */
 static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base)
 {
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index b4c2589d7c91..af6ae95a5e34 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -11,6 +11,7 @@
  */
 #include <linux/efi.h>
 #include <asm/efi.h>
+#include <asm/memory.h>
 #include <asm/sections.h>
 #include <asm/sysreg.h>
 
@@ -81,9 +82,10 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
 		/*
 		 * If CONFIG_DEBUG_ALIGN_RODATA is not set, produce a
 		 * displacement in the interval [0, MIN_KIMG_ALIGN) that
-		 * is a multiple of the minimal segment alignment (SZ_64K)
+		 * doesn't violate this kernel's de-facto alignment
+		 * constraints.
 		 */
-		u32 mask = (MIN_KIMG_ALIGN - 1) & ~(SZ_64K - 1);
+		u32 mask = (MIN_KIMG_ALIGN - 1) & ~(EFI_KIMG_ALIGN - 1);
 		u32 offset = !IS_ENABLED(CONFIG_DEBUG_ALIGN_RODATA) ?
 			     (phys_seed >> 32) & mask : TEXT_OFFSET;
 
-- 
cgit v1.2.3


From b11e5759bfac0c474d95ec4780b1566350e64cad Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 19 Jul 2017 17:24:49 +0100
Subject: arm64: factor out entry stack manipulation

In subsequent patches, we will detect stack overflow in our exception
entry code, by verifying the SP after it has been decremented to make
space for the exception regs.

This verification code is small, and we can minimize its impact by
placing it directly in the vectors. To avoid redundant modification of
the SP, we also need to move the initial decrement of the SP into the
vectors.

As a preparatory step, this patch introduces kernel_ventry, which
performs this decrement, and updates the entry code accordingly.
Subsequent patches will fold SP verification into kernel_ventry.

There should be no functional change as a result of this patch.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[Mark: turn into prep patch, expand commit msg]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/kernel/entry.S | 47 ++++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f31c7b26a686..58eba94279c5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -69,8 +69,13 @@
 #define BAD_FIQ		2
 #define BAD_ERROR	3
 
-	.macro	kernel_entry, el, regsize = 64
+	.macro kernel_ventry	label
+	.align 7
 	sub	sp, sp, #S_FRAME_SIZE
+	b	\label
+	.endm
+
+	.macro	kernel_entry, el, regsize = 64
 	.if	\regsize == 32
 	mov	w0, w0				// zero upper 32 bits of x0
 	.endif
@@ -319,31 +324,31 @@ tsk	.req	x28		// current thread_info
 
 	.align	11
 ENTRY(vectors)
-	ventry	el1_sync_invalid		// Synchronous EL1t
-	ventry	el1_irq_invalid			// IRQ EL1t
-	ventry	el1_fiq_invalid			// FIQ EL1t
-	ventry	el1_error_invalid		// Error EL1t
+	kernel_ventry	el1_sync_invalid		// Synchronous EL1t
+	kernel_ventry	el1_irq_invalid			// IRQ EL1t
+	kernel_ventry	el1_fiq_invalid			// FIQ EL1t
+	kernel_ventry	el1_error_invalid		// Error EL1t
 
-	ventry	el1_sync			// Synchronous EL1h
-	ventry	el1_irq				// IRQ EL1h
-	ventry	el1_fiq_invalid			// FIQ EL1h
-	ventry	el1_error_invalid		// Error EL1h
+	kernel_ventry	el1_sync			// Synchronous EL1h
+	kernel_ventry	el1_irq				// IRQ EL1h
+	kernel_ventry	el1_fiq_invalid			// FIQ EL1h
+	kernel_ventry	el1_error_invalid		// Error EL1h
 
-	ventry	el0_sync			// Synchronous 64-bit EL0
-	ventry	el0_irq				// IRQ 64-bit EL0
-	ventry	el0_fiq_invalid			// FIQ 64-bit EL0
-	ventry	el0_error_invalid		// Error 64-bit EL0
+	kernel_ventry	el0_sync			// Synchronous 64-bit EL0
+	kernel_ventry	el0_irq				// IRQ 64-bit EL0
+	kernel_ventry	el0_fiq_invalid			// FIQ 64-bit EL0
+	kernel_ventry	el0_error_invalid		// Error 64-bit EL0
 
 #ifdef CONFIG_COMPAT
-	ventry	el0_sync_compat			// Synchronous 32-bit EL0
-	ventry	el0_irq_compat			// IRQ 32-bit EL0
-	ventry	el0_fiq_invalid_compat		// FIQ 32-bit EL0
-	ventry	el0_error_invalid_compat	// Error 32-bit EL0
+	kernel_ventry	el0_sync_compat			// Synchronous 32-bit EL0
+	kernel_ventry	el0_irq_compat			// IRQ 32-bit EL0
+	kernel_ventry	el0_fiq_invalid_compat		// FIQ 32-bit EL0
+	kernel_ventry	el0_error_invalid_compat	// Error 32-bit EL0
 #else
-	ventry	el0_sync_invalid		// Synchronous 32-bit EL0
-	ventry	el0_irq_invalid			// IRQ 32-bit EL0
-	ventry	el0_fiq_invalid			// FIQ 32-bit EL0
-	ventry	el0_error_invalid		// Error 32-bit EL0
+	kernel_ventry	el0_sync_invalid		// Synchronous 32-bit EL0
+	kernel_ventry	el0_irq_invalid			// IRQ 32-bit EL0
+	kernel_ventry	el0_fiq_invalid			// FIQ 32-bit EL0
+	kernel_ventry	el0_error_invalid		// Error 32-bit EL0
 #endif
 END(vectors)
 
-- 
cgit v1.2.3


From 8ea41b11ef746e1ac97f8c90911e5c61f8bd5cc0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 15 Jul 2017 17:23:13 +0100
Subject: arm64: assembler: allow adr_this_cpu to use the stack pointer

Given that adr_this_cpu already requires a temp register in addition
to the destination register, tweak the instruction sequence so that sp
may be used as well.

This will simplify switching to per-cpu stacks in subsequent patches. While
this limits the range of adr_this_cpu, to +/-4GiB, we don't currently use
adr_this_cpu in modules, and this is not problematic for the main kernel image.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[Mark: add more commit text]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/assembler.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 610a42018241..2f2bd5192b5e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -230,12 +230,18 @@ lr	.req	x30		// link register
 	.endm
 
 	/*
-	 * @dst: Result of per_cpu(sym, smp_processor_id())
+	 * @dst: Result of per_cpu(sym, smp_processor_id()), can be SP for
+	 *       non-module code
 	 * @sym: The name of the per-cpu variable
 	 * @tmp: scratch register
 	 */
 	.macro adr_this_cpu, dst, sym, tmp
+#ifndef MODULE
+	adrp	\tmp, \sym
+	add	\dst, \tmp, #:lo12:\sym
+#else
 	adr_l	\dst, \sym
+#endif
 	mrs	\tmp, tpidr_el1
 	add	\dst, \dst, \tmp
 	.endm
-- 
cgit v1.2.3


From f60fe78f133243e6de0f05fdefc3ed2f3c5085ca Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 31 Jul 2017 21:17:03 +0100
Subject: arm64: use an irq stack pointer

We allocate our IRQ stacks using a percpu array. This allows us to generate our
IRQ stack pointers with adr_this_cpu, but bloats the kernel Image with the boot
CPU's IRQ stack. Additionally, these are packed with other percpu variables,
and aren't guaranteed to have guard pages.

When we enable VMAP_STACK we'll want to vmap our IRQ stacks also, in order to
provide guard pages and to permit more stringent alignment requirements. Doing
so will require that we use a percpu pointer to each IRQ stack, rather than
allocating a percpu IRQ stack in the kernel image.

This patch updates our IRQ stack code to use a percpu pointer to the base of
each IRQ stack. This will allow us to change the way the stack is allocated
with minimal changes elsewhere. In some cases we may try to backtrace before
the IRQ stack pointers are initialised, so on_irq_stack() is updated to account
for this.

In testing with cyclictest, there was no measureable difference between using
adr_this_cpu (for irq_stack) and ldr_this_cpu (for irq_stack_ptr) in the IRQ
entry path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/stacktrace.h |  7 +++++--
 arch/arm64/kernel/entry.S           |  2 +-
 arch/arm64/kernel/irq.c             | 10 ++++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 000e24182a5c..4c68d8a81988 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -36,13 +36,16 @@ extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
 extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
 
-DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
+DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
 
 static inline bool on_irq_stack(unsigned long sp)
 {
-	unsigned long low = (unsigned long)raw_cpu_ptr(irq_stack);
+	unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
 	unsigned long high = low + IRQ_STACK_SIZE;
 
+	if (!low)
+		return false;
+
 	return (low <= sp && sp < high);
 }
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 58eba94279c5..52348869f82f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -276,7 +276,7 @@ alternative_else_nop_endif
 	and	x25, x25, #~(THREAD_SIZE - 1)
 	cbnz	x25, 9998f
 
-	adr_this_cpu x25, irq_stack, x26
+	ldr_this_cpu x25, irq_stack_ptr, x26
 	mov	x26, #IRQ_STACK_SIZE
 	add	x26, x25, x26
 
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 2386b26c0712..5141282e47d5 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -32,6 +32,7 @@ unsigned long irq_err_count;
 
 /* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
 DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
+DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
@@ -50,8 +51,17 @@ void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
 	handle_arch_irq = handle_irq;
 }
 
+static void init_irq_stacks(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
+}
+
 void __init init_IRQ(void)
 {
+	init_irq_stacks();
 	irqchip_init();
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
-- 
cgit v1.2.3


From e3067861ba6650a566a6273738c23c956ad55c02 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 21 Jul 2017 14:25:33 +0100
Subject: arm64: add basic VMAP_STACK support

This patch enables arm64 to be built with vmap'd task and IRQ stacks.

As vmap'd stacks are mapped at page granularity, stacks must be a multiple of
PAGE_SIZE. This means that a 64K page kernel must use stacks of at least 64K in
size.

To minimize the increase in Image size, IRQ stacks are dynamically allocated at
boot time, rather than embedding the boot CPU's IRQ stack in the kernel image.

This patch was co-authored by Ard Biesheuvel and Mark Rutland.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/Kconfig              |  1 +
 arch/arm64/include/asm/efi.h    |  7 ++++++-
 arch/arm64/include/asm/memory.h | 23 ++++++++++++++++++++++-
 arch/arm64/kernel/irq.c         | 30 ++++++++++++++++++++++++++++--
 arch/arm64/kernel/vmlinux.lds.S |  2 +-
 5 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dfd908630631..d66f9db3e6db 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -75,6 +75,7 @@ config ARM64
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_ARCH_VMAP_STACK
 	select HAVE_ARM_SMCCC
 	select HAVE_EBPF_JIT
 	select HAVE_C_RECORDMCOUNT
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 0e8cc3b85bb8..2b1e5def2e49 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -49,7 +49,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
  */
 #define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */
 
-#define EFI_KIMG_ALIGN	SEGMENT_ALIGN
+/*
+ * In some configurations (e.g. VMAP_STACK && 64K pages), stacks built into the
+ * kernel need greater alignment than we require the segments to be padded to.
+ */
+#define EFI_KIMG_ALIGN	\
+	(SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN)
 
 /* on arm64, the FDT may be located anywhere in system RAM */
 static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 7fa6ad48d574..c5cd2c599b24 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -102,7 +102,17 @@
 #define KASAN_SHADOW_SIZE	(0)
 #endif
 
-#define THREAD_SHIFT		14
+#define MIN_THREAD_SHIFT	14
+
+/*
+ * VMAP'd stacks are allocated at page granularity, so we must ensure that such
+ * stacks are a multiple of page size.
+ */
+#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT)
+#define THREAD_SHIFT		PAGE_SHIFT
+#else
+#define THREAD_SHIFT		MIN_THREAD_SHIFT
+#endif
 
 #if THREAD_SHIFT >= PAGE_SHIFT
 #define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
@@ -110,6 +120,17 @@
 
 #define THREAD_SIZE		(UL(1) << THREAD_SHIFT)
 
+/*
+ * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
+ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
+ * assembly.
+ */
+#ifdef CONFIG_VMAP_STACK
+#define THREAD_ALIGN		(2 * THREAD_SIZE)
+#else
+#define THREAD_ALIGN		THREAD_SIZE
+#endif
+
 #define IRQ_STACK_SIZE		THREAD_SIZE
 
 /*
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 5141282e47d5..713561e5bcab 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -23,15 +23,15 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/irq.h>
+#include <linux/memory.h>
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
 
 unsigned long irq_err_count;
 
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
-DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
 DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
 
 int arch_show_interrupts(struct seq_file *p, int prec)
@@ -51,6 +51,31 @@ void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
 	handle_arch_irq = handle_irq;
 }
 
+#ifdef CONFIG_VMAP_STACK
+static void init_irq_stacks(void)
+{
+	int cpu;
+	unsigned long *p;
+
+	for_each_possible_cpu(cpu) {
+		/*
+		* To ensure that VMAP'd stack overflow detection works
+		* correctly, the IRQ stacks need to have the same
+		* alignment as other stacks.
+		*/
+		p = __vmalloc_node_range(IRQ_STACK_SIZE, THREAD_ALIGN,
+					 VMALLOC_START, VMALLOC_END,
+					 THREADINFO_GFP, PAGE_KERNEL,
+					 0, cpu_to_node(cpu),
+					 __builtin_return_address(0));
+
+		per_cpu(irq_stack_ptr, cpu) = p;
+	}
+}
+#else
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
+DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
+
 static void init_irq_stacks(void)
 {
 	int cpu;
@@ -58,6 +83,7 @@ static void init_irq_stacks(void)
 	for_each_possible_cpu(cpu)
 		per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
 }
+#endif
 
 void __init init_IRQ(void)
 {
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 71565386d063..fe56c268a7d9 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -176,7 +176,7 @@ SECTIONS
 
 	_data = .;
 	_sdata = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
 
 	/*
 	 * Data written with the MMU off but read with the MMU on requires
-- 
cgit v1.2.3


From 12964443e8d1914010f9269f9f9abc4e122bc6ca Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 1 Aug 2017 18:51:15 +0100
Subject: arm64: add on_accessible_stack()

Both unwind_frame() and dump_backtrace() try to check whether a stack
address is sane to access, with very similar logic. Both will need
updating in order to handle overflow stacks.

Factor out this logic into a helper, so that we can avoid further
duplication when we add overflow stacks.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/stacktrace.h | 16 ++++++++++++++++
 arch/arm64/kernel/stacktrace.c      |  7 +------
 arch/arm64/kernel/traps.c           |  3 +--
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4c68d8a81988..92ddb6d25cf3 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -57,4 +57,20 @@ static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
 	return (low <= sp && sp < high);
 }
 
+/*
+ * We can only safely access per-cpu stacks from current in a non-preemptible
+ * context.
+ */
+static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp)
+{
+	if (on_task_stack(tsk, sp))
+		return true;
+	if (tsk != current || preemptible())
+		return false;
+	if (on_irq_stack(sp))
+		return true;
+
+	return false;
+}
+
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 35588caad9d0..3144584617e7 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -50,12 +50,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	if (!tsk)
 		tsk = current;
 
-	/*
-	 * Switching between stacks is valid when tracing current and in
-	 * non-preemptible context.
-	 */
-	if (!(tsk == current && !preemptible() && on_irq_stack(fp)) &&
-	    !on_task_stack(tsk, fp))
+	if (!on_accessible_stack(tsk, fp))
 		return -EINVAL;
 
 	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9633773ca42c..d01c5988354b 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -193,8 +193,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		if (in_entry_text(frame.pc)) {
 			stack = frame.fp - offsetof(struct pt_regs, stackframe);
 
-			if (on_task_stack(tsk, stack) ||
-			    (tsk == current && !preemptible() && on_irq_stack(stack)))
+			if (on_accessible_stack(tsk, stack))
 				dump_mem("", "Exception stack", stack,
 					 stack + sizeof(struct pt_regs));
 		}
-- 
cgit v1.2.3


From 872d8327ce8982883b8237b2c320c8666f14e561 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 14 Jul 2017 20:30:35 +0100
Subject: arm64: add VMAP_STACK overflow detection

This patch adds stack overflow detection to arm64, usable when vmap'd stacks
are in use.

Overflow is detected in a small preamble executed for each exception entry,
which checks whether there is enough space on the current stack for the general
purpose registers to be saved. If there is not enough space, the overflow
handler is invoked on a per-cpu overflow stack. This approach preserves the
original exception information in ESR_EL1 (and where appropriate, FAR_EL1).

Task and IRQ stacks are aligned to double their size, enabling overflow to be
detected with a single bit test. For example, a 16K stack is aligned to 32K,
ensuring that bit 14 of the SP must be zero. On an overflow (or underflow),
this bit is flipped. Thus, overflow (of less than the size of the stack) can be
detected by testing whether this bit is set.

The overflow check is performed before any attempt is made to access the
stack, avoiding recursive faults (and the loss of exception information
these would entail). As logical operations cannot be performed on the SP
directly, the SP is temporarily swapped with a general purpose register
using arithmetic operations to enable the test to be performed.

This gives us a useful error message on stack overflow, as can be trigger with
the LKDTM overflow test:

[  305.388749] lkdtm: Performing direct entry OVERFLOW
[  305.395444] Insufficient stack space to handle exception!
[  305.395482] ESR: 0x96000047 -- DABT (current EL)
[  305.399890] FAR: 0xffff00000a5e7f30
[  305.401315] Task stack:     [0xffff00000a5e8000..0xffff00000a5ec000]
[  305.403815] IRQ stack:      [0xffff000008000000..0xffff000008004000]
[  305.407035] Overflow stack: [0xffff80003efce4e0..0xffff80003efcf4e0]
[  305.409622] CPU: 0 PID: 1219 Comm: sh Not tainted 4.13.0-rc3-00021-g9636aea #5
[  305.412785] Hardware name: linux,dummy-virt (DT)
[  305.415756] task: ffff80003d051c00 task.stack: ffff00000a5e8000
[  305.419221] PC is at recursive_loop+0x10/0x48
[  305.421637] LR is at recursive_loop+0x38/0x48
[  305.423768] pc : [<ffff00000859f330>] lr : [<ffff00000859f358>] pstate: 40000145
[  305.428020] sp : ffff00000a5e7f50
[  305.430469] x29: ffff00000a5e8350 x28: ffff80003d051c00
[  305.433191] x27: ffff000008981000 x26: ffff000008f80400
[  305.439012] x25: ffff00000a5ebeb8 x24: ffff00000a5ebeb8
[  305.440369] x23: ffff000008f80138 x22: 0000000000000009
[  305.442241] x21: ffff80003ce65000 x20: ffff000008f80188
[  305.444552] x19: 0000000000000013 x18: 0000000000000006
[  305.446032] x17: 0000ffffa2601280 x16: ffff0000081fe0b8
[  305.448252] x15: ffff000008ff546d x14: 000000000047a4c8
[  305.450246] x13: ffff000008ff7872 x12: 0000000005f5e0ff
[  305.452953] x11: ffff000008ed2548 x10: 000000000005ee8d
[  305.454824] x9 : ffff000008545380 x8 : ffff00000a5e8770
[  305.457105] x7 : 1313131313131313 x6 : 00000000000000e1
[  305.459285] x5 : 0000000000000000 x4 : 0000000000000000
[  305.461781] x3 : 0000000000000000 x2 : 0000000000000400
[  305.465119] x1 : 0000000000000013 x0 : 0000000000000012
[  305.467724] Kernel panic - not syncing: kernel stack overflow
[  305.470561] CPU: 0 PID: 1219 Comm: sh Not tainted 4.13.0-rc3-00021-g9636aea #5
[  305.473325] Hardware name: linux,dummy-virt (DT)
[  305.475070] Call trace:
[  305.476116] [<ffff000008088ad8>] dump_backtrace+0x0/0x378
[  305.478991] [<ffff000008088e64>] show_stack+0x14/0x20
[  305.481237] [<ffff00000895a178>] dump_stack+0x98/0xb8
[  305.483294] [<ffff0000080c3288>] panic+0x118/0x280
[  305.485673] [<ffff0000080c2e9c>] nmi_panic+0x6c/0x70
[  305.486216] [<ffff000008089710>] handle_bad_stack+0x118/0x128
[  305.486612] Exception stack(0xffff80003efcf3a0 to 0xffff80003efcf4e0)
[  305.487334] f3a0: 0000000000000012 0000000000000013 0000000000000400 0000000000000000
[  305.488025] f3c0: 0000000000000000 0000000000000000 00000000000000e1 1313131313131313
[  305.488908] f3e0: ffff00000a5e8770 ffff000008545380 000000000005ee8d ffff000008ed2548
[  305.489403] f400: 0000000005f5e0ff ffff000008ff7872 000000000047a4c8 ffff000008ff546d
[  305.489759] f420: ffff0000081fe0b8 0000ffffa2601280 0000000000000006 0000000000000013
[  305.490256] f440: ffff000008f80188 ffff80003ce65000 0000000000000009 ffff000008f80138
[  305.490683] f460: ffff00000a5ebeb8 ffff00000a5ebeb8 ffff000008f80400 ffff000008981000
[  305.491051] f480: ffff80003d051c00 ffff00000a5e8350 ffff00000859f358 ffff00000a5e7f50
[  305.491444] f4a0: ffff00000859f330 0000000040000145 0000000000000000 0000000000000000
[  305.492008] f4c0: 0001000000000000 0000000000000000 ffff00000a5e8350 ffff00000859f330
[  305.493063] [<ffff00000808205c>] __bad_stack+0x88/0x8c
[  305.493396] [<ffff00000859f330>] recursive_loop+0x10/0x48
[  305.493731] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.494088] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.494425] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.494649] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.494898] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.495205] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.495453] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.495708] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.496000] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.496302] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.496644] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.496894] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.497138] [<ffff00000859f358>] recursive_loop+0x38/0x48
[  305.497325] [<ffff00000859f3dc>] lkdtm_OVERFLOW+0x14/0x20
[  305.497506] [<ffff00000859f314>] lkdtm_do_action+0x1c/0x28
[  305.497786] [<ffff00000859f178>] direct_entry+0xe0/0x170
[  305.498095] [<ffff000008345568>] full_proxy_write+0x60/0xa8
[  305.498387] [<ffff0000081fb7f4>] __vfs_write+0x1c/0x128
[  305.498679] [<ffff0000081fcc68>] vfs_write+0xa0/0x1b0
[  305.498926] [<ffff0000081fe0fc>] SyS_write+0x44/0xa0
[  305.499182] Exception stack(0xffff00000a5ebec0 to 0xffff00000a5ec000)
[  305.499429] bec0: 0000000000000001 000000001c4cf5e0 0000000000000009 000000001c4cf5e0
[  305.499674] bee0: 574f4c465245564f 0000000000000000 0000000000000000 8000000080808080
[  305.499904] bf00: 0000000000000040 0000000000000038 fefefeff1b4bc2ff 7f7f7f7f7f7fff7f
[  305.500189] bf20: 0101010101010101 0000000000000000 000000000047a4c8 0000000000000038
[  305.500712] bf40: 0000000000000000 0000ffffa2601280 0000ffffc63f6068 00000000004b5000
[  305.501241] bf60: 0000000000000001 000000001c4cf5e0 0000000000000009 000000001c4cf5e0
[  305.501791] bf80: 0000000000000020 0000000000000000 00000000004b5000 000000001c4cc458
[  305.502314] bfa0: 0000000000000000 0000ffffc63f7950 000000000040a3c4 0000ffffc63f70e0
[  305.502762] bfc0: 0000ffffa2601268 0000000080000000 0000000000000001 0000000000000040
[  305.503207] bfe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[  305.503680] [<ffff000008082fb0>] el0_svc_naked+0x24/0x28
[  305.504720] Kernel Offset: disabled
[  305.505189] CPU features: 0x002082
[  305.505473] Memory Limit: none
[  305.506181] ---[ end Kernel panic - not syncing: kernel stack overflow

This patch was co-authored by Ard Biesheuvel and Mark Rutland.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/memory.h     |  2 ++
 arch/arm64/include/asm/stacktrace.h | 16 +++++++++
 arch/arm64/kernel/entry.S           | 70 +++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/traps.c           | 39 +++++++++++++++++++++
 4 files changed, 127 insertions(+)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index c5cd2c599b24..1a025b744107 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -133,6 +133,8 @@
 
 #define IRQ_STACK_SIZE		THREAD_SIZE
 
+#define OVERFLOW_STACK_SIZE	SZ_4K
+
 /*
  * Alignment of kernel segments (e.g. .text, .data).
  */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 92ddb6d25cf3..6ad30776e984 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -57,6 +57,20 @@ static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
 	return (low <= sp && sp < high);
 }
 
+#ifdef CONFIG_VMAP_STACK
+DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+
+static inline bool on_overflow_stack(unsigned long sp)
+{
+	unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
+	unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+	return (low <= sp && sp < high);
+}
+#else
+static inline bool on_overflow_stack(unsigned long sp) { return false; }
+#endif
+
 /*
  * We can only safely access per-cpu stacks from current in a non-preemptible
  * context.
@@ -69,6 +83,8 @@ static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp
 		return false;
 	if (on_irq_stack(sp))
 		return true;
+	if (on_overflow_stack(sp))
+		return true;
 
 	return false;
 }
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 52348869f82f..3ef6e2297fb4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -72,6 +72,48 @@
 	.macro kernel_ventry	label
 	.align 7
 	sub	sp, sp, #S_FRAME_SIZE
+#ifdef CONFIG_VMAP_STACK
+	/*
+	 * Test whether the SP has overflowed, without corrupting a GPR.
+	 * Task and IRQ stacks are aligned to (1 << THREAD_SHIFT).
+	 */
+	add	sp, sp, x0			// sp' = sp + x0
+	sub	x0, sp, x0			// x0' = sp' - x0 = (sp + x0) - x0 = sp
+	tbnz	x0, #THREAD_SHIFT, 0f
+	sub	x0, sp, x0			// x0'' = sp' - x0' = (sp + x0) - sp = x0
+	sub	sp, sp, x0			// sp'' = sp' - x0 = (sp + x0) - x0 = sp
+	b	\label
+
+0:
+	/*
+	 * Either we've just detected an overflow, or we've taken an exception
+	 * while on the overflow stack. Either way, we won't return to
+	 * userspace, and can clobber EL0 registers to free up GPRs.
+	 */
+
+	/* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
+	msr	tpidr_el0, x0
+
+	/* Recover the original x0 value and stash it in tpidrro_el0 */
+	sub	x0, sp, x0
+	msr	tpidrro_el0, x0
+
+	/* Switch to the overflow stack */
+	adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
+
+	/*
+	 * Check whether we were already on the overflow stack. This may happen
+	 * after panic() re-enables interrupts.
+	 */
+	mrs	x0, tpidr_el0			// sp of interrupted context
+	sub	x0, sp, x0			// delta with top of overflow stack
+	tst	x0, #~(OVERFLOW_STACK_SIZE - 1)	// within range?
+	b.ne	__bad_stack			// no? -> bad stack pointer
+
+	/* We were already on the overflow stack. Restore sp/x0 and carry on. */
+	sub	sp, sp, x0
+	mrs	x0, tpidrro_el0
+#endif
 	b	\label
 	.endm
 
@@ -352,6 +394,34 @@ ENTRY(vectors)
 #endif
 END(vectors)
 
+#ifdef CONFIG_VMAP_STACK
+	/*
+	 * We detected an overflow in kernel_ventry, which switched to the
+	 * overflow stack. Stash the exception regs, and head to our overflow
+	 * handler.
+	 */
+__bad_stack:
+	/* Restore the original x0 value */
+	mrs	x0, tpidrro_el0
+
+	/*
+	 * Store the original GPRs to the new stack. The orginal SP (minus
+	 * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
+	 */
+	sub	sp, sp, #S_FRAME_SIZE
+	kernel_entry 1
+	mrs	x0, tpidr_el0
+	add	x0, x0, #S_FRAME_SIZE
+	str	x0, [sp, #S_SP]
+
+	/* Stash the regs for handle_bad_stack */
+	mov	x0, sp
+
+	/* Time to die */
+	bl	handle_bad_stack
+	ASM_BUG()
+#endif /* CONFIG_VMAP_STACK */
+
 /*
  * Invalid mode handlers
  */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d01c5988354b..2d591804e46f 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -32,6 +32,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sizes.h>
 #include <linux/syscalls.h>
 #include <linux/mm_types.h>
 
@@ -41,6 +42,7 @@
 #include <asm/esr.h>
 #include <asm/insn.h>
 #include <asm/traps.h>
+#include <asm/smp.h>
 #include <asm/stack_pointer.h>
 #include <asm/stacktrace.h>
 #include <asm/exception.h>
@@ -666,6 +668,43 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 	force_sig_info(info.si_signo, &info, current);
 }
 
+#ifdef CONFIG_VMAP_STACK
+
+DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
+	__aligned(16);
+
+asmlinkage void handle_bad_stack(struct pt_regs *regs)
+{
+	unsigned long tsk_stk = (unsigned long)current->stack;
+	unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
+	unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
+	unsigned int esr = read_sysreg(esr_el1);
+	unsigned long far = read_sysreg(far_el1);
+
+	console_verbose();
+	pr_emerg("Insufficient stack space to handle exception!");
+
+	pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr));
+	pr_emerg("FAR: 0x%016lx\n", far);
+
+	pr_emerg("Task stack:     [0x%016lx..0x%016lx]\n",
+		 tsk_stk, tsk_stk + THREAD_SIZE);
+	pr_emerg("IRQ stack:      [0x%016lx..0x%016lx]\n",
+		 irq_stk, irq_stk + THREAD_SIZE);
+	pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
+		 ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
+
+	__show_regs(regs);
+
+	/*
+	 * We use nmi_panic to limit the potential for recusive overflows, and
+	 * to get a better stack trace.
+	 */
+	nmi_panic(NULL, "kernel stack overflow");
+	cpu_park_loop();
+}
+#endif
+
 void __pte_error(const char *file, int line, unsigned long val)
 {
 	pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
-- 
cgit v1.2.3


From 3b66023d574fee8a481f8e4e1b5bd15583a3b5bf Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 18 Aug 2017 14:53:47 +0100
Subject: arm64: neon/efi: Make EFI fpsimd save/restore variables static

The percpu variables efi_fpsimd_state and efi_fpsimd_state_used,
used by the FPSIMD save/restore routines for EFI calls, are
unintentionally global.

There's no reason for anything outside fpsimd.c to touch these, so
this patch makes them static (as they should have been in the first
place).

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 9da4e636b328..3a68cf38a6b3 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -321,8 +321,8 @@ void kernel_neon_end(void)
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
-DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
-DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
+static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
+static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
 
 /*
  * EFI runtime services support functions
-- 
cgit v1.2.3


From 3bbf7157ac66a88d94b291d4d5e2b2a9319a0f90 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 26 Jun 2017 14:27:36 +0100
Subject: arm64: Convert pte handling from inline asm to using (cmp)xchg

With the support for hardware updates of the access and dirty states,
the following pte handling functions had to be implemented using
exclusives: __ptep_test_and_clear_young(), ptep_get_and_clear(),
ptep_set_wrprotect() and ptep_set_access_flags(). To take advantage of
the LSE atomic instructions and also make the code cleaner, convert
these pte functions to use the more generic cmpxchg()/xchg().

Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 71 +++++++++++++++++++---------------------
 arch/arm64/mm/fault.c            | 24 +++++++-------
 2 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6eae342ced6b..9127688ae775 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -39,6 +39,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/cmpxchg.h>
 #include <asm/fixmap.h>
 #include <linux/mmdebug.h>
 
@@ -173,6 +174,11 @@ static inline pte_t pte_clear_rdonly(pte_t pte)
 	return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
 }
 
+static inline pte_t pte_set_rdonly(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
+}
+
 static inline pte_t pte_mkpresent(pte_t pte)
 {
 	return set_pte_bit(pte, __pgprot(PTE_VALID));
@@ -593,20 +599,17 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int __ptep_test_and_clear_young(pte_t *ptep)
 {
-	pteval_t pteval;
-	unsigned int tmp, res;
+	pte_t old_pte, pte;
 
-	asm volatile("//	__ptep_test_and_clear_young\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	ubfx	%w3, %w0, %5, #1	// extract PTE_AF (young)\n"
-	"	and	%0, %0, %4		// clear PTE_AF\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
-	: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
+	pte = READ_ONCE(*ptep);
+	do {
+		old_pte = pte;
+		pte = pte_mkold(pte);
+		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+					       pte_val(old_pte), pte_val(pte));
+	} while (pte_val(pte) != pte_val(old_pte));
 
-	return res;
+	return pte_young(pte);
 }
 
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -630,17 +633,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address, pte_t *ptep)
 {
-	pteval_t old_pteval;
-	unsigned int tmp;
-
-	asm volatile("//	ptep_get_and_clear\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	stxr	%w1, xzr, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
-
-	return __pte(old_pteval);
+	return __pte(xchg_relaxed(&pte_val(*ptep), 0));
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -659,21 +652,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 {
-	pteval_t pteval;
-	unsigned long tmp;
-
-	asm volatile("//	ptep_set_wrprotect\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	tst	%0, %4			// check for hw dirty (!PTE_RDONLY)\n"
-	"	csel	%1, %3, xzr, eq		// set PTE_DIRTY|PTE_RDONLY if dirty\n"
-	"	orr	%0, %0, %1		// if !dirty, PTE_RDONLY is already set\n"
-	"	and	%0, %0, %5		// clear PTE_WRITE/PTE_DBM\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
-	: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
-	: "cc");
+	pte_t old_pte, pte;
+
+	pte = READ_ONCE(*ptep);
+	do {
+		old_pte = pte;
+		/*
+		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+		 * clear), set the PTE_DIRTY and PTE_RDONLY bits.
+		 */
+		if (pte_hw_dirty(pte)) {
+			pte = pte_mkdirty(pte);
+			pte = pte_set_rdonly(pte);
+		}
+		pte = pte_wrprotect(pte);
+		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+					       pte_val(old_pte), pte_val(pte));
+	} while (pte_val(pte) != pte_val(old_pte));
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 52ee273afeec..430eaf82da49 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -34,6 +34,7 @@
 #include <linux/hugetlb.h>
 
 #include <asm/bug.h>
+#include <asm/cmpxchg.h>
 #include <asm/cpufeature.h>
 #include <asm/exception.h>
 #include <asm/debug-monitors.h>
@@ -197,8 +198,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 			  unsigned long address, pte_t *ptep,
 			  pte_t entry, int dirty)
 {
-	pteval_t old_pteval;
-	unsigned int tmp;
+	pteval_t old_pteval, pteval;
 
 	if (pte_same(*ptep, entry))
 		return 0;
@@ -208,7 +208,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 
 	/* set PTE_RDONLY if actual read-only or clean PTE */
 	if (!pte_write(entry) || !pte_sw_dirty(entry))
-		pte_val(entry) |= PTE_RDONLY;
+		entry = pte_set_rdonly(entry);
 
 	/*
 	 * Setting the flags must be done atomically to avoid racing with the
@@ -217,16 +217,14 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 	 * (calculated as: a & b == ~(~a | ~b)).
 	 */
 	pte_val(entry) ^= PTE_RDONLY;
-	asm volatile("//	ptep_set_access_flags\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	eor	%0, %0, %3		// negate PTE_RDONLY in *ptep\n"
-	"	orr	%0, %0, %4		// set flags\n"
-	"	eor	%0, %0, %3		// negate final PTE_RDONLY\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
-	: "L" (PTE_RDONLY), "r" (pte_val(entry)));
+	pteval = READ_ONCE(pte_val(*ptep));
+	do {
+		old_pteval = pteval;
+		pteval ^= PTE_RDONLY;
+		pteval |= pte_val(entry);
+		pteval ^= PTE_RDONLY;
+		pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
+	} while (pteval != old_pteval);
 
 	flush_tlb_fix_spurious_fault(vma, address);
 	return 1;
-- 
cgit v1.2.3


From 0966253d7ccddc42a5211b3488bb4f202c04de1b Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Jul 2017 11:46:39 +0100
Subject: kvm: arm64: Convert kvm_set_s2pte_readonly() from inline asm to
 cmpxchg()

To take advantage of the LSE atomic instructions and also make the code
cleaner, convert the kvm_set_s2pte_readonly() function to use the more
generic cmpxchg().

Cc: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/kvm_mmu.h | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a89cc22abadc..672c8684d5c2 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -175,18 +175,15 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
 
 static inline void kvm_set_s2pte_readonly(pte_t *pte)
 {
-	pteval_t pteval;
-	unsigned long tmp;
-
-	asm volatile("//	kvm_set_s2pte_readonly\n"
-	"	prfm	pstl1strm, %2\n"
-	"1:	ldxr	%0, %2\n"
-	"	and	%0, %0, %3		// clear PTE_S2_RDWR\n"
-	"	orr	%0, %0, %4		// set PTE_S2_RDONLY\n"
-	"	stxr	%w1, %0, %2\n"
-	"	cbnz	%w1, 1b\n"
-	: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
-	: "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
+	pteval_t old_pteval, pteval;
+
+	pteval = READ_ONCE(pte_val(*pte));
+	do {
+		old_pteval = pteval;
+		pteval &= ~PTE_S2_RDWR;
+		pteval |= PTE_S2_RDONLY;
+		pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+	} while (pteval != old_pteval);
 }
 
 static inline bool kvm_s2pte_readonly(pte_t *pte)
-- 
cgit v1.2.3


From 73e86cb03cf2ec0aa3789dc8621c6d53619cac5e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 4 Jul 2017 19:04:18 +0100
Subject: arm64: Move PTE_RDONLY bit handling out of set_pte_at()

Currently PTE_RDONLY is treated as a hardware only bit and not handled
by the pte_mkwrite(), pte_wrprotect() or the user PAGE_* definitions.
The set_pte_at() function is responsible for setting this bit based on
the write permission or dirty state. This patch moves the PTE_RDONLY
handling out of set_pte_at into the pte_mkwrite()/pte_wrprotect()
functions. The PAGE_* definitions to need to be updated to explicitly
include PTE_RDONLY when !PTE_WRITE.

The patch also removes the redundant PAGE_COPY(_EXEC) definitions as
they are identical to the corresponding PAGE_READONLY(_EXEC).

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable-prot.h | 18 ++++++++----------
 arch/arm64/include/asm/pgtable.h      | 34 ++++++++++------------------------
 arch/arm64/kernel/hibernate.c         |  4 ++--
 arch/arm64/mm/fault.c                 |  6 +-----
 4 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 2142c7726e76..0a5635fb0ef9 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -63,23 +63,21 @@
 #define PAGE_S2			__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
 #define PAGE_S2_DEVICE		__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
 
-#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
 #define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN)
+#define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
 
 #define __P000  PAGE_NONE
 #define __P001  PAGE_READONLY
-#define __P010  PAGE_COPY
-#define __P011  PAGE_COPY
+#define __P010  PAGE_READONLY
+#define __P011  PAGE_READONLY
 #define __P100  PAGE_EXECONLY
 #define __P101  PAGE_READONLY_EXEC
-#define __P110  PAGE_COPY_EXEC
-#define __P111  PAGE_COPY_EXEC
+#define __P110  PAGE_READONLY_EXEC
+#define __P111  PAGE_READONLY_EXEC
 
 #define __S000  PAGE_NONE
 #define __S001  PAGE_READONLY
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 9127688ae775..a04bfb869a80 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -125,12 +125,16 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	return clear_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+	return pte;
 }
 
 static inline pte_t pte_mkwrite(pte_t pte)
 {
-	return set_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+	return pte;
 }
 
 static inline pte_t pte_mkclean(pte_t pte)
@@ -169,16 +173,6 @@ static inline pte_t pte_mknoncont(pte_t pte)
 	return clear_pte_bit(pte, __pgprot(PTE_CONT));
 }
 
-static inline pte_t pte_clear_rdonly(pte_t pte)
-{
-	return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
-}
-
-static inline pte_t pte_set_rdonly(pte_t pte)
-{
-	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
-}
-
 static inline pte_t pte_mkpresent(pte_t pte)
 {
 	return set_pte_bit(pte, __pgprot(PTE_VALID));
@@ -226,14 +220,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_present(pte)) {
-		if (pte_sw_dirty(pte) && pte_write(pte))
-			pte_val(pte) &= ~PTE_RDONLY;
-		else
-			pte_val(pte) |= PTE_RDONLY;
-		if (pte_user_exec(pte) && !pte_special(pte))
-			__sync_icache_dcache(pte, addr);
-	}
+	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
+		__sync_icache_dcache(pte, addr);
 
 	/*
 	 * If the existing pte is valid, check for potential race with
@@ -659,12 +647,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 		old_pte = pte;
 		/*
 		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
-		 * clear), set the PTE_DIRTY and PTE_RDONLY bits.
+		 * clear), set the PTE_DIRTY bit.
 		 */
-		if (pte_hw_dirty(pte)) {
+		if (pte_hw_dirty(pte))
 			pte = pte_mkdirty(pte);
-			pte = pte_set_rdonly(pte);
-		}
 		pte = pte_wrprotect(pte);
 		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
 					       pte_val(old_pte), pte_val(pte));
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index a44e13942d30..095d3c170f5d 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
 		 * read only (code, rodata). Clear the RDONLY bit from
 		 * the temporary mappings we use during restore.
 		 */
-		set_pte(dst_pte, pte_clear_rdonly(pte));
+		set_pte(dst_pte, pte_mkwrite(pte));
 	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
 		/*
 		 * debug_pagealloc will removed the PTE_VALID bit if
@@ -343,7 +343,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
 		 */
 		BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-		set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
+		set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
 	}
 }
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 430eaf82da49..f75ed5c4b994 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -204,11 +204,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 		return 0;
 
 	/* only preserve the access flags and write permission */
-	pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
-
-	/* set PTE_RDONLY if actual read-only or clean PTE */
-	if (!pte_write(entry) || !pte_sw_dirty(entry))
-		entry = pte_set_rdonly(entry);
+	pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY;
 
 	/*
 	 * Setting the flags must be done atomically to avoid racing with the
-- 
cgit v1.2.3


From 64c26841b34957ef8f33f7a9e8663aeee59c3ded Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 5 Jul 2017 10:59:42 +0100
Subject: arm64: Ignore hardware dirty bit updates in ptep_set_wrprotect()

ptep_set_wrprotect() is only called on CoW mappings which are private
(!VM_SHARED) with the pte either read-only (!PTE_WRITE && PTE_RDONLY) or
writable and software-dirty (PTE_WRITE && !PTE_RDONLY && PTE_DIRTY).
There is no race with the hardware update of the dirty state: clearing
of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM) is set. This patch removes
the code setting the software PTE_DIRTY bit in ptep_set_wrprotect() as
superfluous. A VM_WARN_ONCE is introduced in case the above logic is
wrong or the core mm code changes its use of ptep_set_wrprotect().

Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index a04bfb869a80..0117cbcd62d4 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -634,23 +634,28 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /*
- * ptep_set_wrprotect - mark read-only while trasferring potential hardware
- * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
+ * ptep_set_wrprotect - mark read-only while preserving the hardware update of
+ * the Access Flag.
  */
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 {
 	pte_t old_pte, pte;
 
+	/*
+	 * ptep_set_wrprotect() is only called on CoW mappings which are
+	 * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
+	 * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
+	 * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
+	 * protection_map[]. There is no race with the hardware update of the
+	 * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
+	 * is set.
+	 */
+	VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
+		     "%s: potential race with hardware DBM", __func__);
 	pte = READ_ONCE(*ptep);
 	do {
 		old_pte = pte;
-		/*
-		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
-		 * clear), set the PTE_DIRTY bit.
-		 */
-		if (pte_hw_dirty(pte))
-			pte = pte_mkdirty(pte);
 		pte = pte_wrprotect(pte);
 		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
 					       pte_val(old_pte), pte_val(pte));
-- 
cgit v1.2.3


From af29678fe785ad79e7386e97b57093482f0dd7c4 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 6 Jul 2017 11:53:08 +0100
Subject: arm64: Remove the !CONFIG_ARM64_HW_AFDBM alternative code paths

Since the pte handling for hardware AF/DBM works even when the hardware
feature is not present, make the pte accessors implementation permanent
and remove the corresponding #ifdefs. The Kconfig option is kept as it
can still be used to disable the feature at the hardware level.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 9 +--------
 arch/arm64/kvm/hyp/s2-setup.c    | 2 +-
 arch/arm64/mm/fault.c            | 2 --
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0117cbcd62d4..bc4e92337d16 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -85,11 +85,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
 })
 
-#ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
-#else
-#define pte_hw_dirty(pte)	(0)
-#endif
 #define pte_sw_dirty(pte)	(!!(pte_val(pte) & PTE_DIRTY))
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
@@ -228,8 +224,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * hardware updates of the pte (ptep_set_access_flags safely changes
 	 * valid ptes without going through an invalid entry).
 	 */
-	if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
-	    pte_valid(*ptep) && pte_valid(pte)) {
+	if (pte_valid(*ptep) && pte_valid(pte)) {
 		VM_WARN_ONCE(!pte_young(pte),
 			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
 			     __func__, pte_val(*ptep), pte_val(pte));
@@ -565,7 +560,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
 }
 
-#ifdef CONFIG_ARM64_HW_AFDBM
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 extern int ptep_set_access_flags(struct vm_area_struct *vma,
 				 unsigned long address, pte_t *ptep,
@@ -670,7 +664,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 	ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
 }
 #endif
-#endif	/* CONFIG_ARM64_HW_AFDBM */
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index b81f4091c909..a81f5e10fc8c 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -70,7 +70,7 @@ u32 __hyp_text __init_stage2_translation(void)
 	 * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
 	 */
 	tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
-	if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
+	if (tmp)
 		val |= VTCR_EL2_HA;
 
 	/*
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f75ed5c4b994..778d0bb89551 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -183,7 +183,6 @@ void show_pte(unsigned long addr)
 	pr_cont("\n");
 }
 
-#ifdef CONFIG_ARM64_HW_AFDBM
 /*
  * This function sets the access flags (dirty, accessed), as well as write
  * permission, and only to a more permissive setting.
@@ -225,7 +224,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 	flush_tlb_fix_spurious_fault(vma, address);
 	return 1;
 }
-#endif
 
 static bool is_el1_instruction_abort(unsigned int esr)
 {
-- 
cgit v1.2.3


From 2fa59ec8ccf38bbc2193ae61aed9afa0687974e0 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 14 Aug 2017 09:55:46 +0100
Subject: arm64: dma-mapping: Do not pass data to gen_pool_set_algo()

gen_pool_first_fit_order_align() does not make use of additional data,
so pass plain NULL there.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/dma-mapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index f27d4dd04384..7038e1a61397 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -425,7 +425,7 @@ static int __init atomic_pool_init(void)
 
 		gen_pool_set_algo(atomic_pool,
 				  gen_pool_first_fit_order_align,
-				  (void *)PAGE_SHIFT);
+				  NULL);
 
 		pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
 			atomic_pool_size / 1024);
-- 
cgit v1.2.3


From 8165f70648da0a4a51e5871693781b2cc29b29d6 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 14 Aug 2017 09:55:47 +0100
Subject: arm64: dma-mapping: Mark atomic_pool as __ro_after_init

atomic_pool is setup once while init stage and never changed after
that, so it is good candidate for __ro_after_init

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/dma-mapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 7038e1a61397..614af886b7ef 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -42,7 +42,7 @@ static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
 	return prot;
 }
 
-static struct gen_pool *atomic_pool;
+static struct gen_pool *atomic_pool __ro_after_init;
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
-- 
cgit v1.2.3


From a88ce63b642cf8cd82cbc278429ccd9de4455a07 Mon Sep 17 00:00:00 2001
From: Hoeun Ryu <hoeun.ryu@gmail.com>
Date: Thu, 17 Aug 2017 11:24:27 +0900
Subject: arm64: kexec: have own crash_smp_send_stop() for crash dump for
 nonpanic cores

 Commit 0ee5941 : (x86/panic: replace smp_send_stop() with kdump friendly
version in panic path) introduced crash_smp_send_stop() which is a weak
function and can be overridden by architecture codes to fix the side effect
caused by commit f06e515 : (kernel/panic.c: add "crash_kexec_post_
notifiers" option).

 ARM64 architecture uses the weak version function and the problem is that
the weak function simply calls smp_send_stop() which makes other CPUs
offline and takes away the chance to save crash information for nonpanic
CPUs in machine_crash_shutdown() when crash_kexec_post_notifiers kernel
option is enabled.

 Calling smp_send_crash_stop() in machine_crash_shutdown() is useless
because all nonpanic CPUs are already offline by smp_send_stop() in this
case and smp_send_crash_stop() only works against online CPUs.

 The result is that secondary CPUs registers are not saved by
crash_save_cpu() and the vmcore file misreports these CPUs as being
offline.

 crash_smp_send_stop() is implemented to fix this problem by replacing the
existing smp_send_crash_stop() and adding a check for multiple calling to
the function. The function (strong symbol version) saves crash information
for nonpanic CPUs and machine_crash_shutdown() tries to save crash
information for nonpanic CPUs only when crash_kexec_post_notifiers kernel
option is disabled.

* crash_kexec_post_notifiers : false

  panic()
    __crash_kexec()
      machine_crash_shutdown()
        crash_smp_send_stop()    <= save crash dump for nonpanic cores

* crash_kexec_post_notifiers : true

  panic()
    crash_smp_send_stop()        <= save crash dump for nonpanic cores
    __crash_kexec()
      machine_crash_shutdown()
        crash_smp_send_stop()    <= just return.

Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com>
Reviewed-by: James Morse <james.morse@arm.com>
Tested-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/smp.h      |  2 +-
 arch/arm64/kernel/machine_kexec.c |  2 +-
 arch/arm64/kernel/smp.c           | 12 +++++++++++-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 55f08c5acfad..f82b447bd34f 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -148,7 +148,7 @@ static inline void cpu_panic_kernel(void)
  */
 bool cpus_are_stuck_in_kernel(void);
 
-extern void smp_send_crash_stop(void);
+extern void crash_smp_send_stop(void);
 extern bool smp_crash_stop_failed(void);
 
 #endif /* ifndef __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 481f54a866c5..11121f608eb5 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -252,7 +252,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
 	local_irq_disable();
 
 	/* shutdown non-crashing cpus */
-	smp_send_crash_stop();
+	crash_smp_send_stop();
 
 	/* for crashing cpu */
 	crash_save_cpu(regs, smp_processor_id());
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f13ddb2404f9..ffe089942ac4 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -977,11 +977,21 @@ void smp_send_stop(void)
 }
 
 #ifdef CONFIG_KEXEC_CORE
-void smp_send_crash_stop(void)
+void crash_smp_send_stop(void)
 {
+	static int cpus_stopped;
 	cpumask_t mask;
 	unsigned long timeout;
 
+	/*
+	 * This function can be called twice in panic path, but obviously
+	 * we execute this only once.
+	 */
+	if (cpus_stopped)
+		return;
+
+	cpus_stopped = 1;
+
 	if (num_online_cpus() == 1)
 		return;
 
-- 
cgit v1.2.3


From d3ea79527757ba65b3ee08e10c59a3c84f34e4bf Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Tue, 22 Aug 2017 11:42:41 +0100
Subject: arm64: hugetlb: set_huge_pte_at Add WARN_ON on !pte_present

This patch adds a WARN_ON to set_huge_pte_at as the accessor assumes
that entries to be written down are all present. (There are separate
accessors to clear huge ptes).

We will need to handle the !pte_present case where memory offlining
is used on hugetlb pages. swap and migration entries will be supplied
to set_huge_pte_at in this case.

Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 656e0ece2289..7b61e4833432 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -67,6 +67,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	unsigned long pfn;
 	pgprot_t hugeprot;
 
+	/*
+	 * Code needs to be expanded to handle huge swap and migration
+	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
+	 */
+	WARN_ON(!pte_present(pte));
+
 	if (!pte_cont(pte)) {
 		set_pte_at(mm, addr, ptep, pte);
 		return;
-- 
cgit v1.2.3


From b5b0be86d7181ed82cee9d8ac5073a48f038a305 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Tue, 22 Aug 2017 11:42:42 +0100
Subject: arm64: hugetlb: Introduce pte_pgprot helper

Rather than xor pte bits in various places, use this helper function.

Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 7b61e4833432..cb84ca33bc6b 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -41,6 +41,16 @@ int pud_huge(pud_t pud)
 #endif
 }
 
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+	unsigned long pfn = pte_pfn(pte);
+
+	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, size_t *pgsize)
 {
@@ -80,7 +90,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 	pfn = pte_pfn(pte);
-	hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+	hugeprot = pte_pgprot(pte);
 	for (i = 0; i < ncontig; i++) {
 		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
 			 pte_val(pfn_pte(pfn, hugeprot)));
@@ -223,9 +233,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 		size_t pgsize = 0;
 		unsigned long pfn = pte_pfn(pte);
 		/* Select all bits except the pfn */
-		pgprot_t hugeprot =
-			__pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
-				 pte_val(pte));
+		pgprot_t hugeprot = pte_pgprot(pte);
 
 		pfn = pte_pfn(pte);
 		ncontig = find_num_contig(vma->vm_mm, addr, ptep,
-- 
cgit v1.2.3


From 29a7287dceb76729960a15095fbbfcffa2179b07 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Tue, 22 Aug 2017 11:42:43 +0100
Subject: arm64: hugetlb: Spring clean huge pte accessors

This patch aims to re-structure the huge pte accessors without affecting
their functionality. Control flow is changed to reduce indentation and
expanded use is made of post for loop variable modification.

It is then much easier to add break-before-make semantics in a subsequent
patch.

Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 119 ++++++++++++++++++++------------------------
 1 file changed, 54 insertions(+), 65 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index cb84ca33bc6b..08deed7c71f0 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -74,7 +74,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	size_t pgsize;
 	int i;
 	int ncontig;
-	unsigned long pfn;
+	unsigned long pfn, dpfn;
 	pgprot_t hugeprot;
 
 	/*
@@ -90,14 +90,13 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 	pfn = pte_pfn(pte);
+	dpfn = pgsize >> PAGE_SHIFT;
 	hugeprot = pte_pgprot(pte);
-	for (i = 0; i < ncontig; i++) {
+
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
 		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
 			 pte_val(pfn_pte(pfn, hugeprot)));
 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
-		ptep++;
-		pfn += pgsize >> PAGE_SHIFT;
-		addr += pgsize;
 	}
 }
 
@@ -195,91 +194,81 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 {
-	pte_t pte;
-
-	if (pte_cont(*ptep)) {
-		int ncontig, i;
-		size_t pgsize;
-		bool is_dirty = false;
-
-		ncontig = find_num_contig(mm, addr, ptep, &pgsize);
-		/* save the 1st pte to return */
-		pte = ptep_get_and_clear(mm, addr, ptep);
-		for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
-			/*
-			 * If HW_AFDBM is enabled, then the HW could
-			 * turn on the dirty bit for any of the page
-			 * in the set, so check them all.
-			 */
-			++ptep;
-			if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
-				is_dirty = true;
-		}
-		if (is_dirty)
-			return pte_mkdirty(pte);
-		else
-			return pte;
-	} else {
+	int ncontig, i;
+	size_t pgsize;
+	pte_t orig_pte = huge_ptep_get(ptep);
+
+	if (!pte_cont(orig_pte))
 		return ptep_get_and_clear(mm, addr, ptep);
+
+	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
+		/*
+		 * If HW_AFDBM is enabled, then the HW could
+		 * turn on the dirty bit for any of the page
+		 * in the set, so check them all.
+		 */
+		if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
+			orig_pte = pte_mkdirty(orig_pte);
 	}
+
+	return orig_pte;
 }
 
 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 			       unsigned long addr, pte_t *ptep,
 			       pte_t pte, int dirty)
 {
-	if (pte_cont(pte)) {
-		int ncontig, i, changed = 0;
-		size_t pgsize = 0;
-		unsigned long pfn = pte_pfn(pte);
-		/* Select all bits except the pfn */
-		pgprot_t hugeprot = pte_pgprot(pte);
-
-		pfn = pte_pfn(pte);
-		ncontig = find_num_contig(vma->vm_mm, addr, ptep,
-					  &pgsize);
-		for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) {
-			changed |= ptep_set_access_flags(vma, addr, ptep,
-							pfn_pte(pfn,
-								hugeprot),
-							dirty);
-			pfn += pgsize >> PAGE_SHIFT;
-		}
-		return changed;
-	} else {
+	int ncontig, i, changed = 0;
+	size_t pgsize = 0;
+	unsigned long pfn = pte_pfn(pte), dpfn;
+	pgprot_t hugeprot;
+
+	if (!pte_cont(pte))
 		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+
+	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+	dpfn = pgsize >> PAGE_SHIFT;
+	hugeprot = pte_pgprot(pte);
+
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
+		changed |= ptep_set_access_flags(vma, addr, ptep,
+				pfn_pte(pfn, hugeprot), dirty);
 	}
+
+	return changed;
 }
 
 void huge_ptep_set_wrprotect(struct mm_struct *mm,
 			     unsigned long addr, pte_t *ptep)
 {
-	if (pte_cont(*ptep)) {
-		int ncontig, i;
-		size_t pgsize = 0;
+	int ncontig, i;
+	size_t pgsize;
 
-		ncontig = find_num_contig(mm, addr, ptep, &pgsize);
-		for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
-			ptep_set_wrprotect(mm, addr, ptep);
-	} else {
+	if (!pte_cont(*ptep)) {
 		ptep_set_wrprotect(mm, addr, ptep);
+		return;
 	}
+
+	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
+		ptep_set_wrprotect(mm, addr, ptep);
 }
 
 void huge_ptep_clear_flush(struct vm_area_struct *vma,
 			   unsigned long addr, pte_t *ptep)
 {
-	if (pte_cont(*ptep)) {
-		int ncontig, i;
-		size_t pgsize = 0;
-
-		ncontig = find_num_contig(vma->vm_mm, addr, ptep,
-					  &pgsize);
-		for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
-			ptep_clear_flush(vma, addr, ptep);
-	} else {
+	int ncontig, i;
+	size_t pgsize;
+
+	if (!pte_cont(*ptep)) {
 		ptep_clear_flush(vma, addr, ptep);
+		return;
 	}
+
+	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
+		ptep_clear_flush(vma, addr, ptep);
 }
 
 static __init int setup_hugepagesz(char *opt)
-- 
cgit v1.2.3


From d8bdcff2876424d44d08a4d16a54fee518f9d5b8 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Tue, 22 Aug 2017 11:42:44 +0100
Subject: arm64: hugetlb: Add break-before-make logic for contiguous entries

It has become apparent that one has to take special care when modifying
attributes of memory mappings that employ the contiguous bit.

Both the requirement and the architecturally correct "Break-Before-Make"
technique of updating contiguous entries can be found described in:
ARM DDI 0487A.k_iss10775, "Misprogramming of the Contiguous bit",
page D4-1762.

The huge pte accessors currently replace the attributes of contiguous
pte entries in place thus can, on certain platforms, lead to TLB
conflict aborts or even erroneous results returned from TLB lookups.

This patch adds two helper functions -

* get_clear_flush(.) - clears a contiguous entry and returns the head
  pte (whilst taking care to retain dirty bit information that could
  have been modified by DBM).

* clear_flush(.) that clears a contiguous entry

A tlb invalidate is performed to then ensure that there is no
possibility of multiple tlb entries being present for the same region.

Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
(Added helper clear_flush(), updated commit log, and some cleanup)
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
[catalin.marinas@arm.com: remove CONFIG_ARM64_HW_AFDBM check]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 112 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 91 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 08deed7c71f0..b82df85fe920 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -68,6 +68,66 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 	return CONT_PTES;
 }
 
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step.
+ */
+static pte_t get_clear_flush(struct mm_struct *mm,
+			     unsigned long addr,
+			     pte_t *ptep,
+			     unsigned long pgsize,
+			     unsigned long ncontig)
+{
+	struct vm_area_struct vma = { .vm_mm = mm };
+	pte_t orig_pte = huge_ptep_get(ptep);
+	bool valid = pte_valid(orig_pte);
+	unsigned long i, saddr = addr;
+
+	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
+		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+
+		/*
+		 * If HW_AFDBM is enabled, then the HW could turn on
+		 * the dirty bit for any page in the set, so check
+		 * them all.  All hugetlb entries are already young.
+		 */
+		if (pte_dirty(pte))
+			orig_pte = pte_mkdirty(orig_pte);
+	}
+
+	if (valid)
+		flush_tlb_range(&vma, saddr, addr);
+	return orig_pte;
+}
+
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step for use cases where the
+ * original pte is not needed.
+ */
+static void clear_flush(struct mm_struct *mm,
+			     unsigned long addr,
+			     pte_t *ptep,
+			     unsigned long pgsize,
+			     unsigned long ncontig)
+{
+	struct vm_area_struct vma = { .vm_mm = mm };
+	unsigned long i, saddr = addr;
+
+	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+		pte_clear(mm, addr, ptep);
+
+	flush_tlb_range(&vma, saddr, addr);
+}
+
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, pte_t pte)
 {
@@ -93,6 +153,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	dpfn = pgsize >> PAGE_SHIFT;
 	hugeprot = pte_pgprot(pte);
 
+	clear_flush(mm, addr, ptep, pgsize, ncontig);
+
 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
 		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
 			 pte_val(pfn_pte(pfn, hugeprot)));
@@ -194,7 +256,7 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 {
-	int ncontig, i;
+	int ncontig;
 	size_t pgsize;
 	pte_t orig_pte = huge_ptep_get(ptep);
 
@@ -202,17 +264,8 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 		return ptep_get_and_clear(mm, addr, ptep);
 
 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
-	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
-		/*
-		 * If HW_AFDBM is enabled, then the HW could
-		 * turn on the dirty bit for any of the page
-		 * in the set, so check them all.
-		 */
-		if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
-			orig_pte = pte_mkdirty(orig_pte);
-	}
 
-	return orig_pte;
+	return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 }
 
 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
@@ -223,18 +276,25 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	size_t pgsize = 0;
 	unsigned long pfn = pte_pfn(pte), dpfn;
 	pgprot_t hugeprot;
+	pte_t orig_pte;
 
 	if (!pte_cont(pte))
 		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
 
 	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 	dpfn = pgsize >> PAGE_SHIFT;
-	hugeprot = pte_pgprot(pte);
 
-	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
-		changed |= ptep_set_access_flags(vma, addr, ptep,
-				pfn_pte(pfn, hugeprot), dirty);
-	}
+	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
+	if (!pte_same(orig_pte, pte))
+		changed = 1;
+
+	/* Make sure we don't lose the dirty state */
+	if (pte_dirty(orig_pte))
+		pte = pte_mkdirty(pte);
+
+	hugeprot = pte_pgprot(pte);
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
 
 	return changed;
 }
@@ -242,8 +302,11 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 void huge_ptep_set_wrprotect(struct mm_struct *mm,
 			     unsigned long addr, pte_t *ptep)
 {
+	unsigned long pfn, dpfn;
+	pgprot_t hugeprot;
 	int ncontig, i;
 	size_t pgsize;
+	pte_t pte;
 
 	if (!pte_cont(*ptep)) {
 		ptep_set_wrprotect(mm, addr, ptep);
@@ -251,15 +314,23 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	}
 
 	ncontig = find_num_contig(mm, addr, ptep, &pgsize);
-	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
-		ptep_set_wrprotect(mm, addr, ptep);
+	dpfn = pgsize >> PAGE_SHIFT;
+
+	pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+	pte = pte_wrprotect(pte);
+
+	hugeprot = pte_pgprot(pte);
+	pfn = pte_pfn(pte);
+
+	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 }
 
 void huge_ptep_clear_flush(struct vm_area_struct *vma,
 			   unsigned long addr, pte_t *ptep)
 {
-	int ncontig, i;
 	size_t pgsize;
+	int ncontig;
 
 	if (!pte_cont(*ptep)) {
 		ptep_clear_flush(vma, addr, ptep);
@@ -267,8 +338,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
 	}
 
 	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
-	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
-		ptep_clear_flush(vma, addr, ptep);
+	clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
 }
 
 static __init int setup_hugepagesz(char *opt)
-- 
cgit v1.2.3


From 30f3ac00ad2f822937839c95cbb22ce483190c4c Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Tue, 22 Aug 2017 11:42:45 +0100
Subject: arm64: hugetlb: Handle swap entries in huge_pte_offset() for
 contiguous hugepages

huge_pte_offset() was updated to correctly handle swap entries for
hugepages. With the addition of the size parameter, it is now possible
to disambiguate whether the request is for a regular hugepage or a
contiguous hugepage.

Fix huge_pte_offset() for contiguous hugepages by using the size to find
the correct page table entry.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index b82df85fe920..b91ec151e62c 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -221,19 +221,28 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 		return NULL;
 
 	pud = pud_offset(pgd, addr);
-	if (pud_none(*pud))
+	if (sz != PUD_SIZE && pud_none(*pud))
 		return NULL;
-	/* swap or huge page */
-	if (!pud_present(*pud) || pud_huge(*pud))
+	/* hugepage or swap? */
+	if (pud_huge(*pud) || !pud_present(*pud))
 		return (pte_t *)pud;
 	/* table; check the next level */
 
+	if (sz == CONT_PMD_SIZE)
+		addr &= CONT_PMD_MASK;
+
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
+	    pmd_none(*pmd))
 		return NULL;
-	if (!pmd_present(*pmd) || pmd_huge(*pmd))
+	if (pmd_huge(*pmd) || !pmd_present(*pmd))
 		return (pte_t *)pmd;
 
+	if (sz == CONT_PTE_SIZE) {
+		pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
+		return pte;
+	}
+
 	return NULL;
 }
 
-- 
cgit v1.2.3


From c3e4ed5c3d5d79af940eb24c810dddcec6d2b536 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Tue, 22 Aug 2017 11:42:46 +0100
Subject: arm64: hugetlb: Override huge_pte_clear() to support contiguous
 hugepages

The default huge_pte_clear() implementation does not clear contiguous
page table entries when it encounters contiguous hugepages that are
supported on arm64.

Fix this by overriding the default implementation to clear all the
entries associated with contiguous hugepages.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/hugetlb.h |  6 +++++-
 arch/arm64/mm/hugetlbpage.c      | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 793bd73b0d07..df8c0aea0917 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -18,7 +18,6 @@
 #ifndef __ASM_HUGETLB_H
 #define __ASM_HUGETLB_H
 
-#include <asm-generic/hugetlb.h>
 #include <asm/page.h>
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
@@ -82,6 +81,11 @@ extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
 				    unsigned long addr, pte_t *ptep);
 extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
 				  unsigned long addr, pte_t *ptep);
+extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, unsigned long sz);
+#define huge_pte_clear huge_pte_clear
+
+#include <asm-generic/hugetlb.h>
 
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
 static inline bool gigantic_page_supported(void) { return true; }
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index b91ec151e62c..035c121c675b 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -68,6 +68,32 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 	return CONT_PTES;
 }
 
+static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
+{
+	int contig_ptes = 0;
+
+	*pgsize = size;
+
+	switch (size) {
+#ifdef CONFIG_ARM64_4K_PAGES
+	case PUD_SIZE:
+#endif
+	case PMD_SIZE:
+		contig_ptes = 1;
+		break;
+	case CONT_PMD_SIZE:
+		*pgsize = PMD_SIZE;
+		contig_ptes = CONT_PMDS;
+		break;
+	case CONT_PTE_SIZE:
+		*pgsize = PAGE_SIZE;
+		contig_ptes = CONT_PTES;
+		break;
+	}
+
+	return contig_ptes;
+}
+
 /*
  * Changing some bits of contiguous entries requires us to follow a
  * Break-Before-Make approach, breaking the whole contiguous set
@@ -262,6 +288,18 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 	return entry;
 }
 
+void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+		    pte_t *ptep, unsigned long sz)
+{
+	int i, ncontig;
+	size_t pgsize;
+
+	ncontig = num_contig_ptes(sz, &pgsize);
+
+	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+		pte_clear(mm, addr, ptep);
+}
+
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 {
-- 
cgit v1.2.3


From a8d623eefd780288c0299f517da0845da687fbfc Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Tue, 22 Aug 2017 11:42:47 +0100
Subject: arm64: hugetlb: Override set_huge_swap_pte_at() to support contiguous
 hugepages

The default implementation of set_huge_swap_pte_at() does not support
hugepages consisting of contiguous ptes. Override it to add support for
contiguous hugepages.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/hugetlb.h |  3 +++
 arch/arm64/mm/hugetlbpage.c      | 12 ++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index df8c0aea0917..1dca41bea16a 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -84,6 +84,9 @@ extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, unsigned long sz);
 #define huge_pte_clear huge_pte_clear
+extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+				 pte_t *ptep, pte_t pte, unsigned long sz);
+#define set_huge_swap_pte_at set_huge_swap_pte_at
 
 #include <asm-generic/hugetlb.h>
 
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 035c121c675b..76915b736b17 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -188,6 +188,18 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	}
 }
 
+void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+			  pte_t *ptep, pte_t pte, unsigned long sz)
+{
+	int i, ncontig;
+	size_t pgsize;
+
+	ncontig = num_contig_ptes(sz, &pgsize);
+
+	for (i = 0; i < ncontig; i++, ptep++)
+		set_pte(ptep, pte);
+}
+
 pte_t *huge_pte_alloc(struct mm_struct *mm,
 		      unsigned long addr, unsigned long sz)
 {
-- 
cgit v1.2.3


From 5cd028b9d90403bf24c8bf7915ed61c7a9bfce6c Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Tue, 22 Aug 2017 11:42:48 +0100
Subject: arm64: Re-enable support for contiguous hugepages

also known as -

Revert "Revert "Revert "commit 66b3923a1a0f ("arm64: hugetlb: add
support for PTE contiguous bit")"""

Now that our hugetlb implementation is compliant with the
break-before-make requirements of the architecture and we have addressed
some of the issues in core code required for properly dealing with
hardware poisoning of contiguous hugepages let's re-enable support for
contiguous hugepages.

This reverts commit 6ae979ab39a368c18ceb0424bf824d172d6ab56f.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 76915b736b17..3ceb4f275a1a 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -408,6 +408,10 @@ static __init int setup_hugepagesz(char *opt)
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 	} else if (ps == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
+		hugetlb_add_hstate(CONT_PTE_SHIFT);
+	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
+		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
 	} else {
 		hugetlb_bad_size();
 		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
@@ -416,3 +420,13 @@ static __init int setup_hugepagesz(char *opt)
 	return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
+
+#ifdef CONFIG_ARM64_64K_PAGES
+static __init int add_default_hugepagesz(void)
+{
+	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
+		hugetlb_add_hstate(CONT_PTE_SHIFT);
+	return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif
-- 
cgit v1.2.3


From 828f193dd62a40ade5ea8b24cb8b0a22c30df673 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Tue, 22 Aug 2017 11:42:49 +0100
Subject: arm64: hugetlb: Cleanup setup_hugepagesz

Replace a lot of if statements with switch and case labels to make it
much clearer which huge page sizes are supported.

Also, we prevent PUD_SIZE from being used on systems not running with
4KB PAGE_SIZE. Before if one supplied PUD_SIZE in these circumstances,
then unusuable huge page sizes would be in use.

Fixes: 084bd29810a5 ("ARM64: mm: HugeTLB support.")
Cc: David Woods <dwoods@mellanox.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 3ceb4f275a1a..6cb0fa92a651 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -404,20 +404,20 @@ static __init int setup_hugepagesz(char *opt)
 {
 	unsigned long ps = memparse(opt, &opt);
 
-	if (ps == PMD_SIZE) {
-		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
-	} else if (ps == PUD_SIZE) {
-		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
-		hugetlb_add_hstate(CONT_PTE_SHIFT);
-	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
-		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
-	} else {
-		hugetlb_bad_size();
-		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
-		return 0;
+	switch (ps) {
+#ifdef CONFIG_ARM64_4K_PAGES
+	case PUD_SIZE:
+#endif
+	case PMD_SIZE * CONT_PMDS:
+	case PMD_SIZE:
+	case PAGE_SIZE * CONT_PTES:
+		hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT);
+		return 1;
 	}
-	return 1;
+
+	hugetlb_bad_size();
+	pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
+	return 0;
 }
 __setup("hugepagesz=", setup_hugepagesz);
 
-- 
cgit v1.2.3


From 5ce93ab624cee4ed68086c946bd6d18b9b3f64aa Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@caviumnetworks.com>
Date: Sun, 20 Aug 2017 13:20:47 +0300
Subject: arm64: introduce separated bits for mm_context_t flags

Currently mm->context.flags field uses thread_info flags which is not
the best idea for many reasons. For example, mm_context_t doesn't need
most of thread_info flags. And it would be difficult to add new mm-related
flag if needed because it may easily interfere with TIF ones.

To deal with it, the new MMCF_AARCH32 flag is introduced for
mm_context_t->flags, where MMCF prefix stands for mm_context_t flags.
Also, mm_context_t flag doesn't require atomicity and ordering of the
access, so using set/clear_bit() is replaced with simple masks.

Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/elf.h       | 4 ++--
 arch/arm64/include/asm/mmu.h       | 2 ++
 arch/arm64/kernel/probes/uprobes.c | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index acae781f7359..f4e33f8356ca 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -139,7 +139,7 @@ typedef struct user_fpsimd_state elf_fpregset_t;
 
 #define SET_PERSONALITY(ex)						\
 ({									\
-	clear_bit(TIF_32BIT, &current->mm->context.flags);		\
+	current->mm->context.flags = 0;					\
 	clear_thread_flag(TIF_32BIT);					\
 	current->personality &= ~READ_IMPLIES_EXEC;			\
 })
@@ -195,7 +195,7 @@ typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
  */
 #define COMPAT_SET_PERSONALITY(ex)					\
 ({									\
-	set_bit(TIF_32BIT, &current->mm->context.flags);		\
+	current->mm->context.flags = MMCF_AARCH32;			\
 	set_thread_flag(TIF_32BIT);					\
  })
 #define COMPAT_ARCH_DLINFO
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 5468c834b072..0d34bf0a89c7 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_MMU_H
 #define __ASM_MMU_H
 
+#define MMCF_AARCH32	0x1	/* mm context flag for AArch32 executables */
+
 typedef struct {
 	atomic64_t	id;
 	void		*vdso;
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 26c998534dca..636ca0119c0e 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -40,7 +40,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	probe_opcode_t insn;
 
 	/* TODO: Currently we do not support AARCH32 instruction probing */
-	if (test_bit(TIF_32BIT, &mm->context.flags))
+	if (mm->context.flags & MMCF_AARCH32)
 		return -ENOTSUPP;
 	else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
 		return -EINVAL;
-- 
cgit v1.2.3


From d1be5c99a0341249bf6f74eb1cbc3d5fc4ef2be7 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@caviumnetworks.com>
Date: Sun, 20 Aug 2017 13:20:48 +0300
Subject: arm64: cleanup {COMPAT_,}SET_PERSONALITY() macro

There is some work that should be done after setting the personality.
Currently it's done in the macro, which is not the best idea.

In this patch new arch_setup_new_exec() routine is introduced, and all
setup code is moved there, as suggested by Catalin:
https://lkml.org/lkml/2017/8/4/494

Cc: Pratyush Anand <panand@redhat.com>
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
[catalin.marinas@arm.com: comments changed or removed]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/elf.h         | 2 --
 arch/arm64/include/asm/thread_info.h | 3 +++
 arch/arm64/kernel/process.c          | 8 ++++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index f4e33f8356ca..c55043709c69 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -139,7 +139,6 @@ typedef struct user_fpsimd_state elf_fpregset_t;
 
 #define SET_PERSONALITY(ex)						\
 ({									\
-	current->mm->context.flags = 0;					\
 	clear_thread_flag(TIF_32BIT);					\
 	current->personality &= ~READ_IMPLIES_EXEC;			\
 })
@@ -195,7 +194,6 @@ typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
  */
 #define COMPAT_SET_PERSONALITY(ex)					\
 ({									\
-	current->mm->context.flags = MMCF_AARCH32;			\
 	set_thread_flag(TIF_32BIT);					\
  })
 #define COMPAT_ARCH_DLINFO
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index aa04b733b349..2eca178bc943 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -60,6 +60,9 @@ struct thread_info {
 #define thread_saved_fp(tsk)	\
 	((unsigned long)(tsk->thread.cpu_context.fp))
 
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec     arch_setup_new_exec
+
 #endif
 
 /*
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 85b953dd023a..e6bf19c1dddb 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -414,3 +414,11 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 	else
 		return randomize_page(mm->brk, SZ_1G);
 }
+
+/*
+ * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
+ */
+void arch_setup_new_exec(void)
+{
+	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
+}
-- 
cgit v1.2.3