From 55bdd694116597d2f16510b121463cd579ba78da Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 21 May 2010 18:06:41 +0100
Subject: ARM: Add base support for ARMv7-M
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the base support for the ARMv7-M
architecture. It consists of the corresponding arch/arm/mm/ files and
various #ifdef's around the kernel. Exception handling is implemented by
a subsequent patch.

[ukleinek: squash in some changes originating from commit

b5717ba (Cortex-M3: Add support for the Microcontroller Prototyping System)

from the v2.6.33-arm1 patch stack, port to post 3.6, drop zImage
support, drop reorganisation of pt_regs, assert CONFIG_CPU_V7M doesn't
leak into installed headers and a few cosmetic changes]

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Jonathan Austin <jonathan.austin@arm.com>
Tested-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/arm/include/asm/assembler.h   |  17 +++-
 arch/arm/include/asm/cputype.h     |  12 ++-
 arch/arm/include/asm/glue-cache.h  |  27 +++++++
 arch/arm/include/asm/glue-df.h     |   8 ++
 arch/arm/include/asm/glue-proc.h   |   9 +++
 arch/arm/include/asm/irqflags.h    |  22 ++++--
 arch/arm/include/asm/ptrace.h      |   4 +
 arch/arm/include/asm/system_info.h |   1 +
 arch/arm/include/asm/v7m.h         |  44 +++++++++++
 arch/arm/include/uapi/asm/ptrace.h |  35 +++++++--
 arch/arm/kernel/head-nommu.S       |  10 ++-
 arch/arm/kernel/setup.c            |  17 +++-
 arch/arm/kernel/traps.c            |   8 ++
 arch/arm/mm/cache-nop.S            |  50 ++++++++++++
 arch/arm/mm/nommu.c                |   7 ++
 arch/arm/mm/proc-v7m.S             | 157 +++++++++++++++++++++++++++++++++++++
 16 files changed, 407 insertions(+), 21 deletions(-)
 create mode 100644 arch/arm/include/asm/v7m.h
 create mode 100644 arch/arm/mm/cache-nop.S
 create mode 100644 arch/arm/mm/proc-v7m.S

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 05ee9eebad6b..a5fef710af32 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -136,7 +136,11 @@
  * assumes FIQs are enabled, and that the processor is in SVC mode.
  */
 	.macro	save_and_disable_irqs, oldcpsr
+#ifdef CONFIG_CPU_V7M
+	mrs	\oldcpsr, primask
+#else
 	mrs	\oldcpsr, cpsr
+#endif
 	disable_irq
 	.endm
 
@@ -150,7 +154,11 @@
  * guarantee that this will preserve the flags.
  */
 	.macro	restore_irqs_notrace, oldcpsr
+#ifdef CONFIG_CPU_V7M
+	msr	primask, \oldcpsr
+#else
 	msr	cpsr_c, \oldcpsr
+#endif
 	.endm
 
 	.macro restore_irqs, oldcpsr
@@ -229,7 +237,14 @@
 #endif
 	.endm
 
-#ifdef CONFIG_THUMB2_KERNEL
+#if defined(CONFIG_CPU_V7M)
+	/*
+	 * setmode is used to assert to be in svc mode during boot. For v7-M
+	 * this is done in __v7m_setup, so setmode can be empty here.
+	 */
+	.macro	setmode, mode, reg
+	.endm
+#elif defined(CONFIG_THUMB2_KERNEL)
 	.macro	setmode, mode, reg
 	mov	\reg, #\mode
 	msr	cpsr_c, \reg
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 7652712d1d14..4eb94a3add3c 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -106,7 +106,17 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void)
 	return read_cpuid(CPUID_ID);
 }
 
-#else /* ifdef CONFIG_CPU_CP15 */
+#elif defined(CONFIG_CPU_V7M)
+
+#include <asm/io.h>
+#include <asm/v7m.h>
+
+static inline unsigned int __attribute_const__ read_cpuid_id(void)
+{
+	return readl(BASEADDR_V7M_SCB + V7M_SCB_CPUID);
+}
+
+#else /* ifdef CONFIG_CPU_CP15 / elif defined(CONFIG_CPU_V7M) */
 
 static inline unsigned int __attribute_const__ read_cpuid_id(void)
 {
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index cca9f15704ed..65c9faf7a34a 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -125,10 +125,37 @@
 # endif
 #endif
 
+#if defined(CONFIG_CPU_V7M)
+# ifdef _CACHE
+#  define MULTI_CACHE 1
+# else
+#  define _CACHE nop
+# endif
+#endif
+
 #if !defined(_CACHE) && !defined(MULTI_CACHE)
 #error Unknown cache maintenance model
 #endif
 
+#ifndef __ASSEMBLER__
+extern inline void nop_flush_icache_all(void) { }
+extern inline void nop_flush_kern_cache_all(void) { }
+extern inline void nop_flush_kern_cache_louis(void) { }
+extern inline void nop_flush_user_cache_all(void) { }
+extern inline void nop_flush_user_cache_range(unsigned long a,
+		unsigned long b, unsigned int c) { }
+
+extern inline void nop_coherent_kern_range(unsigned long a, unsigned long b) { }
+extern inline int nop_coherent_user_range(unsigned long a,
+		unsigned long b) { return 0; }
+extern inline void nop_flush_kern_dcache_area(void *a, size_t s) { }
+
+extern inline void nop_dma_flush_range(const void *a, const void *b) { }
+
+extern inline void nop_dma_map_area(const void *s, size_t l, int f) { }
+extern inline void nop_dma_unmap_area(const void *s, size_t l, int f) { }
+#endif
+
 #ifndef MULTI_CACHE
 #define __cpuc_flush_icache_all		__glue(_CACHE,_flush_icache_all)
 #define __cpuc_flush_kern_all		__glue(_CACHE,_flush_kern_cache_all)
diff --git a/arch/arm/include/asm/glue-df.h b/arch/arm/include/asm/glue-df.h
index b6e9f2c108b5..6b70f1b46a6e 100644
--- a/arch/arm/include/asm/glue-df.h
+++ b/arch/arm/include/asm/glue-df.h
@@ -95,6 +95,14 @@
 # endif
 #endif
 
+#ifdef CONFIG_CPU_ABRT_NOMMU
+# ifdef CPU_DABORT_HANDLER
+#  define MULTI_DABORT 1
+# else
+#  define CPU_DABORT_HANDLER nommu_early_abort
+# endif
+#endif
+
 #ifndef CPU_DABORT_HANDLER
 #error Unknown data abort handler type
 #endif
diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h
index ac1dd54724b6..f2f39bcf7945 100644
--- a/arch/arm/include/asm/glue-proc.h
+++ b/arch/arm/include/asm/glue-proc.h
@@ -230,6 +230,15 @@
 # endif
 #endif
 
+#ifdef CONFIG_CPU_V7M
+# ifdef CPU_NAME
+#  undef  MULTI_CPU
+#  define MULTI_CPU
+# else
+#  define CPU_NAME cpu_v7m
+# endif
+#endif
+
 #ifndef MULTI_CPU
 #define cpu_proc_init			__glue(CPU_NAME,_proc_init)
 #define cpu_proc_fin			__glue(CPU_NAME,_proc_fin)
diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h
index 1e6cca55c750..3b763d6652a0 100644
--- a/arch/arm/include/asm/irqflags.h
+++ b/arch/arm/include/asm/irqflags.h
@@ -8,6 +8,16 @@
 /*
  * CPU interrupt mask handling.
  */
+#ifdef CONFIG_CPU_V7M
+#define IRQMASK_REG_NAME_R "primask"
+#define IRQMASK_REG_NAME_W "primask"
+#define IRQMASK_I_BIT	1
+#else
+#define IRQMASK_REG_NAME_R "cpsr"
+#define IRQMASK_REG_NAME_W "cpsr_c"
+#define IRQMASK_I_BIT	PSR_I_BIT
+#endif
+
 #if __LINUX_ARM_ARCH__ >= 6
 
 static inline unsigned long arch_local_irq_save(void)
@@ -15,7 +25,7 @@ static inline unsigned long arch_local_irq_save(void)
 	unsigned long flags;
 
 	asm volatile(
-		"	mrs	%0, cpsr	@ arch_local_irq_save\n"
+		"	mrs	%0, " IRQMASK_REG_NAME_R "	@ arch_local_irq_save\n"
 		"	cpsid	i"
 		: "=r" (flags) : : "memory", "cc");
 	return flags;
@@ -129,7 +139,7 @@ static inline unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
 	asm volatile(
-		"	mrs	%0, cpsr	@ local_save_flags"
+		"	mrs	%0, " IRQMASK_REG_NAME_R "	@ local_save_flags"
 		: "=r" (flags) : : "memory", "cc");
 	return flags;
 }
@@ -140,7 +150,7 @@ static inline unsigned long arch_local_save_flags(void)
 static inline void arch_local_irq_restore(unsigned long flags)
 {
 	asm volatile(
-		"	msr	cpsr_c, %0	@ local_irq_restore"
+		"	msr	" IRQMASK_REG_NAME_W ", %0	@ local_irq_restore"
 		:
 		: "r" (flags)
 		: "memory", "cc");
@@ -148,8 +158,8 @@ static inline void arch_local_irq_restore(unsigned long flags)
 
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-	return flags & PSR_I_BIT;
+	return flags & IRQMASK_I_BIT;
 }
 
-#endif
-#endif
+#endif /* ifdef __KERNEL__ */
+#endif /* ifndef __ASM_ARM_IRQFLAGS_H */
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 3d52ee1bfb31..04c99f36ff7f 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -45,6 +45,7 @@ struct pt_regs {
  */
 static inline int valid_user_regs(struct pt_regs *regs)
 {
+#ifndef CONFIG_CPU_V7M
 	unsigned long mode = regs->ARM_cpsr & MODE_MASK;
 
 	/*
@@ -67,6 +68,9 @@ static inline int valid_user_regs(struct pt_regs *regs)
 		regs->ARM_cpsr |= USR_MODE;
 
 	return 0;
+#else /* ifndef CONFIG_CPU_V7M */
+	return 1;
+#endif
 }
 
 static inline long regs_return_value(struct pt_regs *regs)
diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h
index dfd386d0c022..720ea0320a6d 100644
--- a/arch/arm/include/asm/system_info.h
+++ b/arch/arm/include/asm/system_info.h
@@ -11,6 +11,7 @@
 #define CPU_ARCH_ARMv5TEJ	7
 #define CPU_ARCH_ARMv6		8
 #define CPU_ARCH_ARMv7		9
+#define CPU_ARCH_ARMv7M		10
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h
new file mode 100644
index 000000000000..fa88d09fa3d9
--- /dev/null
+++ b/arch/arm/include/asm/v7m.h
@@ -0,0 +1,44 @@
+/*
+ * Common defines for v7m cpus
+ */
+#define V7M_SCS_ICTR			IOMEM(0xe000e004)
+#define V7M_SCS_ICTR_INTLINESNUM_MASK		0x0000000f
+
+#define BASEADDR_V7M_SCB		IOMEM(0xe000ed00)
+
+#define V7M_SCB_CPUID			0x00
+
+#define V7M_SCB_ICSR			0x04
+#define V7M_SCB_ICSR_PENDSVSET			(1 << 28)
+#define V7M_SCB_ICSR_PENDSVCLR			(1 << 27)
+#define V7M_SCB_ICSR_RETTOBASE			(1 << 11)
+
+#define V7M_SCB_VTOR			0x08
+
+#define V7M_SCB_SCR			0x10
+#define V7M_SCB_SCR_SLEEPDEEP			(1 << 2)
+
+#define V7M_SCB_CCR			0x14
+#define V7M_SCB_CCR_STKALIGN			(1 << 9)
+
+#define V7M_SCB_SHPR2			0x1c
+#define V7M_SCB_SHPR3			0x20
+
+#define V7M_SCB_SHCSR			0x24
+#define V7M_SCB_SHCSR_USGFAULTENA		(1 << 18)
+#define V7M_SCB_SHCSR_BUSFAULTENA		(1 << 17)
+#define V7M_SCB_SHCSR_MEMFAULTENA		(1 << 16)
+
+#define V7M_xPSR_FRAMEPTRALIGN			0x00000200
+#define V7M_xPSR_EXCEPTIONNO			0x000001ff
+
+/*
+ * When branching to an address that has bits [31:28] == 0xf an exception return
+ * occurs. Bits [27:5] are reserved (SBOP). If the processor implements the FP
+ * extension Bit [4] defines if the exception frame has space allocated for FP
+ * state information, SBOP otherwise. Bit [3] defines the mode that is returned
+ * to (0 -> handler mode; 1 -> thread mode). Bit [2] defines which sp is used
+ * (0 -> msp; 1 -> psp). Bits [1:0] are fixed to 0b01.
+ */
+#define EXC_RET_STACK_MASK			0x00000004
+#define EXC_RET_THREADMODE_PROCESSSTACK		0xfffffffd
diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h
index 96ee0929790f..5af0ed1b825a 100644
--- a/arch/arm/include/uapi/asm/ptrace.h
+++ b/arch/arm/include/uapi/asm/ptrace.h
@@ -34,28 +34,47 @@
 
 /*
  * PSR bits
+ * Note on V7M there is no mode contained in the PSR
  */
 #define USR26_MODE	0x00000000
 #define FIQ26_MODE	0x00000001
 #define IRQ26_MODE	0x00000002
 #define SVC26_MODE	0x00000003
+#if defined(__KERNEL__) && defined(CONFIG_CPU_V7M)
+/*
+ * Use 0 here to get code right that creates a userspace
+ * or kernel space thread.
+ */
+#define USR_MODE	0x00000000
+#define SVC_MODE	0x00000000
+#else
 #define USR_MODE	0x00000010
+#define SVC_MODE	0x00000013
+#endif
 #define FIQ_MODE	0x00000011
 #define IRQ_MODE	0x00000012
-#define SVC_MODE	0x00000013
 #define ABT_MODE	0x00000017
 #define HYP_MODE	0x0000001a
 #define UND_MODE	0x0000001b
 #define SYSTEM_MODE	0x0000001f
 #define MODE32_BIT	0x00000010
 #define MODE_MASK	0x0000001f
-#define PSR_T_BIT	0x00000020
-#define PSR_F_BIT	0x00000040
-#define PSR_I_BIT	0x00000080
-#define PSR_A_BIT	0x00000100
-#define PSR_E_BIT	0x00000200
-#define PSR_J_BIT	0x01000000
-#define PSR_Q_BIT	0x08000000
+
+#define V4_PSR_T_BIT	0x00000020	/* >= V4T, but not V7M */
+#define V7M_PSR_T_BIT	0x01000000
+#if defined(__KERNEL__) && defined(CONFIG_CPU_V7M)
+#define PSR_T_BIT	V7M_PSR_T_BIT
+#else
+/* for compatibility */
+#define PSR_T_BIT	V4_PSR_T_BIT
+#endif
+
+#define PSR_F_BIT	0x00000040	/* >= V4, but not V7M */
+#define PSR_I_BIT	0x00000080	/* >= V4, but not V7M */
+#define PSR_A_BIT	0x00000100	/* >= V6, but not V7M */
+#define PSR_E_BIT	0x00000200	/* >= V6, but not V7M */
+#define PSR_J_BIT	0x01000000	/* >= V5J, but not V7M */
+#define PSR_Q_BIT	0x08000000	/* >= V5E, including V7M */
 #define PSR_V_BIT	0x10000000
 #define PSR_C_BIT	0x20000000
 #define PSR_Z_BIT	0x40000000
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 6a2e09c952c7..8812ce88f7a1 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -19,6 +19,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cp15.h>
 #include <asm/thread_info.h>
+#include <asm/v7m.h>
 
 /*
  * Kernel startup entry point.
@@ -50,10 +51,13 @@ ENTRY(stext)
 
 	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
-#ifndef CONFIG_CPU_CP15
-	ldr	r9, =CONFIG_PROCESSOR_ID
-#else
+#if defined(CONFIG_CPU_CP15)
 	mrc	p15, 0, r9, c0, c0		@ get processor id
+#elif defined(CONFIG_CPU_V7M)
+	ldr	r9, =BASEADDR_V7M_SCB
+	ldr	r9, [r9, V7M_SCB_CPUID]
+#else
+	ldr	r9, =CONFIG_PROCESSOR_ID
 #endif
 	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
 	movs	r10, r5				@ invalid processor (r5=0)?
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1cc9e1796415..829124590e4c 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -128,7 +128,9 @@ struct stack {
 	u32 und[3];
 } ____cacheline_aligned;
 
+#ifndef CONFIG_CPU_V7M
 static struct stack stacks[NR_CPUS];
+#endif
 
 char elf_platform[ELF_PLATFORM_SIZE];
 EXPORT_SYMBOL(elf_platform);
@@ -207,7 +209,7 @@ static const char *proc_arch[] = {
 	"5TEJ",
 	"6TEJ",
 	"7",
-	"?(11)",
+	"7M",
 	"?(12)",
 	"?(13)",
 	"?(14)",
@@ -216,6 +218,12 @@ static const char *proc_arch[] = {
 	"?(17)",
 };
 
+#ifdef CONFIG_CPU_V7M
+static int __get_cpu_architecture(void)
+{
+	return CPU_ARCH_ARMv7M;
+}
+#else
 static int __get_cpu_architecture(void)
 {
 	int cpu_arch;
@@ -248,6 +256,7 @@ static int __get_cpu_architecture(void)
 
 	return cpu_arch;
 }
+#endif
 
 int __pure cpu_architecture(void)
 {
@@ -293,7 +302,9 @@ static void __init cacheid_init(void)
 {
 	unsigned int arch = cpu_architecture();
 
-	if (arch >= CPU_ARCH_ARMv6) {
+	if (arch == CPU_ARCH_ARMv7M) {
+		cacheid = 0;
+	} else if (arch >= CPU_ARCH_ARMv6) {
 		unsigned int cachetype = read_cpuid_cachetype();
 		if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */
@@ -375,6 +386,7 @@ static void __init feat_v6_fixup(void)
  */
 void cpu_init(void)
 {
+#ifndef CONFIG_CPU_V7M
 	unsigned int cpu = smp_processor_id();
 	struct stack *stk = &stacks[cpu];
 
@@ -425,6 +437,7 @@ void cpu_init(void)
 	      "I" (offsetof(struct stack, und[0])),
 	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
 	    : "r14");
+#endif
 }
 
 int __cpu_logical_map[NR_CPUS];
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1c089119b2d7..ec64571462d2 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -819,6 +819,7 @@ static void __init kuser_get_tls_init(unsigned long vectors)
 
 void __init early_trap_init(void *vectors_base)
 {
+#ifndef CONFIG_CPU_V7M
 	unsigned long vectors = (unsigned long)vectors_base;
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
@@ -850,4 +851,11 @@ void __init early_trap_init(void *vectors_base)
 
 	flush_icache_range(vectors, vectors + PAGE_SIZE);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
+#else /* ifndef CONFIG_CPU_V7M */
+	/*
+	 * on V7-M there is no need to copy the vector table to a dedicated
+	 * memory area. The address is configurable and so a table in the kernel
+	 * image can be used.
+	 */
+#endif
 }
diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
new file mode 100644
index 000000000000..8e12ddca0031
--- /dev/null
+++ b/arch/arm/mm/cache-nop.S
@@ -0,0 +1,50 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#include "proc-macros.S"
+
+ENTRY(nop_flush_icache_all)
+	mov	pc, lr
+ENDPROC(nop_flush_icache_all)
+
+	.globl nop_flush_kern_cache_all
+	.equ nop_flush_kern_cache_all, nop_flush_icache_all
+
+	.globl nop_flush_kern_cache_louis
+	.equ nop_flush_kern_cache_louis, nop_flush_icache_all
+
+	.globl nop_flush_user_cache_all
+	.equ nop_flush_user_cache_all, nop_flush_icache_all
+
+	.globl nop_flush_user_cache_range
+	.equ nop_flush_user_cache_range, nop_flush_icache_all
+
+	.globl nop_coherent_kern_range
+	.equ nop_coherent_kern_range, nop_flush_icache_all
+
+ENTRY(nop_coherent_user_range)
+	mov	r0, 0
+	mov	pc, lr
+ENDPROC(nop_coherent_user_range)
+
+	.globl nop_flush_kern_dcache_area
+	.equ nop_flush_kern_dcache_area, nop_flush_icache_all
+
+	.globl nop_dma_flush_range
+	.equ nop_dma_flush_range, nop_flush_icache_all
+
+	.globl nop_dma_map_area
+	.equ nop_dma_map_area, nop_flush_icache_all
+
+	.globl nop_dma_unmap_area
+	.equ nop_dma_unmap_area, nop_flush_icache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions nop
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index d51225f90ae2..dd3a6c670f08 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -20,12 +20,19 @@
 
 void __init arm_mm_memblock_reserve(void)
 {
+#ifndef CONFIG_CPU_V7M
 	/*
 	 * Register the exception vector page.
 	 * some architectures which the DRAM is the exception vector to trap,
 	 * alloc_page breaks with error, although it is not NULL, but "0."
 	 */
 	memblock_reserve(CONFIG_VECTORS_BASE, PAGE_SIZE);
+#else /* ifndef CONFIG_CPU_V7M */
+	/*
+	 * There is no dedicated vector page on V7-M. So nothing needs to be
+	 * reserved here.
+	 */
+#endif
 }
 
 void __init sanity_check_meminfo(void)
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
new file mode 100644
index 000000000000..000499cfceb3
--- /dev/null
+++ b/arch/arm/mm/proc-v7m.S
@@ -0,0 +1,157 @@
+/*
+ *  linux/arch/arm/mm/proc-v7m.S
+ *
+ *  Copyright (C) 2008 ARM Ltd.
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv7-M processor support.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/v7m.h>
+#include "proc-macros.S"
+
+ENTRY(cpu_v7m_proc_init)
+	mov	pc, lr
+ENDPROC(cpu_v7m_proc_init)
+
+ENTRY(cpu_v7m_proc_fin)
+	mov	pc, lr
+ENDPROC(cpu_v7m_proc_fin)
+
+/*
+ *	cpu_v7m_reset(loc)
+ *
+ *	Perform a soft reset of the system.  Put the CPU into the
+ *	same state as it would be if it had been reset, and branch
+ *	to what would be the reset vector.
+ *
+ *	- loc   - location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_v7m_reset)
+	mov	pc, r0
+ENDPROC(cpu_v7m_reset)
+
+/*
+ *	cpu_v7m_do_idle()
+ *
+ *	Idle the processor (eg, wait for interrupt).
+ *
+ *	IRQs are already disabled.
+ */
+ENTRY(cpu_v7m_do_idle)
+	wfi
+	mov	pc, lr
+ENDPROC(cpu_v7m_do_idle)
+
+ENTRY(cpu_v7m_dcache_clean_area)
+	mov	pc, lr
+ENDPROC(cpu_v7m_dcache_clean_area)
+
+/*
+ * There is no MMU, so here is nothing to do.
+ */
+ENTRY(cpu_v7m_switch_mm)
+	mov	pc, lr
+ENDPROC(cpu_v7m_switch_mm)
+
+.globl	cpu_v7m_suspend_size
+.equ	cpu_v7m_suspend_size, 0
+
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_v7m_do_suspend)
+	mov	pc, lr
+ENDPROC(cpu_v7m_do_suspend)
+
+ENTRY(cpu_v7m_do_resume)
+	mov	pc, lr
+ENDPROC(cpu_v7m_do_resume)
+#endif
+
+	.section ".text.init", #alloc, #execinstr
+
+/*
+ *	__v7m_setup
+ *
+ *	This should be able to cover all ARMv7-M cores.
+ */
+__v7m_setup:
+	@ Configure the vector table base address
+	ldr	r0, =BASEADDR_V7M_SCB
+	ldr	r12, =vector_table
+	str	r12, [r0, V7M_SCB_VTOR]
+
+	@ enable UsageFault, BusFault and MemManage fault.
+	ldr	r5, [r0, #V7M_SCB_SHCSR]
+	orr	r5, #(V7M_SCB_SHCSR_USGFAULTENA | V7M_SCB_SHCSR_BUSFAULTENA | V7M_SCB_SHCSR_MEMFAULTENA)
+	str	r5, [r0, #V7M_SCB_SHCSR]
+
+	@ Lower the priority of the SVC and PendSV exceptions
+	mov	r5, #0x80000000
+	str	r5, [r0, V7M_SCB_SHPR2]	@ set SVC priority
+	mov	r5, #0x00800000
+	str	r5, [r0, V7M_SCB_SHPR3]	@ set PendSV priority
+
+	@ SVC to run the kernel in this mode
+	adr	r1, BSYM(1f)
+	ldr	r5, [r12, #11 * 4]	@ read the SVC vector entry
+	str	r1, [r12, #11 * 4]	@ write the temporary SVC vector entry
+	mov	r6, lr			@ save LR
+	mov	r7, sp			@ save SP
+	ldr	sp, =__v7m_setup_stack_top
+	cpsie	i
+	svc	#0
+1:	cpsid	i
+	str	r5, [r12, #11 * 4]	@ restore the original SVC vector entry
+	mov	lr, r6			@ restore LR
+	mov	sp, r7			@ restore SP
+
+	@ Special-purpose control register
+	mov	r1, #1
+	msr	control, r1		@ Thread mode has unpriviledged access
+
+	@ Configure the System Control Register to ensure 8-byte stack alignment
+	@ Note the STKALIGN bit is either RW or RAO.
+	ldr	r12, [r0, V7M_SCB_CCR]	@ system control register
+	orr	r12, #V7M_SCB_CCR_STKALIGN
+	str	r12, [r0, V7M_SCB_CCR]
+	mov	pc, lr
+ENDPROC(__v7m_setup)
+
+	define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
+
+	.section ".rodata"
+	string cpu_arch_name, "armv7m"
+	string cpu_elf_name "v7m"
+	string cpu_v7m_name "ARMv7-M"
+
+	.section ".proc.info.init", #alloc, #execinstr
+
+	/*
+	 * Match any ARMv7-M processor core.
+	 */
+	.type	__v7m_proc_info, #object
+__v7m_proc_info:
+	.long	0x000f0000		@ Required ID value
+	.long	0x000f0000		@ Mask for ID
+	.long   0			@ proc_info_list.__cpu_mm_mmu_flags
+	.long   0			@ proc_info_list.__cpu_io_mmu_flags
+	b	__v7m_setup		@ proc_info_list.__cpu_flush
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_IDIVT
+	.long	cpu_v7m_name
+	.long	v7m_processor_functions	@ proc_info_list.proc
+	.long	0			@ proc_info_list.tlb
+	.long	0			@ proc_info_list.user
+	.long	nop_cache_fns		@ proc_info_list.cache
+	.size	__v7m_proc_info, . - __v7m_proc_info
+
+__v7m_setup_stack:
+	.space	4 * 8				@ 8 registers
+__v7m_setup_stack_top:
-- 
cgit v1.2.3


From 19c4d593f0b4bd46f6d923a3e514719982a22058 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 May 2010 18:06:42 +0100
Subject: ARM: ARMv7-M: Add support for exception handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch implements the exception handling for the ARMv7-M
architecture (pretty different from the A or R profiles).

It bases on work done earlier by Catalin for 2.6.33 but was nearly
completely rewritten to use a pt_regs layout compatible to the A
profile.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Jonathan Austin <jonathan.austin@arm.com>
Tested-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/arm/kernel/entry-common.S |   4 ++
 arch/arm/kernel/entry-header.S | 124 +++++++++++++++++++++++++++++++++++
 arch/arm/kernel/entry-v7m.S    | 143 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 271 insertions(+)
 create mode 100644 arch/arm/kernel/entry-v7m.S

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 3248cde504ed..c45e0027613b 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -339,6 +339,9 @@ ENDPROC(ftrace_stub)
 
 	.align	5
 ENTRY(vector_swi)
+#ifdef CONFIG_CPU_V7M
+	v7m_exception_entry
+#else
 	sub	sp, sp, #S_FRAME_SIZE
 	stmia	sp, {r0 - r12}			@ Calling r0 - r12
  ARM(	add	r8, sp, #S_PC		)
@@ -349,6 +352,7 @@ ENTRY(vector_swi)
 	str	lr, [sp, #S_PC]			@ Save calling PC
 	str	r8, [sp, #S_PSR]		@ Save CPSR
 	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
+#endif
 	zero_fp
 
 	/*
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 9a8531eadd3d..b02e1394dc54 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -5,6 +5,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
 #include <asm/thread_info.h>
+#include <asm/v7m.h>
 
 @ Bad Abort numbers
 @ -----------------
@@ -44,6 +45,116 @@
 #endif
 	.endm
 
+#ifdef CONFIG_CPU_V7M
+/*
+ * ARMv7-M exception entry/exit macros.
+ *
+ * xPSR, ReturnAddress(), LR (R14), R12, R3, R2, R1, and R0 are
+ * automatically saved on the current stack (32 words) before
+ * switching to the exception stack (SP_main).
+ *
+ * If exception is taken while in user mode, SP_main is
+ * empty. Otherwise, SP_main is aligned to 64 bit automatically
+ * (CCR.STKALIGN set).
+ *
+ * Linux assumes that the interrupts are disabled when entering an
+ * exception handler and it may BUG if this is not the case. Interrupts
+ * are disabled during entry and reenabled in the exit macro.
+ *
+ * v7m_exception_slow_exit is used when returning from SVC or PendSV.
+ * When returning to kernel mode, we don't return from exception.
+ */
+	.macro	v7m_exception_entry
+	@ determine the location of the registers saved by the core during
+	@ exception entry. Depending on the mode the cpu was in when the
+	@ exception happend that is either on the main or the process stack.
+	@ Bit 2 of EXC_RETURN stored in the lr register specifies which stack
+	@ was used.
+	tst	lr, #EXC_RET_STACK_MASK
+	mrsne	r12, psp
+	moveq	r12, sp
+
+	@ we cannot rely on r0-r3 and r12 matching the value saved in the
+	@ exception frame because of tail-chaining. So these have to be
+	@ reloaded.
+	ldmia	r12!, {r0-r3}
+
+	@ Linux expects to have irqs off. Do it here before taking stack space
+	cpsid	i
+
+	sub	sp, #S_FRAME_SIZE-S_IP
+	stmdb	sp!, {r0-r11}
+
+	@ load saved r12, lr, return address and xPSR.
+	@ r0-r7 are used for signals and never touched from now on. Clobbering
+	@ r8-r12 is OK.
+	mov	r9, r12
+	ldmia	r9!, {r8, r10-r12}
+
+	@ calculate the original stack pointer value.
+	@ r9 currently points to the memory location just above the auto saved
+	@ xPSR.
+	@ The cpu might automatically 8-byte align the stack. Bit 9
+	@ of the saved xPSR specifies if stack aligning took place. In this case
+	@ another 32-bit value is included in the stack.
+
+	tst	r12, V7M_xPSR_FRAMEPTRALIGN
+	addne	r9, r9, #4
+
+	@ store saved r12 using str to have a register to hold the base for stm
+	str	r8, [sp, #S_IP]
+	add	r8, sp, #S_SP
+	@ store r13-r15, xPSR
+	stmia	r8!, {r9-r12}
+	@ store old_r0
+	str	r0, [r8]
+	.endm
+
+        /*
+	 * PENDSV and SVCALL are configured to have the same exception
+	 * priorities. As a kernel thread runs at SVCALL execution priority it
+	 * can never be preempted and so we will never have to return to a
+	 * kernel thread here.
+         */
+	.macro	v7m_exception_slow_exit ret_r0
+	cpsid	i
+	ldr	lr, =EXC_RET_THREADMODE_PROCESSSTACK
+
+	@ read original r12, sp, lr, pc and xPSR
+	add	r12, sp, #S_IP
+	ldmia	r12, {r1-r5}
+
+	@ an exception frame is always 8-byte aligned. To tell the hardware if
+	@ the sp to be restored is aligned or not set bit 9 of the saved xPSR
+	@ accordingly.
+	tst	r2, #4
+	subne	r2, r2, #4
+	orrne	r5, V7M_xPSR_FRAMEPTRALIGN
+	biceq	r5, V7M_xPSR_FRAMEPTRALIGN
+
+	@ write basic exception frame
+	stmdb	r2!, {r1, r3-r5}
+	ldmia	sp, {r1, r3-r5}
+	.if	\ret_r0
+	stmdb	r2!, {r0, r3-r5}
+	.else
+	stmdb	r2!, {r1, r3-r5}
+	.endif
+
+	@ restore process sp
+	msr	psp, r2
+
+	@ restore original r4-r11
+	ldmia	sp!, {r0-r11}
+
+	@ restore main sp
+	add	sp, sp, #S_FRAME_SIZE-S_IP
+
+	cpsie	i
+	bx	lr
+	.endm
+#endif	/* CONFIG_CPU_V7M */
+
 	@
 	@ Store/load the USER SP and LR registers by switching to the SYS
 	@ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not
@@ -131,6 +242,18 @@
 	rfeia	sp!
 	.endm
 
+#ifdef CONFIG_CPU_V7M
+	/*
+	 * Note we don't need to do clrex here as clearing the local monitor is
+	 * part of each exception entry and exit sequence.
+	 */
+	.macro	restore_user_regs, fast = 0, offset = 0
+	.if	\offset
+	add	sp, #\offset
+	.endif
+	v7m_exception_slow_exit ret_r0 = \fast
+	.endm
+#else	/* ifdef CONFIG_CPU_V7M */
 	.macro	restore_user_regs, fast = 0, offset = 0
 	clrex					@ clear the exclusive monitor
 	mov	r2, sp
@@ -147,6 +270,7 @@
 	add	sp, sp, #S_FRAME_SIZE - S_SP
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm
+#endif	/* ifdef CONFIG_CPU_V7M / else */
 
 	.macro	get_thread_info, rd
 	mov	\rd, sp
diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
new file mode 100644
index 000000000000..e00621f1403f
--- /dev/null
+++ b/arch/arm/kernel/entry-v7m.S
@@ -0,0 +1,143 @@
+/*
+ * linux/arch/arm/kernel/entry-v7m.S
+ *
+ * Copyright (C) 2008 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Low-level vector interface routines for the ARMv7-M architecture
+ */
+#include <asm/memory.h>
+#include <asm/glue.h>
+#include <asm/thread_notify.h>
+#include <asm/v7m.h>
+
+#include <mach/entry-macro.S>
+
+#include "entry-header.S"
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#error "CONFIG_TRACE_IRQFLAGS not supported on the current ARMv7M implementation"
+#endif
+
+__invalid_entry:
+	v7m_exception_entry
+	adr	r0, strerr
+	mrs	r1, ipsr
+	mov	r2, lr
+	bl	printk
+	mov	r0, sp
+	bl	show_regs
+1:	b	1b
+ENDPROC(__invalid_entry)
+
+strerr:	.asciz	"\nUnhandled exception: IPSR = %08lx LR = %08lx\n"
+
+	.align	2
+__irq_entry:
+	v7m_exception_entry
+
+	@
+	@ Invoke the IRQ handler
+	@
+	mrs	r0, ipsr
+	ldr	r1, =V7M_xPSR_EXCEPTIONNO
+	and	r0, r1
+	sub	r0, #16
+	mov	r1, sp
+	stmdb	sp!, {lr}
+	@ routine called with r0 = irq number, r1 = struct pt_regs *
+	bl	nvic_do_IRQ
+
+	pop	{lr}
+	@
+	@ Check for any pending work if returning to user
+	@
+	ldr	r1, =BASEADDR_V7M_SCB
+	ldr	r0, [r1, V7M_SCB_ICSR]
+	tst	r0, V7M_SCB_ICSR_RETTOBASE
+	beq	2f
+
+	get_thread_info tsk
+	ldr	r2, [tsk, #TI_FLAGS]
+	tst	r2, #_TIF_WORK_MASK
+	beq	2f			@ no work pending
+	mov	r0, #V7M_SCB_ICSR_PENDSVSET
+	str	r0, [r1, V7M_SCB_ICSR]	@ raise PendSV
+
+2:
+	@ registers r0-r3 and r12 are automatically restored on exception
+	@ return. r4-r7 were not clobbered in v7m_exception_entry so for
+	@ correctness they don't need to be restored. So only r8-r11 must be
+	@ restored here. The easiest way to do so is to restore r0-r7, too.
+	ldmia	sp!, {r0-r11}
+	add	sp, #S_FRAME_SIZE-S_IP
+	cpsie	i
+	bx	lr
+ENDPROC(__irq_entry)
+
+__pendsv_entry:
+	v7m_exception_entry
+
+	ldr	r1, =BASEADDR_V7M_SCB
+	mov	r0, #V7M_SCB_ICSR_PENDSVCLR
+	str	r0, [r1, V7M_SCB_ICSR]	@ clear PendSV
+
+	@ execute the pending work, including reschedule
+	get_thread_info tsk
+	mov	why, #0
+	b	ret_to_user
+ENDPROC(__pendsv_entry)
+
+/*
+ * Register switch for ARMv7-M processors.
+ * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
+ * previous and next are guaranteed not to be the same.
+ */
+ENTRY(__switch_to)
+	.fnstart
+	.cantunwind
+	add	ip, r1, #TI_CPU_SAVE
+	stmia	ip!, {r4 - r11}		@ Store most regs on stack
+	str	sp, [ip], #4
+	str	lr, [ip], #4
+	mov	r5, r0
+	add	r4, r2, #TI_CPU_SAVE
+	ldr	r0, =thread_notify_head
+	mov	r1, #THREAD_NOTIFY_SWITCH
+	bl	atomic_notifier_call_chain
+	mov	ip, r4
+	mov	r0, r5
+	ldmia	ip!, {r4 - r11}		@ Load all regs saved previously
+	ldr	sp, [ip]
+	ldr	pc, [ip, #4]!
+	.fnend
+ENDPROC(__switch_to)
+
+	.data
+	.align	8
+/*
+ * Vector table (64 words => 256 bytes natural alignment)
+ */
+ENTRY(vector_table)
+	.long	0			@ 0 - Reset stack pointer
+	.long	__invalid_entry		@ 1 - Reset
+	.long	__invalid_entry		@ 2 - NMI
+	.long	__invalid_entry		@ 3 - HardFault
+	.long	__invalid_entry		@ 4 - MemManage
+	.long	__invalid_entry		@ 5 - BusFault
+	.long	__invalid_entry		@ 6 - UsageFault
+	.long	__invalid_entry		@ 7 - Reserved
+	.long	__invalid_entry		@ 8 - Reserved
+	.long	__invalid_entry		@ 9 - Reserved
+	.long	__invalid_entry		@ 10 - Reserved
+	.long	vector_swi		@ 11 - SVCall
+	.long	__invalid_entry		@ 12 - Debug Monitor
+	.long	__invalid_entry		@ 13 - Reserved
+	.long	__pendsv_entry		@ 14 - PendSV
+	.long	__invalid_entry		@ 15 - SysTick
+	.rept	64 - 16
+	.long	__irq_entry		@ 16..64 - External Interrupts
+	.endr
-- 
cgit v1.2.3


From 4477ca45fb368880bf77b10ed3b24b03f0cc82da Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 21 Mar 2013 21:02:37 +0100
Subject: ARM: ARMv7-M: Allow the building of new kernel port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch modifies the required Kconfig and Makefile files to allow the
building of kernel for Cortex-M3.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Jonathan Austin <jonathan.austin@arm.com>
Tested-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/arm/Kconfig         |  4 ++--
 arch/arm/Kconfig-nommu   |  2 +-
 arch/arm/Makefile        |  1 +
 arch/arm/kernel/Makefile |  8 +++++++-
 arch/arm/mm/Kconfig      | 21 ++++++++++++++++++++-
 arch/arm/mm/Makefile     |  2 ++
 6 files changed, 33 insertions(+), 5 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index dedf02b6f322..7c8c6079f948 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -9,7 +9,7 @@ config ARM
 	select BUILDTIME_EXTABLE_SORT if MMU
 	select CPU_PM if (SUSPEND || CPU_IDLE)
 	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN && MMU
-	select GENERIC_ATOMIC64 if (CPU_V6 || !CPU_32v6K || !AEABI)
+	select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI)
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
@@ -1685,7 +1685,7 @@ config SCHED_HRTICK
 
 config THUMB2_KERNEL
 	bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY
-	depends on CPU_V7 && !CPU_V6 && !CPU_V6K
+	depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K
 	default y if CPU_THUMBONLY
 	select AEABI
 	select ARM_ASM_UNIFIED
diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu
index 2cef8e13f9f8..c859495da480 100644
--- a/arch/arm/Kconfig-nommu
+++ b/arch/arm/Kconfig-nommu
@@ -28,7 +28,7 @@ config FLASH_SIZE
 config PROCESSOR_ID
 	hex 'Hard wire the processor ID'
 	default 0x00007700
-	depends on !CPU_CP15
+	depends on !(CPU_CP15 || CPU_V7M)
 	help
 	  If processor has no CP15 register, this processor ID is
 	  used instead of the auto-probing which utilizes the register.
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index ee4605f400b0..f11b8da17672 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -59,6 +59,7 @@ comma = ,
 # Note that GCC does not numerically define an architecture version
 # macro, but instead defines a whole series of macros which makes
 # testing for a specific architecture or later rather impossible.
+arch-$(CONFIG_CPU_32v7M)	:=-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m
 arch-$(CONFIG_CPU_32v7)		:=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
 arch-$(CONFIG_CPU_32v6)		:=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
 # Only override the compiler option if ARMv6. The ARMv6K extensions are
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338eacad2..00d703c49f82 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -15,7 +15,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 
 # Object file lists.
 
-obj-y		:= elf.o entry-armv.o entry-common.o irq.o opcodes.o \
+obj-y		:= elf.o entry-common.o irq.o opcodes.o \
 		   process.o ptrace.o return_address.o sched_clock.o \
 		   setup.o signal.o stacktrace.o sys_arm.o time.o traps.o
 
@@ -23,6 +23,12 @@ obj-$(CONFIG_ATAGS)		+= atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)	+= atags_proc.o
 obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o
 
+ifeq ($(CONFIG_CPU_V7M),y)
+obj-y		+= entry-v7m.o
+else
+obj-y		+= entry-armv.o
+endif
+
 obj-$(CONFIG_OC_ETM)		+= etm.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index cb812a13e299..cce78b070825 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -397,6 +397,15 @@ config CPU_V7
 	select CPU_PABRT_V7
 	select CPU_TLB_V7 if MMU
 
+# ARMv7M
+config CPU_V7M
+	bool
+	select CPU_32v7M
+	select CPU_ABRT_NOMMU
+	select CPU_CACHE_NOP
+	select CPU_PABRT_LEGACY
+	select CPU_THUMBONLY
+
 config CPU_THUMBONLY
 	bool
 	# There are no CPUs available with MMU that don't implement an ARM ISA:
@@ -441,6 +450,9 @@ config CPU_32v6K
 config CPU_32v7
 	bool
 
+config CPU_32v7M
+	bool
+
 # The abort model
 config CPU_ABRT_NOMMU
 	bool
@@ -494,6 +506,9 @@ config CPU_CACHE_V6
 config CPU_CACHE_V7
 	bool
 
+config CPU_CACHE_NOP
+	bool
+
 config CPU_CACHE_VIVT
 	bool
 
@@ -616,7 +631,11 @@ config ARCH_DMA_ADDR_T_64BIT
 
 config ARM_THUMB
 	bool "Support Thumb user binaries" if !CPU_THUMBONLY
-	depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || CPU_V7 || CPU_FEROCEON
+	depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || \
+		CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || \
+		CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \
+		CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || \
+		CPU_V7 || CPU_FEROCEON || CPU_V7M
 	default y
 	help
 	  Say Y if you want to include kernel support for running user space
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 4e333fa2756f..317b57559340 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_CPU_CACHE_V4WB)	+= cache-v4wb.o
 obj-$(CONFIG_CPU_CACHE_V6)	+= cache-v6.o
 obj-$(CONFIG_CPU_CACHE_V7)	+= cache-v7.o
 obj-$(CONFIG_CPU_CACHE_FA)	+= cache-fa.o
+obj-$(CONFIG_CPU_CACHE_NOP)	+= cache-nop.o
 
 AFLAGS_cache-v6.o	:=-Wa,-march=armv6
 AFLAGS_cache-v7.o	:=-Wa,-march=armv7-a
@@ -88,6 +89,7 @@ obj-$(CONFIG_CPU_FEROCEON)	+= proc-feroceon.o
 obj-$(CONFIG_CPU_V6)		+= proc-v6.o
 obj-$(CONFIG_CPU_V6K)		+= proc-v6.o
 obj-$(CONFIG_CPU_V7)		+= proc-v7.o
+obj-$(CONFIG_CPU_V7M)		+= proc-v7m.o
 
 AFLAGS_proc-v6.o	:=-Wa,-march=armv6
 AFLAGS_proc-v7.o	:=-Wa,-march=armv7-a
-- 
cgit v1.2.3


From 6fae9cdafc92ae9958a3a45dd68205f72e3ad900 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 6 May 2013 11:35:42 +0200
Subject: ARM: ARMv7-M: implement read_cpuid_ext
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On v7-M the extended cpuid registers are not available from CP15 but they
are memory mapped in the System Control Space.
There isn't an equivalent available for CPUID_{CACHETYPE,TCM,TLBTYPE,MPIDR}.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/arm/include/asm/cputype.h | 38 ++++++++++++++++++++++++++++++++++----
 arch/arm/mm/proc-v7m.S         |  2 +-
 2 files changed, 35 insertions(+), 5 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 4eb94a3add3c..ec635ff32f49 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -10,6 +10,22 @@
 #define CPUID_TLBTYPE	3
 #define CPUID_MPIDR	5
 
+#ifdef CONFIG_CPU_V7M
+#define CPUID_EXT_PFR0	0x40
+#define CPUID_EXT_PFR1	0x44
+#define CPUID_EXT_DFR0	0x48
+#define CPUID_EXT_AFR0	0x4c
+#define CPUID_EXT_MMFR0	0x50
+#define CPUID_EXT_MMFR1	0x54
+#define CPUID_EXT_MMFR2	0x58
+#define CPUID_EXT_MMFR3	0x5c
+#define CPUID_EXT_ISAR0	0x60
+#define CPUID_EXT_ISAR1	0x64
+#define CPUID_EXT_ISAR2	0x68
+#define CPUID_EXT_ISAR3	0x6c
+#define CPUID_EXT_ISAR4	0x70
+#define CPUID_EXT_ISAR5	0x74
+#else
 #define CPUID_EXT_PFR0	"c1, 0"
 #define CPUID_EXT_PFR1	"c1, 1"
 #define CPUID_EXT_DFR0	"c1, 2"
@@ -24,6 +40,7 @@
 #define CPUID_EXT_ISAR3	"c2, 3"
 #define CPUID_EXT_ISAR4	"c2, 4"
 #define CPUID_EXT_ISAR5	"c2, 5"
+#endif
 
 #define MPIDR_SMP_BITMASK (0x3 << 30)
 #define MPIDR_SMP_VALUE (0x2 << 30)
@@ -79,7 +96,23 @@ extern unsigned int processor_id;
 		__val;							\
 	})
 
-#else /* ifdef CONFIG_CPU_CP15 */
+#elif defined(CONFIG_CPU_V7M)
+
+#include <asm/io.h>
+#include <asm/v7m.h>
+
+#define read_cpuid(reg)							\
+	({								\
+		WARN_ON_ONCE(1);					\
+		0;							\
+	})
+
+static inline unsigned int __attribute_const__ read_cpuid_ext(unsigned offset)
+{
+	return readl(BASEADDR_V7M_SCB + offset);
+}
+
+#else /* ifdef CONFIG_CPU_CP15 / elif defined (CONFIG_CPU_V7M) */
 
 /*
  * read_cpuid and read_cpuid_ext should only ever be called on machines that
@@ -108,9 +141,6 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void)
 
 #elif defined(CONFIG_CPU_V7M)
 
-#include <asm/io.h>
-#include <asm/v7m.h>
-
 static inline unsigned int __attribute_const__ read_cpuid_id(void)
 {
 	return readl(BASEADDR_V7M_SCB + V7M_SCB_CPUID);
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 000499cfceb3..0c93588fcb91 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -144,7 +144,7 @@ __v7m_proc_info:
 	b	__v7m_setup		@ proc_info_list.__cpu_flush
 	.long	cpu_arch_name
 	.long	cpu_elf_name
-	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_IDIVT
+	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT
 	.long	cpu_v7m_name
 	.long	v7m_processor_functions	@ proc_info_list.proc
 	.long	0			@ proc_info_list.tlb
-- 
cgit v1.2.3


From 05774088391c7430f6a4c1d5d18196ef17bb3ba9 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 21 May 2013 14:24:11 +0000
Subject: arm: introduce psci_smp_ops

Rename virt_smp_ops to psci_smp_ops and move them to arch/arm/kernel/psci_smp.c.
Remove mach-virt/platsmp.c, now unused.
Compile psci_smp if CONFIG_ARM_PSCI and CONFIG_SMP.

Add a cpu_die smp_op based on psci_ops.cpu_off.

Initialize PSCI before setting smp_ops in setup_arch.

If PSCI is available on the platform, prefer psci_smp_ops over the
platform smp_ops.


Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Will Deacon <will.deacon@arm.com>
CC: arnd@arndb.de
CC: marc.zyngier@arm.com
CC: linux@arm.linux.org.uk
CC: nico@linaro.org
CC: rob.herring@calxeda.com
---
 arch/arm/include/asm/psci.h  |  9 +++++
 arch/arm/kernel/Makefile     |  5 ++-
 arch/arm/kernel/psci.c       |  7 ++--
 arch/arm/kernel/psci_smp.c   | 84 ++++++++++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/setup.c      |  7 +++-
 arch/arm/mach-virt/Makefile  |  1 -
 arch/arm/mach-virt/platsmp.c | 50 --------------------------
 arch/arm/mach-virt/virt.c    |  3 --
 8 files changed, 106 insertions(+), 60 deletions(-)
 create mode 100644 arch/arm/kernel/psci_smp.c
 delete mode 100644 arch/arm/mach-virt/platsmp.c

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h
index ce0dbe7c1625..c4ae171850f8 100644
--- a/arch/arm/include/asm/psci.h
+++ b/arch/arm/include/asm/psci.h
@@ -32,5 +32,14 @@ struct psci_operations {
 };
 
 extern struct psci_operations psci_ops;
+extern struct smp_operations psci_smp_ops;
+
+#ifdef CONFIG_ARM_PSCI
+void psci_init(void);
+bool psci_smp_available(void);
+#else
+static inline void psci_init(void) { }
+static inline bool psci_smp_available(void) { return false; }
+#endif
 
 #endif /* __ASM_ARM_PSCI_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338eacad2..dd9d90ab65d0 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -82,6 +82,9 @@ obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
-obj-$(CONFIG_ARM_PSCI)		+= psci.o
+ifeq ($(CONFIG_ARM_PSCI),y)
+obj-y				+= psci.o
+obj-$(CONFIG_SMP)		+= psci_smp.o
+endif
 
 extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index 36531643cc2c..46931880093d 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -158,7 +158,7 @@ static const struct of_device_id psci_of_match[] __initconst = {
 	{},
 };
 
-static int __init psci_init(void)
+void __init psci_init(void)
 {
 	struct device_node *np;
 	const char *method;
@@ -166,7 +166,7 @@ static int __init psci_init(void)
 
 	np = of_find_matching_node(NULL, psci_of_match);
 	if (!np)
-		return 0;
+		return;
 
 	pr_info("probing function IDs from device-tree\n");
 
@@ -206,6 +206,5 @@ static int __init psci_init(void)
 
 out_put_node:
 	of_node_put(np);
-	return 0;
+	return;
 }
-early_initcall(psci_init);
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
new file mode 100644
index 000000000000..23a11424c568
--- /dev/null
+++ b/arch/arm/kernel/psci_smp.c
@@ -0,0 +1,84 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/smp.h>
+#include <linux/of.h>
+
+#include <asm/psci.h>
+#include <asm/smp_plat.h>
+
+/*
+ * psci_smp assumes that the following is true about PSCI:
+ *
+ * cpu_suspend   Suspend the execution on a CPU
+ * @state        we don't currently describe affinity levels, so just pass 0.
+ * @entry_point  the first instruction to be executed on return
+ * returns 0  success, < 0 on failure
+ *
+ * cpu_off       Power down a CPU
+ * @state        we don't currently describe affinity levels, so just pass 0.
+ * no return on successful call
+ *
+ * cpu_on        Power up a CPU
+ * @cpuid        cpuid of target CPU, as from MPIDR
+ * @entry_point  the first instruction to be executed on return
+ * returns 0  success, < 0 on failure
+ *
+ * migrate       Migrate the context to a different CPU
+ * @cpuid        cpuid of target CPU, as from MPIDR
+ * returns 0  success, < 0 on failure
+ *
+ */
+
+extern void secondary_startup(void);
+
+static int __cpuinit psci_boot_secondary(unsigned int cpu,
+					 struct task_struct *idle)
+{
+	if (psci_ops.cpu_on)
+		return psci_ops.cpu_on(cpu_logical_map(cpu),
+				       __pa(secondary_startup));
+	return -ENODEV;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __ref psci_cpu_die(unsigned int cpu)
+{
+       const struct psci_power_state ps = {
+               .type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+       };
+
+       if (psci_ops.cpu_off)
+               psci_ops.cpu_off(ps);
+
+       /* We should never return */
+       panic("psci: cpu %d failed to shutdown\n", cpu);
+}
+#else
+#define psci_cpu_die NULL
+#endif
+
+bool __init psci_smp_available(void)
+{
+	/* is cpu_on available at least? */
+	return (psci_ops.cpu_on != NULL);
+}
+
+struct smp_operations __initdata psci_smp_ops = {
+	.smp_boot_secondary	= psci_boot_secondary,
+	.cpu_die		= psci_cpu_die,
+};
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1522c7ae31b0..8f7a6e9926a8 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <asm/cputype.h>
 #include <asm/elf.h>
 #include <asm/procinfo.h>
+#include <asm/psci.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -796,9 +797,13 @@ void __init setup_arch(char **cmdline_p)
 	unflatten_device_tree();
 
 	arm_dt_init_cpu_maps();
+	psci_init();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
-		smp_set_ops(mdesc->smp);
+		if (psci_smp_available())
+			smp_set_ops(&psci_smp_ops);
+		else if (mdesc->smp)
+			smp_set_ops(mdesc->smp);
 		smp_init_cpus();
 	}
 #endif
diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile
index 042afc1f8c44..7ddbfa60227f 100644
--- a/arch/arm/mach-virt/Makefile
+++ b/arch/arm/mach-virt/Makefile
@@ -3,4 +3,3 @@
 #
 
 obj-y					:= virt.o
-obj-$(CONFIG_SMP)			+= platsmp.o
diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c
deleted file mode 100644
index f4143f5bfa5b..000000000000
--- a/arch/arm/mach-virt/platsmp.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Dummy Virtual Machine - does what it says on the tin.
- *
- * Copyright (C) 2012 ARM Ltd
- * Author: Will Deacon <will.deacon@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/of.h>
-
-#include <asm/psci.h>
-#include <asm/smp_plat.h>
-
-extern void secondary_startup(void);
-
-static void __init virt_smp_init_cpus(void)
-{
-}
-
-static void __init virt_smp_prepare_cpus(unsigned int max_cpus)
-{
-}
-
-static int __cpuinit virt_boot_secondary(unsigned int cpu,
-					 struct task_struct *idle)
-{
-	if (psci_ops.cpu_on)
-		return psci_ops.cpu_on(cpu_logical_map(cpu),
-				       __pa(secondary_startup));
-	return -ENODEV;
-}
-
-struct smp_operations __initdata virt_smp_ops = {
-	.smp_init_cpus		= virt_smp_init_cpus,
-	.smp_prepare_cpus	= virt_smp_prepare_cpus,
-	.smp_boot_secondary	= virt_boot_secondary,
-};
diff --git a/arch/arm/mach-virt/virt.c b/arch/arm/mach-virt/virt.c
index 061f283f579e..a67d2dd5bb60 100644
--- a/arch/arm/mach-virt/virt.c
+++ b/arch/arm/mach-virt/virt.c
@@ -36,11 +36,8 @@ static const char *virt_dt_match[] = {
 	NULL
 };
 
-extern struct smp_operations virt_smp_ops;
-
 DT_MACHINE_START(VIRT, "Dummy Virtual Machine")
 	.init_irq	= irqchip_init,
 	.init_machine	= virt_init,
-	.smp		= smp_ops(virt_smp_ops),
 	.dt_compat	= virt_dt_match,
 MACHINE_END
-- 
cgit v1.2.3


From b382b940f821784107ca22de3455bb90e4512557 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@linaro.org>
Date: Tue, 21 May 2013 13:40:51 +0000
Subject: ARM: Enable selection of SMP operations at boot time

Add a new 'smp_init' hook to machine_desc so platforms can specify a
function to be used to setup smp ops instead of having a statically
defined value.  The hook must return true when smp_ops are initialized.
If false the static mdesc->smp_ops will be used by default.

Add the definition of "bool" by including the linux/types.h file to
asm/mach/arch.h and make it self-contained.

Signed-off-by: Jon Medhurst <tixy@linaro.org>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 arch/arm/include/asm/mach/arch.h |  5 +++++
 arch/arm/kernel/setup.c          | 10 ++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 308ad7d6f98b..75bf07910b81 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/types.h>
+
 #ifndef __ASSEMBLY__
 
 struct tag;
@@ -16,8 +18,10 @@ struct pt_regs;
 struct smp_operations;
 #ifdef CONFIG_SMP
 #define smp_ops(ops) (&(ops))
+#define smp_init_ops(ops) (&(ops))
 #else
 #define smp_ops(ops) (struct smp_operations *)NULL
+#define smp_init_ops(ops) (bool (*)(void))NULL
 #endif
 
 struct machine_desc {
@@ -41,6 +45,7 @@ struct machine_desc {
 	unsigned char		reserve_lp2 :1;	/* never has lp2	*/
 	char			restart_mode;	/* default restart mode	*/
 	struct smp_operations	*smp;		/* SMP operations	*/
+	bool			(*smp_init)(void);
 	void			(*fixup)(struct tag *, char **,
 					 struct meminfo *);
 	void			(*reserve)(void);/* reserve mem blocks	*/
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 8f7a6e9926a8..2b3ba15408bd 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -800,10 +800,12 @@ void __init setup_arch(char **cmdline_p)
 	psci_init();
 #ifdef CONFIG_SMP
 	if (is_smp()) {
-		if (psci_smp_available())
-			smp_set_ops(&psci_smp_ops);
-		else if (mdesc->smp)
-			smp_set_ops(mdesc->smp);
+		if (!mdesc->smp_init || !mdesc->smp_init()) {
+			if (psci_smp_available())
+				smp_set_ops(&psci_smp_ops);
+			else if (mdesc->smp)
+				smp_set_ops(mdesc->smp);
+		}
 		smp_init_cpus();
 	}
 #endif
-- 
cgit v1.2.3


From 926edcc747e2efb3c9add7ed4dbc4e7a3a959d02 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Sun, 22 Jul 2012 13:40:38 -0400
Subject: ARM: LPAE: use signed arithmetic for mask definitions

This patch applies to PAGE_MASK, PMD_MASK, and PGDIR_MASK, where forcing
unsigned long math truncates the mask at the 32-bits.  This clearly does bad
things on PAE systems.

This patch fixes this problem by defining these masks as signed quantities.
We then rely on sign extension to do the right thing.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/page.h           | 2 +-
 arch/arm/include/asm/pgtable-3level.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 812a4944e783..6363f3d1d505 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -13,7 +13,7 @@
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT		12
 #define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
+#define PAGE_MASK		(~((1 << PAGE_SHIFT) - 1))
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe398b95..5b85b218b0f0 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -48,16 +48,16 @@
 #define PMD_SHIFT		21
 
 #define PMD_SIZE		(1UL << PMD_SHIFT)
-#define PMD_MASK		(~(PMD_SIZE-1))
+#define PMD_MASK		(~((1 << PMD_SHIFT) - 1))
 #define PGDIR_SIZE		(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK		(~(PGDIR_SIZE-1))
+#define PGDIR_MASK		(~((1 << PGDIR_SHIFT) - 1))
 
 /*
  * section address mask and size definitions.
  */
 #define SECTION_SHIFT		21
 #define SECTION_SIZE		(1UL << SECTION_SHIFT)
-#define SECTION_MASK		(~(SECTION_SIZE-1))
+#define SECTION_MASK		(~((1 << SECTION_SHIFT) - 1))
 
 #define USER_PTRS_PER_PGD	(PAGE_OFFSET / PGDIR_SIZE)
 
-- 
cgit v1.2.3


From 20d6956d8cd2452cec0889ff040f18afc03c2e6b Mon Sep 17 00:00:00 2001
From: Vitaly Andrianov <vitalya@ti.com>
Date: Tue, 10 Jul 2012 14:41:17 -0400
Subject: ARM: LPAE: use phys_addr_t in alloc_init_pud()

This patch fixes the alloc_init_pud() function to use phys_addr_t instead of
unsigned long when passing in the phys argument.

This is an extension to commit 97092e0c56830457af0639f6bd904537a150ea4a (ARM:
pgtable: use phys_addr_t for physical addresses), which applied similar changes
elsewhere in the ARM memory management code.

Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e0d8565671a6..b7ce65a82371 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -673,7 +673,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 }
 
 static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
-	unsigned long end, unsigned long phys, const struct mem_type *type)
+				  unsigned long end, phys_addr_t phys,
+				  const struct mem_type *type)
 {
 	pud_t *pud = pud_offset(pgd, addr);
 	unsigned long next;
-- 
cgit v1.2.3


From 56bc628666b39dc8cb395c7686d8c032efd731f4 Mon Sep 17 00:00:00 2001
From: Vitaly Andrianov <vitalya@ti.com>
Date: Thu, 21 Jun 2012 08:09:05 -0400
Subject: ARM: LPAE: use phys_addr_t in free_memmap()

The free_memmap() was mistakenly using unsigned long type to represent
physical addresses.  This breaks on PAE systems where memory could be placed
above the 32-bit addressible limit.

This patch fixes this function to properly use phys_addr_t instead.

Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9a5cdc01fcdf..68c914e8544e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -442,7 +442,7 @@ static inline void
 free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 {
 	struct page *start_pg, *end_pg;
-	unsigned long pg, pgend;
+	phys_addr_t pg, pgend;
 
 	/*
 	 * Convert start_pfn/end_pfn to a struct page pointer.
@@ -454,8 +454,8 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * Convert to physical addresses, and
 	 * round start upwards and end downwards.
 	 */
-	pg = (unsigned long)PAGE_ALIGN(__pa(start_pg));
-	pgend = (unsigned long)__pa(end_pg) & PAGE_MASK;
+	pg = PAGE_ALIGN(__pa(start_pg));
+	pgend = __pa(end_pg) & PAGE_MASK;
 
 	/*
 	 * If there are free pages between these,
-- 
cgit v1.2.3


From de22cc6e33449d8d6fb339619e32138ea4fcc2a4 Mon Sep 17 00:00:00 2001
From: Vitaly Andrianov <vitalya@ti.com>
Date: Fri, 22 Jun 2012 14:26:04 -0400
Subject: ARM: LPAE: use phys_addr_t for initrd location

This patch fixes the initrd setup code to use phys_addr_t instead of assuming
32-bit addressing.  Without this we cannot boot on systems where initrd is
located above the 4G physical address limit.

Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/init.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 68c914e8544e..2ffee02d1d5c 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -36,12 +36,13 @@
 
 #include "mm.h"
 
-static unsigned long phys_initrd_start __initdata = 0;
+static phys_addr_t phys_initrd_start __initdata = 0;
 static unsigned long phys_initrd_size __initdata = 0;
 
 static int __init early_initrd(char *p)
 {
-	unsigned long start, size;
+	phys_addr_t start;
+	unsigned long size;
 	char *endp;
 
 	start = memparse(p, &endp);
@@ -350,14 +351,14 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (phys_initrd_size &&
 	    !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
-		pr_err("INITRD: 0x%08lx+0x%08lx is not a memory region - disabling initrd\n",
-		       phys_initrd_start, phys_initrd_size);
+		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
+		       (u64)phys_initrd_start, phys_initrd_size);
 		phys_initrd_start = phys_initrd_size = 0;
 	}
 	if (phys_initrd_size &&
 	    memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
-		pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n",
-		       phys_initrd_start, phys_initrd_size);
+		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
+		       (u64)phys_initrd_start, phys_initrd_size);
 		phys_initrd_start = phys_initrd_size = 0;
 	}
 	if (phys_initrd_size) {
-- 
cgit v1.2.3


From 13f659b0f363114282679d06094337c5efa12fa8 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Mon, 16 Jul 2012 15:37:06 -0400
Subject: ARM: LPAE: use phys_addr_t in switch_mm()

This patch modifies the switch_mm() processor functions to use phys_addr_t.
On LPAE systems, we now honor the upper 32-bits of the physical address that
is being passed in, and program these into TTBR as expected.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
[will: fixed up conflict in 3-level switch_mm with big-endian changes]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/proc-fns.h |  4 ++--
 arch/arm/mm/proc-v7-3level.S    | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index f3628fb3d2b3..75b5f14617c3 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -60,7 +60,7 @@ extern struct processor {
 	/*
 	 * Set the page table
 	 */
-	void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm);
+	void (*switch_mm)(phys_addr_t pgd_phys, struct mm_struct *mm);
 	/*
 	 * Set a possibly extended PTE.  Non-extended PTEs should
 	 * ignore 'ext'.
@@ -82,7 +82,7 @@ extern void cpu_proc_init(void);
 extern void cpu_proc_fin(void);
 extern int cpu_do_idle(void);
 extern void cpu_dcache_clean_area(void *, int);
-extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
+extern void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
 #ifdef CONFIG_ARM_LPAE
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte);
 #else
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 363027e811d6..995857d3b530 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -39,6 +39,14 @@
 #define TTB_FLAGS_SMP	(TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA)
 #define PMD_FLAGS_SMP	(PMD_SECT_WBWA|PMD_SECT_S)
 
+#ifndef __ARMEB__
+#  define rpgdl	r0
+#  define rpgdh	r1
+#else
+#  define rpgdl	r1
+#  define rpgdh	r0
+#endif
+
 /*
  * cpu_v7_switch_mm(pgd_phys, tsk)
  *
@@ -47,10 +55,10 @@
  */
 ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_MMU
-	mmid	r1, r1				@ get mm->context.id
-	asid	r3, r1
-	mov	r3, r3, lsl #(48 - 32)		@ ASID
-	mcrr	p15, 0, r0, r3, c2		@ set TTB 0
+	mmid	r2, r2
+	asid	r2, r2
+	orr	rpgdh, rpgdh, r2, lsl #(48 - 32)	@ upper 32-bits of pgd
+	mcrr	p15, 0, rpgdl, rpgdh, c2		@ set TTB 0
 	isb
 #endif
 	mov	pc, lr
-- 
cgit v1.2.3


From 1fc84ae84b5153e32a4b6ace507f9663e10b0cb2 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Mon, 16 Jul 2012 17:20:17 -0400
Subject: ARM: LPAE: use 64-bit accessors for TTBR registers

This patch adds TTBR accessor macros, and modifies cpu_get_pgd() and
the LPAE version of cpu_set_reserved_ttbr0() to use these instead.

In the process, we also fix these functions to correctly handle cases
where the physical address lies beyond the 4G limit of 32-bit addressing.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/proc-fns.h | 22 +++++++++++++++++-----
 arch/arm/mm/context.c           |  9 ++-------
 2 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 75b5f14617c3..1c3cf9407a31 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -116,13 +116,25 @@ extern void cpu_resume(void);
 #define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
 
 #ifdef CONFIG_ARM_LPAE
+
+#define cpu_get_ttbr(nr)					\
+	({							\
+		u64 ttbr;					\
+		__asm__("mrrc	p15, " #nr ", %Q0, %R0, c2"	\
+			: "=r" (ttbr));				\
+		ttbr;						\
+	})
+
+#define cpu_set_ttbr(nr, val)					\
+	do {							\
+		u64 ttbr = val;					\
+		__asm__("mcrr	p15, " #nr ", %Q0, %R0, c2"	\
+			: : "r" (ttbr));			\
+	} while (0)
+
 #define cpu_get_pgd()	\
 	({						\
-		unsigned long pg, pg2;			\
-		__asm__("mrrc	p15, 0, %0, %1, c2"	\
-			: "=r" (pg), "=r" (pg2)		\
-			:				\
-			: "cc");			\
+		u64 pg = cpu_get_ttbr(0);		\
 		pg &= ~(PTRS_PER_PGD*sizeof(pgd_t)-1);	\
 		(pgd_t *)phys_to_virt(pg);		\
 	})
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 2ac37372ef52..3675e31473e3 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -20,6 +20,7 @@
 #include <asm/smp_plat.h>
 #include <asm/thread_notify.h>
 #include <asm/tlbflush.h>
+#include <asm/proc-fns.h>
 
 /*
  * On ARMv6, we have the following structure in the Context ID:
@@ -55,17 +56,11 @@ static cpumask_t tlb_flush_pending;
 #ifdef CONFIG_ARM_LPAE
 static void cpu_set_reserved_ttbr0(void)
 {
-	unsigned long ttbl = __pa(swapper_pg_dir);
-	unsigned long ttbh = 0;
-
 	/*
 	 * Set TTBR0 to swapper_pg_dir which contains only global entries. The
 	 * ASID is set to 0.
 	 */
-	asm volatile(
-	"	mcrr	p15, 0, %0, %1, c2		@ set TTBR0\n"
-	:
-	: "r" (ttbl), "r" (ttbh));
+	cpu_set_ttbr(0, __pa(swapper_pg_dir));
 	isb();
 }
 #else
-- 
cgit v1.2.3


From a7fbc0d62a4d46e642af889e7288fede5078bc46 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Sat, 21 Jul 2012 19:47:52 -0400
Subject: ARM: LPAE: factor out T1SZ and TTBR1 computations

This patch moves the TTBR1 offset calculation and the T1SZ calculation out
of the TTB setup assembly code.  This should not affect functionality in
any way, but improves code readability as well as readability of subsequent
patches in this series.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-3level-hwdef.h | 20 ++++++++++++++++++++
 arch/arm/mm/proc-v7-3level.S                | 29 ++++++++---------------------
 2 files changed, 28 insertions(+), 21 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 18f5cef82ad5..c6c6e6df22f3 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -79,4 +79,24 @@
 #define PHYS_MASK_SHIFT		(40)
 #define PHYS_MASK		((1ULL << PHYS_MASK_SHIFT) - 1)
 
+/*
+ * TTBR0/TTBR1 split (PAGE_OFFSET):
+ *   0x40000000: T0SZ = 2, T1SZ = 0 (not used)
+ *   0x80000000: T0SZ = 0, T1SZ = 1
+ *   0xc0000000: T0SZ = 0, T1SZ = 2
+ *
+ * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise
+ * booting secondary CPUs would end up using TTBR1 for the identity
+ * mapping set up in TTBR0.
+ */
+#if defined CONFIG_VMSPLIT_2G
+#define TTBR1_OFFSET	16			/* skip two L1 entries */
+#elif defined CONFIG_VMSPLIT_3G
+#define TTBR1_OFFSET	(4096 * (1 + 3))	/* only L2, skip pgd + 3*pmd */
+#else
+#define TTBR1_OFFSET	0
+#endif
+
+#define TTBR1_SIZE	(((PAGE_OFFSET >> 30) - 1) << 16)
+
 #endif
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 995857d3b530..58ab7477bb61 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -114,7 +114,7 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 */
 	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
 	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
-	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET? (branch below)
+	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET?
 	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control register
 	orr	\tmp, \tmp, #TTB_EAE
 	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP)
@@ -122,27 +122,14 @@ ENDPROC(cpu_v7_set_pte_ext)
 	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP << 16)
 	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP << 16)
 	/*
-	 * TTBR0/TTBR1 split (PAGE_OFFSET):
-	 *   0x40000000: T0SZ = 2, T1SZ = 0 (not used)
-	 *   0x80000000: T0SZ = 0, T1SZ = 1
-	 *   0xc0000000: T0SZ = 0, T1SZ = 2
-	 *
-	 * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise
-	 * booting secondary CPUs would end up using TTBR1 for the identity
-	 * mapping set up in TTBR0.
+	 * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above),
+	 * otherwise booting secondary CPUs would end up using TTBR1 for the
+	 * identity mapping set up in TTBR0.
 	 */
-	bhi	9001f				@ PHYS_OFFSET > PAGE_OFFSET?
-	orr	\tmp, \tmp, #(((PAGE_OFFSET >> 30) - 1) << 16) @ TTBCR.T1SZ
-#if defined CONFIG_VMSPLIT_2G
-	/* PAGE_OFFSET == 0x80000000, T1SZ == 1 */
-	add	\ttbr1, \ttbr1, #1 << 4		@ skip two L1 entries
-#elif defined CONFIG_VMSPLIT_3G
-	/* PAGE_OFFSET == 0xc0000000, T1SZ == 2 */
-	add	\ttbr1, \ttbr1, #4096 * (1 + 3)	@ only L2 used, skip pgd+3*pmd
-#endif
-	/* CONFIG_VMSPLIT_1G does not need TTBR1 adjustment */
-9001:	mcr	p15, 0, \tmp, c2, c0, 2		@ TTB control register
-	mcrr	p15, 1, \ttbr1, \zero, c2	@ load TTBR1
+	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
+	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
+	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
+	mcrr	p15, 1, \ttbr1, \zero, c2			@ load TTBR1
 	.endm
 
 	__CPUINIT
-- 
cgit v1.2.3


From 4756dcbfd37819a8359d3c69a22be2ee41666d0f Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Sat, 21 Jul 2012 15:55:04 -0400
Subject: ARM: LPAE: accomodate >32-bit addresses for page table base

This patch redefines the early boot time use of the R4 register to steal a few
low order bits (ARCH_PGD_SHIFT bits) on LPAE systems.  This allows for up to
38-bit physical addresses.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/memory.h | 16 ++++++++++++++++
 arch/arm/kernel/head.S        | 10 ++++------
 arch/arm/kernel/smp.c         | 11 +++++++++--
 arch/arm/mm/proc-v7-3level.S  |  8 ++++++++
 4 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 57870ab313c5..e506088a7678 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -18,6 +18,8 @@
 #include <linux/types.h>
 #include <linux/sizes.h>
 
+#include <asm/cache.h>
+
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
 #endif
@@ -141,6 +143,20 @@
 #define page_to_phys(page)	(__pfn_to_phys(page_to_pfn(page)))
 #define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
 
+/*
+ * Minimum guaranted alignment in pgd_alloc().  The page table pointers passed
+ * around in head.S and proc-*.S are shifted by this amount, in order to
+ * leave spare high bits for systems with physical address extension.  This
+ * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but
+ * gives us about 38-bits or so.
+ */
+#ifdef CONFIG_ARM_LPAE
+#define ARCH_PGD_SHIFT		L1_CACHE_SHIFT
+#else
+#define ARCH_PGD_SHIFT		0
+#endif
+#define ARCH_PGD_MASK		((1 << ARCH_PGD_SHIFT) - 1)
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 8bac553fe213..45e8935cae4e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -156,7 +156,7 @@ ENDPROC(stext)
  *
  * Returns:
  *  r0, r3, r5-r7 corrupted
- *  r4 = physical page table address
+ *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
  */
 __create_page_tables:
 	pgtbl	r4, r8				@ page table address
@@ -331,6 +331,7 @@ __create_page_tables:
 #endif
 #ifdef CONFIG_ARM_LPAE
 	sub	r4, r4, #0x1000		@ point to the PGD table
+	mov	r4, r4, lsr #ARCH_PGD_SHIFT
 #endif
 	mov	pc, lr
 ENDPROC(__create_page_tables)
@@ -408,7 +409,7 @@ __secondary_data:
  *  r0  = cp#15 control register
  *  r1  = machine ID
  *  r2  = atags or dtb pointer
- *  r4  = page table pointer
+ *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
  *  r9  = processor ID
  *  r13 = *virtual* address to jump to upon completion
  */
@@ -427,10 +428,7 @@ __enable_mmu:
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #CR_I
 #endif
-#ifdef CONFIG_ARM_LPAE
-	mov	r5, #0
-	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
-#else
+#ifndef CONFIG_ARM_LPAE
 	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 550d63cef68e..217b755aadd4 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -78,6 +78,13 @@ void __init smp_set_ops(struct smp_operations *ops)
 		smp_ops = *ops;
 };
 
+static unsigned long get_arch_pgd(pgd_t *pgd)
+{
+	phys_addr_t pgdir = virt_to_phys(pgd);
+	BUG_ON(pgdir & ARCH_PGD_MASK);
+	return pgdir >> ARCH_PGD_SHIFT;
+}
+
 int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int ret;
@@ -87,8 +94,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
-	secondary_data.pgdir = virt_to_phys(idmap_pgd);
-	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+	secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+	secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 58ab7477bb61..5ffe1956c6d9 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -114,6 +114,7 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 */
 	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
 	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
+	mov	\tmp, \tmp, lsr #ARCH_PGD_SHIFT
 	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET?
 	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control register
 	orr	\tmp, \tmp, #TTB_EAE
@@ -128,8 +129,15 @@ ENDPROC(cpu_v7_set_pte_ext)
 	 */
 	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
 	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
+	mov	\tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
+	mov	\ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT		@ lower bits
 	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
 	mcrr	p15, 1, \ttbr1, \zero, c2			@ load TTBR1
+	mov	\tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
+	mov	\ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT		@ lower bits
+	mcrr	p15, 0, \ttbr0, \zero, c2			@ load TTBR0
+	mcrr	p15, 1, \ttbr1, \zero, c2			@ load TTBR1
+	mcrr	p15, 0, \ttbr0, \zero, c2			@ load TTBR0
 	.endm
 
 	__CPUINIT
-- 
cgit v1.2.3


From 82f667046ec895552caf2f7a4c6841c530bfc215 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Fri, 20 Jul 2012 12:01:23 -0400
Subject: ARM: mm: use physical addresses in highmem sanity checks

This patch modifies the highmem sanity checking code to use physical addresses
instead.  This change eliminates the wrap-around problems associated with the
original virtual address based checks, and this simplifies the code a bit.

The one constraint imposed here is that low physical memory must be mapped in
a monotonically increasing fashion if there are multiple banks of memory,
i.e., x < y must => pa(x) < pa(y).

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/mmu.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b7ce65a82371..fc6ff1a9db50 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -988,6 +988,7 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
 void __init sanity_check_meminfo(void)
 {
 	int i, j, highmem = 0;
+	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
 
 	for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
 		struct membank *bank = &meminfo.bank[j];
@@ -997,8 +998,7 @@ void __init sanity_check_meminfo(void)
 			highmem = 1;
 
 #ifdef CONFIG_HIGHMEM
-		if (__va(bank->start) >= vmalloc_min ||
-		    __va(bank->start) < (void *)PAGE_OFFSET)
+		if (bank->start >= vmalloc_limit)
 			highmem = 1;
 
 		bank->highmem = highmem;
@@ -1007,8 +1007,8 @@ void __init sanity_check_meminfo(void)
 		 * Split those memory banks which are partially overlapping
 		 * the vmalloc area greatly simplifying things later.
 		 */
-		if (!highmem && __va(bank->start) < vmalloc_min &&
-		    bank->size > vmalloc_min - __va(bank->start)) {
+		if (!highmem && bank->start < vmalloc_limit &&
+		    bank->size > vmalloc_limit - bank->start) {
 			if (meminfo.nr_banks >= NR_BANKS) {
 				printk(KERN_CRIT "NR_BANKS too low, "
 						 "ignoring high memory\n");
@@ -1017,12 +1017,12 @@ void __init sanity_check_meminfo(void)
 					(meminfo.nr_banks - i) * sizeof(*bank));
 				meminfo.nr_banks++;
 				i++;
-				bank[1].size -= vmalloc_min - __va(bank->start);
-				bank[1].start = __pa(vmalloc_min - 1) + 1;
+				bank[1].size -= vmalloc_limit - bank->start;
+				bank[1].start = vmalloc_limit;
 				bank[1].highmem = highmem = 1;
 				j++;
 			}
-			bank->size = vmalloc_min - __va(bank->start);
+			bank->size = vmalloc_limit - bank->start;
 		}
 #else
 		bank->highmem = highmem;
@@ -1042,8 +1042,7 @@ void __init sanity_check_meminfo(void)
 		 * Check whether this memory bank would entirely overlap
 		 * the vmalloc area.
 		 */
-		if (__va(bank->start) >= vmalloc_min ||
-		    __va(bank->start) < (void *)PAGE_OFFSET) {
+		if (bank->start >= vmalloc_limit) {
 			printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
 			       "(vmalloc region overlap).\n",
 			       (unsigned long long)bank->start,
@@ -1055,9 +1054,8 @@ void __init sanity_check_meminfo(void)
 		 * Check whether this memory bank would partially overlap
 		 * the vmalloc area.
 		 */
-		if (__va(bank->start + bank->size - 1) >= vmalloc_min ||
-		    __va(bank->start + bank->size - 1) <= __va(bank->start)) {
-			unsigned long newsize = vmalloc_min - __va(bank->start);
+		if (bank->start + bank->size > vmalloc_limit)
+			unsigned long newsize = vmalloc_limit - bank->start;
 			printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
 			       "to -%.8llx (vmalloc region overlap).\n",
 			       (unsigned long long)bank->start,
-- 
cgit v1.2.3


From 5b20c5b2f014ecc0a6310988af69cd7ede9e7c67 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Wed, 12 Sep 2012 10:19:05 -0400
Subject: ARM: fix type of PHYS_PFN_OFFSET to unsigned long

On LPAE machines, PHYS_OFFSET evaluates to a phys_addr_t and this type is
inherited by the PHYS_PFN_OFFSET definition as well.  Consequently, the kernel
build emits warnings of the form:

init/main.c: In function 'start_kernel':
init/main.c:588:7: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'phys_addr_t' [-Wformat]

This patch fixes this warning by pinning down the PFN type to unsigned long.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index e506088a7678..584786f740f9 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -223,7 +223,7 @@ static inline unsigned long __phys_to_virt(unsigned long x)
  * direct-mapped view.  We assume this is the first page
  * of RAM in the mem_map as well.
  */
-#define PHYS_PFN_OFFSET	(PHYS_OFFSET >> PAGE_SHIFT)
+#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
-- 
cgit v1.2.3


From adf2e9fda34c1cfff2ee4e47078b1e142adb2c30 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Fri, 20 Jul 2012 12:24:45 -0400
Subject: ARM: mm: cleanup checks for membank overlap with vmalloc area

On Keystone platforms, physical memory is entirely outside the 32-bit
addressible range.  Therefore, the (bank->start > ULONG_MAX) check below marks
the entire system memory as highmem, and this causes unpleasentness all over.

This patch eliminates the extra bank start check (against ULONG_MAX) by
checking bank->start against the physical address corresponding to vmalloc_min
instead.

In the process, this patch also cleans up parts of the highmem sanity check
code by removing what has now become a redundant check for banks that entirely
overlap with the vmalloc range.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/mmu.c | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index fc6ff1a9db50..ae249d1ab1d3 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -994,15 +994,12 @@ void __init sanity_check_meminfo(void)
 		struct membank *bank = &meminfo.bank[j];
 		*bank = meminfo.bank[i];
 
-		if (bank->start > ULONG_MAX)
-			highmem = 1;
-
-#ifdef CONFIG_HIGHMEM
 		if (bank->start >= vmalloc_limit)
 			highmem = 1;
 
 		bank->highmem = highmem;
 
+#ifdef CONFIG_HIGHMEM
 		/*
 		 * Split those memory banks which are partially overlapping
 		 * the vmalloc area greatly simplifying things later.
@@ -1025,8 +1022,6 @@ void __init sanity_check_meminfo(void)
 			bank->size = vmalloc_limit - bank->start;
 		}
 #else
-		bank->highmem = highmem;
-
 		/*
 		 * Highmem banks not allowed with !CONFIG_HIGHMEM.
 		 */
@@ -1038,18 +1033,6 @@ void __init sanity_check_meminfo(void)
 			continue;
 		}
 
-		/*
-		 * Check whether this memory bank would entirely overlap
-		 * the vmalloc area.
-		 */
-		if (bank->start >= vmalloc_limit) {
-			printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
-			       "(vmalloc region overlap).\n",
-			       (unsigned long long)bank->start,
-			       (unsigned long long)bank->start + bank->size - 1);
-			continue;
-		}
-
 		/*
 		 * Check whether this memory bank would partially overlap
 		 * the vmalloc area.
-- 
cgit v1.2.3


From 28d4bf7a2929c5e525171d249e12662e21130ec3 Mon Sep 17 00:00:00 2001
From: Cyril Chemparathy <cyril@ti.com>
Date: Fri, 20 Jul 2012 13:16:41 -0400
Subject: ARM: mm: clean up membank size limit checks

This patch cleans up the highmem sanity check code by simplifying the range
checks with a pre-calculated size_limit.  This patch should otherwise have no
functional impact on behavior.

This patch also removes a redundant (bank->start < vmalloc_limit) check, since
this is already covered by the !highmem condition.

Signed-off-by: Cyril Chemparathy <cyril@ti.com>
Signed-off-by: Vitaly Andrianov <vitalya@ti.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/mmu.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index ae249d1ab1d3..280f91d02de2 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -992,10 +992,15 @@ void __init sanity_check_meminfo(void)
 
 	for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
 		struct membank *bank = &meminfo.bank[j];
+		phys_addr_t size_limit;
+
 		*bank = meminfo.bank[i];
+		size_limit = bank->size;
 
 		if (bank->start >= vmalloc_limit)
 			highmem = 1;
+		else
+			size_limit = vmalloc_limit - bank->start;
 
 		bank->highmem = highmem;
 
@@ -1004,8 +1009,7 @@ void __init sanity_check_meminfo(void)
 		 * Split those memory banks which are partially overlapping
 		 * the vmalloc area greatly simplifying things later.
 		 */
-		if (!highmem && bank->start < vmalloc_limit &&
-		    bank->size > vmalloc_limit - bank->start) {
+		if (!highmem && bank->size > size_limit) {
 			if (meminfo.nr_banks >= NR_BANKS) {
 				printk(KERN_CRIT "NR_BANKS too low, "
 						 "ignoring high memory\n");
@@ -1014,12 +1018,12 @@ void __init sanity_check_meminfo(void)
 					(meminfo.nr_banks - i) * sizeof(*bank));
 				meminfo.nr_banks++;
 				i++;
-				bank[1].size -= vmalloc_limit - bank->start;
+				bank[1].size -= size_limit;
 				bank[1].start = vmalloc_limit;
 				bank[1].highmem = highmem = 1;
 				j++;
 			}
-			bank->size = vmalloc_limit - bank->start;
+			bank->size = size_limit;
 		}
 #else
 		/*
@@ -1037,14 +1041,13 @@ void __init sanity_check_meminfo(void)
 		 * Check whether this memory bank would partially overlap
 		 * the vmalloc area.
 		 */
-		if (bank->start + bank->size > vmalloc_limit)
-			unsigned long newsize = vmalloc_limit - bank->start;
+		if (bank->size > size_limit) {
 			printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
 			       "to -%.8llx (vmalloc region overlap).\n",
 			       (unsigned long long)bank->start,
 			       (unsigned long long)bank->start + bank->size - 1,
-			       (unsigned long long)bank->start + newsize - 1);
-			bank->size = newsize;
+			       (unsigned long long)bank->start + size_limit - 1);
+			bank->size = size_limit;
 		}
 #endif
 		if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit)
-- 
cgit v1.2.3


From e38a517578d6c0f764b0d0f6e26dcdf9f70c69d7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 2 May 2013 13:52:01 +0100
Subject: ARM: lpae: fix definition of PTE_HWTABLE_PTRS

For 2-level page tables, PTE_HWTABLE_PTRS describes the offset between
Linux PTEs and hardware PTEs. On LPAE, there is no distinction (since
we have 64-bit descriptors with plenty of space) so PTE_HWTABLE_PTRS
should be 0. Unfortunately, it is wrongly defined as PTRS_PER_PTE,
meaning that current pte table flushing is off by a page. Luckily,
all current LPAE implementations are SMP, so the hardware walker can
snoop L1.

This patch fixes the broken definition.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-3level.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 5b85b218b0f0..d03c589cf6b5 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -33,7 +33,7 @@
 #define PTRS_PER_PMD		512
 #define PTRS_PER_PGD		4
 
-#define PTE_HWTABLE_PTRS	(PTRS_PER_PTE)
+#define PTE_HWTABLE_PTRS	(0)
 #define PTE_HWTABLE_OFF		(0)
 #define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u64))
 
-- 
cgit v1.2.3


From a469abd0f868c902b75532579bf87553dcf1b360 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Apr 2013 17:13:12 +0100
Subject: ARM: elf: add new hwcap for identifying atomic ldrd/strd instructions

CPUs implementing LPAE have atomic ldrd/strd instructions, meaning that
userspace software can avoid having to use the exclusive variants of
these instructions if they wish.

This patch advertises the atomicity of these instructions via the
hwcaps, so userspace can detect this CPU feature.

Reported-by: Vladimir Danushevsky <vladimir.danushevsky@oracle.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/uapi/asm/hwcap.h | 2 +-
 arch/arm/kernel/setup.c           | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h
index 3688fd15a32d..6d34d080372a 100644
--- a/arch/arm/include/uapi/asm/hwcap.h
+++ b/arch/arm/include/uapi/asm/hwcap.h
@@ -25,6 +25,6 @@
 #define HWCAP_IDIVT	(1 << 18)
 #define HWCAP_VFPD32	(1 << 19)	/* set if VFP has 32 regs (not 16) */
 #define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
-
+#define HWCAP_LPAE	(1 << 20)
 
 #endif /* _UAPI__ASMARM_HWCAP_H */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1522c7ae31b0..bdcd4dd13230 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -355,7 +355,7 @@ void __init early_print(const char *str, ...)
 
 static void __init cpuid_init_hwcaps(void)
 {
-	unsigned int divide_instrs;
+	unsigned int divide_instrs, vmsa;
 
 	if (cpu_architecture() < CPU_ARCH_ARMv7)
 		return;
@@ -368,6 +368,11 @@ static void __init cpuid_init_hwcaps(void)
 	case 1:
 		elf_hwcap |= HWCAP_IDIVT;
 	}
+
+	/* LPAE implies atomic ldrd/strd instructions */
+	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
+	if (vmsa >= 5)
+		elf_hwcap |= HWCAP_LPAE;
 }
 
 static void __init feat_v6_fixup(void)
@@ -872,6 +877,7 @@ static const char *hwcap_str[] = {
 	"vfpv4",
 	"idiva",
 	"idivt",
+	"lpae",
 	NULL
 };
 
-- 
cgit v1.2.3


From dde1b65110353517816bcbc58539463396202244 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@linaro.org>
Date: Fri, 17 May 2013 12:32:55 +0100
Subject: ARM: mm: correct pte_same behaviour for LPAE.

For 3 levels of paging the PTE_EXT_NG bit will be set for user
address ptes that are written to a page table but not for ptes
created with mk_pte.

This can cause some comparison tests made by pte_same to fail
spuriously and lead to other problems.

To correct this behaviour, we mask off PTE_EXT_NG for any pte that
is present before running the comparison.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/pgtable-3level.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe398b95..70f041cb50d1 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -166,6 +166,23 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 		clean_pmd_entry(pmdp);	\
 	} while (0)
 
+/*
+ * For 3 levels of paging the PTE_EXT_NG bit will be set for user address ptes
+ * that are written to a page table but not for ptes created with mk_pte.
+ *
+ * In hugetlb_no_page, a new huge pte (new_pte) is generated and passed to
+ * hugetlb_cow, where it is compared with an entry in a page table.
+ * This comparison test fails erroneously leading ultimately to a memory leak.
+ *
+ * To correct this behaviour, we mask off PTE_EXT_NG for any pte that is
+ * present before running the comparison.
+ */
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(pte_a,pte_b)	((pte_present(pte_a) ? pte_val(pte_a) & ~PTE_EXT_NG	\
+					: pte_val(pte_a))				\
+				== (pte_present(pte_b) ? pte_val(pte_b) & ~PTE_EXT_NG	\
+					: pte_val(pte_b)))
+
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext)))
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From 0b19f93351dd68cb68a1a5b2d74e13d2ddfcfc64 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@linaro.org>
Date: Fri, 17 May 2013 12:33:28 +0100
Subject: ARM: mm: Add support for flushing HugeTLB pages.

On ARM we use the __flush_dcache_page function to flush the dcache
of pages when needed; usually when the PG_dcache_clean bit is unset
and we are setting a PTE.

A HugeTLB page is represented as a compound page consisting of an
array of pages. Thus to flush the dcache of a HugeTLB page, one must
flush more than a single page.

This patch modifies __flush_dcache_page such that all constituent
pages of a HugeTLB page are flushed.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/flush.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 0d473cce501c..370640780d57 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -17,6 +17,7 @@
 #include <asm/highmem.h>
 #include <asm/smp_plat.h>
 #include <asm/tlbflush.h>
+#include <linux/hugetlb.h>
 
 #include "mm.h"
 
@@ -168,19 +169,23 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 	 * coherent with the kernels mapping.
 	 */
 	if (!PageHighMem(page)) {
-		__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
+		size_t page_size = PAGE_SIZE << compound_order(page);
+		__cpuc_flush_dcache_area(page_address(page), page_size);
 	} else {
-		void *addr;
-
+		unsigned long i;
 		if (cache_is_vipt_nonaliasing()) {
-			addr = kmap_atomic(page);
-			__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-			kunmap_atomic(addr);
-		} else {
-			addr = kmap_high_get(page);
-			if (addr) {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_atomic(page);
 				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
-				kunmap_high(page);
+				kunmap_atomic(addr);
+			}
+		} else {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_high_get(page);
+				if (addr) {
+					__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+					kunmap_high(page);
+				}
 			}
 		}
 	}
-- 
cgit v1.2.3


From 1355e2a6eb88f04d76125c057dc5fca64d4b6a9e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 25 Jul 2012 14:32:38 +0100
Subject: ARM: mm: HugeTLB support for LPAE systems.

This patch adds support for hugetlbfs based on the x86 implementation.
It allows mapping of 2MB sections (see Documentation/vm/hugetlbpage.txt
for usage). The 64K pages configuration is not supported (section size
is 512MB in this case).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[steve.capper@linaro.org: symbolic constants replace numbers in places.
Split up into multiple files, to simplify future non-LPAE support,
removed huge_pmd_share code, as this is very rarely executed,
Added PROT_NONE support].
Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/Kconfig                            |   4 ++
 arch/arm/include/asm/hugetlb-3level.h       |  71 +++++++++++++++++++
 arch/arm/include/asm/hugetlb.h              |  84 +++++++++++++++++++++++
 arch/arm/include/asm/pgtable-3level-hwdef.h |   2 +
 arch/arm/include/asm/pgtable-3level.h       |  11 +++
 arch/arm/mm/Makefile                        |   1 +
 arch/arm/mm/dma-mapping.c                   |   2 +-
 arch/arm/mm/fsr-3level.c                    |   2 +-
 arch/arm/mm/hugetlbpage.c                   | 101 ++++++++++++++++++++++++++++
 9 files changed, 276 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/include/asm/hugetlb-3level.h
 create mode 100644 arch/arm/include/asm/hugetlb.h
 create mode 100644 arch/arm/mm/hugetlbpage.c

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 49d993cee512..860f034653a8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1707,6 +1707,10 @@ config HW_PERF_EVENTS
 	  Enable hardware performance counter support for perf events. If
 	  disabled, perf events will use software events only.
 
+config SYS_SUPPORTS_HUGETLBFS
+       def_bool y
+       depends on ARM_LPAE
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/include/asm/hugetlb-3level.h b/arch/arm/include/asm/hugetlb-3level.h
new file mode 100644
index 000000000000..d4014fbe5ea3
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb-3level.h
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/include/asm/hugetlb-3level.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_3LEVEL_H
+#define _ASM_ARM_HUGETLB_3LEVEL_H
+
+
+/*
+ * If our huge pte is non-zero then mark the valid bit.
+ * This allows pte_present(huge_ptep_get(ptep)) to return true for non-zero
+ * ptes.
+ * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes).
+ */
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	pte_t retval = *ptep;
+	if (pte_val(retval))
+		pte_val(retval) |= L_PTE_VALID;
+	return retval;
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+#endif /* _ASM_ARM_HUGETLB_3LEVEL_H */
diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
new file mode 100644
index 000000000000..1f1b1cd112f3
--- /dev/null
+++ b/arch/arm/include/asm/hugetlb.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/hugetlb.h
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_ARM_HUGETLB_H
+#define _ASM_ARM_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+#include <asm/hugetlb-3level.h>
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline int prepare_hugepage_range(struct file *file,
+					 unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+	return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
+#endif /* _ASM_ARM_HUGETLB_H */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 18f5cef82ad5..42df407ee3e3 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -30,6 +30,7 @@
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 3) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT		(_AT(pmdval_t, 1) << 1)
 #define PMD_BIT4		(_AT(pmdval_t, 0))
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, 0))
 #define PMD_APTABLE_SHIFT	(61)
@@ -66,6 +67,7 @@
 #define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
 #define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
 #define PTE_TYPE_PAGE		(_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT		(_AT(pteval_t, 1) << 1)
 #define PTE_BUFFERABLE		(_AT(pteval_t, 1) << 2)		/* AttrIndx[0] */
 #define PTE_CACHEABLE		(_AT(pteval_t, 1) << 3)		/* AttrIndx[1] */
 #define PTE_EXT_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 70f041cb50d1..d1bcd8226cb1 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -61,6 +61,14 @@
 
 #define USER_PTRS_PER_PGD	(PAGE_OFFSET / PGDIR_SIZE)
 
+/*
+ * Hugetlb definitions.
+ */
+#define HPAGE_SHIFT		PMD_SHIFT
+#define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
 /*
  * "Linux" PTE definitions for LPAE.
  *
@@ -185,6 +193,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,__pte(pte_val(pte)|(ext)))
 
+#define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
+#define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 9e51be96f635..224a9cc09877 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES)		+= proc-syms.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 
 obj-$(CONFIG_CPU_ABRT_NOMMU)	+= abort-nommu.o
 obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef3e0f3aac96..9674476a75dc 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -250,7 +250,7 @@ static void __dma_free_buffer(struct page *page, size_t size)
 
 #ifdef CONFIG_MMU
 #ifdef CONFIG_HUGETLB_PAGE
-#error ARM Coherent DMA allocator does not (yet) support huge TLB
+#warning ARM Coherent DMA allocator does not (yet) support huge TLB
 #endif
 
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c
index 05a4e9431836..e115fc7a69bd 100644
--- a/arch/arm/mm/fsr-3level.c
+++ b/arch/arm/mm/fsr-3level.c
@@ -13,7 +13,7 @@ static struct fsr_info fsr_info[] = {
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
-	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
 	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
 	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
new file mode 100644
index 000000000000..3d1e4a205b0b
--- /dev/null
+++ b/arch/arm/mm/hugetlbpage.c
@@ -0,0 +1,101 @@
+/*
+ * arch/arm/mm/hugetlbpage.c
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+/*
+ * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot
+ * of type casting from pmd_t * to pte_t *.
+ */
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_present(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (pud_present(*pud))
+			pmd = pmd_offset(pud, addr);
+	}
+
+	return (pte_t *)pmd;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (pud)
+		pte = (pte_t *)pmd_alloc(mm, pud, addr);
+
+	return pte;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	struct page *page;
+
+	page = pte_page(*(pte_t *)pmd);
+	if (page)
+		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
+	return page;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
-- 
cgit v1.2.3


From 8d962507007357d6fbbcbdd1647faa389a9aed6d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 25 Jul 2012 14:39:26 +0100
Subject: ARM: mm: Transparent huge page support for LPAE systems.

The patch adds support for THP (transparent huge pages) to LPAE
systems. When this feature is enabled, the kernel tries to map
anonymous pages as 2MB sections where possible.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[steve.capper@linaro.org: symbolic constants used, value of
PMD_SECT_SPLITTING adjusted, tlbflush.h included in pgtable.h,
added PROT_NONE support.]
Signed-off-by: Steve Capper <steve.capper@linaro.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/Kconfig                            |  4 ++
 arch/arm/include/asm/pgtable-3level-hwdef.h |  2 +
 arch/arm/include/asm/pgtable-3level.h       | 60 +++++++++++++++++++++++++++++
 arch/arm/include/asm/pgtable.h              |  3 ++
 arch/arm/include/asm/tlb.h                  |  6 +++
 arch/arm/include/asm/tlbflush.h             |  2 +
 arch/arm/mm/fsr-3level.c                    |  2 +-
 7 files changed, 78 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 860f034653a8..f07a46271168 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1711,6 +1711,10 @@ config SYS_SUPPORTS_HUGETLBFS
        def_bool y
        depends on ARM_LPAE
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       def_bool y
+       depends on ARM_LPAE
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 42df407ee3e3..f088c864c992 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -42,6 +42,8 @@
  */
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
+#define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_nG		(_AT(pmdval_t, 1) << 11)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index d1bcd8226cb1..54733e5ef7a1 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -87,6 +87,11 @@
 #define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)	/* unused */
 #define L_PTE_NONE		(_AT(pteval_t, 1) << 57)	/* PROT_NONE */
 
+#define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_DIRTY		(_AT(pmdval_t, 1) << 55)
+#define PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 56)
+#define PMD_SECT_NONE		(_AT(pmdval_t, 1) << 57)
+
 /*
  * To be used in assembly code with the upper page attributes.
  */
@@ -196,6 +201,61 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 #define pte_huge(pte)		(pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
 #define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
 
+#define pmd_young(pmd)		(pmd_val(pmd) & PMD_SECT_AF)
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		(!(pmd_val(pmd) & PMD_SECT_RDONLY))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#endif
+
+#define PMD_BIT_FUNC(fn,op) \
+static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+
+PMD_BIT_FUNC(wrprotect,	|= PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkold,	&= ~PMD_SECT_AF);
+PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
+PMD_BIT_FUNC(mkwrite,   &= ~PMD_SECT_RDONLY);
+PMD_BIT_FUNC(mkdirty,   |= PMD_SECT_DIRTY);
+PMD_BIT_FUNC(mkyoung,   |= PMD_SECT_AF);
+
+#define pmd_mkhuge(pmd)		(__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+
+#define pmd_pfn(pmd)		(((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
+
+/* represent a notpresent pmd by zero, this is used by pmdp_invalidate */
+#define pmd_mknotpresent(pmd)	(__pmd(0))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
+				PMD_SECT_VALID | PMD_SECT_NONE;
+	pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
+	return pmd;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	BUG_ON(addr >= TASK_SIZE);
+
+	/* create a faulting entry if PROT_NONE protected */
+	if (pmd_val(pmd) & PMD_SECT_NONE)
+		pmd_val(pmd) &= ~PMD_SECT_VALID;
+
+	*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
+	flush_pmd_entry(pmdp);
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return 1;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 9bcd262a9008..eaedce7b7e3a 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -24,6 +24,9 @@
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
 
+
+#include <asm/tlbflush.h>
+
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level.h>
 #else
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 99a19512ee26..bdc62da2f212 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -223,6 +223,12 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 #endif
 }
 
+static inline void
+tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+}
+
 #define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
 #define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
 #define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a3625d141c1d..c37459299fc9 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -535,6 +535,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 }
 #endif
 
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
 #endif
 
 #endif /* CONFIG_MMU */
diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c
index e115fc7a69bd..ab4409a2307e 100644
--- a/arch/arm/mm/fsr-3level.c
+++ b/arch/arm/mm/fsr-3level.c
@@ -9,7 +9,7 @@ static struct fsr_info fsr_info[] = {
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
-	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
 	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
 	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
-- 
cgit v1.2.3


From fdeb94b5dc5bf9db7b3e36f3f38089a554f6a108 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 3 Jun 2013 23:09:14 +0100
Subject: ARM: 7745/1: psci: fix building without HOTPLUG_CPU

The cpu_die field in smp_operations is not valid with CONFIG_HOTPLUG_CPU,
so we must enclose it in #ifdef, but at least that lets us remove
two other lines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/psci_smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
index 23a11424c568..219f1d73572a 100644
--- a/arch/arm/kernel/psci_smp.c
+++ b/arch/arm/kernel/psci_smp.c
@@ -68,8 +68,6 @@ void __ref psci_cpu_die(unsigned int cpu)
        /* We should never return */
        panic("psci: cpu %d failed to shutdown\n", cpu);
 }
-#else
-#define psci_cpu_die NULL
 #endif
 
 bool __init psci_smp_available(void)
@@ -80,5 +78,7 @@ bool __init psci_smp_available(void)
 
 struct smp_operations __initdata psci_smp_ops = {
 	.smp_boot_secondary	= psci_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
 	.cpu_die		= psci_cpu_die,
+#endif
 };
-- 
cgit v1.2.3


From 0af0b189abf73d232af782df2f999235cd2fed7f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Wed, 30 Jan 2013 18:17:49 +0000
Subject: ARM: hyp: initialize CNTVOFF to zero

In order to be able to use the virtual counter in a safe way,
make sure it is initialized to zero before dropping to SVC.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Dave Martin <dave.martin@linaro.org>
---
 arch/arm/kernel/hyp-stub.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 1315c4ccfa56..dbe21107945a 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -153,6 +153,8 @@ THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
 	mrc	p15, 4, r7, c14, c1, 0	@ CNTHCTL
 	orr	r7, r7, #3		@ PL1PCEN | PL1PCTEN
 	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
+	mov	r7, #0
+	mcrr	p15, 4, r7, r7, c14	@ CNTVOFF
 1:
 #endif
 
-- 
cgit v1.2.3


From f793c23ebbe5afd1cabf4a42a3a297022213756f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 Mar 2013 13:41:35 +0000
Subject: ARM: KVM: arch_timers: zero CNTVOFF upon return to host

To use the virtual counters from the host, we need to ensure that
CNTVOFF doesn't change unexpectedly. When we change to a guest, we
replace the host's CNTVOFF, but we don't restore it when returning to
the host.

As the host sets CNTVOFF to zero, and never changes it, we can simply
zero CNTVOFF when returning to the host. This patch adds said zeroing to
the return to host path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Christoffer Dall <cdall@cs.columbia.edu>
---
 arch/arm/kvm/interrupts_head.S | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 3c8f2f0b4c5e..d43cfb5b37c4 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -497,6 +497,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
 
+	@ Ensure host CNTVCT == CNTPCT
+	mov	r2, #0
+	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
+
 1:
 #endif
 	@ Allow physical timer/counter access for the host
-- 
cgit v1.2.3


From 0d651e4e65e96989f72236bf83bd4c6e55eb6ce4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 30 Jan 2013 17:51:26 +0000
Subject: clocksource: arch_timer: use virtual counters

Switching between reading the virtual or physical counters is
problematic, as some core code wants a view of time before we're fully
set up. Using a function pointer and switching the source after the
first read can make time appear to go backwards, and having a check in
the read function is an unfortunate block on what we want to be a fast
path.

Instead, this patch makes us always use the virtual counters. If we're a
guest, or don't have hyp mode, we'll use the virtual timers, and as such
don't care about CNTVOFF as long as it doesn't change in such a way as
to make time appear to travel backwards. As the guest will use the
virtual timers, a (potential) KVM host must use the physical timers
(which can wake up the host even if they fire while a guest is
executing), and hence a host must have CNTVOFF set to zero so as to have
a consistent view of time between the physical timers and virtual
counters.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 arch/arm/include/asm/arch_timer.h    |  9 ---------
 arch/arm64/include/asm/arch_timer.h  | 10 ----------
 drivers/clocksource/arm_arch_timer.c | 23 +++++------------------
 include/clocksource/arm_arch_timer.h |  2 +-
 4 files changed, 6 insertions(+), 38 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 7c1bfc0aea0c..accefe099182 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void)
 	return val;
 }
 
-static inline u64 arch_counter_get_cntpct(void)
-{
-	u64 cval;
-
-	isb();
-	asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval));
-	return cval;
-}
-
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index bf6ab242f047..d56ed11ba9a3 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -110,16 +110,6 @@ static inline void __cpuinit arch_counter_set_user_access(void)
 	asm volatile("msr	cntkctl_el1, %0" : : "r" (cntkctl));
 }
 
-static inline u64 arch_counter_get_cntpct(void)
-{
-	u64 cval;
-
-	isb();
-	asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
-
-	return cval;
-}
-
 static inline u64 arch_counter_get_cntvct(void)
 {
 	u64 cval;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index a2b254189782..053d846ab5b1 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -186,27 +186,19 @@ u32 arch_timer_get_rate(void)
 	return arch_timer_rate;
 }
 
-/*
- * Some external users of arch_timer_read_counter (e.g. sched_clock) may try to
- * call it before it has been initialised. Rather than incur a performance
- * penalty checking for initialisation, provide a default implementation that
- * won't lead to time appearing to jump backwards.
- */
-static u64 arch_timer_read_zero(void)
+u64 arch_timer_read_counter(void)
 {
-	return 0;
+	return arch_counter_get_cntvct();
 }
 
-u64 (*arch_timer_read_counter)(void) = arch_timer_read_zero;
-
 static cycle_t arch_counter_read(struct clocksource *cs)
 {
-	return arch_timer_read_counter();
+	return arch_counter_get_cntvct();
 }
 
 static cycle_t arch_counter_read_cc(const struct cyclecounter *cc)
 {
-	return arch_timer_read_counter();
+	return arch_counter_get_cntvct();
 }
 
 static struct clocksource clocksource_counter = {
@@ -287,7 +279,7 @@ static int __init arch_timer_register(void)
 	cyclecounter.mult = clocksource_counter.mult;
 	cyclecounter.shift = clocksource_counter.shift;
 	timecounter_init(&timecounter, &cyclecounter,
-			 arch_counter_get_cntpct());
+			 arch_counter_get_cntvct());
 
 	if (arch_timer_use_virtual) {
 		ppi = arch_timer_ppi[VIRT_PPI];
@@ -376,11 +368,6 @@ static void __init arch_timer_init(struct device_node *np)
 		}
 	}
 
-	if (arch_timer_use_virtual)
-		arch_timer_read_counter = arch_counter_get_cntvct;
-	else
-		arch_timer_read_counter = arch_counter_get_cntpct;
-
 	arch_timer_register();
 	arch_timer_arch_init();
 }
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index e6c9c4cc9b23..c463ce990c48 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -32,7 +32,7 @@
 #ifdef CONFIG_ARM_ARCH_TIMER
 
 extern u32 arch_timer_get_rate(void);
-extern u64 (*arch_timer_read_counter)(void);
+extern u64 arch_timer_read_counter(void);
 extern struct timecounter *arch_timer_get_timecounter(void);
 
 #else
-- 
cgit v1.2.3


From fb521a0da1551468a45f2e2a1c1941d0033357ea Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 20 Mar 2013 13:57:38 +0000
Subject: arm: fix up ARM_ARCH_TIMER selects

In 8a4da6e: "arm: arch_timer: move core to drivers/clocksource", the
selection of ARM_ARCH_TIMER was indirected via HAVE_ARM_ARCH_TIMER,
though mach-exynos's selection of ARM_ARCH_TIMER was missed, and since
then mach-shmobile, mach-tegra, and mach-virt have begun selecting
ARM_ARCH_TIMER. This can lead to architected timer support erroneously
appearing to not be selected in menuconfig.

This patch fixes up the Kconfigs for those platforms to select
HAVE_ARM_ARCH_TIMER.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/mach-exynos/Kconfig   | 2 +-
 arch/arm/mach-shmobile/Kconfig | 4 ++--
 arch/arm/mach-tegra/Kconfig    | 2 +-
 arch/arm/mach-virt/Kconfig     | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index d19edff0ea6e..3c3f36a8fbd4 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -76,7 +76,7 @@ config SOC_EXYNOS5440
 	default y
 	depends on ARCH_EXYNOS5
 	select ARCH_HAS_OPP
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select AUTO_ZRELADDR
 	select PINCTRL
 	select PINCTRL_EXYNOS5440
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index 1a517e2fe449..767a6c3c27a4 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -23,7 +23,7 @@ config ARCH_R8A73A4
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_GIC
 	select CPU_V7
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select SH_CLK_CPG
 	select RENESAS_IRQC
 
@@ -56,7 +56,7 @@ config ARCH_R8A7790
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_GIC
 	select CPU_V7
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select SH_CLK_CPG
 	select RENESAS_IRQC
 
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 84d72fc36dfe..65c5ae6fa386 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -60,7 +60,7 @@ config ARCH_TEGRA_3x_SOC
 
 config ARCH_TEGRA_114_SOC
 	bool "Enable support for Tegra114 family"
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select ARM_GIC
 	select ARM_L1_CACHE_SHIFT_6
 	select CPU_FREQ_TABLE if CPU_FREQ
diff --git a/arch/arm/mach-virt/Kconfig b/arch/arm/mach-virt/Kconfig
index 8958f0d896bc..081d46929436 100644
--- a/arch/arm/mach-virt/Kconfig
+++ b/arch/arm/mach-virt/Kconfig
@@ -2,7 +2,7 @@ config ARCH_VIRT
 	bool "Dummy Virtual Machine" if ARCH_MULTI_V7
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_GIC
-	select ARM_ARCH_TIMER
+	select HAVE_ARM_ARCH_TIMER
 	select ARM_PSCI
 	select HAVE_SMP
 	select CPU_V7
-- 
cgit v1.2.3


From 3f71be237ce37e0131973ebfa33b326bc51d743e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Tue, 12 Mar 2013 14:56:12 +0000
Subject: ARM: arch_timer: stop virtual timer when booted in HYP mode

When booting the kernel, a bootloader could have left the virtual
timer ticking away, potentially generating interrupts. This could
be troublesome if the user of the virtual timer is not careful
when enabling the interrupt.

In order to avoid any surprise, stop the virtual timer from
interrupting us when booted in HYP mode, as we'll use the physical
timer in this case.

Reported-by: Giridhar Maruthy <giridhar.m@samsung.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Dave Martin <dave.martin@linaro.org>
---
 arch/arm/kernel/hyp-stub.S | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index dbe21107945a..4910232c4833 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -155,6 +155,11 @@ THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
 	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
 	mov	r7, #0
 	mcrr	p15, 4, r7, r7, c14	@ CNTVOFF
+
+	@ Disable virtual timer in case it was counting
+	mrc	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
+	bic	r7, #1			@ Clear ENABLE
+	mcr	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
 1:
 #endif
 
-- 
cgit v1.2.3


From 01fafcab20fbbd2930691c7fdcf177eaa190d499 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 28 Feb 2012 11:50:32 +0000
Subject: ARM: nommu: add entry point for secondary CPUs to head-nommu.S
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a secondary_startup entry point to head-nommu.S so that
we can boot secondary CPUs on an SMP nommu configuration.

Signed-off-by: Will Deacon <will.deacon@arm.com>
CC: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
CC: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/kernel/head-nommu.S | 50 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 8812ce88f7a1..06ba9c8e62be 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -63,12 +63,56 @@ ENTRY(stext)
 	movs	r10, r5				@ invalid processor (r5=0)?
 	beq	__error_p				@ yes, error 'p'
 
-	adr	lr, BSYM(__after_proc_init)	@ return (PIC) address
+	ldr	r13, =__mmap_switched		@ address to jump to after
+						@ initialising sctlr
+	adr	lr, BSYM(1f)			@ return (PIC) address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
  THUMB(	mov	pc, r12				)
+ 1:	b	__after_proc_init
 ENDPROC(stext)
 
+#ifdef CONFIG_SMP
+	__CPUINIT
+ENTRY(secondary_startup)
+	/*
+	 * Common entry point for secondary CPUs.
+	 *
+	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
+	 * the processor type - there is no need to check the machine type
+	 * as it has already been validated by the primary processor.
+	 */
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
+#ifndef CONFIG_CPU_CP15
+	ldr	r9, =CONFIG_PROCESSOR_ID
+#else
+	mrc	p15, 0, r9, c0, c0		@ get processor id
+#endif
+	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
+	movs	r10, r5				@ invalid processor?
+	beq	__error_p			@ yes, error 'p'
+
+	adr	r4, __secondary_data
+	ldmia	r4, {r7, r12}
+	adr	lr, BSYM(__after_proc_init)	@ return address
+	mov	r13, r12			@ __secondary_switched address
+ ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
+ THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
+ THUMB(	mov	pc, r12				)
+ENDPROC(secondary_startup)
+
+ENTRY(__secondary_switched)
+	ldr	sp, [r7, #8]			@ set up the stack pointer
+	mov	fp, #0
+	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
+
+	.type	__secondary_data, %object
+__secondary_data:
+	.long	secondary_data
+	.long	__secondary_switched
+#endif /* CONFIG_SMP */
+
 /*
  * Set the Control Register and Read the process ID.
  */
@@ -99,9 +143,7 @@ __after_proc_init:
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #endif /* CONFIG_CPU_CP15 */
-
-	b	__mmap_switched			@ clear the BSS and jump
-						@ to start_kernel
+	mov	pc, r13
 ENDPROC(__after_proc_init)
 	.ltorg
 
-- 
cgit v1.2.3


From 5c709e699881afd1cf9244c719eb063c3476a405 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 28 Feb 2012 12:56:06 +0000
Subject: ARM: nommu: define dummy TLB operations for nommu configurations

nommu platforms do not perform address translation and therefore clearly
don't have TLBs. However, some SMP code assumes the presence of the TLB
flushing routines and will therefore fail to compile for a nommu system.

This patch defines dummy local_* TLB operations and #defines
tlb_ops_need_broadcast() as 0, therefore causing the usual ARM SMP TLB
operations to call the local variants instead.

Signed-off-by: Will Deacon <will.deacon@arm.com>
CC: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
CC: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/include/asm/smp_plat.h |  4 ++++
 arch/arm/include/asm/tlbflush.h | 23 ++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index aaa61b6f50ff..1c7b6f8101ae 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -26,6 +26,9 @@ static inline bool is_smp(void)
 }
 
 /* all SMP configurations have the extended CPUID registers */
+#ifndef CONFIG_MMU
+#define tlb_ops_need_broadcast()	0
+#else
 static inline int tlb_ops_need_broadcast(void)
 {
 	if (!is_smp())
@@ -33,6 +36,7 @@ static inline int tlb_ops_need_broadcast(void)
 
 	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2;
 }
+#endif
 
 #if !defined(CONFIG_SMP) || __LINUX_ARM_ARCH__ >= 7
 #define cache_ops_need_broadcast()	0
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index a3625d141c1d..cc8517e0f132 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -537,6 +537,27 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #endif
 
-#endif /* CONFIG_MMU */
+#elif defined(CONFIG_SMP)	/* !CONFIG_MMU */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/mm_types.h>
+
+static inline void local_flush_tlb_all(void)									{ }
+static inline void local_flush_tlb_mm(struct mm_struct *mm)							{ }
+static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)			{ }
+static inline void local_flush_tlb_kernel_page(unsigned long kaddr)						{ }
+static inline void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)	{ }
+static inline void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)				{ }
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
+extern void flush_tlb_kernel_page(unsigned long kaddr);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+#endif	/* __ASSEMBLY__ */
+
+#endif
 
 #endif
-- 
cgit v1.2.3


From 02ed1c7bba57b66c9a2f3146c935af12a93f2d76 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 28 Feb 2012 14:26:42 +0000
Subject: ARM: nommu: provide dummy cpu_switch_mm implementation

cpu_switch_mm is a logical nop on nommu systems, so define it as such
when !CONFIG_MMU.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/proc-fns.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index f3628fb3d2b3..a6c99fe62b82 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -137,6 +137,10 @@ extern void cpu_resume(void);
 	})
 #endif
 
+#else	/*!CONFIG_MMU */
+
+#define cpu_switch_mm(pgd,mm)	{ }
+
 #endif
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From c4a1f032ed35d744e3d74b8aebe8d85f29aecd88 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 28 Feb 2012 13:02:59 +0000
Subject: ARM: nommu: do not initialise page tables in secondary_data structure

nommu systems do not require any page tables, so don't try to initialise
them when bringing up secondary cores.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/smp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 550d63cef68e..44d1c00dc45f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -87,8 +87,10 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+#ifdef CONFIG_MMU
 	secondary_data.pgdir = virt_to_phys(idmap_pgd);
 	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+#endif
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
-- 
cgit v1.2.3


From aa1aadc3305c4917c39f0291613a5ec81dd4c73b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 23 Feb 2012 13:51:38 +0000
Subject: ARM: suspend: fix CPU suspend code for !CONFIG_MMU configurations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ARM CPU suspend code can be selected even for a !CONFIG_MMU
configuration. The resulting kernel will not compile and, even if it did,
would access undefined co-processor registers when executing.

This patch fixes the v6 and v7 CPU suspend code for the nommu case.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Jonathan Austin <jonathan.austin@arm.com>
CC: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> (commit_signer:1/3=33%)
CC: Santosh Shilimkar <santosh.shilimkar@ti.com> (commit_signer:1/3=33%)
CC: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/arm/kernel/suspend.c | 64 ++++++++++++++++++++++++++---------------------
 arch/arm/mm/proc-v6.S     |  6 ++++-
 arch/arm/mm/proc-v7.S     | 14 +++++++----
 3 files changed, 50 insertions(+), 34 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index c59c97ea8268..38a50676213b 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -10,6 +10,42 @@
 extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
 extern void cpu_resume_mmu(void);
 
+#ifdef CONFIG_MMU
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+
+	if (!idmap_pgd)
+		return -EINVAL;
+
+	/*
+	 * Provide a temporary page table with an identity mapping for
+	 * the MMU-enable code, required for resuming.  On successful
+	 * resume (indicated by a zero return code), we need to switch
+	 * back to the correct page tables.
+	 */
+	ret = __cpu_suspend(arg, fn);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		local_flush_bp_all();
+		local_flush_tlb_all();
+	}
+
+	return ret;
+}
+#else
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	return __cpu_suspend(arg, fn);
+}
+#define	idmap_pgd	NULL
+#endif
+
 /*
  * This is called by __cpu_suspend() to save the state, and do whatever
  * flushing is required to ensure that when the CPU goes to sleep we have
@@ -46,31 +82,3 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
 	outer_clean_range(virt_to_phys(save_ptr),
 			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
 }
-
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
-int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
-{
-	struct mm_struct *mm = current->active_mm;
-	int ret;
-
-	if (!idmap_pgd)
-		return -EINVAL;
-
-	/*
-	 * Provide a temporary page table with an identity mapping for
-	 * the MMU-enable code, required for resuming.  On successful
-	 * resume (indicated by a zero return code), we need to switch
-	 * back to the correct page tables.
-	 */
-	ret = __cpu_suspend(arg, fn);
-	if (ret == 0) {
-		cpu_switch_mm(mm->pgd, mm);
-		local_flush_bp_all();
-		local_flush_tlb_all();
-	}
-
-	return ret;
-}
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 919405e20b80..2d1ef87328a1 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -140,8 +140,10 @@ ENTRY(cpu_v6_set_pte_ext)
 ENTRY(cpu_v6_do_suspend)
 	stmfd	sp!, {r4 - r9, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+#ifdef CONFIG_MMU
 	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
 	mrc	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+#endif
 	mrc	p15, 0, r7, c1, c0, 1	@ auxiliary control register
 	mrc	p15, 0, r8, c1, c0, 2	@ co-processor access control
 	mrc	p15, 0, r9, c1, c0, 0	@ control register
@@ -158,14 +160,16 @@ ENTRY(cpu_v6_do_resume)
 	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
 	ldmia	r0, {r4 - r9}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+#ifdef CONFIG_MMU
 	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
 	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
 	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
 	mcr	p15, 0, r1, c2, c0, 0	@ Translation table base 0
 	mcr	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
+#endif
 	mcr	p15, 0, r7, c1, c0, 1	@ auxiliary control register
 	mcr	p15, 0, r8, c1, c0, 2	@ co-processor access control
-	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
 	mcr	p15, 0, ip, c7, c5, 4	@ ISB
 	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 2c73a7301ff7..a851e3433afe 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -98,9 +98,11 @@ ENTRY(cpu_v7_do_suspend)
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
 	mrc	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
 	stmia	r0!, {r4 - r5}
+#ifdef CONFIG_MMU
 	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
 	mrc	p15, 0, r7, c2, c0, 1	@ TTB 1
 	mrc	p15, 0, r11, c2, c0, 2	@ TTB control register
+#endif
 	mrc	p15, 0, r8, c1, c0, 0	@ Control register
 	mrc	p15, 0, r9, c1, c0, 1	@ Auxiliary control register
 	mrc	p15, 0, r10, c1, c0, 2	@ Co-processor access control
@@ -110,13 +112,14 @@ ENDPROC(cpu_v7_do_suspend)
 
 ENTRY(cpu_v7_do_resume)
 	mov	ip, #0
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
 	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
 	ldmia	r0!, {r4 - r5}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
 	mcr	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
 	ldmia	r0, {r6 - r11}
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
 #ifndef CONFIG_ARM_LPAE
 	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
@@ -125,14 +128,15 @@ ENTRY(cpu_v7_do_resume)
 	mcr	p15, 0, r1, c2, c0, 0	@ TTB 0
 	mcr	p15, 0, r7, c2, c0, 1	@ TTB 1
 	mcr	p15, 0, r11, c2, c0, 2	@ TTB control register
-	mrc	p15, 0, r4, c1, c0, 1	@ Read Auxiliary control register
-	teq	r4, r9			@ Is it already set?
-	mcrne	p15, 0, r9, c1, c0, 1	@ No, so write it
-	mcr	p15, 0, r10, c1, c0, 2	@ Co-processor access control
 	ldr	r4, =PRRR		@ PRRR
 	ldr	r5, =NMRR		@ NMRR
 	mcr	p15, 0, r4, c10, c2, 0	@ write PRRR
 	mcr	p15, 0, r5, c10, c2, 1	@ write NMRR
+#endif	/* CONFIG_MMU */
+	mrc	p15, 0, r4, c1, c0, 1	@ Read Auxiliary control register
+	teq	r4, r9			@ Is it already set?
+	mcrne	p15, 0, r9, c1, c0, 1	@ No, so write it
+	mcr	p15, 0, r10, c1, c0, 2	@ Co-processor access control
 	isb
 	dsb
 	mov	r0, r8			@ control register
-- 
cgit v1.2.3


From 8006b4d1a7c70f27a87cb753b5ed90483f0cfe26 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Wed, 24 Apr 2013 11:51:38 +0100
Subject: ARM: nommu: Don't build smp_tlb.c for !CONFIG_MMU

Without an MMU we don't need to do any TLB maintenance. Until the addition
of 93dc68876b60 (ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181
(TLBI/DSB operations)) building the tlb maintenance ops in smp_tlb.c worked,
though none of the contents were used.

Since that commit, however, SMP NOMMU has not been able to build. This patch
restores that ability by making the building of smp_tlb.c dependent on MMU.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
CC: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index f4285b5ffb05..fccfbdb03df1 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -38,7 +38,10 @@ obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
-obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
+obj-$(CONFIG_SMP)		+= smp.o
+ifdef CONFIG_MMU
+obj-$(CONFIG_SMP)		+= smp_tlb.o
+endif
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_ARM_ARCH_TIMER)	+= arch_timer.o
-- 
cgit v1.2.3


From 8d655d835bc5c0cb7d485d147ba96249e356a697 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Wed, 24 Apr 2013 11:56:09 +0100
Subject: ARM: nommu: add stub local_flush_bp_all() for !CONFIG_MMUU

Since the merging of Will's tlb-ops branch, specifically 89c7e4b8bbb3
(ARM: 7661/1: mm: perform explicit branch predictor maintenance when required),
building SMP without CONFIG_MMU has been broken.

The local_flush_bp_all function is only called for operations related to
changing the kernel's view of memory and ASID rollover - both of which are
irrelevant to an !MMU kernel.

This patch adds a stub local_flush_bp_all() function to the other tlb
maintenance stubs and restores the ability to build an SMP !MMU kernel.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/tlbflush.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index cc8517e0f132..ded7c16f80cc 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -549,6 +549,7 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon
 static inline void local_flush_tlb_kernel_page(unsigned long kaddr)						{ }
 static inline void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)	{ }
 static inline void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)				{ }
+static inline void local_flush_bp_all(void)									{ }
 
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
@@ -556,6 +557,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
 extern void flush_tlb_kernel_page(unsigned long kaddr);
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_bp_all(void);
 #endif	/* __ASSEMBLY__ */
 
 #endif
-- 
cgit v1.2.3


From 66567618f37269ec55febee4dcec2a1dec1033a0 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 12 Jul 2012 14:38:46 +0100
Subject: ARM: select CPU_CPU15_MMU/MPU appropriately

Currently CPU_V7 selects CPU_CP15_MMU, however in the case of a V7 CPU
implementing the PMSA, such as the Cortex-R7, the CP15_MMU operations are
not available. Selecting CPU_CP15_MPU is appropriate in this case.

This patch makes CPU_CP15_MMU dependent on the use of the MMU, selecting
CPU_CP15_MPU for v7 processors when !MMU is chosen.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
---
 arch/arm/mm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 9e8101ecd63e..6cacdc8dd654 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -392,7 +392,8 @@ config CPU_V7
 	select CPU_CACHE_V7
 	select CPU_CACHE_VIPT
 	select CPU_COPY_V6 if MMU
-	select CPU_CP15_MMU
+	select CPU_CP15_MMU if MMU
+	select CPU_CP15_MPU if !MMU
 	select CPU_HAS_ASID if MMU
 	select CPU_PABRT_V7
 	select CPU_TLB_V7 if MMU
-- 
cgit v1.2.3


From c90ad5c940583525f46938149b91187e75acc546 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 15 Mar 2012 14:27:07 +0000
Subject: ARM: add Cortex-R7 Processor Info

This patch adds processor info for ARM Ltd. Cortex-R7.

The R7 has many similarities to the A9 and though the ACTLR layout is not
identical, the bits associated with cache operations broadcasting and SMP
modes are the same for A9, A5 and R7 (Though in the A-class processors the
same bits toggle TLB-ops broadcasting as well as cache-ops)

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Catalin Marinas <catalin.marinas@arm.com>
CC: Stephen Boyd <sboyd@codeaurora.org>
---
 arch/arm/mm/proc-v7.S | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index a851e3433afe..f85ae8cad17f 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -159,7 +159,8 @@ ENDPROC(cpu_v7_do_resume)
  */
 __v7_ca5mp_setup:
 __v7_ca9mp_setup:
-	mov	r10, #(1 << 0)			@ TLB ops broadcasting
+__v7_cr7mp_setup:
+	mov	r10, #(1 << 0)			@ Cache/TLB ops broadcasting
 	b	1f
 __v7_ca7mp_setup:
 __v7_ca15mp_setup:
@@ -418,6 +419,16 @@ __v7_pj4b_proc_info:
 	__v7_proc __v7_pj4b_setup
 	.size	__v7_pj4b_proc_info, . - __v7_pj4b_proc_info
 
+	/*
+	 * ARM Ltd. Cortex R7 processor.
+	 */
+	.type	__v7_cr7mp_proc_info, #object
+__v7_cr7mp_proc_info:
+	.long	0x410fc170
+	.long	0xff0ffff0
+	__v7_proc __v7_cr7mp_setup
+	.size	__v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info
+
 	/*
 	 * ARM Ltd. Cortex A7 processor.
 	 */
-- 
cgit v1.2.3


From ed18bdc875328921114139e17ade1d2569e82c85 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 30 Aug 2012 13:46:44 +0100
Subject: ARM: vexpress: Add Cortex-R Series UART, selectable via DEBUG_LL

The Cortex-R series processors on Versatile Express have a different memory
map to the RS1 and CA9X4 tiles. Most of the platform difference can be
expressed in device-trees, but the UART definitions for LL_DEBUG cannot.

This patch defines the UART location for R-Series processors on
versatile-express, allowing low-level debug and output from the decompressor.
These definitions are selectable via Kconfig

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
CC: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/Kconfig.debug            | 10 +++++++++-
 arch/arm/include/debug/vexpress.S | 10 ++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 1d41908d5cda..f2623b25ff9a 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -476,6 +476,13 @@ choice
 		  of the tiles using the RS1 memory map, including all new A-class
 		  core tiles, FPGA-based SMMs and software models.
 
+	config DEBUG_VEXPRESS_UART0_CRX
+		bool "Use PL011 UART0 at 0xb0090000 (Cortex-R compliant tiles)"
+		depends on ARCH_VEXPRESS && !MMU
+		help
+		  This option selects UART0 at 0xb0090000. This is appropriate for
+		  Cortex-R series tiles and SMMs, such as Cortex-R5 and Cortex-R7
+
 	config DEBUG_VT8500_UART0
 		bool "Use UART0 on VIA/Wondermedia SoCs"
 		depends on ARCH_VT8500
@@ -645,7 +652,8 @@ config DEBUG_LL_INCLUDE
 	default "debug/tegra.S" if DEBUG_TEGRA_UART
 	default "debug/ux500.S" if DEBUG_UX500_UART
 	default "debug/vexpress.S" if DEBUG_VEXPRESS_UART0_DETECT || \
-		DEBUG_VEXPRESS_UART0_CA9 || DEBUG_VEXPRESS_UART0_RS1
+		DEBUG_VEXPRESS_UART0_CA9 || DEBUG_VEXPRESS_UART0_RS1 || \
+		DEBUG_VEXPRESS_UART0_CRX
 	default "debug/vt8500.S" if DEBUG_VT8500_UART0
 	default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1
 	default "mach/debug-macro.S"
diff --git a/arch/arm/include/debug/vexpress.S b/arch/arm/include/debug/vexpress.S
index dc8e882a6257..acafb229e2b6 100644
--- a/arch/arm/include/debug/vexpress.S
+++ b/arch/arm/include/debug/vexpress.S
@@ -16,6 +16,8 @@
 #define DEBUG_LL_PHYS_BASE_RS1		0x1c000000
 #define DEBUG_LL_UART_OFFSET_RS1	0x00090000
 
+#define DEBUG_LL_UART_PHYS_CRX		0xb0090000
+
 #define DEBUG_LL_VIRT_BASE		0xf8000000
 
 #if defined(CONFIG_DEBUG_VEXPRESS_UART0_DETECT)
@@ -67,6 +69,14 @@
 
 #include <asm/hardware/debug-pl01x.S>
 
+#elif defined(CONFIG_DEBUG_VEXPRESS_UART0_CRX)
+
+		.macro	addruart,rp,tmp,tmp2
+		ldr	\rp, =DEBUG_LL_UART_PHYS_CRX
+		.endm
+
+#include <asm/hardware/debug-pl01x.S>
+
 #else /* CONFIG_DEBUG_LL_UART_NONE */
 
 		.macro	addruart, rp, rv, tmp
-- 
cgit v1.2.3


From aca7e5920c8e80a2a49c1e37664675d78b23398b Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 21 Feb 2013 15:21:34 +0000
Subject: ARM: mpu: add PMSA related registers and bitfields to existing
 headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the following definitions relevant to the PMSA:

Add SCTLR bit 17, (CR_BR - Background Region bit) to the list of CR_*
bitfields. This bit determines whether to use the architecturally defined
memory map

Add the MPUIR to the available registers when using read_cpuid macro. The
MPUIR is the MPU type register.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC:"Uwe Kleine-König" <u.kleine-koenig@pengutronix.de>
---
 arch/arm/include/asm/cp15.h    | 5 +++++
 arch/arm/include/asm/cputype.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 1f3262e99d81..a524a23d8627 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -23,6 +23,11 @@
 #define CR_RR	(1 << 14)	/* Round Robin cache replacement	*/
 #define CR_L4	(1 << 15)	/* LDR pc can set T bit			*/
 #define CR_DT	(1 << 16)
+#ifdef CONFIG_MMU
+#define CR_HA	(1 << 17)	/* Hardware management of Access Flag   */
+#else
+#define CR_BR	(1 << 17)	/* MPU Background region enable (PMSA)  */
+#endif
 #define CR_IT	(1 << 18)
 #define CR_ST	(1 << 19)
 #define CR_FI	(1 << 21)	/* Fast interrupt (lower latency mode)	*/
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index ec635ff32f49..3b704dfb6c58 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -8,6 +8,7 @@
 #define CPUID_CACHETYPE	1
 #define CPUID_TCM	2
 #define CPUID_TLBTYPE	3
+#define CPUID_MPUIR	4
 #define CPUID_MPIDR	5
 
 #ifdef CONFIG_CPU_V7M
-- 
cgit v1.2.3


From a2b45b0da8bbb98cb2f062cf16c6fdebab4243a1 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 21 Feb 2013 17:49:24 +0000
Subject: ARM: mpu: add header for MPU register layouts and region data

This commit adds definitions relevant to the ARM v7 PMSA compliant MPU.

The register layouts and region configuration data is made accessible to asm
as well as C-code so that it can be used in early bring-up of the MPU.

The mpu region information structs assume that the properties for the I/D side
are the same, though the implementation could be trivially extended for future
platforms where this is no-longer true.

The MPU_*_REGION defines are used for the basic, static MPU region setup.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/mpu.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 arch/arm/include/asm/mpu.h

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
new file mode 100644
index 000000000000..bd48c0cb544e
--- /dev/null
+++ b/arch/arm/include/asm/mpu.h
@@ -0,0 +1,72 @@
+#ifndef __ARM_MPU_H
+#define __ARM_MPU_H
+
+#ifdef CONFIG_ARM_MPU
+
+/* MPUIR layout */
+#define MPUIR_nU		1
+#define MPUIR_DREGION		8
+#define MPUIR_IREGION		16
+#define MPUIR_DREGION_SZMASK	(0xFF << MPUIR_DREGION)
+#define MPUIR_IREGION_SZMASK	(0xFF << MPUIR_IREGION)
+
+/* ID_MMFR0 data relevant to MPU */
+#define MMFR0_PMSA		(0xF << 4)
+#define MMFR0_PMSAv7		(3 << 4)
+
+/* MPU D/I Size Register fields */
+#define MPU_RSR_SZ		1
+#define MPU_RSR_EN		0
+
+/* The D/I RSR value for an enabled region spanning the whole of memory */
+#define MPU_RSR_ALL_MEM		63
+
+/* Individual bits in the DR/IR ACR */
+#define MPU_ACR_XN		(1 << 12)
+#define MPU_ACR_SHARED		(1 << 2)
+
+/* C, B and TEX[2:0] bits only have semantic meanings when grouped */
+#define MPU_RGN_CACHEABLE	0xB
+#define MPU_RGN_SHARED_CACHEABLE (MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
+#define MPU_RGN_STRONGLY_ORDERED 0
+
+/* Main region should only be shared for SMP */
+#ifdef CONFIG_SMP
+#define MPU_RGN_NORMAL		(MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
+#else
+#define MPU_RGN_NORMAL		MPU_RGN_CACHEABLE
+#endif
+
+/* Access permission bits of ACR (only define those that we use)*/
+#define MPU_AP_PL1RW_PL0RW	(0x3 << 8)
+#define MPU_AP_PL1RW_PL0R0	(0x2 << 8)
+#define MPU_AP_PL1RW_PL0NA	(0x1 << 8)
+
+/* For minimal static MPU region configurations */
+#define MPU_PROBE_REGION	0
+#define MPU_BG_REGION		1
+#define MPU_RAM_REGION		2
+
+/* Maximum number of regions Linux is interested in */
+#define MPU_MAX_REGIONS		16
+
+#ifndef __ASSEMBLY__
+
+struct mpu_rgn {
+	/* Assume same attributes for d/i-side  */
+	u32 drbar;
+	u32 drsr;
+	u32 dracr;
+};
+
+struct mpu_rgn_info {
+	u32 mpuir;
+	struct mpu_rgn rgns[MPU_MAX_REGIONS];
+};
+extern struct mpu_rgn_info mpu_rgn_info;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_ARM_MPU */
+
+#endif
-- 
cgit v1.2.3


From 67c9845beab16a0c97b9c07f72a4b36b7175bb86 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 22 Feb 2013 17:48:56 +0000
Subject: ARM: mpu: add early bring-up code for the ARMv7 PMSA-compliant MPU

This patch adds initial support for using the MPU, which is necessary for
SMP operation on PMSAv7 processors because it is the only way to ensure
memory is shared. This is an initial patch and full SMP support is added
later in this series.

The setup of the MPU is performed in a way analagous to that for the MMU:
Very early initialisation before the C environment is brought up, followed
by a sanity check and more complete initialisation in C.

This patch provides the simplest possible memory region configuration:
MPU_PROBE_REGION: Reserved for probing MPU details, not enabled
MPU_BG_REGION: A 'background' region that specifies all memory strongly ordered
MPU_RAM_REGION: A single shared, cacheable, normal region for the valid RAM.

In this early initialisation code we simply map the whole of the address
space with the BG_REGION and (at least) the kernel with the RAM_REGION. The
MPU has region alignment constraints that require us to round past the end
of the kernel.

As region 2 has a higher priority than region 1, it overrides the strongly-
ordered behaviour for RAM only.

Subsequent patches will add more complete initialisation from the C-world
and support for bringing up secondary CPUs.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Hyok S. Choi <hyok.choi@samsung.com>
---
 arch/arm/include/asm/mpu.h   |  3 ++
 arch/arm/kernel/head-nommu.S | 87 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
index bd48c0cb544e..001483465aa4 100644
--- a/arch/arm/include/asm/mpu.h
+++ b/arch/arm/include/asm/mpu.h
@@ -50,6 +50,9 @@
 /* Maximum number of regions Linux is interested in */
 #define MPU_MAX_REGIONS		16
 
+#define MPU_DATA_SIDE		0
+#define MPU_INSTR_SIDE		1
+
 #ifndef __ASSEMBLY__
 
 struct mpu_rgn {
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 06ba9c8e62be..659912c49571 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -17,9 +17,11 @@
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/memory.h>
 #include <asm/cp15.h>
 #include <asm/thread_info.h>
 #include <asm/v7m.h>
+#include <asm/mpu.h>
 
 /*
  * Kernel startup entry point.
@@ -63,6 +65,17 @@ ENTRY(stext)
 	movs	r10, r5				@ invalid processor (r5=0)?
 	beq	__error_p				@ yes, error 'p'
 
+#ifdef CONFIG_ARM_MPU
+	/* Calculate the size of a region covering just the kernel */
+	ldr	r5, =PHYS_OFFSET		@ Region start: PHYS_OFFSET
+	ldr     r6, =(_end)			@ Cover whole kernel
+	sub	r6, r6, r5			@ Minimum size of region to map
+	clz	r6, r6				@ Region size must be 2^N...
+	rsb	r6, r6, #31			@ ...so round up region size
+	lsl	r6, r6, #MPU_RSR_SZ		@ Put size in right field
+	orr	r6, r6, #(1 << MPU_RSR_EN)	@ Set region enabled bit
+	bl	__setup_mpu
+#endif
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ initialising sctlr
 	adr	lr, BSYM(1f)			@ return (PIC) address
@@ -147,4 +160,78 @@ __after_proc_init:
 ENDPROC(__after_proc_init)
 	.ltorg
 
+#ifdef CONFIG_ARM_MPU
+
+
+/* Set which MPU region should be programmed */
+.macro set_region_nr tmp, rgnr
+	mov	\tmp, \rgnr			@ Use static region numbers
+	mcr	p15, 0, \tmp, c6, c2, 0		@ Write RGNR
+.endm
+
+/* Setup a single MPU region, either D or I side (D-side for unified) */
+.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE
+	mcr	p15, 0, \bar, c6, c1, (0 + \side)	@ I/DRBAR
+	mcr	p15, 0, \acr, c6, c1, (4 + \side)	@ I/DRACR
+	mcr	p15, 0, \sr, c6, c1, (2 + \side)		@ I/DRSR
+.endm
+
+/*
+ * Setup the MPU and initial MPU Regions. We create the following regions:
+ * Region 0: Use this for probing the MPU details, so leave disabled.
+ * Region 1: Background region - covers the whole of RAM as strongly ordered
+ * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
+ *
+ * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
+*/
+
+ENTRY(__setup_mpu)
+
+	/* Probe for v7 PMSA compliance */
+	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
+	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
+	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
+	bne	__error_p			@ Fail: ARM_MPU on NOT v7 PMSA
+
+	/* Determine whether the D/I-side memory map is unified. We set the
+	 * flags here and continue to use them for the rest of this function */
+	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
+	ands	r5, r0, #MPUIR_DREGION_SZMASK	@ 0 size d region => No MPU
+	beq	__error_p			@ Fail: ARM_MPU and no MPU
+	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
+
+	/* Setup second region first to free up r6 */
+	set_region_nr r0, #MPU_RAM_REGION
+	isb
+	/* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
+	ldr	r0, =PHYS_OFFSET		@ RAM starts at PHYS_OFFSET
+	ldr	r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL)
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ PHYS_OFFSET, shared, enabled
+	beq	1f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled
+1:	isb
+
+	/* First/background region */
+	set_region_nr r0, #MPU_BG_REGION
+	isb
+	/* Execute Never,  strongly ordered, inaccessible to PL0, rw PL1  */
+	mov	r0, #0				@ BG region starts at 0x0
+	ldr	r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA)
+	mov	r6, #MPU_RSR_ALL_MEM		@ 4GB region, enabled
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ 0x0, BG region, enabled
+	beq	2f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled
+2:	isb
+
+	/* Enable the MPU */
+	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
+	bic     r0, r0, #CR_BR			@ Disable the 'default mem-map'
+	orr	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
+	mcr	p15, 0, r0, c1, c0, 0		@ Enable MPU
+	isb
+	mov pc,lr
+ENDPROC(__setup_mpu)
+#endif
 #include "head-common.S"
-- 
cgit v1.2.3


From 5ad7dcbe40ae52bee67d7ed61efb6a3fccb6dc2b Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 28 Feb 2013 17:46:36 +0000
Subject: ARM: mpu: add MPU probe and initialisation functions in C

This patch adds new functions for probing and initialising the ARMv7
PMSA-compliant MPU.

These use the pre-defined and reserved MPU_PROBE_REGION for establishing
properties of the MPU, which is necessary because certain probe operations
require modifying region properties and reading back the results.

This patch also introduces a minimal sanity_check_meminfo_mpu function, that
ensures that the memory set-up passed to the kernel can be used in conjunction
with the MPU. The base address of a region must be aligned to the region size,
otherwise behavior is unpredictable and region sizes can only be specified as a
power-of-two. To simplify the satisfaction of these requirements this
implementation currently enforces that all memory is contiguous from
PHYS_OFFSET, merging banks that are contiguous but passed in separately.

The functions are added in this patch but wired in to the boot process later
in the series.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Hyok S. Choi <hyok.choi@samsung.com>
---
 arch/arm/mm/nommu.c | 249 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 249 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index dd3a6c670f08..0897d6bd9c2d 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -8,6 +8,7 @@
 #include <linux/pagemap.h>
 #include <linux/io.h>
 #include <linux/memblock.h>
+#include <linux/kernel.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -15,9 +16,257 @@
 #include <asm/setup.h>
 #include <asm/traps.h>
 #include <asm/mach/arch.h>
+#include <asm/cputype.h>
+#include <asm/mpu.h>
 
 #include "mm.h"
 
+#ifdef CONFIG_ARM_MPU
+struct mpu_rgn_info mpu_rgn_info;
+
+/* Region number */
+static void rgnr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c2, 0" : : "r" (v));
+}
+
+/* Data-side / unified region attributes */
+
+/* Region access control register */
+static void dracr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 4" : : "r" (v));
+}
+
+/* Region size register */
+static void drsr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 2" : : "r" (v));
+}
+
+/* Region base address register */
+static void drbar_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 0" : : "r" (v));
+}
+
+static u32 drbar_read(void)
+{
+	u32 v;
+	asm("mrc        p15, 0, %0, c6, c1, 0" : "=r" (v));
+	return v;
+}
+/* Optional instruction-side region attributes */
+
+/* I-side Region access control register */
+static void iracr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 5" : : "r" (v));
+}
+
+/* I-side Region size register */
+static void irsr_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 3" : : "r" (v));
+}
+
+/* I-side Region base address register */
+static void irbar_write(u32 v)
+{
+	asm("mcr        p15, 0, %0, c6, c1, 1" : : "r" (v));
+}
+
+static unsigned long irbar_read(void)
+{
+	unsigned long v;
+	asm("mrc        p15, 0, %0, c6, c1, 1" : "=r" (v));
+	return v;
+}
+
+/* MPU initialisation functions */
+void __init sanity_check_meminfo_mpu(void)
+{
+	int i;
+	struct membank *bank = meminfo.bank;
+	phys_addr_t phys_offset = PHYS_OFFSET;
+	phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size;
+
+	/* Initially only use memory continuous from PHYS_OFFSET */
+	if (bank_phys_start(&bank[0]) != phys_offset)
+		panic("First memory bank must be contiguous from PHYS_OFFSET");
+
+	/* Banks have already been sorted by start address */
+	for (i = 1; i < meminfo.nr_banks; i++) {
+		if (bank[i].start <= bank_phys_end(&bank[0]) &&
+		    bank_phys_end(&bank[i]) > bank_phys_end(&bank[0])) {
+			bank[0].size = bank_phys_end(&bank[i]) - bank[0].start;
+		} else {
+			pr_notice("Ignoring RAM after 0x%.8lx. "
+			"First non-contiguous (ignored) bank start: 0x%.8lx\n",
+				(unsigned long)bank_phys_end(&bank[0]),
+				(unsigned long)bank_phys_start(&bank[i]));
+			break;
+		}
+	}
+	/* All contiguous banks are now merged in to the first bank */
+	meminfo.nr_banks = 1;
+	specified_mem_size = bank[0].size;
+
+	/*
+	 * MPU has curious alignment requirements: Size must be power of 2, and
+	 * region start must be aligned to the region size
+	 */
+	if (phys_offset != 0)
+		pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n");
+
+	/*
+	 * Maximum aligned region might overflow phys_addr_t if phys_offset is
+	 * 0. Hence we keep everything below 4G until we take the smaller of
+	 * the aligned_region_size and rounded_mem_size, one of which is
+	 * guaranteed to be smaller than the maximum physical address.
+	 */
+	aligned_region_size = (phys_offset - 1) ^ (phys_offset);
+	/* Find the max power-of-two sized region that fits inside our bank */
+	rounded_mem_size = (1 <<  __fls(bank[0].size)) - 1;
+
+	/* The actual region size is the smaller of the two */
+	aligned_region_size = aligned_region_size < rounded_mem_size
+				? aligned_region_size + 1
+				: rounded_mem_size + 1;
+
+	if (aligned_region_size != specified_mem_size)
+		pr_warn("Truncating memory from 0x%.8lx to 0x%.8lx (MPU region constraints)",
+				(unsigned long)specified_mem_size,
+				(unsigned long)aligned_region_size);
+
+	meminfo.bank[0].size = aligned_region_size;
+	pr_debug("MPU Region from 0x%.8lx size 0x%.8lx (end 0x%.8lx))\n",
+		(unsigned long)phys_offset,
+		(unsigned long)aligned_region_size,
+		(unsigned long)bank_phys_end(&bank[0]));
+
+}
+
+static int mpu_present(void)
+{
+	return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7);
+}
+
+static int mpu_max_regions(void)
+{
+	/*
+	 * We don't support a different number of I/D side regions so if we
+	 * have separate instruction and data memory maps then return
+	 * whichever side has a smaller number of supported regions.
+	 */
+	u32 dregions, iregions, mpuir;
+	mpuir = read_cpuid(CPUID_MPUIR);
+
+	dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION;
+
+	/* Check for separate d-side and i-side memory maps */
+	if (mpuir & MPUIR_nU)
+		iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION;
+
+	/* Use the smallest of the two maxima */
+	return min(dregions, iregions);
+}
+
+static int mpu_iside_independent(void)
+{
+	/* MPUIR.nU specifies whether there is *not* a unified memory map */
+	return read_cpuid(CPUID_MPUIR) & MPUIR_nU;
+}
+
+static int mpu_min_region_order(void)
+{
+	u32 drbar_result, irbar_result;
+	/* We've kept a region free for this probing */
+	rgnr_write(MPU_PROBE_REGION);
+	isb();
+	/*
+	 * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum
+	 * region order
+	*/
+	drbar_write(0xFFFFFFFC);
+	drbar_result = irbar_result = drbar_read();
+	drbar_write(0x0);
+	/* If the MPU is non-unified, we use the larger of the two minima*/
+	if (mpu_iside_independent()) {
+		irbar_write(0xFFFFFFFC);
+		irbar_result = irbar_read();
+		irbar_write(0x0);
+	}
+	isb(); /* Ensure that MPU region operations have completed */
+	/* Return whichever result is larger */
+	return __ffs(max(drbar_result, irbar_result));
+}
+
+static int mpu_setup_region(unsigned int number, phys_addr_t start,
+			unsigned int size_order, unsigned int properties)
+{
+	u32 size_data;
+
+	/* We kept a region free for probing resolution of MPU regions*/
+	if (number > mpu_max_regions() || number == MPU_PROBE_REGION)
+		return -ENOENT;
+
+	if (size_order > 32)
+		return -ENOMEM;
+
+	if (size_order < mpu_min_region_order())
+		return -ENOMEM;
+
+	/* Writing N to bits 5:1 (RSR_SZ)  specifies region size 2^N+1 */
+	size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN;
+
+	dsb(); /* Ensure all previous data accesses occur with old mappings */
+	rgnr_write(number);
+	isb();
+	drbar_write(start);
+	dracr_write(properties);
+	isb(); /* Propagate properties before enabling region */
+	drsr_write(size_data);
+
+	/* Check for independent I-side registers */
+	if (mpu_iside_independent()) {
+		irbar_write(start);
+		iracr_write(properties);
+		isb();
+		irsr_write(size_data);
+	}
+	isb();
+
+	/* Store region info (we treat i/d side the same, so only store d) */
+	mpu_rgn_info.rgns[number].dracr = properties;
+	mpu_rgn_info.rgns[number].drbar = start;
+	mpu_rgn_info.rgns[number].drsr = size_data;
+	return 0;
+}
+
+/*
+* Set up default MPU regions, doing nothing if there is no MPU
+*/
+void __init mpu_setup(void)
+{
+	int region_err;
+	if (!mpu_present())
+		return;
+
+	region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET,
+					ilog2(meminfo.bank[0].size),
+					MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL);
+	if (region_err) {
+		panic("MPU region initialization failure! %d", region_err);
+	} else {
+		pr_info("Using ARMv7 PMSA Compliant MPU. "
+			 "Region independence: %s, Max regions: %d\n",
+			mpu_iside_independent() ? "Yes" : "No",
+			mpu_max_regions());
+	}
+}
+#endif /* CONFIG_ARM_MPU */
+
 void __init arm_mm_memblock_reserve(void)
 {
 #ifndef CONFIG_CPU_V7M
-- 
cgit v1.2.3


From 9a271567fe9980a7e7ded0c6250d56200c3678ee Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 28 Feb 2013 17:51:05 +0000
Subject: ARM: mpu: Complete initialisation of the MPU after reaching the
 C-world

Much like with the MMU, MPU initialisation is performed in two stages; the
first in the pre-C world and the 'real' initialisation during arch setup.

This patch wires in previously added MPU initialisation functions so that
the whole of memory is mapped with the appropriate region properties for
'normal' RAM (the appropriate properties depend on whether the system is
SMP).

Stub initialisation functions are added for the case that there MPU support
is not configured in to the kernel.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Hyok S. Choi <hyok.choi@samsung.com>
---
 arch/arm/mm/nommu.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 0897d6bd9c2d..c1b494d43ed1 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -265,6 +265,9 @@ void __init mpu_setup(void)
 			mpu_max_regions());
 	}
 }
+#else
+static void sanity_check_meminfo_mpu(void) {}
+static void __init mpu_setup(void) {}
 #endif /* CONFIG_ARM_MPU */
 
 void __init arm_mm_memblock_reserve(void)
@@ -286,7 +289,9 @@ void __init arm_mm_memblock_reserve(void)
 
 void __init sanity_check_meminfo(void)
 {
-	phys_addr_t end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]);
+	phys_addr_t end;
+	sanity_check_meminfo_mpu();
+	end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]);
 	high_memory = __va(end - 1) + 1;
 }
 
@@ -297,6 +302,7 @@ void __init sanity_check_meminfo(void)
 void __init paging_init(struct machine_desc *mdesc)
 {
 	early_trap_init((void *)CONFIG_VECTORS_BASE);
+	mpu_setup();
 	bootmem_init();
 }
 
-- 
cgit v1.2.3


From eb08375ea66e63c5e11dea69b43c5633d531ce81 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 22 Feb 2013 18:51:30 +0000
Subject: ARM: mpu: add MPU initialisation for secondary cores

The MPU initialisation on the primary core is performed in two stages, one
minimal stage to ensure the CPU can boot and a second one after
sanity_check_meminfo. As the memory configuration is known by the time we
boot secondary cores only a single step is necessary, provided the values
for DRSR are passed to secondaries.

This patch implements this arrangement. The configuration generated for the
MPU regions is made available to the secondary core, which can then use the
asm MPU intialisation code to program a complete region configuration.

This is necessary for SMP configurations without an MMU, as the MPU
initialisation is the only way to ensure that memory is specified as
'shared'.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Nicolas Pitre <nico@linaro.org>
---
 arch/arm/include/asm/smp.h   | 5 ++++-
 arch/arm/kernel/head-nommu.S | 7 +++++++
 arch/arm/kernel/smp.c        | 8 ++++++--
 3 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index d3a22bebe6ce..a8cae71caceb 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -65,7 +65,10 @@ asmlinkage void secondary_start_kernel(void);
  * Initial data for bringing up a secondary CPU.
  */
 struct secondary_data {
-	unsigned long pgdir;
+	union {
+		unsigned long mpu_rgn_szr;
+		unsigned long pgdir;
+	};
 	unsigned long swapper_pg_dir;
 	void *stack;
 };
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 659912c49571..13741d004de5 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -107,6 +107,13 @@ ENTRY(secondary_startup)
 
 	adr	r4, __secondary_data
 	ldmia	r4, {r7, r12}
+
+#ifdef CONFIG_ARM_MPU
+	/* Use MPU region info supplied by __cpu_up */
+	ldr	r6, [r7]			@ get secondary_data.mpu_szr
+	bl      __setup_mpu			@ Initialize the MPU
+#endif
+
 	adr	lr, BSYM(__after_proc_init)	@ return address
 	mov	r13, r12			@ __secondary_switched address
  ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 44d1c00dc45f..e17d9346baee 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -45,6 +45,7 @@
 #include <asm/smp_plat.h>
 #include <asm/virt.h>
 #include <asm/mach/arch.h>
+#include <asm/mpu.h>
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
@@ -87,6 +88,10 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+#ifdef CONFIG_ARM_MPU
+	secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr;
+#endif
+
 #ifdef CONFIG_MMU
 	secondary_data.pgdir = virt_to_phys(idmap_pgd);
 	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
@@ -114,9 +119,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
 	}
 
-	secondary_data.stack = NULL;
-	secondary_data.pgdir = 0;
 
+	memset(&secondary_data, 0, sizeof(secondary_data));
 	return ret;
 }
 
-- 
cgit v1.2.3


From 801bb21c600161c8626d7d9cb649be869ee4dbbe Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 22 Feb 2013 18:56:04 +0000
Subject: ARM: mpu: Allow enabling of the MPU via kconfig

Allows the user to select MPU support when compiling for ARM processors
that support the PMSAv7.

This ensures that CONFIG_SMP depends on the MPU in the case that no MMU
is present.

CONFIG_SMP_ON_UP is not implemented for nommu, so introduce an MMU
dependency there.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/Kconfig       |  4 ++--
 arch/arm/Kconfig-nommu | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 5543d36c2834..ec38cacb3d08 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1414,7 +1414,7 @@ config SMP
 	depends on CPU_V6K || CPU_V7
 	depends on GENERIC_CLOCKEVENTS
 	depends on HAVE_SMP
-	depends on MMU
+	depends on MMU || ARM_MPU
 	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
@@ -1435,7 +1435,7 @@ config SMP
 
 config SMP_ON_UP
 	bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)"
-	depends on SMP && !XIP_KERNEL
+	depends on SMP && !XIP_KERNEL && MMU
 	default y
 	help
 	  SMP kernels contain instructions which fail on non-SMP processors.
diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu
index c859495da480..db597e1c7572 100644
--- a/arch/arm/Kconfig-nommu
+++ b/arch/arm/Kconfig-nommu
@@ -50,3 +50,14 @@ config REMAP_VECTORS_TO_RAM
 	  Otherwise, say 'y' here.  In this case, the kernel will require
 	  external support to redirect the hardware exception vectors to
 	  the writable versions located at DRAM_BASE.
+
+config ARM_MPU
+       bool 'Use the ARM v7 PMSA Compliant MPU'
+       default y
+       help
+         Some ARM systems without an MMU have instead a Memory Protection
+         Unit (MPU) that defines the type and permissions for regions of
+         memory.
+
+         If your CPU has an MPU then you should choose 'y' here unless you
+         know that you do not want to use the MPU.
-- 
cgit v1.2.3


From 9dfc28b6308096e48b54c28259825a1200f60742 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 18 Apr 2013 18:37:24 +0100
Subject: ARM: mpu: protect the vectors page with an MPU region

Without an MMU it is possible for userspace programs to start executing code
in places that they have no business executing. The MPU allows some level of
protection against this.

This patch protects the vectors page from access by userspace processes.
Userspace tasks that dereference a null pointer are already protected by an
svc at 0x0 that kills them. However when tasks use an offset from a null
pointer (eg a function in a null struct) they miss this carefully placed svc
and enter the exception vectors in user mode, ending up in the kernel.

This patch causes programs that do this to receive a SEGV instead of happily
entering the kernel in user-mode, and hence avoid a 'Bad Mode' panic.

As part of this change it is necessary to make sigreturn happen via the
stack when there is not an sa_restorer function. This change is invisible to
userspace, and irrelevant to code compiled using a uClibc toolchain, which
always uses an sa_restorer function.

Because we don't get to remap the vectors in !MMU kuser_helpers are not
in a defined location, and hence aren't usable. This means we don't need to
worry about keeping them accessible from PL0

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
CC: Nicolas Pitre <nico@linaro.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/mpu.h   |  1 +
 arch/arm/kernel/head-nommu.S | 16 ++++++++++++++++
 arch/arm/kernel/signal.c     |  9 +++++++--
 3 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
index 001483465aa4..c3247cc2fe08 100644
--- a/arch/arm/include/asm/mpu.h
+++ b/arch/arm/include/asm/mpu.h
@@ -46,6 +46,7 @@
 #define MPU_PROBE_REGION	0
 #define MPU_BG_REGION		1
 #define MPU_RAM_REGION		2
+#define MPU_VECTORS_REGION	3
 
 /* Maximum number of regions Linux is interested in */
 #define MPU_MAX_REGIONS		16
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 13741d004de5..75f14cc3e073 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -22,6 +22,7 @@
 #include <asm/thread_info.h>
 #include <asm/v7m.h>
 #include <asm/mpu.h>
+#include <asm/page.h>
 
 /*
  * Kernel startup entry point.
@@ -188,6 +189,7 @@ ENDPROC(__after_proc_init)
  * Region 0: Use this for probing the MPU details, so leave disabled.
  * Region 1: Background region - covers the whole of RAM as strongly ordered
  * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
+ * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page
  *
  * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
 */
@@ -232,6 +234,20 @@ ENTRY(__setup_mpu)
 	setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled
 2:	isb
 
+	/* Vectors region */
+	set_region_nr r0, #MPU_VECTORS_REGION
+	isb
+	/* Shared, inaccessible to PL0, rw PL1 */
+	mov	r0, #CONFIG_VECTORS_BASE	@ Cover from VECTORS_BASE
+	ldr	r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL)
+	/* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */
+	mov	r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN)
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+	beq	3f				@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+3:	isb
+
 	/* Enable the MPU */
 	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
 	bic     r0, r0, #CR_BR			@ Disable the 'default mem-map'
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 296786bdbb73..1c16c35c271a 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -392,14 +392,19 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig,
 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
 			idx += 3;
 
+		/*
+		 * Put the sigreturn code on the stack no matter which return
+		 * mechanism we use in order to remain ABI compliant
+		 */
 		if (__put_user(sigreturn_codes[idx],   rc) ||
 		    __put_user(sigreturn_codes[idx+1], rc+1))
 			return 1;
 
-		if (cpsr & MODE32_BIT) {
+		if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) {
 			/*
 			 * 32-bit code can use the new high-page
-			 * signal return code support.
+			 * signal return code support except when the MPU has
+			 * protected the vectors page from PL0
 			 */
 			retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb;
 		} else {
-- 
cgit v1.2.3


From de8297765def67ec40b69522f3e405a61e0217b3 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Fri, 24 May 2013 20:47:57 +0100
Subject: ARM: mpu: Ensure that MPU depends on CPU_V7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The support for the MPU is currently implemented only for R-class
(PMSAv7/R). Since the merge of V7M support in to the kernel it is possible
to select MPU support on V7M.

This patch ensures that until MPU support for M-class processors is
implemented, the MPU can only be selected with R-class CPUs

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/arm/Kconfig-nommu | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu
index db597e1c7572..aed66d5df7f1 100644
--- a/arch/arm/Kconfig-nommu
+++ b/arch/arm/Kconfig-nommu
@@ -53,6 +53,7 @@ config REMAP_VECTORS_TO_RAM
 
 config ARM_MPU
        bool 'Use the ARM v7 PMSA Compliant MPU'
+       depends on CPU_V7
        default y
        help
          Some ARM systems without an MMU have instead a Memory Protection
-- 
cgit v1.2.3


From 8cf72172d739639f2699131821a3ebc291287cf2 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 16 May 2013 10:32:09 +0100
Subject: ARM: kernel: build MPIDR hash function data structure

On ARM SMP systems, cores are identified by their MPIDR register.
The MPIDR guidelines in the ARM ARM do not provide strict enforcement of
MPIDR layout, only recommendations that, if followed, split the MPIDR
on ARM 32 bit platforms in three affinity levels. In multi-cluster
systems like big.LITTLE, if the affinity guidelines are followed, the
MPIDR can not be considered an index anymore. This means that the
association between logical CPU in the kernel and the HW CPU identifier
becomes somewhat more complicated requiring methods like hashing to
associate a given MPIDR to a CPU logical index, in order for the look-up
to be carried out in an efficient and scalable way.

This patch provides a function in the kernel that starting from the
cpu_logical_map, implement collision-free hashing of MPIDR values by checking
all significative bits of MPIDR affinity level bitfields. The hashing
can then be carried out through bits shifting and ORing; the resulting
hash algorithm is a collision-free though not minimal hash that can be
executed with few assembly instructions. The mpidr is filtered through a
mpidr mask that is built by checking all bits that toggle in the set of
MPIDRs corresponding to possible CPUs. Bits that do not toggle do not carry
information so they do not contribute to the resulting hash.

Pseudo code:

/* check all bits that toggle, so they are required */
for (i = 1, mpidr_mask = 0; i < num_possible_cpus(); i++)
	mpidr_mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));

/*
 * Build shifts to be applied to aff0, aff1, aff2 values to hash the mpidr
 * fls() returns the last bit set in a word, 0 if none
 * ffs() returns the first bit set in a word, 0 if none
 */
fs0 = mpidr_mask[7:0] ? ffs(mpidr_mask[7:0]) - 1 : 0;
fs1 = mpidr_mask[15:8] ? ffs(mpidr_mask[15:8]) - 1 : 0;
fs2 = mpidr_mask[23:16] ? ffs(mpidr_mask[23:16]) - 1 : 0;
ls0 = fls(mpidr_mask[7:0]);
ls1 = fls(mpidr_mask[15:8]);
ls2 = fls(mpidr_mask[23:16]);
bits0 = ls0 - fs0;
bits1 = ls1 - fs1;
bits2 = ls2 - fs2;
aff0_shift = fs0;
aff1_shift = 8 + fs1 - bits0;
aff2_shift = 16 + fs2 - (bits0 + bits1);
u32 hash(u32 mpidr) {
	u32 l0, l1, l2;
	u32 mpidr_masked = mpidr & mpidr_mask;
	l0 = mpidr_masked & 0xff;
	l1 = mpidr_masked & 0xff00;
	l2 = mpidr_masked & 0xff0000;
	return (l0 >> aff0_shift | l1 >> aff1_shift | l2 >> aff2_shift);
}

The hashing algorithm relies on the inherent properties set in the ARM ARM
recommendations for the MPIDR. Exotic configurations, where for instance the
MPIDR values at a given affinity level have large holes, can end up requiring
big hash tables since the compression of values that can be achieved through
shifting is somewhat crippled when holes are present. Kernel warns if
the number of buckets of the resulting hash table exceeds the number of
possible CPUs by a factor of 4, which is a symptom of a very sparse HW
MPIDR configuration.

The hash algorithm is quite simple and can easily be implemented in assembly
code, to be used in code paths where the kernel virtual address space is
not set-up (ie cpu_resume) and instruction and data fetches are strongly
ordered so code must be compact and must carry out few data accesses.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Colin Cross <ccross@android.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
---
 arch/arm/include/asm/smp_plat.h | 12 ++++++++
 arch/arm/kernel/setup.c         | 67 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 1c7b6f8101ae..f75f8a234b3f 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -70,4 +70,16 @@ static inline int get_logical_index(u32 mpidr)
 	return -EINVAL;
 }
 
+struct mpidr_hash {
+	u32	mask;
+	u32	shift_aff[3];
+	u32	bits;
+};
+
+extern struct mpidr_hash mpidr_hash;
+
+static inline u32 mpidr_hash_size(void)
+{
+	return 1 << mpidr_hash.bits;
+}
 #endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index ca34224f891f..9048513cbe0d 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -478,6 +478,72 @@ void __init smp_setup_processor_id(void)
 	printk(KERN_INFO "Booting Linux on physical CPU 0x%x\n", mpidr);
 }
 
+struct mpidr_hash mpidr_hash;
+#ifdef CONFIG_SMP
+/**
+ * smp_build_mpidr_hash - Pre-compute shifts required at each affinity
+ *			  level in order to build a linear index from an
+ *			  MPIDR value. Resulting algorithm is a collision
+ *			  free hash carried out through shifting and ORing
+ */
+static void __init smp_build_mpidr_hash(void)
+{
+	u32 i, affinity;
+	u32 fs[3], bits[3], ls, mask = 0;
+	/*
+	 * Pre-scan the list of MPIDRS and filter out bits that do
+	 * not contribute to affinity levels, ie they never toggle.
+	 */
+	for_each_possible_cpu(i)
+		mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
+	pr_debug("mask of set bits 0x%x\n", mask);
+	/*
+	 * Find and stash the last and first bit set at all affinity levels to
+	 * check how many bits are required to represent them.
+	 */
+	for (i = 0; i < 3; i++) {
+		affinity = MPIDR_AFFINITY_LEVEL(mask, i);
+		/*
+		 * Find the MSB bit and LSB bits position
+		 * to determine how many bits are required
+		 * to express the affinity level.
+		 */
+		ls = fls(affinity);
+		fs[i] = affinity ? ffs(affinity) - 1 : 0;
+		bits[i] = ls - fs[i];
+	}
+	/*
+	 * An index can be created from the MPIDR by isolating the
+	 * significant bits at each affinity level and by shifting
+	 * them in order to compress the 24 bits values space to a
+	 * compressed set of values. This is equivalent to hashing
+	 * the MPIDR through shifting and ORing. It is a collision free
+	 * hash though not minimal since some levels might contain a number
+	 * of CPUs that is not an exact power of 2 and their bit
+	 * representation might contain holes, eg MPIDR[7:0] = {0x2, 0x80}.
+	 */
+	mpidr_hash.shift_aff[0] = fs[0];
+	mpidr_hash.shift_aff[1] = MPIDR_LEVEL_BITS + fs[1] - bits[0];
+	mpidr_hash.shift_aff[2] = 2*MPIDR_LEVEL_BITS + fs[2] -
+						(bits[1] + bits[0]);
+	mpidr_hash.mask = mask;
+	mpidr_hash.bits = bits[2] + bits[1] + bits[0];
+	pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] mask[0x%x] bits[%u]\n",
+				mpidr_hash.shift_aff[0],
+				mpidr_hash.shift_aff[1],
+				mpidr_hash.shift_aff[2],
+				mpidr_hash.mask,
+				mpidr_hash.bits);
+	/*
+	 * 4x is an arbitrary value used to warn on a hash table much bigger
+	 * than expected on most systems.
+	 */
+	if (mpidr_hash_size() > 4 * num_possible_cpus())
+		pr_warn("Large number of MPIDR hash buckets detected\n");
+	sync_cache_w(&mpidr_hash);
+}
+#endif
+
 static void __init setup_processor(void)
 {
 	struct proc_info_list *list;
@@ -825,6 +891,7 @@ void __init setup_arch(char **cmdline_p)
 				smp_set_ops(mdesc->smp);
 		}
 		smp_init_cpus();
+		smp_build_mpidr_hash();
 	}
 #endif
 
-- 
cgit v1.2.3


From 7604537bbb5720376e8c9e6bc74a8e6305e3094d Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 16 May 2013 10:34:30 +0100
Subject: ARM: kernel: implement stack pointer save array through MPIDR hashing

Current implementation of cpu_{suspend}/cpu_{resume} relies on the MPIDR
to index the array of pointers where the context is saved and restored.
The current approach works as long as the MPIDR can be considered a
linear index, so that the pointers array can simply be dereferenced by
using the MPIDR[7:0] value.
On ARM multi-cluster systems, where the MPIDR may not be a linear index,
to properly dereference the stack pointer array, a mapping function should
be applied to it so that it can be used for arrays look-ups.

This patch adds code in the cpu_{suspend}/cpu_{resume} implementation
that relies on shifting and ORing hashing method to map a MPIDR value to a
set of buckets precomputed at boot to have a collision free mapping from
MPIDR to context pointers.

The hashing algorithm must be simple, fast, and implementable with few
instructions since in the cpu_resume path the mapping is carried out with
the MMU off and the I-cache off, hence code and data are fetched from DRAM
with no-caching available. Simplicity is counterbalanced with a little
increase of memory (allocated dynamically) for stack pointers buckets, that
should be anyway fairly limited on most systems.

Memory for context pointers is allocated in a early_initcall with
size precomputed and stashed previously in kernel data structures.
Memory for context pointers is allocated through kmalloc; this
guarantees contiguous physical addresses for the allocated memory which
is fundamental to the correct functioning of the resume mechanism that
relies on the context pointer array to be a chunk of contiguous physical
memory. Virtual to physical address conversion for the context pointer
array base is carried out at boot to avoid fiddling with virt_to_phys
conversions in the cpu_resume path which is quite fragile and should be
optimized to execute as few instructions as possible.
Virtual and physical context pointer base array addresses are stashed in a
struct that is accessible from assembly using values generated through the
asm-offsets.c mechanism.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Colin Cross <ccross@android.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
---
 arch/arm/include/asm/smp_plat.h | 10 ++++-
 arch/arm/include/asm/suspend.h  |  5 +++
 arch/arm/kernel/asm-offsets.c   |  6 +++
 arch/arm/kernel/sleep.S         | 97 +++++++++++++++++++++++++++++++++--------
 arch/arm/kernel/suspend.c       | 20 +++++++++
 5 files changed, 118 insertions(+), 20 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index f75f8a234b3f..6e63f29f41b7 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -70,9 +70,15 @@ static inline int get_logical_index(u32 mpidr)
 	return -EINVAL;
 }
 
+/*
+ * NOTE ! Assembly code relies on the following
+ * structure memory layout in order to carry out load
+ * multiple from its base address. For more
+ * information check arch/arm/kernel/sleep.S
+ */
 struct mpidr_hash {
-	u32	mask;
-	u32	shift_aff[3];
+	u32	mask; /* used by sleep.S */
+	u32	shift_aff[3]; /* used by sleep.S */
 	u32	bits;
 };
 
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
index 1c0a551ae375..cd20029bcd94 100644
--- a/arch/arm/include/asm/suspend.h
+++ b/arch/arm/include/asm/suspend.h
@@ -1,6 +1,11 @@
 #ifndef __ASM_ARM_SUSPEND_H
 #define __ASM_ARM_SUSPEND_H
 
+struct sleep_save_sp {
+	u32 *save_ptr_stash;
+	u32 save_ptr_stash_phys;
+};
+
 extern void cpu_resume(void);
 extern int cpu_suspend(unsigned long, int (*)(unsigned long));
 
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce6b48e..ded041711beb 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/procinfo.h>
+#include <asm/suspend.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
 
@@ -144,6 +145,11 @@ int main(void)
 #endif
 #ifdef MULTI_CACHE
   DEFINE(CACHE_FLUSH_KERN_ALL,	offsetof(struct cpu_cache_fns, flush_kern_all));
+#endif
+#ifdef CONFIG_ARM_CPU_SUSPEND
+  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp));
+  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
+  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
 #endif
   BLANK();
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 987dcf33415c..db1536b8b30b 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -6,6 +6,49 @@
 #include <asm/glue-proc.h>
 	.text
 
+/*
+ * Implementation of MPIDR hash algorithm through shifting
+ * and OR'ing.
+ *
+ * @dst: register containing hash result
+ * @rs0: register containing affinity level 0 bit shift
+ * @rs1: register containing affinity level 1 bit shift
+ * @rs2: register containing affinity level 2 bit shift
+ * @mpidr: register containing MPIDR value
+ * @mask: register containing MPIDR mask
+ *
+ * Pseudo C-code:
+ *
+ *u32 dst;
+ *
+ *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) {
+ *	u32 aff0, aff1, aff2;
+ *	u32 mpidr_masked = mpidr & mask;
+ *	aff0 = mpidr_masked & 0xff;
+ *	aff1 = mpidr_masked & 0xff00;
+ *	aff2 = mpidr_masked & 0xff0000;
+ *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2);
+ *}
+ * Input registers: rs0, rs1, rs2, mpidr, mask
+ * Output register: dst
+ * Note: input and output registers must be disjoint register sets
+         (eg: a macro instance with mpidr = r1 and dst = r1 is invalid)
+ */
+	.macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask
+	and	\mpidr, \mpidr, \mask			@ mask out MPIDR bits
+	and	\dst, \mpidr, #0xff			@ mask=aff0
+ ARM(	mov	\dst, \dst, lsr \rs0		)	@ dst=aff0>>rs0
+ THUMB(	lsr	\dst, \dst, \rs0		)
+	and	\mask, \mpidr, #0xff00			@ mask = aff1
+ ARM(	orr	\dst, \dst, \mask, lsr \rs1	)	@ dst|=(aff1>>rs1)
+ THUMB(	lsr	\mask, \mask, \rs1		)
+ THUMB(	orr	\dst, \dst, \mask		)
+	and	\mask, \mpidr, #0xff0000		@ mask = aff2
+ ARM(	orr	\dst, \dst, \mask, lsr \rs2	)	@ dst|=(aff2>>rs2)
+ THUMB(	lsr	\mask, \mask, \rs2		)
+ THUMB(	orr	\dst, \dst, \mask		)
+	.endm
+
 /*
  * Save CPU state for a suspend.  This saves the CPU general purpose
  * registers, and allocates space on the kernel stack to save the CPU
@@ -29,12 +72,18 @@ ENTRY(__cpu_suspend)
 	mov	r1, r4			@ size of save block
 	mov	r2, r5			@ virtual SP
 	ldr	r3, =sleep_save_sp
-#ifdef CONFIG_SMP
-	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
-	ALT_UP(mov lr, #0)
-	and	lr, lr, #15
+	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
+	ALT_SMP(mrc p15, 0, r9, c0, c0, 5)
+        ALT_UP_B(1f)
+	ldr	r8, =mpidr_hash
+	/*
+	 * This ldmia relies on the memory layout of the mpidr_hash
+	 * struct mpidr_hash.
+	 */
+	ldmia	r8, {r4-r7}	@ r4 = mpidr mask (r5,r6,r7) = l[0,1,2] shifts
+	compute_mpidr_hash	lr, r5, r6, r7, r9, r4
 	add	r3, r3, lr, lsl #2
-#endif
+1:
 	bl	__cpu_suspend_save
 	adr	lr, BSYM(cpu_suspend_abort)
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
@@ -81,15 +130,23 @@ ENDPROC(cpu_resume_after_mmu)
 	.data
 	.align
 ENTRY(cpu_resume)
-#ifdef CONFIG_SMP
-	adr	r0, sleep_save_sp
-	ALT_SMP(mrc p15, 0, r1, c0, c0, 5)
-	ALT_UP(mov r1, #0)
-	and	r1, r1, #15
-	ldr	r0, [r0, r1, lsl #2]	@ stack phys addr
-#else
-	ldr	r0, sleep_save_sp	@ stack phys addr
-#endif
+	mov	r1, #0
+	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
+	ALT_UP_B(1f)
+	adr	r2, mpidr_hash_ptr
+	ldr	r3, [r2]
+	add	r2, r2, r3		@ r2 = struct mpidr_hash phys address
+	/*
+	 * This ldmia relies on the memory layout of the mpidr_hash
+	 * struct mpidr_hash.
+	 */
+	ldmia	r2, { r3-r6 }	@ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
+	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
+1:
+	adr	r0, _sleep_save_sp
+	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
+	ldr	r0, [r0, r1, lsl #2]
+
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
 	@ load phys pgd, stack, resume fn
   ARM(	ldmia	r0!, {r1, sp, pc}	)
@@ -98,7 +155,11 @@ THUMB(	mov	sp, r2			)
 THUMB(	bx	r3			)
 ENDPROC(cpu_resume)
 
-sleep_save_sp:
-	.rept	CONFIG_NR_CPUS
-	.long	0				@ preserve stack phys ptr here
-	.endr
+	.align 2
+mpidr_hash_ptr:
+	.long	mpidr_hash - .			@ mpidr_hash struct offset
+
+	.type	sleep_save_sp, #object
+ENTRY(sleep_save_sp)
+_sleep_save_sp:
+	.space	SLEEP_SAVE_SP_SZ		@ struct sleep_save_sp
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 38a50676213b..41cf3cbf756d 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -1,9 +1,12 @@
 #include <linux/init.h>
+#include <linux/slab.h>
 
+#include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/memory.h>
+#include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 
@@ -82,3 +85,20 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
 	outer_clean_range(virt_to_phys(save_ptr),
 			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
 }
+
+extern struct sleep_save_sp sleep_save_sp;
+
+static int cpu_suspend_alloc_sp(void)
+{
+	void *ctx_ptr;
+	/* ctx_ptr is an array of physical addresses */
+	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL);
+
+	if (WARN_ON(!ctx_ptr))
+		return -ENOMEM;
+	sleep_save_sp.save_ptr_stash = ctx_ptr;
+	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
+	sync_cache_w(&sleep_save_sp);
+	return 0;
+}
+early_initcall(cpu_suspend_alloc_sp);
-- 
cgit v1.2.3


From 809e660f438fc5a69bf57630a85bcd8112263f37 Mon Sep 17 00:00:00 2001
From: Steven Capper <steve.capper@linaro.org>
Date: Tue, 25 Jun 2013 08:45:51 +0100
Subject: ARM: 7775/1: mm: Remove do_sect_fault from LPAE code

For LPAE, do_sect_fault used to be invoked as the second level access
flag handler. When transparent huge pages were introduced for LPAE,
do_page_fault was used instead.

Unfortunately, do_sect_fault remains defined but not used for LPAE code
resulting in a compile warning.

This patch surrounds do_sect_fault with #ifndef CONFIG_ARM_LPAE to fix
this warning.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/fault.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 5dbf13f954f6..c97f7940cb95 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -491,12 +491,14 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  * Some section permission faults need to be handled gracefully.
  * They can happen due to a __{get,put}_user during an oops.
  */
+#ifndef CONFIG_ARM_LPAE
 static int
 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 {
 	do_bad_area(addr, fsr, regs);
 	return 0;
 }
+#endif /* CONFIG_ARM_LPAE */
 
 /*
  * This abort handler always returns "fault".
-- 
cgit v1.2.3